Repo created
This commit is contained in:
parent
81b91f4139
commit
f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions
|
|
@ -0,0 +1,5 @@
|
|||
specific_include_rules = {
|
||||
"opus_inst\.h": [
|
||||
"+third_party/opus",
|
||||
],
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
absl::optional<std::string> GetFormatParameter(const SdpAudioFormat& format,
|
||||
absl::string_view param) {
|
||||
auto it = format.parameters.find(std::string(param));
|
||||
if (it == format.parameters.end())
|
||||
return absl::nullopt;
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Parses a comma-separated string "1,2,0,6" into a std::vector<unsigned char>.
|
||||
template <>
|
||||
absl::optional<std::vector<unsigned char>> GetFormatParameter(
|
||||
const SdpAudioFormat& format,
|
||||
absl::string_view param) {
|
||||
std::vector<unsigned char> result;
|
||||
const std::string comma_separated_list =
|
||||
GetFormatParameter(format, param).value_or("");
|
||||
size_t pos = 0;
|
||||
while (pos < comma_separated_list.size()) {
|
||||
const size_t next_comma = comma_separated_list.find(',', pos);
|
||||
const size_t distance_to_next_comma = next_comma == std::string::npos
|
||||
? std::string::npos
|
||||
: (next_comma - pos);
|
||||
auto substring_with_number =
|
||||
comma_separated_list.substr(pos, distance_to_next_comma);
|
||||
auto conv = rtc::StringToNumber<int>(substring_with_number);
|
||||
if (!conv.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
result.push_back(*conv);
|
||||
pos += substring_with_number.size() + 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio_codecs/audio_decoder.h"
|
||||
#include "api/audio_codecs/audio_format.h"
|
||||
#include "rtc_base/string_to_number.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
absl::optional<std::string> GetFormatParameter(const SdpAudioFormat& format,
|
||||
absl::string_view param);
|
||||
|
||||
template <typename T>
|
||||
absl::optional<T> GetFormatParameter(const SdpAudioFormat& format,
|
||||
absl::string_view param) {
|
||||
return rtc::StringToNumber<T>(GetFormatParameter(format, param).value_or(""));
|
||||
}
|
||||
|
||||
template <>
|
||||
absl::optional<std::vector<unsigned char>> GetFormatParameter(
|
||||
const SdpAudioFormat& format,
|
||||
absl::string_view param);
|
||||
|
||||
class OpusFrame : public AudioDecoder::EncodedAudioFrame {
|
||||
public:
|
||||
OpusFrame(AudioDecoder* decoder,
|
||||
rtc::Buffer&& payload,
|
||||
bool is_primary_payload)
|
||||
: decoder_(decoder),
|
||||
payload_(std::move(payload)),
|
||||
is_primary_payload_(is_primary_payload) {}
|
||||
|
||||
size_t Duration() const override {
|
||||
int ret;
|
||||
if (is_primary_payload_) {
|
||||
ret = decoder_->PacketDuration(payload_.data(), payload_.size());
|
||||
} else {
|
||||
ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size());
|
||||
}
|
||||
return (ret < 0) ? 0 : static_cast<size_t>(ret);
|
||||
}
|
||||
|
||||
bool IsDtxPacket() const override { return payload_.size() <= 2; }
|
||||
|
||||
absl::optional<DecodeResult> Decode(
|
||||
rtc::ArrayView<int16_t> decoded) const override {
|
||||
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
|
||||
int ret;
|
||||
if (is_primary_payload_) {
|
||||
ret = decoder_->Decode(
|
||||
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
|
||||
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
|
||||
} else {
|
||||
ret = decoder_->DecodeRedundant(
|
||||
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
|
||||
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
return absl::nullopt;
|
||||
|
||||
return DecodeResult{static_cast<size_t>(ret), speech_type};
|
||||
}
|
||||
|
||||
private:
|
||||
AudioDecoder* const decoder_;
|
||||
const rtc::Buffer payload_;
|
||||
const bool is_primary_payload_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_
|
||||
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
|
||||
#include "rtc_base/string_to_number.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
std::unique_ptr<AudioDecoderMultiChannelOpusImpl>
|
||||
AudioDecoderMultiChannelOpusImpl::MakeAudioDecoder(
|
||||
AudioDecoderMultiChannelOpusConfig config) {
|
||||
if (!config.IsOk()) {
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
return nullptr;
|
||||
}
|
||||
// Fill the pointer with a working decoder through the C interface. This
|
||||
// allocates memory.
|
||||
OpusDecInst* dec_state = nullptr;
|
||||
const int error = WebRtcOpus_MultistreamDecoderCreate(
|
||||
&dec_state, config.num_channels, config.num_streams,
|
||||
config.coupled_streams, config.channel_mapping.data());
|
||||
if (error != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Pass the ownership to DecoderImpl. Not using 'make_unique' because the
|
||||
// c-tor is private.
|
||||
return std::unique_ptr<AudioDecoderMultiChannelOpusImpl>(
|
||||
new AudioDecoderMultiChannelOpusImpl(dec_state, config));
|
||||
}
|
||||
|
||||
AudioDecoderMultiChannelOpusImpl::AudioDecoderMultiChannelOpusImpl(
|
||||
OpusDecInst* dec_state,
|
||||
AudioDecoderMultiChannelOpusConfig config)
|
||||
: dec_state_(dec_state), config_(config) {
|
||||
RTC_DCHECK(dec_state);
|
||||
WebRtcOpus_DecoderInit(dec_state_);
|
||||
}
|
||||
|
||||
AudioDecoderMultiChannelOpusImpl::~AudioDecoderMultiChannelOpusImpl() {
|
||||
WebRtcOpus_DecoderFree(dec_state_);
|
||||
}
|
||||
|
||||
absl::optional<AudioDecoderMultiChannelOpusConfig>
|
||||
AudioDecoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) {
|
||||
AudioDecoderMultiChannelOpusConfig config;
|
||||
config.num_channels = format.num_channels;
|
||||
auto num_streams = GetFormatParameter<int>(format, "num_streams");
|
||||
if (!num_streams.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
config.num_streams = *num_streams;
|
||||
|
||||
auto coupled_streams = GetFormatParameter<int>(format, "coupled_streams");
|
||||
if (!coupled_streams.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
config.coupled_streams = *coupled_streams;
|
||||
|
||||
auto channel_mapping =
|
||||
GetFormatParameter<std::vector<unsigned char>>(format, "channel_mapping");
|
||||
if (!channel_mapping.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
config.channel_mapping = *channel_mapping;
|
||||
if (!config.IsOk()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
std::vector<AudioDecoder::ParseResult>
|
||||
AudioDecoderMultiChannelOpusImpl::ParsePayload(rtc::Buffer&& payload,
|
||||
uint32_t timestamp) {
|
||||
std::vector<ParseResult> results;
|
||||
|
||||
if (PacketHasFec(payload.data(), payload.size())) {
|
||||
const int duration =
|
||||
PacketDurationRedundant(payload.data(), payload.size());
|
||||
RTC_DCHECK_GE(duration, 0);
|
||||
rtc::Buffer payload_copy(payload.data(), payload.size());
|
||||
std::unique_ptr<EncodedAudioFrame> fec_frame(
|
||||
new OpusFrame(this, std::move(payload_copy), false));
|
||||
results.emplace_back(timestamp - duration, 1, std::move(fec_frame));
|
||||
}
|
||||
std::unique_ptr<EncodedAudioFrame> frame(
|
||||
new OpusFrame(this, std::move(payload), true));
|
||||
results.emplace_back(timestamp, 0, std::move(frame));
|
||||
return results;
|
||||
}
|
||||
|
||||
int AudioDecoderMultiChannelOpusImpl::DecodeInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) {
|
||||
RTC_DCHECK_EQ(sample_rate_hz, 48000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int ret =
|
||||
WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type);
|
||||
if (ret > 0)
|
||||
ret *= static_cast<int>(
|
||||
config_.num_channels); // Return total number of samples.
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int AudioDecoderMultiChannelOpusImpl::DecodeRedundantInternal(
|
||||
const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) {
|
||||
if (!PacketHasFec(encoded, encoded_len)) {
|
||||
// This packet is a RED packet.
|
||||
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
|
||||
speech_type);
|
||||
}
|
||||
|
||||
RTC_DCHECK_EQ(sample_rate_hz, 48000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded,
|
||||
&temp_type);
|
||||
if (ret > 0)
|
||||
ret *= static_cast<int>(
|
||||
config_.num_channels); // Return total number of samples.
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void AudioDecoderMultiChannelOpusImpl::Reset() {
|
||||
WebRtcOpus_DecoderInit(dec_state_);
|
||||
}
|
||||
|
||||
int AudioDecoderMultiChannelOpusImpl::PacketDuration(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len);
|
||||
}
|
||||
|
||||
int AudioDecoderMultiChannelOpusImpl::PacketDurationRedundant(
|
||||
const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
if (!PacketHasFec(encoded, encoded_len)) {
|
||||
// This packet is a RED packet.
|
||||
return PacketDuration(encoded, encoded_len);
|
||||
}
|
||||
|
||||
return WebRtcOpus_FecDurationEst(encoded, encoded_len, 48000);
|
||||
}
|
||||
|
||||
bool AudioDecoderMultiChannelOpusImpl::PacketHasFec(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
int fec;
|
||||
fec = WebRtcOpus_PacketHasFec(encoded, encoded_len);
|
||||
return (fec == 1);
|
||||
}
|
||||
|
||||
int AudioDecoderMultiChannelOpusImpl::SampleRateHz() const {
|
||||
return 48000;
|
||||
}
|
||||
|
||||
size_t AudioDecoderMultiChannelOpusImpl::Channels() const {
|
||||
return config_.num_channels;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio_codecs/audio_decoder.h"
|
||||
#include "api/audio_codecs/audio_format.h"
|
||||
#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h"
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
#include "rtc_base/buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioDecoderMultiChannelOpusImpl final : public AudioDecoder {
|
||||
public:
|
||||
static std::unique_ptr<AudioDecoderMultiChannelOpusImpl> MakeAudioDecoder(
|
||||
AudioDecoderMultiChannelOpusConfig config);
|
||||
|
||||
~AudioDecoderMultiChannelOpusImpl() override;
|
||||
|
||||
AudioDecoderMultiChannelOpusImpl(const AudioDecoderMultiChannelOpusImpl&) =
|
||||
delete;
|
||||
AudioDecoderMultiChannelOpusImpl& operator=(
|
||||
const AudioDecoderMultiChannelOpusImpl&) = delete;
|
||||
|
||||
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
|
||||
uint32_t timestamp) override;
|
||||
void Reset() override;
|
||||
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
|
||||
int PacketDurationRedundant(const uint8_t* encoded,
|
||||
size_t encoded_len) const override;
|
||||
bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override;
|
||||
int SampleRateHz() const override;
|
||||
size_t Channels() const override;
|
||||
|
||||
static absl::optional<AudioDecoderMultiChannelOpusConfig> SdpToConfig(
|
||||
const SdpAudioFormat& format);
|
||||
|
||||
protected:
|
||||
int DecodeInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) override;
|
||||
int DecodeRedundantInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) override;
|
||||
|
||||
private:
|
||||
AudioDecoderMultiChannelOpusImpl(OpusDecInst* dec_state,
|
||||
AudioDecoderMultiChannelOpusConfig config);
|
||||
|
||||
OpusDecInst* dec_state_;
|
||||
const AudioDecoderMultiChannelOpusConfig config_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus.h"
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
|
||||
#include "test/gmock.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
using ::testing::NiceMock;
|
||||
using ::testing::Return;
|
||||
|
||||
TEST(AudioDecoderMultiOpusTest, GetFormatParameter) {
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
|
||||
{{"channel_mapping", "0,1,2,3"},
|
||||
{"coupled_streams", "2"},
|
||||
{"num_streams", "2"}});
|
||||
|
||||
EXPECT_EQ(GetFormatParameter(sdp_format, "channel_mapping"),
|
||||
absl::optional<std::string>("0,1,2,3"));
|
||||
|
||||
EXPECT_EQ(GetFormatParameter<int>(sdp_format, "coupled_streams"),
|
||||
absl::optional<int>(2));
|
||||
|
||||
EXPECT_EQ(GetFormatParameter(sdp_format, "missing"), absl::nullopt);
|
||||
|
||||
EXPECT_EQ(GetFormatParameter<int>(sdp_format, "channel_mapping"),
|
||||
absl::nullopt);
|
||||
}
|
||||
|
||||
TEST(AudioDecoderMultiOpusTest, InvalidChannelMappings) {
|
||||
{
|
||||
// Can't use channel 3 if there are only 2 channels.
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 2,
|
||||
{{"channel_mapping", "3,0"},
|
||||
{"coupled_streams", "1"},
|
||||
{"num_streams", "2"}});
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
EXPECT_FALSE(decoder_config.has_value());
|
||||
}
|
||||
{
|
||||
// The mapping is too long. There are only 5 channels, but 6 elements in the
|
||||
// mapping.
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 5,
|
||||
{{"channel_mapping", "0,1,2,3,4,5"},
|
||||
{"coupled_streams", "0"},
|
||||
{"num_streams", "2"}});
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
EXPECT_FALSE(decoder_config.has_value());
|
||||
}
|
||||
{
|
||||
// The mapping doesn't parse correctly.
|
||||
const SdpAudioFormat sdp_format(
|
||||
"multiopus", 48000, 5,
|
||||
{{"channel_mapping", "0,1,two,3,4"}, {"coupled_streams", "0"}});
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
EXPECT_FALSE(decoder_config.has_value());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AudioDecoderMultiOpusTest, ValidSdpToConfigProducesCorrectConfig) {
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
|
||||
{{"channel_mapping", "3,1,2,0"},
|
||||
{"coupled_streams", "2"},
|
||||
{"num_streams", "2"}});
|
||||
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
ASSERT_TRUE(decoder_config.has_value());
|
||||
EXPECT_TRUE(decoder_config->IsOk());
|
||||
EXPECT_EQ(decoder_config->coupled_streams, 2);
|
||||
EXPECT_THAT(decoder_config->channel_mapping,
|
||||
::testing::ContainerEq(std::vector<unsigned char>({3, 1, 2, 0})));
|
||||
}
|
||||
|
||||
TEST(AudioDecoderMultiOpusTest, InvalidSdpToConfigDoesNotProduceConfig) {
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
|
||||
{{"channel_mapping", "0,1,2,3"},
|
||||
{"coupled_stream", "2"},
|
||||
{"num_streams", "2"}});
|
||||
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
EXPECT_FALSE(decoder_config.has_value());
|
||||
}
|
||||
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
|
||||
{{"channel_mapping", "0,1,2 3"},
|
||||
{"coupled_streams", "2"},
|
||||
{"num_streams", "2"}});
|
||||
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
EXPECT_FALSE(decoder_config.has_value());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AudioDecoderMultiOpusTest, CodecsCanBeCreated) {
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
|
||||
{{"channel_mapping", "0,1,2,3"},
|
||||
{"coupled_streams", "2"},
|
||||
{"num_streams", "2"}});
|
||||
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
ASSERT_TRUE(decoder_config.has_value());
|
||||
|
||||
const std::unique_ptr<AudioDecoder> opus_decoder =
|
||||
AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config);
|
||||
|
||||
EXPECT_TRUE(opus_decoder);
|
||||
}
|
||||
|
||||
TEST(AudioDecoderMultiOpusTest, AdvertisedCodecsCanBeCreated) {
|
||||
std::vector<AudioCodecSpec> specs;
|
||||
AudioDecoderMultiChannelOpus::AppendSupportedDecoders(&specs);
|
||||
|
||||
EXPECT_FALSE(specs.empty());
|
||||
|
||||
for (const AudioCodecSpec& spec : specs) {
|
||||
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
|
||||
AudioDecoderMultiChannelOpus::SdpToConfig(spec.format);
|
||||
ASSERT_TRUE(decoder_config.has_value());
|
||||
|
||||
const std::unique_ptr<AudioDecoder> opus_decoder =
|
||||
AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config);
|
||||
|
||||
EXPECT_TRUE(opus_decoder);
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioDecoderOpusImpl::AudioDecoderOpusImpl(size_t num_channels,
|
||||
int sample_rate_hz)
|
||||
: channels_(num_channels),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
generate_plc_(field_trial::IsEnabled("WebRTC-Audio-OpusGeneratePlc")) {
|
||||
RTC_DCHECK(num_channels == 1 || num_channels == 2);
|
||||
RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 48000);
|
||||
const int error =
|
||||
WebRtcOpus_DecoderCreate(&dec_state_, channels_, sample_rate_hz_);
|
||||
RTC_DCHECK(error == 0);
|
||||
WebRtcOpus_DecoderInit(dec_state_);
|
||||
}
|
||||
|
||||
AudioDecoderOpusImpl::~AudioDecoderOpusImpl() {
|
||||
WebRtcOpus_DecoderFree(dec_state_);
|
||||
}
|
||||
|
||||
std::vector<AudioDecoder::ParseResult> AudioDecoderOpusImpl::ParsePayload(
|
||||
rtc::Buffer&& payload,
|
||||
uint32_t timestamp) {
|
||||
std::vector<ParseResult> results;
|
||||
|
||||
if (PacketHasFec(payload.data(), payload.size())) {
|
||||
const int duration =
|
||||
PacketDurationRedundant(payload.data(), payload.size());
|
||||
RTC_DCHECK_GE(duration, 0);
|
||||
rtc::Buffer payload_copy(payload.data(), payload.size());
|
||||
std::unique_ptr<EncodedAudioFrame> fec_frame(
|
||||
new OpusFrame(this, std::move(payload_copy), false));
|
||||
results.emplace_back(timestamp - duration, 1, std::move(fec_frame));
|
||||
}
|
||||
std::unique_ptr<EncodedAudioFrame> frame(
|
||||
new OpusFrame(this, std::move(payload), true));
|
||||
results.emplace_back(timestamp, 0, std::move(frame));
|
||||
return results;
|
||||
}
|
||||
|
||||
int AudioDecoderOpusImpl::DecodeInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) {
|
||||
RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int ret =
|
||||
WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type);
|
||||
if (ret > 0)
|
||||
ret *= static_cast<int>(channels_); // Return total number of samples.
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int AudioDecoderOpusImpl::DecodeRedundantInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) {
|
||||
if (!PacketHasFec(encoded, encoded_len)) {
|
||||
// This packet is a RED packet.
|
||||
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
|
||||
speech_type);
|
||||
}
|
||||
|
||||
RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded,
|
||||
&temp_type);
|
||||
if (ret > 0)
|
||||
ret *= static_cast<int>(channels_); // Return total number of samples.
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void AudioDecoderOpusImpl::Reset() {
|
||||
WebRtcOpus_DecoderInit(dec_state_);
|
||||
}
|
||||
|
||||
int AudioDecoderOpusImpl::PacketDuration(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len);
|
||||
}
|
||||
|
||||
int AudioDecoderOpusImpl::PacketDurationRedundant(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
if (!PacketHasFec(encoded, encoded_len)) {
|
||||
// This packet is a RED packet.
|
||||
return PacketDuration(encoded, encoded_len);
|
||||
}
|
||||
|
||||
return WebRtcOpus_FecDurationEst(encoded, encoded_len, sample_rate_hz_);
|
||||
}
|
||||
|
||||
bool AudioDecoderOpusImpl::PacketHasFec(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
int fec;
|
||||
fec = WebRtcOpus_PacketHasFec(encoded, encoded_len);
|
||||
return (fec == 1);
|
||||
}
|
||||
|
||||
int AudioDecoderOpusImpl::SampleRateHz() const {
|
||||
return sample_rate_hz_;
|
||||
}
|
||||
|
||||
size_t AudioDecoderOpusImpl::Channels() const {
|
||||
return channels_;
|
||||
}
|
||||
|
||||
void AudioDecoderOpusImpl::GeneratePlc(
|
||||
size_t requested_samples_per_channel,
|
||||
rtc::BufferT<int16_t>* concealment_audio) {
|
||||
if (!generate_plc_) {
|
||||
return;
|
||||
}
|
||||
int plc_size = WebRtcOpus_PlcDuration(dec_state_) * channels_;
|
||||
concealment_audio->AppendData(plc_size, [&](rtc::ArrayView<int16_t> decoded) {
|
||||
int16_t temp_type = 1;
|
||||
int ret =
|
||||
WebRtcOpus_Decode(dec_state_, nullptr, 0, decoded.data(), &temp_type);
|
||||
if (ret < 0) {
|
||||
return 0;
|
||||
}
|
||||
return ret;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio_codecs/audio_decoder.h"
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
#include "rtc_base/buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioDecoderOpusImpl final : public AudioDecoder {
|
||||
public:
|
||||
explicit AudioDecoderOpusImpl(size_t num_channels,
|
||||
int sample_rate_hz = 48000);
|
||||
~AudioDecoderOpusImpl() override;
|
||||
|
||||
AudioDecoderOpusImpl(const AudioDecoderOpusImpl&) = delete;
|
||||
AudioDecoderOpusImpl& operator=(const AudioDecoderOpusImpl&) = delete;
|
||||
|
||||
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
|
||||
uint32_t timestamp) override;
|
||||
void Reset() override;
|
||||
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
|
||||
int PacketDurationRedundant(const uint8_t* encoded,
|
||||
size_t encoded_len) const override;
|
||||
bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override;
|
||||
int SampleRateHz() const override;
|
||||
size_t Channels() const override;
|
||||
void GeneratePlc(size_t requested_samples_per_channel,
|
||||
rtc::BufferT<int16_t>* concealment_audio) override;
|
||||
|
||||
protected:
|
||||
int DecodeInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) override;
|
||||
int DecodeRedundantInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) override;
|
||||
|
||||
private:
|
||||
OpusDecInst* dec_state_;
|
||||
const size_t channels_;
|
||||
const int sample_rate_hz_;
|
||||
const bool generate_plc_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_
|
||||
|
|
@ -0,0 +1,366 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* LEFT TO DO:
|
||||
* - WRITE TESTS for the stuff in this file.
|
||||
* - Check the creation, maybe make it safer by returning an empty optional or
|
||||
* unique_ptr. --- It looks OK, but RecreateEncoderInstance can perhaps crash
|
||||
* on a valid config. Can run it in the fuzzer for some time. Should prbl also
|
||||
* fuzz the config.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/match.h"
|
||||
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "rtc_base/string_to_number.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Recommended bitrates for one channel:
|
||||
// 8-12 kb/s for NB speech,
|
||||
// 16-20 kb/s for WB speech,
|
||||
// 28-40 kb/s for FB speech,
|
||||
// 48-64 kb/s for FB mono music, and
|
||||
// 64-128 kb/s for FB stereo music.
|
||||
// The current implementation multiplies these values by the number of channels.
|
||||
constexpr int kOpusBitrateNbBps = 12000;
|
||||
constexpr int kOpusBitrateWbBps = 20000;
|
||||
constexpr int kOpusBitrateFbBps = 32000;
|
||||
|
||||
constexpr int kDefaultMaxPlaybackRate = 48000;
|
||||
// These two lists must be sorted from low to high
|
||||
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
|
||||
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120};
|
||||
#else
|
||||
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60};
|
||||
#endif
|
||||
|
||||
int GetBitrateBps(const AudioEncoderMultiChannelOpusConfig& config) {
|
||||
RTC_DCHECK(config.IsOk());
|
||||
return config.bitrate_bps;
|
||||
}
|
||||
int GetMaxPlaybackRate(const SdpAudioFormat& format) {
|
||||
const auto param = GetFormatParameter<int>(format, "maxplaybackrate");
|
||||
if (param && *param >= 8000) {
|
||||
return std::min(*param, kDefaultMaxPlaybackRate);
|
||||
}
|
||||
return kDefaultMaxPlaybackRate;
|
||||
}
|
||||
|
||||
int GetFrameSizeMs(const SdpAudioFormat& format) {
|
||||
const auto ptime = GetFormatParameter<int>(format, "ptime");
|
||||
if (ptime.has_value()) {
|
||||
// Pick the next highest supported frame length from
|
||||
// kOpusSupportedFrameLengths.
|
||||
for (const int supported_frame_length : kOpusSupportedFrameLengths) {
|
||||
if (supported_frame_length >= *ptime) {
|
||||
return supported_frame_length;
|
||||
}
|
||||
}
|
||||
// If none was found, return the largest supported frame length.
|
||||
return *(std::end(kOpusSupportedFrameLengths) - 1);
|
||||
}
|
||||
|
||||
return AudioEncoderOpusConfig::kDefaultFrameSizeMs;
|
||||
}
|
||||
|
||||
int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) {
|
||||
const int bitrate = [&] {
|
||||
if (max_playback_rate <= 8000) {
|
||||
return kOpusBitrateNbBps * rtc::dchecked_cast<int>(num_channels);
|
||||
} else if (max_playback_rate <= 16000) {
|
||||
return kOpusBitrateWbBps * rtc::dchecked_cast<int>(num_channels);
|
||||
} else {
|
||||
return kOpusBitrateFbBps * rtc::dchecked_cast<int>(num_channels);
|
||||
}
|
||||
}();
|
||||
RTC_DCHECK_GE(bitrate, AudioEncoderMultiChannelOpusConfig::kMinBitrateBps);
|
||||
return bitrate;
|
||||
}
|
||||
|
||||
// Get the maxaveragebitrate parameter in string-form, so we can properly figure
|
||||
// out how invalid it is and accurately log invalid values.
|
||||
int CalculateBitrate(int max_playback_rate_hz,
|
||||
size_t num_channels,
|
||||
absl::optional<std::string> bitrate_param) {
|
||||
const int default_bitrate =
|
||||
CalculateDefaultBitrate(max_playback_rate_hz, num_channels);
|
||||
|
||||
if (bitrate_param) {
|
||||
const auto bitrate = rtc::StringToNumber<int>(*bitrate_param);
|
||||
if (bitrate) {
|
||||
const int chosen_bitrate =
|
||||
std::max(AudioEncoderOpusConfig::kMinBitrateBps,
|
||||
std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps));
|
||||
if (bitrate != chosen_bitrate) {
|
||||
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate
|
||||
<< " clamped to " << chosen_bitrate;
|
||||
}
|
||||
return chosen_bitrate;
|
||||
}
|
||||
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param
|
||||
<< "\" replaced by default bitrate " << default_bitrate;
|
||||
}
|
||||
|
||||
return default_bitrate;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<AudioEncoder>
|
||||
AudioEncoderMultiChannelOpusImpl::MakeAudioEncoder(
|
||||
const AudioEncoderMultiChannelOpusConfig& config,
|
||||
int payload_type) {
|
||||
if (!config.IsOk()) {
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<AudioEncoderMultiChannelOpusImpl>(config,
|
||||
payload_type);
|
||||
}
|
||||
|
||||
AudioEncoderMultiChannelOpusImpl::AudioEncoderMultiChannelOpusImpl(
|
||||
const AudioEncoderMultiChannelOpusConfig& config,
|
||||
int payload_type)
|
||||
: payload_type_(payload_type), inst_(nullptr) {
|
||||
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
|
||||
|
||||
RTC_CHECK(RecreateEncoderInstance(config));
|
||||
}
|
||||
|
||||
AudioEncoderMultiChannelOpusImpl::~AudioEncoderMultiChannelOpusImpl() {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
|
||||
}
|
||||
|
||||
size_t AudioEncoderMultiChannelOpusImpl::SufficientOutputBufferSize() const {
|
||||
// Calculate the number of bytes we expect the encoder to produce,
|
||||
// then multiply by two to give a wide margin for error.
|
||||
const size_t bytes_per_millisecond =
|
||||
static_cast<size_t>(GetBitrateBps(config_) / (1000 * 8) + 1);
|
||||
const size_t approx_encoded_bytes =
|
||||
Num10msFramesPerPacket() * 10 * bytes_per_millisecond;
|
||||
return 2 * approx_encoded_bytes;
|
||||
}
|
||||
|
||||
void AudioEncoderMultiChannelOpusImpl::Reset() {
|
||||
RTC_CHECK(RecreateEncoderInstance(config_));
|
||||
}
|
||||
|
||||
absl::optional<std::pair<TimeDelta, TimeDelta>>
|
||||
AudioEncoderMultiChannelOpusImpl::GetFrameLengthRange() const {
|
||||
return {{TimeDelta::Millis(config_.frame_size_ms),
|
||||
TimeDelta::Millis(config_.frame_size_ms)}};
|
||||
}
|
||||
|
||||
// If the given config is OK, recreate the Opus encoder instance with those
|
||||
// settings, save the config, and return true. Otherwise, do nothing and return
|
||||
// false.
|
||||
bool AudioEncoderMultiChannelOpusImpl::RecreateEncoderInstance(
|
||||
const AudioEncoderMultiChannelOpusConfig& config) {
|
||||
if (!config.IsOk())
|
||||
return false;
|
||||
config_ = config;
|
||||
if (inst_)
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
|
||||
input_buffer_.clear();
|
||||
input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame());
|
||||
RTC_CHECK_EQ(
|
||||
0, WebRtcOpus_MultistreamEncoderCreate(
|
||||
&inst_, config.num_channels,
|
||||
config.application ==
|
||||
AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip
|
||||
? 0
|
||||
: 1,
|
||||
config.num_streams, config.coupled_streams,
|
||||
config.channel_mapping.data()));
|
||||
const int bitrate = GetBitrateBps(config);
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate));
|
||||
RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps.";
|
||||
if (config.fec_enabled) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_));
|
||||
RTC_LOG(LS_VERBOSE) << "Opus enable FEC";
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_));
|
||||
RTC_LOG(LS_VERBOSE) << "Opus disable FEC";
|
||||
}
|
||||
RTC_CHECK_EQ(
|
||||
0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz));
|
||||
RTC_LOG(LS_VERBOSE) << "Set Opus playback rate to "
|
||||
<< config.max_playback_rate_hz << " hz.";
|
||||
|
||||
// Use the DEFAULT complexity.
|
||||
RTC_CHECK_EQ(
|
||||
0, WebRtcOpus_SetComplexity(inst_, AudioEncoderOpusConfig().complexity));
|
||||
RTC_LOG(LS_VERBOSE) << "Set Opus coding complexity to "
|
||||
<< AudioEncoderOpusConfig().complexity;
|
||||
|
||||
if (config.dtx_enabled) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_));
|
||||
RTC_LOG(LS_VERBOSE) << "Opus enable DTX";
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_));
|
||||
RTC_LOG(LS_VERBOSE) << "Opus disable DTX";
|
||||
}
|
||||
|
||||
if (config.cbr_enabled) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_));
|
||||
RTC_LOG(LS_VERBOSE) << "Opus enable CBR";
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_));
|
||||
RTC_LOG(LS_VERBOSE) << "Opus disable CBR";
|
||||
}
|
||||
num_channels_to_encode_ = NumChannels();
|
||||
next_frame_length_ms_ = config_.frame_size_ms;
|
||||
RTC_LOG(LS_VERBOSE) << "Set Opus frame length to " << config_.frame_size_ms
|
||||
<< " ms";
|
||||
return true;
|
||||
}
|
||||
|
||||
absl::optional<AudioEncoderMultiChannelOpusConfig>
|
||||
AudioEncoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) {
|
||||
if (!absl::EqualsIgnoreCase(format.name, "multiopus") ||
|
||||
format.clockrate_hz != 48000) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
AudioEncoderMultiChannelOpusConfig config;
|
||||
config.num_channels = format.num_channels;
|
||||
config.frame_size_ms = GetFrameSizeMs(format);
|
||||
config.max_playback_rate_hz = GetMaxPlaybackRate(format);
|
||||
config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1");
|
||||
config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1");
|
||||
config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1");
|
||||
config.bitrate_bps =
|
||||
CalculateBitrate(config.max_playback_rate_hz, config.num_channels,
|
||||
GetFormatParameter(format, "maxaveragebitrate"));
|
||||
config.application =
|
||||
config.num_channels == 1
|
||||
? AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip
|
||||
: AudioEncoderMultiChannelOpusConfig::ApplicationMode::kAudio;
|
||||
|
||||
config.supported_frame_lengths_ms.clear();
|
||||
std::copy(std::begin(kOpusSupportedFrameLengths),
|
||||
std::end(kOpusSupportedFrameLengths),
|
||||
std::back_inserter(config.supported_frame_lengths_ms));
|
||||
|
||||
auto num_streams = GetFormatParameter<int>(format, "num_streams");
|
||||
if (!num_streams.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
config.num_streams = *num_streams;
|
||||
|
||||
auto coupled_streams = GetFormatParameter<int>(format, "coupled_streams");
|
||||
if (!coupled_streams.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
config.coupled_streams = *coupled_streams;
|
||||
|
||||
auto channel_mapping =
|
||||
GetFormatParameter<std::vector<unsigned char>>(format, "channel_mapping");
|
||||
if (!channel_mapping.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
config.channel_mapping = *channel_mapping;
|
||||
|
||||
if (!config.IsOk()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
AudioCodecInfo AudioEncoderMultiChannelOpusImpl::QueryAudioEncoder(
|
||||
const AudioEncoderMultiChannelOpusConfig& config) {
|
||||
RTC_DCHECK(config.IsOk());
|
||||
AudioCodecInfo info(48000, config.num_channels, config.bitrate_bps,
|
||||
AudioEncoderOpusConfig::kMinBitrateBps,
|
||||
AudioEncoderOpusConfig::kMaxBitrateBps);
|
||||
info.allow_comfort_noise = false;
|
||||
info.supports_network_adaption = false;
|
||||
return info;
|
||||
}
|
||||
|
||||
size_t AudioEncoderMultiChannelOpusImpl::Num10msFramesPerPacket() const {
|
||||
return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10));
|
||||
}
|
||||
size_t AudioEncoderMultiChannelOpusImpl::SamplesPer10msFrame() const {
|
||||
return rtc::CheckedDivExact(48000, 100) * config_.num_channels;
|
||||
}
|
||||
int AudioEncoderMultiChannelOpusImpl::SampleRateHz() const {
|
||||
return 48000;
|
||||
}
|
||||
size_t AudioEncoderMultiChannelOpusImpl::NumChannels() const {
|
||||
return config_.num_channels;
|
||||
}
|
||||
size_t AudioEncoderMultiChannelOpusImpl::Num10MsFramesInNextPacket() const {
|
||||
return Num10msFramesPerPacket();
|
||||
}
|
||||
size_t AudioEncoderMultiChannelOpusImpl::Max10MsFramesInAPacket() const {
|
||||
return Num10msFramesPerPacket();
|
||||
}
|
||||
int AudioEncoderMultiChannelOpusImpl::GetTargetBitrate() const {
|
||||
return GetBitrateBps(config_);
|
||||
}
|
||||
|
||||
AudioEncoder::EncodedInfo AudioEncoderMultiChannelOpusImpl::EncodeImpl(
|
||||
uint32_t rtp_timestamp,
|
||||
rtc::ArrayView<const int16_t> audio,
|
||||
rtc::Buffer* encoded) {
|
||||
if (input_buffer_.empty())
|
||||
first_timestamp_in_buffer_ = rtp_timestamp;
|
||||
|
||||
input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend());
|
||||
if (input_buffer_.size() <
|
||||
(Num10msFramesPerPacket() * SamplesPer10msFrame())) {
|
||||
return EncodedInfo();
|
||||
}
|
||||
RTC_CHECK_EQ(input_buffer_.size(),
|
||||
Num10msFramesPerPacket() * SamplesPer10msFrame());
|
||||
|
||||
const size_t max_encoded_bytes = SufficientOutputBufferSize();
|
||||
EncodedInfo info;
|
||||
info.encoded_bytes = encoded->AppendData(
|
||||
max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
|
||||
int status = WebRtcOpus_Encode(
|
||||
inst_, &input_buffer_[0],
|
||||
rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels),
|
||||
rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data());
|
||||
|
||||
RTC_CHECK_GE(status, 0); // Fails only if fed invalid data.
|
||||
|
||||
return static_cast<size_t>(status);
|
||||
});
|
||||
input_buffer_.clear();
|
||||
|
||||
// Will use new packet size for next encoding.
|
||||
config_.frame_size_ms = next_frame_length_ms_;
|
||||
|
||||
info.encoded_timestamp = first_timestamp_in_buffer_;
|
||||
info.payload_type = payload_type_;
|
||||
info.send_even_if_empty = true; // Allows Opus to send empty packets.
|
||||
|
||||
info.speech = true;
|
||||
info.encoder_type = CodecType::kOther;
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio_codecs/audio_encoder.h"
|
||||
#include "api/audio_codecs/audio_format.h"
|
||||
#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus_config.h"
|
||||
#include "api/units/time_delta.h"
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class RtcEventLog;
|
||||
|
||||
class AudioEncoderMultiChannelOpusImpl final : public AudioEncoder {
|
||||
public:
|
||||
AudioEncoderMultiChannelOpusImpl(
|
||||
const AudioEncoderMultiChannelOpusConfig& config,
|
||||
int payload_type);
|
||||
~AudioEncoderMultiChannelOpusImpl() override;
|
||||
|
||||
AudioEncoderMultiChannelOpusImpl(const AudioEncoderMultiChannelOpusImpl&) =
|
||||
delete;
|
||||
AudioEncoderMultiChannelOpusImpl& operator=(
|
||||
const AudioEncoderMultiChannelOpusImpl&) = delete;
|
||||
|
||||
// Static interface for use by BuiltinAudioEncoderFactory.
|
||||
static constexpr const char* GetPayloadName() { return "multiopus"; }
|
||||
static absl::optional<AudioCodecInfo> QueryAudioEncoder(
|
||||
const SdpAudioFormat& format);
|
||||
|
||||
int SampleRateHz() const override;
|
||||
size_t NumChannels() const override;
|
||||
size_t Num10MsFramesInNextPacket() const override;
|
||||
size_t Max10MsFramesInAPacket() const override;
|
||||
int GetTargetBitrate() const override;
|
||||
|
||||
void Reset() override;
|
||||
absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange()
|
||||
const override;
|
||||
|
||||
protected:
|
||||
EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
|
||||
rtc::ArrayView<const int16_t> audio,
|
||||
rtc::Buffer* encoded) override;
|
||||
|
||||
private:
|
||||
static absl::optional<AudioEncoderMultiChannelOpusConfig> SdpToConfig(
|
||||
const SdpAudioFormat& format);
|
||||
static AudioCodecInfo QueryAudioEncoder(
|
||||
const AudioEncoderMultiChannelOpusConfig& config);
|
||||
static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
|
||||
const AudioEncoderMultiChannelOpusConfig&,
|
||||
int payload_type);
|
||||
|
||||
size_t Num10msFramesPerPacket() const;
|
||||
size_t SamplesPer10msFrame() const;
|
||||
size_t SufficientOutputBufferSize() const;
|
||||
bool RecreateEncoderInstance(
|
||||
const AudioEncoderMultiChannelOpusConfig& config);
|
||||
void SetFrameLength(int frame_length_ms);
|
||||
void SetNumChannelsToEncode(size_t num_channels_to_encode);
|
||||
void SetProjectedPacketLossRate(float fraction);
|
||||
|
||||
AudioEncoderMultiChannelOpusConfig config_;
|
||||
const int payload_type_;
|
||||
std::vector<int16_t> input_buffer_;
|
||||
OpusEncInst* inst_;
|
||||
uint32_t first_timestamp_in_buffer_;
|
||||
size_t num_channels_to_encode_;
|
||||
int next_frame_length_ms_;
|
||||
|
||||
friend struct AudioEncoderMultiChannelOpus;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus.h"
|
||||
|
||||
#include "test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
using ::testing::NiceMock;
|
||||
using ::testing::Return;
|
||||
|
||||
namespace {
|
||||
constexpr int kOpusPayloadType = 120;
|
||||
} // namespace
|
||||
|
||||
TEST(AudioEncoderMultiOpusTest, CheckConfigValidity) {
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 2,
|
||||
{{"channel_mapping", "3,0"},
|
||||
{"coupled_streams", "1"},
|
||||
{"num_streams", "2"}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
// Maps input channel 0 to coded channel 3, which doesn't exist.
|
||||
EXPECT_FALSE(encoder_config.has_value());
|
||||
}
|
||||
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 2,
|
||||
{{"channel_mapping", "0"},
|
||||
{"coupled_streams", "1"},
|
||||
{"num_streams", "2"}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
// The mapping is too short.
|
||||
EXPECT_FALSE(encoder_config.has_value());
|
||||
}
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
|
||||
{{"channel_mapping", "0,0,0"},
|
||||
{"coupled_streams", "0"},
|
||||
{"num_streams", "1"}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
// Coded channel 0 comes from both input channels 0, 1 and 2.
|
||||
EXPECT_FALSE(encoder_config.has_value());
|
||||
}
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
|
||||
{{"channel_mapping", "0,255,255"},
|
||||
{"coupled_streams", "0"},
|
||||
{"num_streams", "1"}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
ASSERT_TRUE(encoder_config.has_value());
|
||||
|
||||
// This is fine, because channels 1, 2 are set to be ignored.
|
||||
EXPECT_TRUE(encoder_config->IsOk());
|
||||
}
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
|
||||
{{"channel_mapping", "0,255,255"},
|
||||
{"coupled_streams", "0"},
|
||||
{"num_streams", "2"}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
|
||||
// This is NOT fine, because channels nothing says how coded channel 1
|
||||
// should be coded.
|
||||
EXPECT_FALSE(encoder_config.has_value());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AudioEncoderMultiOpusTest, ConfigValuesAreParsedCorrectly) {
|
||||
SdpAudioFormat sdp_format({"multiopus",
|
||||
48000,
|
||||
6,
|
||||
{{"minptime", "10"},
|
||||
{"useinbandfec", "1"},
|
||||
{"channel_mapping", "0,4,1,2,3,5"},
|
||||
{"num_streams", "4"},
|
||||
{"coupled_streams", "2"}}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
ASSERT_TRUE(encoder_config.has_value());
|
||||
|
||||
EXPECT_EQ(encoder_config->coupled_streams, 2);
|
||||
EXPECT_EQ(encoder_config->num_streams, 4);
|
||||
EXPECT_THAT(
|
||||
encoder_config->channel_mapping,
|
||||
testing::ContainerEq(std::vector<unsigned char>({0, 4, 1, 2, 3, 5})));
|
||||
}
|
||||
|
||||
TEST(AudioEncoderMultiOpusTest, CreateFromValidConfig) {
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
|
||||
{{"channel_mapping", "0,255,255"},
|
||||
{"coupled_streams", "0"},
|
||||
{"num_streams", "2"}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
ASSERT_FALSE(encoder_config.has_value());
|
||||
}
|
||||
{
|
||||
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
|
||||
{{"channel_mapping", "1,255,0"},
|
||||
{"coupled_streams", "1"},
|
||||
{"num_streams", "1"}});
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
|
||||
ASSERT_TRUE(encoder_config.has_value());
|
||||
|
||||
EXPECT_THAT(encoder_config->channel_mapping,
|
||||
testing::ContainerEq(std::vector<unsigned char>({1, 255, 0})));
|
||||
|
||||
EXPECT_TRUE(encoder_config->IsOk());
|
||||
|
||||
const std::unique_ptr<AudioEncoder> opus_encoder =
|
||||
AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config,
|
||||
kOpusPayloadType);
|
||||
|
||||
// Creating an encoder from a valid config should work.
|
||||
EXPECT_TRUE(opus_encoder);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AudioEncoderMultiOpusTest, AdvertisedCodecsCanBeCreated) {
|
||||
std::vector<AudioCodecSpec> specs;
|
||||
AudioEncoderMultiChannelOpus::AppendSupportedEncoders(&specs);
|
||||
|
||||
EXPECT_FALSE(specs.empty());
|
||||
|
||||
for (const AudioCodecSpec& spec : specs) {
|
||||
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
|
||||
AudioEncoderMultiChannelOpus::SdpToConfig(spec.format);
|
||||
ASSERT_TRUE(encoder_config.has_value());
|
||||
|
||||
const std::unique_ptr<AudioEncoder> opus_encoder =
|
||||
AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config,
|
||||
kOpusPayloadType);
|
||||
|
||||
EXPECT_TRUE(opus_encoder);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,824 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/strings/match.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h"
|
||||
#include "modules/audio_coding/audio_network_adaptor/controller_manager.h"
|
||||
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "rtc_base/numerics/exp_filter.h"
|
||||
#include "rtc_base/numerics/safe_conversions.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
#include "rtc_base/string_encode.h"
|
||||
#include "rtc_base/string_to_number.h"
|
||||
#include "rtc_base/time_utils.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Codec parameters for Opus.
|
||||
// draft-spittka-payload-rtp-opus-03
|
||||
|
||||
// Recommended bitrates:
|
||||
// 8-12 kb/s for NB speech,
|
||||
// 16-20 kb/s for WB speech,
|
||||
// 28-40 kb/s for FB speech,
|
||||
// 48-64 kb/s for FB mono music, and
|
||||
// 64-128 kb/s for FB stereo music.
|
||||
// The current implementation applies the following values to mono signals,
|
||||
// and multiplies them by 2 for stereo.
|
||||
constexpr int kOpusBitrateNbBps = 12000;
|
||||
constexpr int kOpusBitrateWbBps = 20000;
|
||||
constexpr int kOpusBitrateFbBps = 32000;
|
||||
|
||||
constexpr int kRtpTimestampRateHz = 48000;
|
||||
constexpr int kDefaultMaxPlaybackRate = 48000;
|
||||
|
||||
// These two lists must be sorted from low to high
|
||||
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
|
||||
constexpr int kANASupportedFrameLengths[] = {20, 40, 60, 120};
|
||||
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120};
|
||||
#else
|
||||
constexpr int kANASupportedFrameLengths[] = {20, 40, 60};
|
||||
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60};
|
||||
#endif
|
||||
|
||||
// PacketLossFractionSmoother uses an exponential filter with a time constant
|
||||
// of -1.0 / ln(0.9999) = 10000 ms.
|
||||
constexpr float kAlphaForPacketLossFractionSmoother = 0.9999f;
|
||||
constexpr float kMaxPacketLossFraction = 0.2f;
|
||||
|
||||
int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) {
|
||||
const int bitrate = [&] {
|
||||
if (max_playback_rate <= 8000) {
|
||||
return kOpusBitrateNbBps * rtc::dchecked_cast<int>(num_channels);
|
||||
} else if (max_playback_rate <= 16000) {
|
||||
return kOpusBitrateWbBps * rtc::dchecked_cast<int>(num_channels);
|
||||
} else {
|
||||
return kOpusBitrateFbBps * rtc::dchecked_cast<int>(num_channels);
|
||||
}
|
||||
}();
|
||||
RTC_DCHECK_GE(bitrate, AudioEncoderOpusConfig::kMinBitrateBps);
|
||||
RTC_DCHECK_LE(bitrate, AudioEncoderOpusConfig::kMaxBitrateBps);
|
||||
return bitrate;
|
||||
}
|
||||
|
||||
// Get the maxaveragebitrate parameter in string-form, so we can properly figure
|
||||
// out how invalid it is and accurately log invalid values.
|
||||
int CalculateBitrate(int max_playback_rate_hz,
|
||||
size_t num_channels,
|
||||
absl::optional<std::string> bitrate_param) {
|
||||
const int default_bitrate =
|
||||
CalculateDefaultBitrate(max_playback_rate_hz, num_channels);
|
||||
|
||||
if (bitrate_param) {
|
||||
const auto bitrate = rtc::StringToNumber<int>(*bitrate_param);
|
||||
if (bitrate) {
|
||||
const int chosen_bitrate =
|
||||
std::max(AudioEncoderOpusConfig::kMinBitrateBps,
|
||||
std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps));
|
||||
if (bitrate != chosen_bitrate) {
|
||||
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate
|
||||
<< " clamped to " << chosen_bitrate;
|
||||
}
|
||||
return chosen_bitrate;
|
||||
}
|
||||
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param
|
||||
<< "\" replaced by default bitrate " << default_bitrate;
|
||||
}
|
||||
|
||||
return default_bitrate;
|
||||
}
|
||||
|
||||
int GetChannelCount(const SdpAudioFormat& format) {
|
||||
const auto param = GetFormatParameter(format, "stereo");
|
||||
if (param == "1") {
|
||||
return 2;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int GetMaxPlaybackRate(const SdpAudioFormat& format) {
|
||||
const auto param = GetFormatParameter<int>(format, "maxplaybackrate");
|
||||
if (param && *param >= 8000) {
|
||||
return std::min(*param, kDefaultMaxPlaybackRate);
|
||||
}
|
||||
return kDefaultMaxPlaybackRate;
|
||||
}
|
||||
|
||||
int GetFrameSizeMs(const SdpAudioFormat& format) {
|
||||
const auto ptime = GetFormatParameter<int>(format, "ptime");
|
||||
if (ptime) {
|
||||
// Pick the next highest supported frame length from
|
||||
// kOpusSupportedFrameLengths.
|
||||
for (const int supported_frame_length : kOpusSupportedFrameLengths) {
|
||||
if (supported_frame_length >= *ptime) {
|
||||
return supported_frame_length;
|
||||
}
|
||||
}
|
||||
// If none was found, return the largest supported frame length.
|
||||
return *(std::end(kOpusSupportedFrameLengths) - 1);
|
||||
}
|
||||
|
||||
return AudioEncoderOpusConfig::kDefaultFrameSizeMs;
|
||||
}
|
||||
|
||||
void FindSupportedFrameLengths(int min_frame_length_ms,
|
||||
int max_frame_length_ms,
|
||||
std::vector<int>* out) {
|
||||
out->clear();
|
||||
std::copy_if(std::begin(kANASupportedFrameLengths),
|
||||
std::end(kANASupportedFrameLengths), std::back_inserter(*out),
|
||||
[&](int frame_length_ms) {
|
||||
return frame_length_ms >= min_frame_length_ms &&
|
||||
frame_length_ms <= max_frame_length_ms;
|
||||
});
|
||||
RTC_DCHECK(std::is_sorted(out->begin(), out->end()));
|
||||
}
|
||||
|
||||
int GetBitrateBps(const AudioEncoderOpusConfig& config) {
|
||||
RTC_DCHECK(config.IsOk());
|
||||
return *config.bitrate_bps;
|
||||
}
|
||||
|
||||
std::vector<float> GetBitrateMultipliers() {
|
||||
constexpr char kBitrateMultipliersName[] =
|
||||
"WebRTC-Audio-OpusBitrateMultipliers";
|
||||
const bool use_bitrate_multipliers =
|
||||
webrtc::field_trial::IsEnabled(kBitrateMultipliersName);
|
||||
if (use_bitrate_multipliers) {
|
||||
const std::string field_trial_string =
|
||||
webrtc::field_trial::FindFullName(kBitrateMultipliersName);
|
||||
std::vector<std::string> pieces;
|
||||
rtc::tokenize(field_trial_string, '-', &pieces);
|
||||
if (pieces.size() < 2 || pieces[0] != "Enabled") {
|
||||
RTC_LOG(LS_WARNING) << "Invalid parameters for "
|
||||
<< kBitrateMultipliersName
|
||||
<< ", not using custom values.";
|
||||
return std::vector<float>();
|
||||
}
|
||||
std::vector<float> multipliers(pieces.size() - 1);
|
||||
for (size_t i = 1; i < pieces.size(); i++) {
|
||||
if (!rtc::FromString(pieces[i], &multipliers[i - 1])) {
|
||||
RTC_LOG(LS_WARNING)
|
||||
<< "Invalid parameters for " << kBitrateMultipliersName
|
||||
<< ", not using custom values.";
|
||||
return std::vector<float>();
|
||||
}
|
||||
}
|
||||
RTC_LOG(LS_INFO) << "Using custom bitrate multipliers: "
|
||||
<< field_trial_string;
|
||||
return multipliers;
|
||||
}
|
||||
return std::vector<float>();
|
||||
}
|
||||
|
||||
int GetMultipliedBitrate(int bitrate, const std::vector<float>& multipliers) {
|
||||
// The multipliers are valid from 5 kbps.
|
||||
const size_t bitrate_kbps = static_cast<size_t>(bitrate / 1000);
|
||||
if (bitrate_kbps < 5 || bitrate_kbps >= multipliers.size() + 5) {
|
||||
return bitrate;
|
||||
}
|
||||
return static_cast<int>(multipliers[bitrate_kbps - 5] * bitrate);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void AudioEncoderOpusImpl::AppendSupportedEncoders(
|
||||
std::vector<AudioCodecSpec>* specs) {
|
||||
const SdpAudioFormat fmt = {"opus",
|
||||
kRtpTimestampRateHz,
|
||||
2,
|
||||
{{"minptime", "10"}, {"useinbandfec", "1"}}};
|
||||
const AudioCodecInfo info = QueryAudioEncoder(*SdpToConfig(fmt));
|
||||
specs->push_back({fmt, info});
|
||||
}
|
||||
|
||||
AudioCodecInfo AudioEncoderOpusImpl::QueryAudioEncoder(
|
||||
const AudioEncoderOpusConfig& config) {
|
||||
RTC_DCHECK(config.IsOk());
|
||||
AudioCodecInfo info(config.sample_rate_hz, config.num_channels,
|
||||
*config.bitrate_bps,
|
||||
AudioEncoderOpusConfig::kMinBitrateBps,
|
||||
AudioEncoderOpusConfig::kMaxBitrateBps);
|
||||
info.allow_comfort_noise = false;
|
||||
info.supports_network_adaption = true;
|
||||
return info;
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioEncoder> AudioEncoderOpusImpl::MakeAudioEncoder(
|
||||
const AudioEncoderOpusConfig& config,
|
||||
int payload_type) {
|
||||
if (!config.IsOk()) {
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<AudioEncoderOpusImpl>(config, payload_type);
|
||||
}
|
||||
|
||||
absl::optional<AudioEncoderOpusConfig> AudioEncoderOpusImpl::SdpToConfig(
|
||||
const SdpAudioFormat& format) {
|
||||
if (!absl::EqualsIgnoreCase(format.name, "opus") ||
|
||||
format.clockrate_hz != kRtpTimestampRateHz || format.num_channels != 2) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
AudioEncoderOpusConfig config;
|
||||
config.num_channels = GetChannelCount(format);
|
||||
config.frame_size_ms = GetFrameSizeMs(format);
|
||||
config.max_playback_rate_hz = GetMaxPlaybackRate(format);
|
||||
config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1");
|
||||
config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1");
|
||||
config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1");
|
||||
config.bitrate_bps =
|
||||
CalculateBitrate(config.max_playback_rate_hz, config.num_channels,
|
||||
GetFormatParameter(format, "maxaveragebitrate"));
|
||||
config.application = config.num_channels == 1
|
||||
? AudioEncoderOpusConfig::ApplicationMode::kVoip
|
||||
: AudioEncoderOpusConfig::ApplicationMode::kAudio;
|
||||
|
||||
constexpr int kMinANAFrameLength = kANASupportedFrameLengths[0];
|
||||
constexpr int kMaxANAFrameLength =
|
||||
kANASupportedFrameLengths[arraysize(kANASupportedFrameLengths) - 1];
|
||||
|
||||
// For now, minptime and maxptime are only used with ANA. If ptime is outside
|
||||
// of this range, it will get adjusted once ANA takes hold. Ideally, we'd know
|
||||
// if ANA was to be used when setting up the config, and adjust accordingly.
|
||||
const int min_frame_length_ms =
|
||||
GetFormatParameter<int>(format, "minptime").value_or(kMinANAFrameLength);
|
||||
const int max_frame_length_ms =
|
||||
GetFormatParameter<int>(format, "maxptime").value_or(kMaxANAFrameLength);
|
||||
|
||||
FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms,
|
||||
&config.supported_frame_lengths_ms);
|
||||
if (!config.IsOk()) {
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
return absl::nullopt;
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
absl::optional<int> AudioEncoderOpusImpl::GetNewComplexity(
|
||||
const AudioEncoderOpusConfig& config) {
|
||||
RTC_DCHECK(config.IsOk());
|
||||
const int bitrate_bps = GetBitrateBps(config);
|
||||
if (bitrate_bps >= config.complexity_threshold_bps -
|
||||
config.complexity_threshold_window_bps &&
|
||||
bitrate_bps <= config.complexity_threshold_bps +
|
||||
config.complexity_threshold_window_bps) {
|
||||
// Within the hysteresis window; make no change.
|
||||
return absl::nullopt;
|
||||
} else {
|
||||
return bitrate_bps <= config.complexity_threshold_bps
|
||||
? config.low_rate_complexity
|
||||
: config.complexity;
|
||||
}
|
||||
}
|
||||
|
||||
absl::optional<int> AudioEncoderOpusImpl::GetNewBandwidth(
|
||||
const AudioEncoderOpusConfig& config,
|
||||
OpusEncInst* inst) {
|
||||
constexpr int kMinWidebandBitrate = 8000;
|
||||
constexpr int kMaxNarrowbandBitrate = 9000;
|
||||
constexpr int kAutomaticThreshold = 11000;
|
||||
RTC_DCHECK(config.IsOk());
|
||||
const int bitrate = GetBitrateBps(config);
|
||||
if (bitrate > kAutomaticThreshold) {
|
||||
return absl::optional<int>(OPUS_AUTO);
|
||||
}
|
||||
const int bandwidth = WebRtcOpus_GetBandwidth(inst);
|
||||
RTC_DCHECK_GE(bandwidth, 0);
|
||||
if (bitrate > kMaxNarrowbandBitrate && bandwidth < OPUS_BANDWIDTH_WIDEBAND) {
|
||||
return absl::optional<int>(OPUS_BANDWIDTH_WIDEBAND);
|
||||
} else if (bitrate < kMinWidebandBitrate &&
|
||||
bandwidth > OPUS_BANDWIDTH_NARROWBAND) {
|
||||
return absl::optional<int>(OPUS_BANDWIDTH_NARROWBAND);
|
||||
}
|
||||
return absl::optional<int>();
|
||||
}
|
||||
|
||||
class AudioEncoderOpusImpl::PacketLossFractionSmoother {
|
||||
public:
|
||||
explicit PacketLossFractionSmoother()
|
||||
: last_sample_time_ms_(rtc::TimeMillis()),
|
||||
smoother_(kAlphaForPacketLossFractionSmoother) {}
|
||||
|
||||
// Gets the smoothed packet loss fraction.
|
||||
float GetAverage() const {
|
||||
float value = smoother_.filtered();
|
||||
return (value == rtc::ExpFilter::kValueUndefined) ? 0.0f : value;
|
||||
}
|
||||
|
||||
// Add new observation to the packet loss fraction smoother.
|
||||
void AddSample(float packet_loss_fraction) {
|
||||
int64_t now_ms = rtc::TimeMillis();
|
||||
smoother_.Apply(static_cast<float>(now_ms - last_sample_time_ms_),
|
||||
packet_loss_fraction);
|
||||
last_sample_time_ms_ = now_ms;
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t last_sample_time_ms_;
|
||||
|
||||
// An exponential filter is used to smooth the packet loss fraction.
|
||||
rtc::ExpFilter smoother_;
|
||||
};
|
||||
|
||||
AudioEncoderOpusImpl::AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config,
|
||||
int payload_type)
|
||||
: AudioEncoderOpusImpl(
|
||||
config,
|
||||
payload_type,
|
||||
[this](absl::string_view config_string, RtcEventLog* event_log) {
|
||||
return DefaultAudioNetworkAdaptorCreator(config_string, event_log);
|
||||
},
|
||||
// We choose 5sec as initial time constant due to empirical data.
|
||||
std::make_unique<SmoothingFilterImpl>(5000)) {}
|
||||
|
||||
AudioEncoderOpusImpl::AudioEncoderOpusImpl(
|
||||
const AudioEncoderOpusConfig& config,
|
||||
int payload_type,
|
||||
const AudioNetworkAdaptorCreator& audio_network_adaptor_creator,
|
||||
std::unique_ptr<SmoothingFilter> bitrate_smoother)
|
||||
: payload_type_(payload_type),
|
||||
use_stable_target_for_adaptation_(!webrtc::field_trial::IsDisabled(
|
||||
"WebRTC-Audio-StableTargetAdaptation")),
|
||||
adjust_bandwidth_(
|
||||
webrtc::field_trial::IsEnabled("WebRTC-AdjustOpusBandwidth")),
|
||||
bitrate_changed_(true),
|
||||
bitrate_multipliers_(GetBitrateMultipliers()),
|
||||
packet_loss_rate_(0.0),
|
||||
inst_(nullptr),
|
||||
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
|
||||
audio_network_adaptor_creator_(audio_network_adaptor_creator),
|
||||
bitrate_smoother_(std::move(bitrate_smoother)),
|
||||
consecutive_dtx_frames_(0) {
|
||||
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
|
||||
|
||||
// Sanity check of the redundant payload type field that we want to get rid
|
||||
// of. See https://bugs.chromium.org/p/webrtc/issues/detail?id=7847
|
||||
RTC_CHECK(config.payload_type == -1 || config.payload_type == payload_type);
|
||||
|
||||
RTC_CHECK(RecreateEncoderInstance(config));
|
||||
SetProjectedPacketLossRate(packet_loss_rate_);
|
||||
}
|
||||
|
||||
AudioEncoderOpusImpl::AudioEncoderOpusImpl(int payload_type,
|
||||
const SdpAudioFormat& format)
|
||||
: AudioEncoderOpusImpl(*SdpToConfig(format), payload_type) {}
|
||||
|
||||
AudioEncoderOpusImpl::~AudioEncoderOpusImpl() {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
|
||||
}
|
||||
|
||||
int AudioEncoderOpusImpl::SampleRateHz() const {
|
||||
return config_.sample_rate_hz;
|
||||
}
|
||||
|
||||
size_t AudioEncoderOpusImpl::NumChannels() const {
|
||||
return config_.num_channels;
|
||||
}
|
||||
|
||||
int AudioEncoderOpusImpl::RtpTimestampRateHz() const {
|
||||
return kRtpTimestampRateHz;
|
||||
}
|
||||
|
||||
size_t AudioEncoderOpusImpl::Num10MsFramesInNextPacket() const {
|
||||
return Num10msFramesPerPacket();
|
||||
}
|
||||
|
||||
size_t AudioEncoderOpusImpl::Max10MsFramesInAPacket() const {
|
||||
return Num10msFramesPerPacket();
|
||||
}
|
||||
|
||||
int AudioEncoderOpusImpl::GetTargetBitrate() const {
|
||||
return GetBitrateBps(config_);
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::Reset() {
|
||||
RTC_CHECK(RecreateEncoderInstance(config_));
|
||||
}
|
||||
|
||||
bool AudioEncoderOpusImpl::SetFec(bool enable) {
|
||||
if (enable) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_));
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_));
|
||||
}
|
||||
config_.fec_enabled = enable;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AudioEncoderOpusImpl::SetDtx(bool enable) {
|
||||
if (enable) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_));
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_));
|
||||
}
|
||||
config_.dtx_enabled = enable;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AudioEncoderOpusImpl::GetDtx() const {
|
||||
return config_.dtx_enabled;
|
||||
}
|
||||
|
||||
bool AudioEncoderOpusImpl::SetApplication(Application application) {
|
||||
auto conf = config_;
|
||||
switch (application) {
|
||||
case Application::kSpeech:
|
||||
conf.application = AudioEncoderOpusConfig::ApplicationMode::kVoip;
|
||||
break;
|
||||
case Application::kAudio:
|
||||
conf.application = AudioEncoderOpusConfig::ApplicationMode::kAudio;
|
||||
break;
|
||||
}
|
||||
return RecreateEncoderInstance(conf);
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::SetMaxPlaybackRate(int frequency_hz) {
|
||||
auto conf = config_;
|
||||
conf.max_playback_rate_hz = frequency_hz;
|
||||
RTC_CHECK(RecreateEncoderInstance(conf));
|
||||
}
|
||||
|
||||
bool AudioEncoderOpusImpl::EnableAudioNetworkAdaptor(
|
||||
const std::string& config_string,
|
||||
RtcEventLog* event_log) {
|
||||
audio_network_adaptor_ =
|
||||
audio_network_adaptor_creator_(config_string, event_log);
|
||||
return audio_network_adaptor_.get() != nullptr;
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::DisableAudioNetworkAdaptor() {
|
||||
audio_network_adaptor_.reset(nullptr);
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::OnReceivedUplinkPacketLossFraction(
|
||||
float uplink_packet_loss_fraction) {
|
||||
if (audio_network_adaptor_) {
|
||||
audio_network_adaptor_->SetUplinkPacketLossFraction(
|
||||
uplink_packet_loss_fraction);
|
||||
ApplyAudioNetworkAdaptor();
|
||||
}
|
||||
packet_loss_fraction_smoother_->AddSample(uplink_packet_loss_fraction);
|
||||
float average_fraction_loss = packet_loss_fraction_smoother_->GetAverage();
|
||||
SetProjectedPacketLossRate(average_fraction_loss);
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::OnReceivedTargetAudioBitrate(
|
||||
int target_audio_bitrate_bps) {
|
||||
SetTargetBitrate(target_audio_bitrate_bps);
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth(
|
||||
int target_audio_bitrate_bps,
|
||||
absl::optional<int64_t> bwe_period_ms,
|
||||
absl::optional<int64_t> stable_target_bitrate_bps) {
|
||||
if (audio_network_adaptor_) {
|
||||
audio_network_adaptor_->SetTargetAudioBitrate(target_audio_bitrate_bps);
|
||||
if (use_stable_target_for_adaptation_) {
|
||||
if (stable_target_bitrate_bps)
|
||||
audio_network_adaptor_->SetUplinkBandwidth(*stable_target_bitrate_bps);
|
||||
} else {
|
||||
// We give smoothed bitrate allocation to audio network adaptor as
|
||||
// the uplink bandwidth.
|
||||
// The BWE spikes should not affect the bitrate smoother more than 25%.
|
||||
// To simplify the calculations we use a step response as input signal.
|
||||
// The step response of an exponential filter is
|
||||
// u(t) = 1 - e^(-t / time_constant).
|
||||
// In order to limit the affect of a BWE spike within 25% of its value
|
||||
// before
|
||||
// the next BWE update, we would choose a time constant that fulfills
|
||||
// 1 - e^(-bwe_period_ms / time_constant) < 0.25
|
||||
// Then 4 * bwe_period_ms is a good choice.
|
||||
if (bwe_period_ms)
|
||||
bitrate_smoother_->SetTimeConstantMs(*bwe_period_ms * 4);
|
||||
bitrate_smoother_->AddSample(target_audio_bitrate_bps);
|
||||
}
|
||||
|
||||
ApplyAudioNetworkAdaptor();
|
||||
} else {
|
||||
if (!overhead_bytes_per_packet_) {
|
||||
RTC_LOG(LS_INFO)
|
||||
<< "AudioEncoderOpusImpl: Overhead unknown, target audio bitrate "
|
||||
<< target_audio_bitrate_bps << " bps is ignored.";
|
||||
return;
|
||||
}
|
||||
const int overhead_bps = static_cast<int>(
|
||||
*overhead_bytes_per_packet_ * 8 * 100 / Num10MsFramesInNextPacket());
|
||||
SetTargetBitrate(
|
||||
std::min(AudioEncoderOpusConfig::kMaxBitrateBps,
|
||||
std::max(AudioEncoderOpusConfig::kMinBitrateBps,
|
||||
target_audio_bitrate_bps - overhead_bps)));
|
||||
}
|
||||
}
|
||||
void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth(
|
||||
int target_audio_bitrate_bps,
|
||||
absl::optional<int64_t> bwe_period_ms) {
|
||||
OnReceivedUplinkBandwidth(target_audio_bitrate_bps, bwe_period_ms,
|
||||
absl::nullopt);
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::OnReceivedUplinkAllocation(
|
||||
BitrateAllocationUpdate update) {
|
||||
OnReceivedUplinkBandwidth(update.target_bitrate.bps(), update.bwe_period.ms(),
|
||||
update.stable_target_bitrate.bps());
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::OnReceivedRtt(int rtt_ms) {
|
||||
if (!audio_network_adaptor_)
|
||||
return;
|
||||
audio_network_adaptor_->SetRtt(rtt_ms);
|
||||
ApplyAudioNetworkAdaptor();
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::OnReceivedOverhead(
|
||||
size_t overhead_bytes_per_packet) {
|
||||
if (audio_network_adaptor_) {
|
||||
audio_network_adaptor_->SetOverhead(overhead_bytes_per_packet);
|
||||
ApplyAudioNetworkAdaptor();
|
||||
} else {
|
||||
overhead_bytes_per_packet_ = overhead_bytes_per_packet;
|
||||
}
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::SetReceiverFrameLengthRange(
|
||||
int min_frame_length_ms,
|
||||
int max_frame_length_ms) {
|
||||
// Ensure that `SetReceiverFrameLengthRange` is called before
|
||||
// `EnableAudioNetworkAdaptor`, otherwise we need to recreate
|
||||
// `audio_network_adaptor_`, which is not a needed use case.
|
||||
RTC_DCHECK(!audio_network_adaptor_);
|
||||
FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms,
|
||||
&config_.supported_frame_lengths_ms);
|
||||
}
|
||||
|
||||
AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
|
||||
uint32_t rtp_timestamp,
|
||||
rtc::ArrayView<const int16_t> audio,
|
||||
rtc::Buffer* encoded) {
|
||||
MaybeUpdateUplinkBandwidth();
|
||||
|
||||
if (input_buffer_.empty())
|
||||
first_timestamp_in_buffer_ = rtp_timestamp;
|
||||
|
||||
input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend());
|
||||
if (input_buffer_.size() <
|
||||
(Num10msFramesPerPacket() * SamplesPer10msFrame())) {
|
||||
return EncodedInfo();
|
||||
}
|
||||
RTC_CHECK_EQ(input_buffer_.size(),
|
||||
Num10msFramesPerPacket() * SamplesPer10msFrame());
|
||||
|
||||
const size_t max_encoded_bytes = SufficientOutputBufferSize();
|
||||
EncodedInfo info;
|
||||
info.encoded_bytes = encoded->AppendData(
|
||||
max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
|
||||
int status = WebRtcOpus_Encode(
|
||||
inst_, &input_buffer_[0],
|
||||
rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels),
|
||||
rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data());
|
||||
|
||||
RTC_CHECK_GE(status, 0); // Fails only if fed invalid data.
|
||||
|
||||
return static_cast<size_t>(status);
|
||||
});
|
||||
input_buffer_.clear();
|
||||
|
||||
bool dtx_frame = (info.encoded_bytes <= 2);
|
||||
|
||||
// Will use new packet size for next encoding.
|
||||
config_.frame_size_ms = next_frame_length_ms_;
|
||||
|
||||
if (adjust_bandwidth_ && bitrate_changed_) {
|
||||
const auto bandwidth = GetNewBandwidth(config_, inst_);
|
||||
if (bandwidth) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_SetBandwidth(inst_, *bandwidth));
|
||||
}
|
||||
bitrate_changed_ = false;
|
||||
}
|
||||
|
||||
info.encoded_timestamp = first_timestamp_in_buffer_;
|
||||
info.payload_type = payload_type_;
|
||||
info.send_even_if_empty = true; // Allows Opus to send empty packets.
|
||||
// After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
|
||||
// coding the background noise. Avoid flagging this frame as speech
|
||||
// (even though there is a probability of the frame being speech).
|
||||
info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20);
|
||||
info.encoder_type = CodecType::kOpus;
|
||||
|
||||
// Increase or reset DTX counter.
|
||||
consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
size_t AudioEncoderOpusImpl::Num10msFramesPerPacket() const {
|
||||
return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10));
|
||||
}
|
||||
|
||||
size_t AudioEncoderOpusImpl::SamplesPer10msFrame() const {
|
||||
return rtc::CheckedDivExact(config_.sample_rate_hz, 100) *
|
||||
config_.num_channels;
|
||||
}
|
||||
|
||||
size_t AudioEncoderOpusImpl::SufficientOutputBufferSize() const {
|
||||
// Calculate the number of bytes we expect the encoder to produce,
|
||||
// then multiply by two to give a wide margin for error.
|
||||
const size_t bytes_per_millisecond =
|
||||
static_cast<size_t>(GetBitrateBps(config_) / (1000 * 8) + 1);
|
||||
const size_t approx_encoded_bytes =
|
||||
Num10msFramesPerPacket() * 10 * bytes_per_millisecond;
|
||||
return 2 * approx_encoded_bytes;
|
||||
}
|
||||
|
||||
// If the given config is OK, recreate the Opus encoder instance with those
|
||||
// settings, save the config, and return true. Otherwise, do nothing and return
|
||||
// false.
|
||||
bool AudioEncoderOpusImpl::RecreateEncoderInstance(
|
||||
const AudioEncoderOpusConfig& config) {
|
||||
if (!config.IsOk())
|
||||
return false;
|
||||
config_ = config;
|
||||
if (inst_)
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
|
||||
input_buffer_.clear();
|
||||
input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame());
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EncoderCreate(
|
||||
&inst_, config.num_channels,
|
||||
config.application ==
|
||||
AudioEncoderOpusConfig::ApplicationMode::kVoip
|
||||
? 0
|
||||
: 1,
|
||||
config.sample_rate_hz));
|
||||
const int bitrate = GetBitrateBps(config);
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate));
|
||||
RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps.";
|
||||
if (config.fec_enabled) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_));
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_));
|
||||
}
|
||||
RTC_CHECK_EQ(
|
||||
0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz));
|
||||
// Use the default complexity if the start bitrate is within the hysteresis
|
||||
// window.
|
||||
complexity_ = GetNewComplexity(config).value_or(config.complexity);
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_));
|
||||
bitrate_changed_ = true;
|
||||
if (config.dtx_enabled) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_));
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_));
|
||||
}
|
||||
RTC_CHECK_EQ(0,
|
||||
WebRtcOpus_SetPacketLossRate(
|
||||
inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5)));
|
||||
if (config.cbr_enabled) {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_));
|
||||
} else {
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_));
|
||||
}
|
||||
num_channels_to_encode_ = NumChannels();
|
||||
next_frame_length_ms_ = config_.frame_size_ms;
|
||||
return true;
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::SetFrameLength(int frame_length_ms) {
|
||||
if (next_frame_length_ms_ != frame_length_ms) {
|
||||
RTC_LOG(LS_VERBOSE) << "Update Opus frame length "
|
||||
<< "from " << next_frame_length_ms_ << " ms "
|
||||
<< "to " << frame_length_ms << " ms.";
|
||||
}
|
||||
next_frame_length_ms_ = frame_length_ms;
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::SetNumChannelsToEncode(
|
||||
size_t num_channels_to_encode) {
|
||||
RTC_DCHECK_GT(num_channels_to_encode, 0);
|
||||
RTC_DCHECK_LE(num_channels_to_encode, config_.num_channels);
|
||||
|
||||
if (num_channels_to_encode_ == num_channels_to_encode)
|
||||
return;
|
||||
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_SetForceChannels(inst_, num_channels_to_encode));
|
||||
num_channels_to_encode_ = num_channels_to_encode;
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::SetProjectedPacketLossRate(float fraction) {
|
||||
fraction = std::min(std::max(fraction, 0.0f), kMaxPacketLossFraction);
|
||||
if (packet_loss_rate_ != fraction) {
|
||||
packet_loss_rate_ = fraction;
|
||||
RTC_CHECK_EQ(
|
||||
0, WebRtcOpus_SetPacketLossRate(
|
||||
inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5)));
|
||||
}
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::SetTargetBitrate(int bits_per_second) {
|
||||
const int new_bitrate = rtc::SafeClamp<int>(
|
||||
bits_per_second, AudioEncoderOpusConfig::kMinBitrateBps,
|
||||
AudioEncoderOpusConfig::kMaxBitrateBps);
|
||||
if (config_.bitrate_bps && *config_.bitrate_bps != new_bitrate) {
|
||||
config_.bitrate_bps = new_bitrate;
|
||||
RTC_DCHECK(config_.IsOk());
|
||||
const int bitrate = GetBitrateBps(config_);
|
||||
RTC_CHECK_EQ(
|
||||
0, WebRtcOpus_SetBitRate(
|
||||
inst_, GetMultipliedBitrate(bitrate, bitrate_multipliers_)));
|
||||
RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps.";
|
||||
bitrate_changed_ = true;
|
||||
}
|
||||
|
||||
const auto new_complexity = GetNewComplexity(config_);
|
||||
if (new_complexity && complexity_ != *new_complexity) {
|
||||
complexity_ = *new_complexity;
|
||||
RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_));
|
||||
}
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::ApplyAudioNetworkAdaptor() {
|
||||
auto config = audio_network_adaptor_->GetEncoderRuntimeConfig();
|
||||
|
||||
if (config.bitrate_bps)
|
||||
SetTargetBitrate(*config.bitrate_bps);
|
||||
if (config.frame_length_ms)
|
||||
SetFrameLength(*config.frame_length_ms);
|
||||
if (config.enable_dtx)
|
||||
SetDtx(*config.enable_dtx);
|
||||
if (config.num_channels)
|
||||
SetNumChannelsToEncode(*config.num_channels);
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioNetworkAdaptor>
|
||||
AudioEncoderOpusImpl::DefaultAudioNetworkAdaptorCreator(
|
||||
absl::string_view config_string,
|
||||
RtcEventLog* event_log) const {
|
||||
AudioNetworkAdaptorImpl::Config config;
|
||||
config.event_log = event_log;
|
||||
return std::unique_ptr<AudioNetworkAdaptor>(new AudioNetworkAdaptorImpl(
|
||||
config, ControllerManagerImpl::Create(
|
||||
config_string, NumChannels(), supported_frame_lengths_ms(),
|
||||
AudioEncoderOpusConfig::kMinBitrateBps,
|
||||
num_channels_to_encode_, next_frame_length_ms_,
|
||||
GetTargetBitrate(), config_.fec_enabled, GetDtx())));
|
||||
}
|
||||
|
||||
void AudioEncoderOpusImpl::MaybeUpdateUplinkBandwidth() {
|
||||
if (audio_network_adaptor_ && !use_stable_target_for_adaptation_) {
|
||||
int64_t now_ms = rtc::TimeMillis();
|
||||
if (!bitrate_smoother_last_update_time_ ||
|
||||
now_ms - *bitrate_smoother_last_update_time_ >=
|
||||
config_.uplink_bandwidth_update_interval_ms) {
|
||||
absl::optional<float> smoothed_bitrate = bitrate_smoother_->GetAverage();
|
||||
if (smoothed_bitrate)
|
||||
audio_network_adaptor_->SetUplinkBandwidth(*smoothed_bitrate);
|
||||
bitrate_smoother_last_update_time_ = now_ms;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ANAStats AudioEncoderOpusImpl::GetANAStats() const {
|
||||
if (audio_network_adaptor_) {
|
||||
return audio_network_adaptor_->GetStats();
|
||||
}
|
||||
return ANAStats();
|
||||
}
|
||||
|
||||
absl::optional<std::pair<TimeDelta, TimeDelta> >
|
||||
AudioEncoderOpusImpl::GetFrameLengthRange() const {
|
||||
if (audio_network_adaptor_) {
|
||||
if (config_.supported_frame_lengths_ms.empty()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
return {{TimeDelta::Millis(config_.supported_frame_lengths_ms.front()),
|
||||
TimeDelta::Millis(config_.supported_frame_lengths_ms.back())}};
|
||||
} else {
|
||||
return {{TimeDelta::Millis(config_.frame_size_ms),
|
||||
TimeDelta::Millis(config_.frame_size_ms)}};
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio_codecs/audio_encoder.h"
|
||||
#include "api/audio_codecs/audio_format.h"
|
||||
#include "api/audio_codecs/opus/audio_encoder_opus_config.h"
|
||||
#include "common_audio/smoothing_filter.h"
|
||||
#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h"
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class RtcEventLog;
|
||||
|
||||
class AudioEncoderOpusImpl final : public AudioEncoder {
|
||||
public:
|
||||
// Returns empty if the current bitrate falls within the hysteresis window,
|
||||
// defined by complexity_threshold_bps +/- complexity_threshold_window_bps.
|
||||
// Otherwise, returns the current complexity depending on whether the
|
||||
// current bitrate is above or below complexity_threshold_bps.
|
||||
static absl::optional<int> GetNewComplexity(
|
||||
const AudioEncoderOpusConfig& config);
|
||||
|
||||
// Returns OPUS_AUTO if the the current bitrate is above wideband threshold.
|
||||
// Returns empty if it is below, but bandwidth coincides with the desired one.
|
||||
// Otherwise returns the desired bandwidth.
|
||||
static absl::optional<int> GetNewBandwidth(
|
||||
const AudioEncoderOpusConfig& config,
|
||||
OpusEncInst* inst);
|
||||
|
||||
using AudioNetworkAdaptorCreator =
|
||||
std::function<std::unique_ptr<AudioNetworkAdaptor>(absl::string_view,
|
||||
RtcEventLog*)>;
|
||||
|
||||
AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config, int payload_type);
|
||||
|
||||
// Dependency injection for testing.
|
||||
AudioEncoderOpusImpl(
|
||||
const AudioEncoderOpusConfig& config,
|
||||
int payload_type,
|
||||
const AudioNetworkAdaptorCreator& audio_network_adaptor_creator,
|
||||
std::unique_ptr<SmoothingFilter> bitrate_smoother);
|
||||
|
||||
AudioEncoderOpusImpl(int payload_type, const SdpAudioFormat& format);
|
||||
~AudioEncoderOpusImpl() override;
|
||||
|
||||
AudioEncoderOpusImpl(const AudioEncoderOpusImpl&) = delete;
|
||||
AudioEncoderOpusImpl& operator=(const AudioEncoderOpusImpl&) = delete;
|
||||
|
||||
int SampleRateHz() const override;
|
||||
size_t NumChannels() const override;
|
||||
int RtpTimestampRateHz() const override;
|
||||
size_t Num10MsFramesInNextPacket() const override;
|
||||
size_t Max10MsFramesInAPacket() const override;
|
||||
int GetTargetBitrate() const override;
|
||||
|
||||
void Reset() override;
|
||||
bool SetFec(bool enable) override;
|
||||
|
||||
// Set Opus DTX. Once enabled, Opus stops transmission, when it detects
|
||||
// voice being inactive. During that, it still sends 2 packets (one for
|
||||
// content, one for signaling) about every 400 ms.
|
||||
bool SetDtx(bool enable) override;
|
||||
bool GetDtx() const override;
|
||||
|
||||
bool SetApplication(Application application) override;
|
||||
void SetMaxPlaybackRate(int frequency_hz) override;
|
||||
bool EnableAudioNetworkAdaptor(const std::string& config_string,
|
||||
RtcEventLog* event_log) override;
|
||||
void DisableAudioNetworkAdaptor() override;
|
||||
void OnReceivedUplinkPacketLossFraction(
|
||||
float uplink_packet_loss_fraction) override;
|
||||
void OnReceivedTargetAudioBitrate(int target_audio_bitrate_bps) override;
|
||||
void OnReceivedUplinkBandwidth(
|
||||
int target_audio_bitrate_bps,
|
||||
absl::optional<int64_t> bwe_period_ms) override;
|
||||
void OnReceivedUplinkAllocation(BitrateAllocationUpdate update) override;
|
||||
void OnReceivedRtt(int rtt_ms) override;
|
||||
void OnReceivedOverhead(size_t overhead_bytes_per_packet) override;
|
||||
void SetReceiverFrameLengthRange(int min_frame_length_ms,
|
||||
int max_frame_length_ms) override;
|
||||
ANAStats GetANAStats() const override;
|
||||
absl::optional<std::pair<TimeDelta, TimeDelta> > GetFrameLengthRange()
|
||||
const override;
|
||||
rtc::ArrayView<const int> supported_frame_lengths_ms() const {
|
||||
return config_.supported_frame_lengths_ms;
|
||||
}
|
||||
|
||||
// Getters for testing.
|
||||
float packet_loss_rate() const { return packet_loss_rate_; }
|
||||
AudioEncoderOpusConfig::ApplicationMode application() const {
|
||||
return config_.application;
|
||||
}
|
||||
bool fec_enabled() const { return config_.fec_enabled; }
|
||||
size_t num_channels_to_encode() const { return num_channels_to_encode_; }
|
||||
int next_frame_length_ms() const { return next_frame_length_ms_; }
|
||||
|
||||
protected:
|
||||
EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
|
||||
rtc::ArrayView<const int16_t> audio,
|
||||
rtc::Buffer* encoded) override;
|
||||
|
||||
private:
|
||||
class PacketLossFractionSmoother;
|
||||
|
||||
static absl::optional<AudioEncoderOpusConfig> SdpToConfig(
|
||||
const SdpAudioFormat& format);
|
||||
static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
|
||||
static AudioCodecInfo QueryAudioEncoder(const AudioEncoderOpusConfig& config);
|
||||
static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
|
||||
const AudioEncoderOpusConfig&,
|
||||
int payload_type);
|
||||
|
||||
size_t Num10msFramesPerPacket() const;
|
||||
size_t SamplesPer10msFrame() const;
|
||||
size_t SufficientOutputBufferSize() const;
|
||||
bool RecreateEncoderInstance(const AudioEncoderOpusConfig& config);
|
||||
void SetFrameLength(int frame_length_ms);
|
||||
void SetNumChannelsToEncode(size_t num_channels_to_encode);
|
||||
void SetProjectedPacketLossRate(float fraction);
|
||||
|
||||
void OnReceivedUplinkBandwidth(
|
||||
int target_audio_bitrate_bps,
|
||||
absl::optional<int64_t> bwe_period_ms,
|
||||
absl::optional<int64_t> link_capacity_allocation);
|
||||
|
||||
// TODO(minyue): remove "override" when we can deprecate
|
||||
// `AudioEncoder::SetTargetBitrate`.
|
||||
void SetTargetBitrate(int target_bps) override;
|
||||
|
||||
void ApplyAudioNetworkAdaptor();
|
||||
std::unique_ptr<AudioNetworkAdaptor> DefaultAudioNetworkAdaptorCreator(
|
||||
absl::string_view config_string,
|
||||
RtcEventLog* event_log) const;
|
||||
|
||||
void MaybeUpdateUplinkBandwidth();
|
||||
|
||||
AudioEncoderOpusConfig config_;
|
||||
const int payload_type_;
|
||||
const bool use_stable_target_for_adaptation_;
|
||||
const bool adjust_bandwidth_;
|
||||
bool bitrate_changed_;
|
||||
// A multiplier for bitrates at 5 kbps and higher. The target bitrate
|
||||
// will be multiplied by these multipliers, each multiplier is applied to a
|
||||
// 1 kbps range.
|
||||
std::vector<float> bitrate_multipliers_;
|
||||
float packet_loss_rate_;
|
||||
std::vector<int16_t> input_buffer_;
|
||||
OpusEncInst* inst_;
|
||||
uint32_t first_timestamp_in_buffer_;
|
||||
size_t num_channels_to_encode_;
|
||||
int next_frame_length_ms_;
|
||||
int complexity_;
|
||||
std::unique_ptr<PacketLossFractionSmoother> packet_loss_fraction_smoother_;
|
||||
const AudioNetworkAdaptorCreator audio_network_adaptor_creator_;
|
||||
std::unique_ptr<AudioNetworkAdaptor> audio_network_adaptor_;
|
||||
absl::optional<size_t> overhead_bytes_per_packet_;
|
||||
const std::unique_ptr<SmoothingFilter> bitrate_smoother_;
|
||||
absl::optional<int64_t> bitrate_smoother_last_update_time_;
|
||||
int consecutive_dtx_frames_;
|
||||
|
||||
friend struct AudioEncoderOpus;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_
|
||||
|
|
@ -0,0 +1,914 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio_codecs/opus/audio_encoder_opus.h"
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "common_audio/mocks/mock_smoothing_filter.h"
|
||||
#include "modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h"
|
||||
#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
#include "modules/audio_coding/neteq/tools/audio_loop.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/fake_clock.h"
|
||||
#include "test/field_trial.h"
|
||||
#include "test/gmock.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
namespace webrtc {
|
||||
using ::testing::NiceMock;
|
||||
using ::testing::Return;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kDefaultOpusPayloadType = 105;
|
||||
constexpr int kDefaultOpusRate = 32000;
|
||||
constexpr int kDefaultOpusPacSize = 960;
|
||||
constexpr int64_t kInitialTimeUs = 12345678;
|
||||
|
||||
AudioEncoderOpusConfig CreateConfigWithParameters(
|
||||
const CodecParameterMap& params) {
|
||||
const SdpAudioFormat format("opus", 48000, 2, params);
|
||||
return *AudioEncoderOpus::SdpToConfig(format);
|
||||
}
|
||||
|
||||
struct AudioEncoderOpusStates {
|
||||
MockAudioNetworkAdaptor* mock_audio_network_adaptor;
|
||||
MockSmoothingFilter* mock_bitrate_smoother;
|
||||
std::unique_ptr<AudioEncoderOpusImpl> encoder;
|
||||
std::unique_ptr<rtc::ScopedFakeClock> fake_clock;
|
||||
AudioEncoderOpusConfig config;
|
||||
};
|
||||
|
||||
std::unique_ptr<AudioEncoderOpusStates> CreateCodec(int sample_rate_hz,
|
||||
size_t num_channels) {
|
||||
std::unique_ptr<AudioEncoderOpusStates> states =
|
||||
std::make_unique<AudioEncoderOpusStates>();
|
||||
states->mock_audio_network_adaptor = nullptr;
|
||||
states->fake_clock.reset(new rtc::ScopedFakeClock());
|
||||
states->fake_clock->SetTime(Timestamp::Micros(kInitialTimeUs));
|
||||
|
||||
MockAudioNetworkAdaptor** mock_ptr = &states->mock_audio_network_adaptor;
|
||||
AudioEncoderOpusImpl::AudioNetworkAdaptorCreator creator =
|
||||
[mock_ptr](absl::string_view, RtcEventLog* event_log) {
|
||||
std::unique_ptr<MockAudioNetworkAdaptor> adaptor(
|
||||
new NiceMock<MockAudioNetworkAdaptor>());
|
||||
EXPECT_CALL(*adaptor, Die());
|
||||
*mock_ptr = adaptor.get();
|
||||
return adaptor;
|
||||
};
|
||||
|
||||
AudioEncoderOpusConfig config;
|
||||
config.frame_size_ms = rtc::CheckedDivExact(kDefaultOpusPacSize, 48);
|
||||
config.sample_rate_hz = sample_rate_hz;
|
||||
config.num_channels = num_channels;
|
||||
config.bitrate_bps = kDefaultOpusRate;
|
||||
config.application = num_channels == 1
|
||||
? AudioEncoderOpusConfig::ApplicationMode::kVoip
|
||||
: AudioEncoderOpusConfig::ApplicationMode::kAudio;
|
||||
config.supported_frame_lengths_ms.push_back(config.frame_size_ms);
|
||||
states->config = config;
|
||||
|
||||
std::unique_ptr<MockSmoothingFilter> bitrate_smoother(
|
||||
new MockSmoothingFilter());
|
||||
states->mock_bitrate_smoother = bitrate_smoother.get();
|
||||
|
||||
states->encoder.reset(
|
||||
new AudioEncoderOpusImpl(states->config, kDefaultOpusPayloadType, creator,
|
||||
std::move(bitrate_smoother)));
|
||||
return states;
|
||||
}
|
||||
|
||||
AudioEncoderRuntimeConfig CreateEncoderRuntimeConfig() {
|
||||
constexpr int kBitrate = 40000;
|
||||
constexpr int kFrameLength = 60;
|
||||
constexpr bool kEnableDtx = false;
|
||||
constexpr size_t kNumChannels = 1;
|
||||
AudioEncoderRuntimeConfig config;
|
||||
config.bitrate_bps = kBitrate;
|
||||
config.frame_length_ms = kFrameLength;
|
||||
config.enable_dtx = kEnableDtx;
|
||||
config.num_channels = kNumChannels;
|
||||
return config;
|
||||
}
|
||||
|
||||
void CheckEncoderRuntimeConfig(const AudioEncoderOpusImpl* encoder,
|
||||
const AudioEncoderRuntimeConfig& config) {
|
||||
EXPECT_EQ(*config.bitrate_bps, encoder->GetTargetBitrate());
|
||||
EXPECT_EQ(*config.frame_length_ms, encoder->next_frame_length_ms());
|
||||
EXPECT_EQ(*config.enable_dtx, encoder->GetDtx());
|
||||
EXPECT_EQ(*config.num_channels, encoder->num_channels_to_encode());
|
||||
}
|
||||
|
||||
// Create 10ms audio data blocks for a total packet size of "packet_size_ms".
|
||||
std::unique_ptr<test::AudioLoop> Create10msAudioBlocks(
|
||||
const std::unique_ptr<AudioEncoderOpusImpl>& encoder,
|
||||
int packet_size_ms) {
|
||||
const std::string file_name =
|
||||
test::ResourcePath("audio_coding/testfile32kHz", "pcm");
|
||||
|
||||
std::unique_ptr<test::AudioLoop> speech_data(new test::AudioLoop());
|
||||
int audio_samples_per_ms =
|
||||
rtc::CheckedDivExact(encoder->SampleRateHz(), 1000);
|
||||
if (!speech_data->Init(
|
||||
file_name,
|
||||
packet_size_ms * audio_samples_per_ms *
|
||||
encoder->num_channels_to_encode(),
|
||||
10 * audio_samples_per_ms * encoder->num_channels_to_encode()))
|
||||
return nullptr;
|
||||
return speech_data;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class AudioEncoderOpusTest : public ::testing::TestWithParam<int> {
|
||||
protected:
|
||||
int sample_rate_hz_{GetParam()};
|
||||
};
|
||||
INSTANTIATE_TEST_SUITE_P(Param,
|
||||
AudioEncoderOpusTest,
|
||||
::testing::Values(16000, 48000));
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, DefaultApplicationModeMono) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 1);
|
||||
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
|
||||
states->encoder->application());
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, DefaultApplicationModeStereo) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio,
|
||||
states->encoder->application());
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, ChangeApplicationMode) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
EXPECT_TRUE(
|
||||
states->encoder->SetApplication(AudioEncoder::Application::kSpeech));
|
||||
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
|
||||
states->encoder->application());
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, ResetWontChangeApplicationMode) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
|
||||
// Trigger a reset.
|
||||
states->encoder->Reset();
|
||||
// Verify that the mode is still kAudio.
|
||||
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio,
|
||||
states->encoder->application());
|
||||
|
||||
// Now change to kVoip.
|
||||
EXPECT_TRUE(
|
||||
states->encoder->SetApplication(AudioEncoder::Application::kSpeech));
|
||||
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
|
||||
states->encoder->application());
|
||||
|
||||
// Trigger a reset again.
|
||||
states->encoder->Reset();
|
||||
// Verify that the mode is still kVoip.
|
||||
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
|
||||
states->encoder->application());
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, ToggleDtx) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
// Enable DTX
|
||||
EXPECT_TRUE(states->encoder->SetDtx(true));
|
||||
EXPECT_TRUE(states->encoder->GetDtx());
|
||||
// Turn off DTX.
|
||||
EXPECT_TRUE(states->encoder->SetDtx(false));
|
||||
EXPECT_FALSE(states->encoder->GetDtx());
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest,
|
||||
OnReceivedUplinkBandwidthWithoutAudioNetworkAdaptor) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 1);
|
||||
// Constants are replicated from audio_states->encoderopus.cc.
|
||||
const int kMinBitrateBps = 6000;
|
||||
const int kMaxBitrateBps = 510000;
|
||||
const int kOverheadBytesPerPacket = 64;
|
||||
states->encoder->OnReceivedOverhead(kOverheadBytesPerPacket);
|
||||
const int kOverheadBps = 8 * kOverheadBytesPerPacket *
|
||||
rtc::CheckedDivExact(48000, kDefaultOpusPacSize);
|
||||
// Set a too low bitrate.
|
||||
states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps - 1,
|
||||
absl::nullopt);
|
||||
EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate());
|
||||
// Set a too high bitrate.
|
||||
states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps + 1,
|
||||
absl::nullopt);
|
||||
EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate());
|
||||
// Set the minimum rate.
|
||||
states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps,
|
||||
absl::nullopt);
|
||||
EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate());
|
||||
// Set the maximum rate.
|
||||
states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps,
|
||||
absl::nullopt);
|
||||
EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate());
|
||||
// Set rates from kMaxBitrateBps up to 32000 bps.
|
||||
for (int rate = kMinBitrateBps + kOverheadBps; rate <= 32000 + kOverheadBps;
|
||||
rate += 1000) {
|
||||
states->encoder->OnReceivedUplinkBandwidth(rate, absl::nullopt);
|
||||
EXPECT_EQ(rate - kOverheadBps, states->encoder->GetTargetBitrate());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, SetReceiverFrameLengthRange) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
// Before calling to `SetReceiverFrameLengthRange`,
|
||||
// `supported_frame_lengths_ms` should contain only the frame length being
|
||||
// used.
|
||||
using ::testing::ElementsAre;
|
||||
EXPECT_THAT(states->encoder->supported_frame_lengths_ms(),
|
||||
ElementsAre(states->encoder->next_frame_length_ms()));
|
||||
states->encoder->SetReceiverFrameLengthRange(0, 12345);
|
||||
states->encoder->SetReceiverFrameLengthRange(21, 60);
|
||||
EXPECT_THAT(states->encoder->supported_frame_lengths_ms(),
|
||||
ElementsAre(40, 60));
|
||||
states->encoder->SetReceiverFrameLengthRange(20, 59);
|
||||
EXPECT_THAT(states->encoder->supported_frame_lengths_ms(),
|
||||
ElementsAre(20, 40));
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest,
|
||||
InvokeAudioNetworkAdaptorOnReceivedUplinkPacketLossFraction) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
|
||||
|
||||
auto config = CreateEncoderRuntimeConfig();
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
|
||||
.WillOnce(Return(config));
|
||||
|
||||
// Since using mock audio network adaptor, any packet loss fraction is fine.
|
||||
constexpr float kUplinkPacketLoss = 0.1f;
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor,
|
||||
SetUplinkPacketLossFraction(kUplinkPacketLoss));
|
||||
states->encoder->OnReceivedUplinkPacketLossFraction(kUplinkPacketLoss);
|
||||
|
||||
CheckEncoderRuntimeConfig(states->encoder.get(), config);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest,
|
||||
InvokeAudioNetworkAdaptorOnReceivedUplinkBandwidth) {
|
||||
test::ScopedFieldTrials override_field_trials(
|
||||
"WebRTC-Audio-StableTargetAdaptation/Disabled/");
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
|
||||
|
||||
auto config = CreateEncoderRuntimeConfig();
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
|
||||
.WillOnce(Return(config));
|
||||
|
||||
// Since using mock audio network adaptor, any target audio bitrate is fine.
|
||||
constexpr int kTargetAudioBitrate = 30000;
|
||||
constexpr int64_t kProbingIntervalMs = 3000;
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor,
|
||||
SetTargetAudioBitrate(kTargetAudioBitrate));
|
||||
EXPECT_CALL(*states->mock_bitrate_smoother,
|
||||
SetTimeConstantMs(kProbingIntervalMs * 4));
|
||||
EXPECT_CALL(*states->mock_bitrate_smoother, AddSample(kTargetAudioBitrate));
|
||||
states->encoder->OnReceivedUplinkBandwidth(kTargetAudioBitrate,
|
||||
kProbingIntervalMs);
|
||||
|
||||
CheckEncoderRuntimeConfig(states->encoder.get(), config);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest,
|
||||
InvokeAudioNetworkAdaptorOnReceivedUplinkAllocation) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
|
||||
|
||||
auto config = CreateEncoderRuntimeConfig();
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
|
||||
.WillOnce(Return(config));
|
||||
|
||||
BitrateAllocationUpdate update;
|
||||
update.target_bitrate = DataRate::BitsPerSec(30000);
|
||||
update.stable_target_bitrate = DataRate::BitsPerSec(20000);
|
||||
update.bwe_period = TimeDelta::Millis(200);
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor,
|
||||
SetTargetAudioBitrate(update.target_bitrate.bps()));
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor,
|
||||
SetUplinkBandwidth(update.stable_target_bitrate.bps()));
|
||||
states->encoder->OnReceivedUplinkAllocation(update);
|
||||
|
||||
CheckEncoderRuntimeConfig(states->encoder.get(), config);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedRtt) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
|
||||
|
||||
auto config = CreateEncoderRuntimeConfig();
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
|
||||
.WillOnce(Return(config));
|
||||
|
||||
// Since using mock audio network adaptor, any rtt is fine.
|
||||
constexpr int kRtt = 30;
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, SetRtt(kRtt));
|
||||
states->encoder->OnReceivedRtt(kRtt);
|
||||
|
||||
CheckEncoderRuntimeConfig(states->encoder.get(), config);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedOverhead) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
|
||||
|
||||
auto config = CreateEncoderRuntimeConfig();
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
|
||||
.WillOnce(Return(config));
|
||||
|
||||
// Since using mock audio network adaptor, any overhead is fine.
|
||||
constexpr size_t kOverhead = 64;
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead));
|
||||
states->encoder->OnReceivedOverhead(kOverhead);
|
||||
|
||||
CheckEncoderRuntimeConfig(states->encoder.get(), config);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest,
|
||||
PacketLossFractionSmoothedOnSetUplinkPacketLossFraction) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
|
||||
// The values are carefully chosen so that if no smoothing is made, the test
|
||||
// will fail.
|
||||
constexpr float kPacketLossFraction_1 = 0.02f;
|
||||
constexpr float kPacketLossFraction_2 = 0.198f;
|
||||
// `kSecondSampleTimeMs` is chosen to ease the calculation since
|
||||
// 0.9999 ^ 6931 = 0.5.
|
||||
constexpr int64_t kSecondSampleTimeMs = 6931;
|
||||
|
||||
// First time, no filtering.
|
||||
states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_1);
|
||||
EXPECT_FLOAT_EQ(0.02f, states->encoder->packet_loss_rate());
|
||||
|
||||
states->fake_clock->AdvanceTime(TimeDelta::Millis(kSecondSampleTimeMs));
|
||||
states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_2);
|
||||
|
||||
// Now the output of packet loss fraction smoother should be
|
||||
// (0.02 + 0.198) / 2 = 0.109.
|
||||
EXPECT_NEAR(0.109f, states->encoder->packet_loss_rate(), 0.001);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, PacketLossRateUpperBounded) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
|
||||
states->encoder->OnReceivedUplinkPacketLossFraction(0.5);
|
||||
EXPECT_FLOAT_EQ(0.2f, states->encoder->packet_loss_rate());
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, DoNotInvokeSetTargetBitrateIfOverheadUnknown) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
|
||||
states->encoder->OnReceivedUplinkBandwidth(kDefaultOpusRate * 2,
|
||||
absl::nullopt);
|
||||
|
||||
// Since `OnReceivedOverhead` has not been called, the codec bitrate should
|
||||
// not change.
|
||||
EXPECT_EQ(kDefaultOpusRate, states->encoder->GetTargetBitrate());
|
||||
}
|
||||
|
||||
// Verifies that the complexity adaptation in the config works as intended.
|
||||
TEST(AudioEncoderOpusTest, ConfigComplexityAdaptation) {
|
||||
AudioEncoderOpusConfig config;
|
||||
config.low_rate_complexity = 8;
|
||||
config.complexity = 6;
|
||||
|
||||
// Bitrate within hysteresis window. Expect empty output.
|
||||
config.bitrate_bps = 12500;
|
||||
EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config));
|
||||
|
||||
// Bitrate below hysteresis window. Expect higher complexity.
|
||||
config.bitrate_bps = 10999;
|
||||
EXPECT_EQ(8, AudioEncoderOpusImpl::GetNewComplexity(config));
|
||||
|
||||
// Bitrate within hysteresis window. Expect empty output.
|
||||
config.bitrate_bps = 12500;
|
||||
EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config));
|
||||
|
||||
// Bitrate above hysteresis window. Expect lower complexity.
|
||||
config.bitrate_bps = 14001;
|
||||
EXPECT_EQ(6, AudioEncoderOpusImpl::GetNewComplexity(config));
|
||||
}
|
||||
|
||||
// Verifies that the bandwidth adaptation in the config works as intended.
|
||||
TEST_P(AudioEncoderOpusTest, ConfigBandwidthAdaptation) {
|
||||
AudioEncoderOpusConfig config;
|
||||
const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000);
|
||||
const std::vector<int16_t> silence(
|
||||
opus_rate_khz * config.frame_size_ms * config.num_channels, 0);
|
||||
constexpr size_t kMaxBytes = 1000;
|
||||
uint8_t bitstream[kMaxBytes];
|
||||
|
||||
OpusEncInst* inst;
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(
|
||||
&inst, config.num_channels,
|
||||
config.application ==
|
||||
AudioEncoderOpusConfig::ApplicationMode::kVoip
|
||||
? 0
|
||||
: 1,
|
||||
sample_rate_hz_));
|
||||
|
||||
// Bitrate below minmum wideband. Expect narrowband.
|
||||
config.bitrate_bps = absl::optional<int>(7999);
|
||||
auto bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
|
||||
EXPECT_EQ(absl::optional<int>(OPUS_BANDWIDTH_NARROWBAND), bandwidth);
|
||||
WebRtcOpus_SetBandwidth(inst, *bandwidth);
|
||||
// It is necessary to encode here because Opus has some logic in the encoder
|
||||
// that goes from the user-set bandwidth to the used and returned one.
|
||||
WebRtcOpus_Encode(inst, silence.data(),
|
||||
rtc::CheckedDivExact(silence.size(), config.num_channels),
|
||||
kMaxBytes, bitstream);
|
||||
|
||||
// Bitrate not yet above maximum narrowband. Expect empty.
|
||||
config.bitrate_bps = absl::optional<int>(9000);
|
||||
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
|
||||
EXPECT_EQ(absl::optional<int>(), bandwidth);
|
||||
|
||||
// Bitrate above maximum narrowband. Expect wideband.
|
||||
config.bitrate_bps = absl::optional<int>(9001);
|
||||
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
|
||||
EXPECT_EQ(absl::optional<int>(OPUS_BANDWIDTH_WIDEBAND), bandwidth);
|
||||
WebRtcOpus_SetBandwidth(inst, *bandwidth);
|
||||
// It is necessary to encode here because Opus has some logic in the encoder
|
||||
// that goes from the user-set bandwidth to the used and returned one.
|
||||
WebRtcOpus_Encode(inst, silence.data(),
|
||||
rtc::CheckedDivExact(silence.size(), config.num_channels),
|
||||
kMaxBytes, bitstream);
|
||||
|
||||
// Bitrate not yet below minimum wideband. Expect empty.
|
||||
config.bitrate_bps = absl::optional<int>(8000);
|
||||
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
|
||||
EXPECT_EQ(absl::optional<int>(), bandwidth);
|
||||
|
||||
// Bitrate above automatic threshold. Expect automatic.
|
||||
config.bitrate_bps = absl::optional<int>(12001);
|
||||
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
|
||||
EXPECT_EQ(absl::optional<int>(OPUS_AUTO), bandwidth);
|
||||
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(inst));
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, EmptyConfigDoesNotAffectEncoderSettings) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
|
||||
|
||||
auto config = CreateEncoderRuntimeConfig();
|
||||
AudioEncoderRuntimeConfig empty_config;
|
||||
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
|
||||
.WillOnce(Return(config))
|
||||
.WillOnce(Return(empty_config));
|
||||
|
||||
constexpr size_t kOverhead = 64;
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead))
|
||||
.Times(2);
|
||||
states->encoder->OnReceivedOverhead(kOverhead);
|
||||
states->encoder->OnReceivedOverhead(kOverhead);
|
||||
|
||||
CheckEncoderRuntimeConfig(states->encoder.get(), config);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, UpdateUplinkBandwidthInAudioNetworkAdaptor) {
|
||||
test::ScopedFieldTrials override_field_trials(
|
||||
"WebRTC-Audio-StableTargetAdaptation/Disabled/");
|
||||
auto states = CreateCodec(sample_rate_hz_, 2);
|
||||
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
|
||||
const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000);
|
||||
const std::vector<int16_t> audio(opus_rate_khz * 10 * 2, 0);
|
||||
rtc::Buffer encoded;
|
||||
EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage())
|
||||
.WillOnce(Return(50000));
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(50000));
|
||||
states->encoder->Encode(
|
||||
0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded);
|
||||
|
||||
// Repeat update uplink bandwidth tests.
|
||||
for (int i = 0; i < 5; i++) {
|
||||
// Don't update till it is time to update again.
|
||||
states->fake_clock->AdvanceTime(TimeDelta::Millis(
|
||||
states->config.uplink_bandwidth_update_interval_ms - 1));
|
||||
states->encoder->Encode(
|
||||
0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded);
|
||||
|
||||
// Update when it is time to update.
|
||||
EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage())
|
||||
.WillOnce(Return(40000));
|
||||
EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(40000));
|
||||
states->fake_clock->AdvanceTime(TimeDelta::Millis(1));
|
||||
states->encoder->Encode(
|
||||
0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, EncodeAtMinBitrate) {
|
||||
auto states = CreateCodec(sample_rate_hz_, 1);
|
||||
constexpr int kNumPacketsToEncode = 2;
|
||||
auto audio_frames =
|
||||
Create10msAudioBlocks(states->encoder, kNumPacketsToEncode * 20);
|
||||
ASSERT_TRUE(audio_frames) << "Create10msAudioBlocks failed";
|
||||
rtc::Buffer encoded;
|
||||
uint32_t rtp_timestamp = 12345; // Just a number not important to this test.
|
||||
|
||||
states->encoder->OnReceivedUplinkBandwidth(0, absl::nullopt);
|
||||
for (int packet_index = 0; packet_index < kNumPacketsToEncode;
|
||||
packet_index++) {
|
||||
// Make sure we are not encoding before we have enough data for
|
||||
// a 20ms packet.
|
||||
for (int index = 0; index < 1; index++) {
|
||||
states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(),
|
||||
&encoded);
|
||||
EXPECT_EQ(0u, encoded.size());
|
||||
}
|
||||
|
||||
// Should encode now.
|
||||
states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(),
|
||||
&encoded);
|
||||
EXPECT_GT(encoded.size(), 0u);
|
||||
encoded.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AudioEncoderOpusTest, TestConfigDefaults) {
|
||||
const auto config_opt = AudioEncoderOpus::SdpToConfig({"opus", 48000, 2});
|
||||
ASSERT_TRUE(config_opt);
|
||||
EXPECT_EQ(48000, config_opt->max_playback_rate_hz);
|
||||
EXPECT_EQ(1u, config_opt->num_channels);
|
||||
EXPECT_FALSE(config_opt->fec_enabled);
|
||||
EXPECT_FALSE(config_opt->dtx_enabled);
|
||||
EXPECT_EQ(20, config_opt->frame_size_ms);
|
||||
}
|
||||
|
||||
TEST(AudioEncoderOpusTest, TestConfigFromParams) {
|
||||
const auto config1 = CreateConfigWithParameters({{"stereo", "0"}});
|
||||
EXPECT_EQ(1U, config1.num_channels);
|
||||
|
||||
const auto config2 = CreateConfigWithParameters({{"stereo", "1"}});
|
||||
EXPECT_EQ(2U, config2.num_channels);
|
||||
|
||||
const auto config3 = CreateConfigWithParameters({{"useinbandfec", "0"}});
|
||||
EXPECT_FALSE(config3.fec_enabled);
|
||||
|
||||
const auto config4 = CreateConfigWithParameters({{"useinbandfec", "1"}});
|
||||
EXPECT_TRUE(config4.fec_enabled);
|
||||
|
||||
const auto config5 = CreateConfigWithParameters({{"usedtx", "0"}});
|
||||
EXPECT_FALSE(config5.dtx_enabled);
|
||||
|
||||
const auto config6 = CreateConfigWithParameters({{"usedtx", "1"}});
|
||||
EXPECT_TRUE(config6.dtx_enabled);
|
||||
|
||||
const auto config7 = CreateConfigWithParameters({{"cbr", "0"}});
|
||||
EXPECT_FALSE(config7.cbr_enabled);
|
||||
|
||||
const auto config8 = CreateConfigWithParameters({{"cbr", "1"}});
|
||||
EXPECT_TRUE(config8.cbr_enabled);
|
||||
|
||||
const auto config9 =
|
||||
CreateConfigWithParameters({{"maxplaybackrate", "12345"}});
|
||||
EXPECT_EQ(12345, config9.max_playback_rate_hz);
|
||||
|
||||
const auto config10 =
|
||||
CreateConfigWithParameters({{"maxaveragebitrate", "96000"}});
|
||||
EXPECT_EQ(96000, config10.bitrate_bps);
|
||||
|
||||
const auto config11 = CreateConfigWithParameters({{"maxptime", "40"}});
|
||||
for (int frame_length : config11.supported_frame_lengths_ms) {
|
||||
EXPECT_LE(frame_length, 40);
|
||||
}
|
||||
|
||||
const auto config12 = CreateConfigWithParameters({{"minptime", "40"}});
|
||||
for (int frame_length : config12.supported_frame_lengths_ms) {
|
||||
EXPECT_GE(frame_length, 40);
|
||||
}
|
||||
|
||||
const auto config13 = CreateConfigWithParameters({{"ptime", "40"}});
|
||||
EXPECT_EQ(40, config13.frame_size_ms);
|
||||
|
||||
constexpr int kMinSupportedFrameLength = 10;
|
||||
constexpr int kMaxSupportedFrameLength =
|
||||
WEBRTC_OPUS_SUPPORT_120MS_PTIME ? 120 : 60;
|
||||
|
||||
const auto config14 = CreateConfigWithParameters({{"ptime", "1"}});
|
||||
EXPECT_EQ(kMinSupportedFrameLength, config14.frame_size_ms);
|
||||
|
||||
const auto config15 = CreateConfigWithParameters({{"ptime", "2000"}});
|
||||
EXPECT_EQ(kMaxSupportedFrameLength, config15.frame_size_ms);
|
||||
}
|
||||
|
||||
TEST(AudioEncoderOpusTest, TestConfigFromInvalidParams) {
|
||||
const webrtc::SdpAudioFormat format("opus", 48000, 2);
|
||||
const auto default_config = *AudioEncoderOpus::SdpToConfig(format);
|
||||
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
|
||||
const std::vector<int> default_supported_frame_lengths_ms({20, 40, 60, 120});
|
||||
#else
|
||||
const std::vector<int> default_supported_frame_lengths_ms({20, 40, 60});
|
||||
#endif
|
||||
|
||||
AudioEncoderOpusConfig config;
|
||||
config = CreateConfigWithParameters({{"stereo", "invalid"}});
|
||||
EXPECT_EQ(default_config.num_channels, config.num_channels);
|
||||
|
||||
config = CreateConfigWithParameters({{"useinbandfec", "invalid"}});
|
||||
EXPECT_EQ(default_config.fec_enabled, config.fec_enabled);
|
||||
|
||||
config = CreateConfigWithParameters({{"usedtx", "invalid"}});
|
||||
EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled);
|
||||
|
||||
config = CreateConfigWithParameters({{"cbr", "invalid"}});
|
||||
EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxplaybackrate", "0"}});
|
||||
EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxplaybackrate", "-23"}});
|
||||
EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxplaybackrate", "not a number!"}});
|
||||
EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxaveragebitrate", "0"}});
|
||||
EXPECT_EQ(6000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxaveragebitrate", "-1000"}});
|
||||
EXPECT_EQ(6000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxaveragebitrate", "1024000"}});
|
||||
EXPECT_EQ(510000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxaveragebitrate", "not a number!"}});
|
||||
EXPECT_EQ(default_config.bitrate_bps, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters({{"maxptime", "invalid"}});
|
||||
EXPECT_EQ(default_supported_frame_lengths_ms,
|
||||
config.supported_frame_lengths_ms);
|
||||
|
||||
config = CreateConfigWithParameters({{"minptime", "invalid"}});
|
||||
EXPECT_EQ(default_supported_frame_lengths_ms,
|
||||
config.supported_frame_lengths_ms);
|
||||
|
||||
config = CreateConfigWithParameters({{"ptime", "invalid"}});
|
||||
EXPECT_EQ(default_supported_frame_lengths_ms,
|
||||
config.supported_frame_lengths_ms);
|
||||
}
|
||||
|
||||
TEST(AudioEncoderOpusTest, GetFrameLenghtRange) {
|
||||
AudioEncoderOpusConfig config =
|
||||
CreateConfigWithParameters({{"maxptime", "10"}, {"ptime", "10"}});
|
||||
std::unique_ptr<AudioEncoder> encoder =
|
||||
AudioEncoderOpus::MakeAudioEncoder(config, kDefaultOpusPayloadType);
|
||||
auto ptime = webrtc::TimeDelta::Millis(10);
|
||||
absl::optional<std::pair<webrtc::TimeDelta, webrtc::TimeDelta>> range = {
|
||||
{ptime, ptime}};
|
||||
EXPECT_EQ(encoder->GetFrameLengthRange(), range);
|
||||
}
|
||||
|
||||
// Test that bitrate will be overridden by the "maxaveragebitrate" parameter.
|
||||
// Also test that the "maxaveragebitrate" can't be set to values outside the
|
||||
// range of 6000 and 510000
|
||||
TEST(AudioEncoderOpusTest, SetSendCodecOpusMaxAverageBitrate) {
|
||||
// Ignore if less than 6000.
|
||||
const auto config1 = AudioEncoderOpus::SdpToConfig(
|
||||
{"opus", 48000, 2, {{"maxaveragebitrate", "5999"}}});
|
||||
EXPECT_EQ(6000, config1->bitrate_bps);
|
||||
|
||||
// Ignore if larger than 510000.
|
||||
const auto config2 = AudioEncoderOpus::SdpToConfig(
|
||||
{"opus", 48000, 2, {{"maxaveragebitrate", "510001"}}});
|
||||
EXPECT_EQ(510000, config2->bitrate_bps);
|
||||
|
||||
const auto config3 = AudioEncoderOpus::SdpToConfig(
|
||||
{"opus", 48000, 2, {{"maxaveragebitrate", "200000"}}});
|
||||
EXPECT_EQ(200000, config3->bitrate_bps);
|
||||
}
|
||||
|
||||
// Test maxplaybackrate <= 8000 triggers Opus narrow band mode.
|
||||
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateNb) {
|
||||
auto config = CreateConfigWithParameters({{"maxplaybackrate", "8000"}});
|
||||
EXPECT_EQ(8000, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(12000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters(
|
||||
{{"maxplaybackrate", "8000"}, {"stereo", "1"}});
|
||||
EXPECT_EQ(8000, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(24000, config.bitrate_bps);
|
||||
}
|
||||
|
||||
// Test 8000 < maxplaybackrate <= 12000 triggers Opus medium band mode.
|
||||
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateMb) {
|
||||
auto config = CreateConfigWithParameters({{"maxplaybackrate", "8001"}});
|
||||
EXPECT_EQ(8001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(20000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters(
|
||||
{{"maxplaybackrate", "8001"}, {"stereo", "1"}});
|
||||
EXPECT_EQ(8001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(40000, config.bitrate_bps);
|
||||
}
|
||||
|
||||
// Test 12000 < maxplaybackrate <= 16000 triggers Opus wide band mode.
|
||||
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateWb) {
|
||||
auto config = CreateConfigWithParameters({{"maxplaybackrate", "12001"}});
|
||||
EXPECT_EQ(12001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(20000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters(
|
||||
{{"maxplaybackrate", "12001"}, {"stereo", "1"}});
|
||||
EXPECT_EQ(12001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(40000, config.bitrate_bps);
|
||||
}
|
||||
|
||||
// Test 16000 < maxplaybackrate <= 24000 triggers Opus super wide band mode.
|
||||
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateSwb) {
|
||||
auto config = CreateConfigWithParameters({{"maxplaybackrate", "16001"}});
|
||||
EXPECT_EQ(16001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(32000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters(
|
||||
{{"maxplaybackrate", "16001"}, {"stereo", "1"}});
|
||||
EXPECT_EQ(16001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(64000, config.bitrate_bps);
|
||||
}
|
||||
|
||||
// Test 24000 < maxplaybackrate triggers Opus full band mode.
|
||||
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateFb) {
|
||||
auto config = CreateConfigWithParameters({{"maxplaybackrate", "24001"}});
|
||||
EXPECT_EQ(24001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(32000, config.bitrate_bps);
|
||||
|
||||
config = CreateConfigWithParameters(
|
||||
{{"maxplaybackrate", "24001"}, {"stereo", "1"}});
|
||||
EXPECT_EQ(24001, config.max_playback_rate_hz);
|
||||
EXPECT_EQ(64000, config.bitrate_bps);
|
||||
}
|
||||
|
||||
TEST_P(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) {
|
||||
// Create encoder with DTX enabled.
|
||||
AudioEncoderOpusConfig config;
|
||||
config.dtx_enabled = true;
|
||||
config.sample_rate_hz = sample_rate_hz_;
|
||||
constexpr int payload_type = 17;
|
||||
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
|
||||
|
||||
// Open file containing speech and silence.
|
||||
const std::string kInputFileName =
|
||||
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
|
||||
test::AudioLoop audio_loop;
|
||||
// Use the file as if it were sampled at our desired input rate.
|
||||
const size_t max_loop_length_samples =
|
||||
sample_rate_hz_ * 10; // Max 10 second loop.
|
||||
const size_t input_block_size_samples =
|
||||
10 * sample_rate_hz_ / 1000; // 10 ms.
|
||||
EXPECT_TRUE(audio_loop.Init(kInputFileName, max_loop_length_samples,
|
||||
input_block_size_samples));
|
||||
|
||||
// Encode.
|
||||
AudioEncoder::EncodedInfo info;
|
||||
rtc::Buffer encoded(500);
|
||||
int nonspeech_frames = 0;
|
||||
int max_nonspeech_frames = 0;
|
||||
int dtx_frames = 0;
|
||||
int max_dtx_frames = 0;
|
||||
uint32_t rtp_timestamp = 0u;
|
||||
for (size_t i = 0; i < 500; ++i) {
|
||||
encoded.Clear();
|
||||
|
||||
// Every second call to the encoder will generate an Opus packet.
|
||||
for (int j = 0; j < 2; j++) {
|
||||
info =
|
||||
encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded);
|
||||
rtp_timestamp += input_block_size_samples;
|
||||
}
|
||||
|
||||
// Bookkeeping of number of DTX frames.
|
||||
if (info.encoded_bytes <= 2) {
|
||||
++dtx_frames;
|
||||
} else {
|
||||
if (dtx_frames > max_dtx_frames)
|
||||
max_dtx_frames = dtx_frames;
|
||||
dtx_frames = 0;
|
||||
}
|
||||
|
||||
// Bookkeeping of number of non-speech frames.
|
||||
if (info.speech == 0) {
|
||||
++nonspeech_frames;
|
||||
} else {
|
||||
if (nonspeech_frames > max_nonspeech_frames)
|
||||
max_nonspeech_frames = nonspeech_frames;
|
||||
nonspeech_frames = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Maximum number of consecutive non-speech packets should exceed 15.
|
||||
EXPECT_GT(max_nonspeech_frames, 15);
|
||||
}
|
||||
|
||||
TEST(AudioEncoderOpusTest, OpusDtxFilteringHighEnergyRefreshPackets) {
|
||||
test::ScopedFieldTrials override_field_trials(
|
||||
"WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx/Enabled/");
|
||||
const std::string kInputFileName =
|
||||
webrtc::test::ResourcePath("audio_coding/testfile16kHz", "pcm");
|
||||
constexpr int kSampleRateHz = 16000;
|
||||
AudioEncoderOpusConfig config;
|
||||
config.dtx_enabled = true;
|
||||
config.sample_rate_hz = kSampleRateHz;
|
||||
constexpr int payload_type = 17;
|
||||
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
|
||||
test::AudioLoop audio_loop;
|
||||
constexpr size_t kMaxLoopLengthSaples = kSampleRateHz * 11.6f;
|
||||
constexpr size_t kInputBlockSizeSamples = kSampleRateHz / 100;
|
||||
EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSaples,
|
||||
kInputBlockSizeSamples));
|
||||
AudioEncoder::EncodedInfo info;
|
||||
rtc::Buffer encoded(500);
|
||||
// Encode the audio file and store the last part that corresponds to silence.
|
||||
constexpr size_t kSilenceDurationSamples = kSampleRateHz * 0.2f;
|
||||
std::array<int16_t, kSilenceDurationSamples> silence;
|
||||
uint32_t rtp_timestamp = 0;
|
||||
bool last_packet_dtx_frame = false;
|
||||
bool opus_entered_dtx = false;
|
||||
bool silence_filled = false;
|
||||
size_t timestamp_start_silence = 0;
|
||||
while (!silence_filled && rtp_timestamp < kMaxLoopLengthSaples) {
|
||||
encoded.Clear();
|
||||
// Every second call to the encoder will generate an Opus packet.
|
||||
for (int j = 0; j < 2; j++) {
|
||||
auto next_frame = audio_loop.GetNextBlock();
|
||||
info = encoder->Encode(rtp_timestamp, next_frame, &encoded);
|
||||
if (opus_entered_dtx) {
|
||||
size_t silence_frame_start = rtp_timestamp - timestamp_start_silence;
|
||||
silence_filled = silence_frame_start >= kSilenceDurationSamples;
|
||||
if (!silence_filled) {
|
||||
std::copy(next_frame.begin(), next_frame.end(),
|
||||
silence.begin() + silence_frame_start);
|
||||
}
|
||||
}
|
||||
rtp_timestamp += kInputBlockSizeSamples;
|
||||
}
|
||||
EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame);
|
||||
last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2
|
||||
: last_packet_dtx_frame;
|
||||
if (info.encoded_bytes <= 2 && !opus_entered_dtx) {
|
||||
timestamp_start_silence = rtp_timestamp;
|
||||
}
|
||||
opus_entered_dtx = info.encoded_bytes <= 2;
|
||||
}
|
||||
|
||||
EXPECT_TRUE(silence_filled);
|
||||
// The copied 200 ms of silence is used for creating 6 bursts that are fed to
|
||||
// the encoder, the first three ones with a larger energy and the last three
|
||||
// with a lower energy. This test verifies that the encoder just sends refresh
|
||||
// DTX packets during the last bursts.
|
||||
int number_non_empty_packets_during_increase = 0;
|
||||
int number_non_empty_packets_during_decrease = 0;
|
||||
for (size_t burst = 0; burst < 6; ++burst) {
|
||||
uint32_t rtp_timestamp_start = rtp_timestamp;
|
||||
const bool increase_noise = burst < 3;
|
||||
const float gain = increase_noise ? 1.4f : 0.0f;
|
||||
while (rtp_timestamp < rtp_timestamp_start + kSilenceDurationSamples) {
|
||||
encoded.Clear();
|
||||
// Every second call to the encoder will generate an Opus packet.
|
||||
for (int j = 0; j < 2; j++) {
|
||||
std::array<int16_t, kInputBlockSizeSamples> silence_frame;
|
||||
size_t silence_frame_start = rtp_timestamp - rtp_timestamp_start;
|
||||
std::transform(
|
||||
silence.begin() + silence_frame_start,
|
||||
silence.begin() + silence_frame_start + kInputBlockSizeSamples,
|
||||
silence_frame.begin(), [gain](float s) { return gain * s; });
|
||||
info = encoder->Encode(rtp_timestamp, silence_frame, &encoded);
|
||||
rtp_timestamp += kInputBlockSizeSamples;
|
||||
}
|
||||
EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame);
|
||||
last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2
|
||||
: last_packet_dtx_frame;
|
||||
// Tracking the number of non empty packets.
|
||||
if (increase_noise && info.encoded_bytes > 2) {
|
||||
number_non_empty_packets_during_increase++;
|
||||
}
|
||||
if (!increase_noise && info.encoded_bytes > 2) {
|
||||
number_non_empty_packets_during_decrease++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check that the refresh DTX packets are just sent during the decrease energy
|
||||
// region.
|
||||
EXPECT_EQ(number_non_empty_packets_during_increase, 0);
|
||||
EXPECT_GT(number_non_empty_packets_during_decrease, 0);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio_codecs/opus/audio_decoder_opus.h"
|
||||
#include "api/audio_codecs/opus/audio_encoder_opus.h"
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "common_audio/window_generator.h"
|
||||
#include "modules/audio_coding/codecs/opus/test/lapped_transform.h"
|
||||
#include "modules/audio_coding/neteq/tools/audio_loop.h"
|
||||
#include "test/field_trial.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
constexpr size_t kNumChannels = 1u;
|
||||
constexpr int kSampleRateHz = 48000;
|
||||
constexpr size_t kMaxLoopLengthSamples = kSampleRateHz * 50; // 50 seconds.
|
||||
constexpr size_t kInputBlockSizeSamples = 10 * kSampleRateHz / 1000; // 10 ms
|
||||
constexpr size_t kOutputBlockSizeSamples = 20 * kSampleRateHz / 1000; // 20 ms
|
||||
constexpr size_t kFftSize = 1024;
|
||||
constexpr size_t kNarrowbandSize = 4000 * kFftSize / kSampleRateHz;
|
||||
constexpr float kKbdAlpha = 1.5f;
|
||||
|
||||
class PowerRatioEstimator : public LappedTransform::Callback {
|
||||
public:
|
||||
PowerRatioEstimator() : low_pow_(0.f), high_pow_(0.f) {
|
||||
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
|
||||
transform_.reset(new LappedTransform(kNumChannels, 0u,
|
||||
kInputBlockSizeSamples, window_,
|
||||
kFftSize, kFftSize / 2, this));
|
||||
}
|
||||
|
||||
void ProcessBlock(float* data) { transform_->ProcessChunk(&data, nullptr); }
|
||||
|
||||
float PowerRatio() { return high_pow_ / low_pow_; }
|
||||
|
||||
protected:
|
||||
void ProcessAudioBlock(const std::complex<float>* const* input,
|
||||
size_t num_input_channels,
|
||||
size_t num_freq_bins,
|
||||
size_t num_output_channels,
|
||||
std::complex<float>* const* output) override {
|
||||
float low_pow = 0.f;
|
||||
float high_pow = 0.f;
|
||||
for (size_t i = 0u; i < num_input_channels; ++i) {
|
||||
for (size_t j = 0u; j < kNarrowbandSize; ++j) {
|
||||
float low_mag = std::abs(input[i][j]);
|
||||
low_pow += low_mag * low_mag;
|
||||
float high_mag = std::abs(input[i][j + kNarrowbandSize]);
|
||||
high_pow += high_mag * high_mag;
|
||||
}
|
||||
}
|
||||
low_pow_ += low_pow / (num_input_channels * kFftSize);
|
||||
high_pow_ += high_pow / (num_input_channels * kFftSize);
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<LappedTransform> transform_;
|
||||
float window_[kFftSize];
|
||||
float low_pow_;
|
||||
float high_pow_;
|
||||
};
|
||||
|
||||
float EncodedPowerRatio(AudioEncoder* encoder,
|
||||
AudioDecoder* decoder,
|
||||
test::AudioLoop* audio_loop) {
|
||||
// Encode and decode.
|
||||
uint32_t rtp_timestamp = 0u;
|
||||
constexpr size_t kBufferSize = 500;
|
||||
rtc::Buffer encoded(kBufferSize);
|
||||
std::vector<int16_t> decoded(kOutputBlockSizeSamples);
|
||||
std::vector<float> decoded_float(kOutputBlockSizeSamples);
|
||||
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
|
||||
PowerRatioEstimator power_ratio_estimator;
|
||||
for (size_t i = 0; i < 1000; ++i) {
|
||||
encoded.Clear();
|
||||
AudioEncoder::EncodedInfo encoder_info =
|
||||
encoder->Encode(rtp_timestamp, audio_loop->GetNextBlock(), &encoded);
|
||||
rtp_timestamp += kInputBlockSizeSamples;
|
||||
if (encoded.size() > 0) {
|
||||
int decoder_info = decoder->Decode(
|
||||
encoded.data(), encoded.size(), kSampleRateHz,
|
||||
decoded.size() * sizeof(decoded[0]), decoded.data(), &speech_type);
|
||||
if (decoder_info > 0) {
|
||||
S16ToFloat(decoded.data(), decoded.size(), decoded_float.data());
|
||||
power_ratio_estimator.ProcessBlock(decoded_float.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
return power_ratio_estimator.PowerRatio();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// TODO(ivoc): Remove this test, WebRTC-AdjustOpusBandwidth is obsolete.
|
||||
TEST(BandwidthAdaptationTest, BandwidthAdaptationTest) {
|
||||
test::ScopedFieldTrials override_field_trials(
|
||||
"WebRTC-AdjustOpusBandwidth/Enabled/");
|
||||
|
||||
constexpr float kMaxNarrowbandRatio = 0.0035f;
|
||||
constexpr float kMinWidebandRatio = 0.01f;
|
||||
|
||||
// Create encoder.
|
||||
AudioEncoderOpusConfig enc_config;
|
||||
enc_config.bitrate_bps = absl::optional<int>(7999);
|
||||
enc_config.num_channels = kNumChannels;
|
||||
constexpr int payload_type = 17;
|
||||
auto encoder = AudioEncoderOpus::MakeAudioEncoder(enc_config, payload_type);
|
||||
|
||||
// Create decoder.
|
||||
AudioDecoderOpus::Config dec_config;
|
||||
dec_config.num_channels = kNumChannels;
|
||||
auto decoder = AudioDecoderOpus::MakeAudioDecoder(dec_config);
|
||||
|
||||
// Open speech file.
|
||||
const std::string kInputFileName =
|
||||
webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm");
|
||||
test::AudioLoop audio_loop;
|
||||
EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
|
||||
ASSERT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
|
||||
kInputBlockSizeSamples));
|
||||
|
||||
EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
|
||||
kMaxNarrowbandRatio);
|
||||
|
||||
encoder->OnReceivedTargetAudioBitrate(9000);
|
||||
EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
|
||||
kMaxNarrowbandRatio);
|
||||
|
||||
encoder->OnReceivedTargetAudioBitrate(9001);
|
||||
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
|
||||
kMinWidebandRatio);
|
||||
|
||||
encoder->OnReceivedTargetAudioBitrate(8000);
|
||||
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
|
||||
kMinWidebandRatio);
|
||||
|
||||
encoder->OnReceivedTargetAudioBitrate(12001);
|
||||
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
|
||||
kMinWidebandRatio);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio_codecs/opus/audio_encoder_opus.h"
|
||||
#include "api/test/metrics/global_metrics_logger_and_exporter.h"
|
||||
#include "api/test/metrics/metric.h"
|
||||
#include "modules/audio_coding/neteq/tools/audio_loop.h"
|
||||
#include "rtc_base/time_utils.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
using ::webrtc::test::GetGlobalMetricsLogger;
|
||||
using ::webrtc::test::ImprovementDirection;
|
||||
using ::webrtc::test::Unit;
|
||||
|
||||
int64_t RunComplexityTest(const AudioEncoderOpusConfig& config) {
|
||||
// Create encoder.
|
||||
constexpr int payload_type = 17;
|
||||
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
|
||||
// Open speech file.
|
||||
const std::string kInputFileName =
|
||||
webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm");
|
||||
test::AudioLoop audio_loop;
|
||||
constexpr int kSampleRateHz = 48000;
|
||||
EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
|
||||
constexpr size_t kMaxLoopLengthSamples =
|
||||
kSampleRateHz * 10; // 10 second loop.
|
||||
constexpr size_t kInputBlockSizeSamples =
|
||||
10 * kSampleRateHz / 1000; // 60 ms.
|
||||
EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
|
||||
kInputBlockSizeSamples));
|
||||
// Encode.
|
||||
const int64_t start_time_ms = rtc::TimeMillis();
|
||||
AudioEncoder::EncodedInfo info;
|
||||
rtc::Buffer encoded(500);
|
||||
uint32_t rtp_timestamp = 0u;
|
||||
for (size_t i = 0; i < 10000; ++i) {
|
||||
encoded.Clear();
|
||||
info = encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded);
|
||||
rtp_timestamp += kInputBlockSizeSamples;
|
||||
}
|
||||
return rtc::TimeMillis() - start_time_ms;
|
||||
}
|
||||
|
||||
// This test encodes an audio file using Opus twice with different bitrates
|
||||
// (~11 kbps and 15.5 kbps). The runtime for each is measured, and the ratio
|
||||
// between the two is calculated and tracked. This test explicitly sets the
|
||||
// low_rate_complexity to 9. When running on desktop platforms, this is the same
|
||||
// as the regular complexity, and the expectation is that the resulting ratio
|
||||
// should be less than 100% (since the encoder runs faster at lower bitrates,
|
||||
// given a fixed complexity setting). On the other hand, when running on
|
||||
// mobiles, the regular complexity is 5, and we expect the resulting ratio to
|
||||
// be higher, since we have explicitly asked for a higher complexity setting at
|
||||
// the lower rate.
|
||||
TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_On) {
|
||||
// Create config.
|
||||
AudioEncoderOpusConfig config;
|
||||
// The limit -- including the hysteresis window -- at which the complexity
|
||||
// shuold be increased.
|
||||
config.bitrate_bps = 11000 - 1;
|
||||
config.low_rate_complexity = 9;
|
||||
int64_t runtime_10999bps = RunComplexityTest(config);
|
||||
|
||||
config.bitrate_bps = 15500;
|
||||
int64_t runtime_15500bps = RunComplexityTest(config);
|
||||
|
||||
GetGlobalMetricsLogger()->LogSingleValueMetric(
|
||||
"opus_encoding_complexity_ratio", "adaptation_on",
|
||||
100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent,
|
||||
ImprovementDirection::kNeitherIsBetter);
|
||||
}
|
||||
|
||||
// This test is identical to the one above, but without the complexity
|
||||
// adaptation enabled (neither on desktop, nor on mobile). The expectation is
|
||||
// that the resulting ratio is less than 100% at all times.
|
||||
TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_Off) {
|
||||
// Create config.
|
||||
AudioEncoderOpusConfig config;
|
||||
// The limit -- including the hysteresis window -- at which the complexity
|
||||
// shuold be increased (but not in this test since complexity adaptation is
|
||||
// disabled).
|
||||
config.bitrate_bps = 11000 - 1;
|
||||
int64_t runtime_10999bps = RunComplexityTest(config);
|
||||
|
||||
config.bitrate_bps = 15500;
|
||||
int64_t runtime_15500bps = RunComplexityTest(config);
|
||||
|
||||
GetGlobalMetricsLogger()->LogSingleValueMetric(
|
||||
"opus_encoding_complexity_ratio", "adaptation_off",
|
||||
100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent,
|
||||
ImprovementDirection::kNeitherIsBetter);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,248 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
using std::get;
|
||||
using std::string;
|
||||
using std::tuple;
|
||||
using ::testing::TestWithParam;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Define coding parameter as <channels, bit_rate, filename, extension>.
|
||||
typedef tuple<size_t, int, string, string> coding_param;
|
||||
typedef struct mode mode;
|
||||
|
||||
struct mode {
|
||||
bool fec;
|
||||
uint8_t target_packet_loss_rate;
|
||||
};
|
||||
|
||||
const int kOpusBlockDurationMs = 20;
|
||||
const int kOpusSamplingKhz = 48;
|
||||
|
||||
class OpusFecTest : public TestWithParam<coding_param> {
|
||||
protected:
|
||||
OpusFecTest();
|
||||
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
|
||||
virtual void EncodeABlock();
|
||||
|
||||
virtual void DecodeABlock(bool lost_previous, bool lost_current);
|
||||
|
||||
int block_duration_ms_;
|
||||
int sampling_khz_;
|
||||
size_t block_length_sample_;
|
||||
|
||||
size_t channels_;
|
||||
int bit_rate_;
|
||||
|
||||
size_t data_pointer_;
|
||||
size_t loop_length_samples_;
|
||||
size_t max_bytes_;
|
||||
size_t encoded_bytes_;
|
||||
|
||||
WebRtcOpusEncInst* opus_encoder_;
|
||||
WebRtcOpusDecInst* opus_decoder_;
|
||||
|
||||
string in_filename_;
|
||||
|
||||
std::unique_ptr<int16_t[]> in_data_;
|
||||
std::unique_ptr<int16_t[]> out_data_;
|
||||
std::unique_ptr<uint8_t[]> bit_stream_;
|
||||
};
|
||||
|
||||
void OpusFecTest::SetUp() {
|
||||
channels_ = get<0>(GetParam());
|
||||
bit_rate_ = get<1>(GetParam());
|
||||
printf("Coding %zu channel signal at %d bps.\n", channels_, bit_rate_);
|
||||
|
||||
in_filename_ = test::ResourcePath(get<2>(GetParam()), get<3>(GetParam()));
|
||||
|
||||
FILE* fp = fopen(in_filename_.c_str(), "rb");
|
||||
ASSERT_FALSE(fp == NULL);
|
||||
|
||||
// Obtain file size.
|
||||
fseek(fp, 0, SEEK_END);
|
||||
loop_length_samples_ = ftell(fp) / sizeof(int16_t);
|
||||
rewind(fp);
|
||||
|
||||
// Allocate memory to contain the whole file.
|
||||
in_data_.reset(
|
||||
new int16_t[loop_length_samples_ + block_length_sample_ * channels_]);
|
||||
|
||||
// Copy the file into the buffer.
|
||||
ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp),
|
||||
loop_length_samples_);
|
||||
fclose(fp);
|
||||
|
||||
// The audio will be used in a looped manner. To ease the acquisition of an
|
||||
// audio frame that crosses the end of the excerpt, we add an extra block
|
||||
// length of samples to the end of the array, starting over again from the
|
||||
// beginning of the array. Audio frames cross the end of the excerpt always
|
||||
// appear as a continuum of memory.
|
||||
memcpy(&in_data_[loop_length_samples_], &in_data_[0],
|
||||
block_length_sample_ * channels_ * sizeof(int16_t));
|
||||
|
||||
// Maximum number of bytes in output bitstream.
|
||||
max_bytes_ = block_length_sample_ * channels_ * sizeof(int16_t);
|
||||
|
||||
out_data_.reset(new int16_t[2 * block_length_sample_ * channels_]);
|
||||
bit_stream_.reset(new uint8_t[max_bytes_]);
|
||||
|
||||
// If channels_ == 1, use Opus VOIP mode, otherwise, audio mode.
|
||||
int app = channels_ == 1 ? 0 : 1;
|
||||
|
||||
// Create encoder memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000));
|
||||
// Set bitrate.
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_));
|
||||
}
|
||||
|
||||
void OpusFecTest::TearDown() {
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
OpusFecTest::OpusFecTest()
|
||||
: block_duration_ms_(kOpusBlockDurationMs),
|
||||
sampling_khz_(kOpusSamplingKhz),
|
||||
block_length_sample_(
|
||||
static_cast<size_t>(block_duration_ms_ * sampling_khz_)),
|
||||
data_pointer_(0),
|
||||
max_bytes_(0),
|
||||
encoded_bytes_(0),
|
||||
opus_encoder_(NULL),
|
||||
opus_decoder_(NULL) {}
|
||||
|
||||
void OpusFecTest::EncodeABlock() {
|
||||
int value =
|
||||
WebRtcOpus_Encode(opus_encoder_, &in_data_[data_pointer_],
|
||||
block_length_sample_, max_bytes_, &bit_stream_[0]);
|
||||
EXPECT_GT(value, 0);
|
||||
|
||||
encoded_bytes_ = static_cast<size_t>(value);
|
||||
}
|
||||
|
||||
void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) {
|
||||
int16_t audio_type;
|
||||
int value_1 = 0, value_2 = 0;
|
||||
|
||||
if (lost_previous) {
|
||||
// Decode previous frame.
|
||||
if (!lost_current &&
|
||||
WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_) == 1) {
|
||||
value_1 =
|
||||
WebRtcOpus_DecodeFec(opus_decoder_, &bit_stream_[0], encoded_bytes_,
|
||||
&out_data_[0], &audio_type);
|
||||
} else {
|
||||
// Call decoder PLC.
|
||||
while (value_1 < static_cast<int>(block_length_sample_)) {
|
||||
int ret = WebRtcOpus_Decode(opus_decoder_, NULL, 0, &out_data_[value_1],
|
||||
&audio_type);
|
||||
EXPECT_EQ(ret, sampling_khz_ * 10); // Should return 10 ms of samples.
|
||||
value_1 += ret;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(static_cast<int>(block_length_sample_), value_1);
|
||||
}
|
||||
|
||||
if (!lost_current) {
|
||||
// Decode current frame.
|
||||
value_2 = WebRtcOpus_Decode(opus_decoder_, &bit_stream_[0], encoded_bytes_,
|
||||
&out_data_[value_1 * channels_], &audio_type);
|
||||
EXPECT_EQ(static_cast<int>(block_length_sample_), value_2);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(OpusFecTest, RandomPacketLossTest) {
|
||||
const int kDurationMs = 200000;
|
||||
int time_now_ms, fec_frames;
|
||||
int actual_packet_loss_rate;
|
||||
bool lost_current, lost_previous;
|
||||
mode mode_set[3] = {{true, 0}, {false, 0}, {true, 50}};
|
||||
|
||||
lost_current = false;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (mode_set[i].fec) {
|
||||
EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(
|
||||
opus_encoder_, mode_set[i].target_packet_loss_rate));
|
||||
printf("FEC is ON, target at packet loss rate %d percent.\n",
|
||||
mode_set[i].target_packet_loss_rate);
|
||||
} else {
|
||||
EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_));
|
||||
printf("FEC is OFF.\n");
|
||||
}
|
||||
// In this test, we let the target packet loss rate match the actual rate.
|
||||
actual_packet_loss_rate = mode_set[i].target_packet_loss_rate;
|
||||
// Run every mode a certain time.
|
||||
time_now_ms = 0;
|
||||
fec_frames = 0;
|
||||
while (time_now_ms < kDurationMs) {
|
||||
// Encode & decode.
|
||||
EncodeABlock();
|
||||
|
||||
// Check if payload has FEC.
|
||||
int fec = WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_);
|
||||
|
||||
// If FEC is disabled or the target packet loss rate is set to 0, there
|
||||
// should be no FEC in the bit stream.
|
||||
if (!mode_set[i].fec || mode_set[i].target_packet_loss_rate == 0) {
|
||||
EXPECT_EQ(fec, 0);
|
||||
} else if (fec == 1) {
|
||||
fec_frames++;
|
||||
}
|
||||
|
||||
lost_previous = lost_current;
|
||||
lost_current = rand() < actual_packet_loss_rate * (RAND_MAX / 100);
|
||||
DecodeABlock(lost_previous, lost_current);
|
||||
|
||||
time_now_ms += block_duration_ms_;
|
||||
|
||||
// `data_pointer_` is incremented and wrapped across
|
||||
// `loop_length_samples_`.
|
||||
data_pointer_ = (data_pointer_ + block_length_sample_ * channels_) %
|
||||
loop_length_samples_;
|
||||
}
|
||||
if (mode_set[i].fec) {
|
||||
printf("%.2f percent frames has FEC.\n",
|
||||
static_cast<float>(fec_frames) * block_duration_ms_ / 2000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const coding_param param_set[] = {
|
||||
std::make_tuple(1,
|
||||
64000,
|
||||
string("audio_coding/testfile32kHz"),
|
||||
string("pcm")),
|
||||
std::make_tuple(1,
|
||||
32000,
|
||||
string("audio_coding/testfile32kHz"),
|
||||
string("pcm")),
|
||||
std::make_tuple(2,
|
||||
64000,
|
||||
string("audio_coding/teststereo32kHz"),
|
||||
string("pcm"))};
|
||||
|
||||
// 64 kbps, stereo
|
||||
INSTANTIATE_TEST_SUITE_P(AllTest, OpusFecTest, ::testing::ValuesIn(param_set));
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "rtc_base/ignore_wundef.h"
|
||||
|
||||
RTC_PUSH_IGNORING_WUNDEF()
|
||||
#include "opus.h"
|
||||
#include "opus_multistream.h"
|
||||
RTC_POP_IGNORING_WUNDEF()
|
||||
|
||||
struct WebRtcOpusEncInst {
|
||||
OpusEncoder* encoder;
|
||||
OpusMSEncoder* multistream_encoder;
|
||||
size_t channels;
|
||||
int in_dtx_mode;
|
||||
bool avoid_noise_pumping_during_dtx;
|
||||
int sample_rate_hz;
|
||||
float smooth_energy_non_active_frames;
|
||||
};
|
||||
|
||||
struct WebRtcOpusDecInst {
|
||||
OpusDecoder* decoder;
|
||||
OpusMSDecoder* multistream_decoder;
|
||||
int prev_decoded_samples;
|
||||
bool plc_use_prev_decoded_samples;
|
||||
size_t channels;
|
||||
int in_dtx_mode;
|
||||
int sample_rate_hz;
|
||||
};
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_
|
||||
|
|
@ -0,0 +1,880 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <numeric>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
enum {
|
||||
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
|
||||
/* Maximum supported frame size in WebRTC is 120 ms. */
|
||||
kWebRtcOpusMaxEncodeFrameSizeMs = 120,
|
||||
#else
|
||||
/* Maximum supported frame size in WebRTC is 60 ms. */
|
||||
kWebRtcOpusMaxEncodeFrameSizeMs = 60,
|
||||
#endif
|
||||
|
||||
/* The format allows up to 120 ms frames. Since we don't control the other
|
||||
* side, we must allow for packets of that size. NetEq is currently limited
|
||||
* to 60 ms on the receive side. */
|
||||
kWebRtcOpusMaxDecodeFrameSizeMs = 120,
|
||||
|
||||
// Duration of audio that each call to packet loss concealment covers.
|
||||
kWebRtcOpusPlcFrameSizeMs = 10,
|
||||
};
|
||||
|
||||
constexpr char kPlcUsePrevDecodedSamplesFieldTrial[] =
|
||||
"WebRTC-Audio-OpusPlcUsePrevDecodedSamples";
|
||||
|
||||
constexpr char kAvoidNoisePumpingDuringDtxFieldTrial[] =
|
||||
"WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx";
|
||||
|
||||
constexpr char kSetSignalVoiceWithDtxFieldTrial[] =
|
||||
"WebRTC-Audio-OpusSetSignalVoiceWithDtx";
|
||||
|
||||
static int FrameSizePerChannel(int frame_size_ms, int sample_rate_hz) {
|
||||
RTC_DCHECK_GT(frame_size_ms, 0);
|
||||
RTC_DCHECK_EQ(frame_size_ms % 10, 0);
|
||||
RTC_DCHECK_GT(sample_rate_hz, 0);
|
||||
RTC_DCHECK_EQ(sample_rate_hz % 1000, 0);
|
||||
return frame_size_ms * (sample_rate_hz / 1000);
|
||||
}
|
||||
|
||||
// Maximum sample count per channel.
|
||||
static int MaxFrameSizePerChannel(int sample_rate_hz) {
|
||||
return FrameSizePerChannel(kWebRtcOpusMaxDecodeFrameSizeMs, sample_rate_hz);
|
||||
}
|
||||
|
||||
// Default sample count per channel.
|
||||
static int DefaultFrameSizePerChannel(int sample_rate_hz) {
|
||||
return FrameSizePerChannel(20, sample_rate_hz);
|
||||
}
|
||||
|
||||
// Returns true if the `encoded` payload corresponds to a refresh DTX packet
|
||||
// whose energy is larger than the expected for non activity packets.
|
||||
static bool WebRtcOpus_IsHighEnergyRefreshDtxPacket(
|
||||
OpusEncInst* inst,
|
||||
rtc::ArrayView<const int16_t> frame,
|
||||
rtc::ArrayView<const uint8_t> encoded) {
|
||||
if (encoded.size() <= 2) {
|
||||
return false;
|
||||
}
|
||||
int number_frames =
|
||||
frame.size() / DefaultFrameSizePerChannel(inst->sample_rate_hz);
|
||||
if (number_frames > 0 &&
|
||||
WebRtcOpus_PacketHasVoiceActivity(encoded.data(), encoded.size()) == 0) {
|
||||
const float average_frame_energy =
|
||||
std::accumulate(frame.begin(), frame.end(), 0.0f,
|
||||
[](float a, int32_t b) { return a + b * b; }) /
|
||||
number_frames;
|
||||
if (WebRtcOpus_GetInDtx(inst) == 1 &&
|
||||
average_frame_energy >= inst->smooth_energy_non_active_frames * 0.5f) {
|
||||
// This is a refresh DTX packet as the encoder is in DTX and has
|
||||
// produced a payload > 2 bytes. This refresh packet has a higher energy
|
||||
// than the smooth energy of non activity frames (with a 3 dB negative
|
||||
// margin) and, therefore, it is flagged as a high energy refresh DTX
|
||||
// packet.
|
||||
return true;
|
||||
}
|
||||
// The average energy is tracked in a similar way as the modeling of the
|
||||
// comfort noise in the Silk decoder in Opus
|
||||
// (third_party/opus/src/silk/CNG.c).
|
||||
if (average_frame_energy < inst->smooth_energy_non_active_frames * 0.5f) {
|
||||
inst->smooth_energy_non_active_frames = average_frame_energy;
|
||||
} else {
|
||||
inst->smooth_energy_non_active_frames +=
|
||||
(average_frame_energy - inst->smooth_energy_non_active_frames) *
|
||||
0.25f;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
|
||||
size_t channels,
|
||||
int32_t application,
|
||||
int sample_rate_hz) {
|
||||
int opus_app;
|
||||
if (!inst)
|
||||
return -1;
|
||||
|
||||
switch (application) {
|
||||
case 0:
|
||||
opus_app = OPUS_APPLICATION_VOIP;
|
||||
break;
|
||||
case 1:
|
||||
opus_app = OPUS_APPLICATION_AUDIO;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
OpusEncInst* state =
|
||||
reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst)));
|
||||
RTC_DCHECK(state);
|
||||
|
||||
int error;
|
||||
state->encoder = opus_encoder_create(
|
||||
sample_rate_hz, static_cast<int>(channels), opus_app, &error);
|
||||
|
||||
if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) {
|
||||
WebRtcOpus_EncoderFree(state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
state->in_dtx_mode = 0;
|
||||
state->channels = channels;
|
||||
state->sample_rate_hz = sample_rate_hz;
|
||||
state->smooth_energy_non_active_frames = 0.0f;
|
||||
state->avoid_noise_pumping_during_dtx =
|
||||
webrtc::field_trial::IsEnabled(kAvoidNoisePumpingDuringDtxFieldTrial);
|
||||
|
||||
*inst = state;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_MultistreamEncoderCreate(
|
||||
OpusEncInst** inst,
|
||||
size_t channels,
|
||||
int32_t application,
|
||||
size_t streams,
|
||||
size_t coupled_streams,
|
||||
const unsigned char* channel_mapping) {
|
||||
int opus_app;
|
||||
if (!inst)
|
||||
return -1;
|
||||
|
||||
switch (application) {
|
||||
case 0:
|
||||
opus_app = OPUS_APPLICATION_VOIP;
|
||||
break;
|
||||
case 1:
|
||||
opus_app = OPUS_APPLICATION_AUDIO;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
OpusEncInst* state =
|
||||
reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst)));
|
||||
RTC_DCHECK(state);
|
||||
|
||||
int error;
|
||||
const int sample_rate_hz = 48000;
|
||||
state->multistream_encoder = opus_multistream_encoder_create(
|
||||
sample_rate_hz, channels, streams, coupled_streams, channel_mapping,
|
||||
opus_app, &error);
|
||||
|
||||
if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) {
|
||||
WebRtcOpus_EncoderFree(state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
state->in_dtx_mode = 0;
|
||||
state->channels = channels;
|
||||
state->sample_rate_hz = sample_rate_hz;
|
||||
state->smooth_energy_non_active_frames = 0.0f;
|
||||
state->avoid_noise_pumping_during_dtx = false;
|
||||
|
||||
*inst = state;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
if (inst->encoder) {
|
||||
opus_encoder_destroy(inst->encoder);
|
||||
} else {
|
||||
opus_multistream_encoder_destroy(inst->multistream_encoder);
|
||||
}
|
||||
free(inst);
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int WebRtcOpus_Encode(OpusEncInst* inst,
|
||||
const int16_t* audio_in,
|
||||
size_t samples,
|
||||
size_t length_encoded_buffer,
|
||||
uint8_t* encoded) {
|
||||
int res;
|
||||
|
||||
if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (inst->encoder) {
|
||||
res = opus_encode(inst->encoder, (const opus_int16*)audio_in,
|
||||
static_cast<int>(samples), encoded,
|
||||
static_cast<opus_int32>(length_encoded_buffer));
|
||||
} else {
|
||||
res = opus_multistream_encode(
|
||||
inst->multistream_encoder, (const opus_int16*)audio_in,
|
||||
static_cast<int>(samples), encoded,
|
||||
static_cast<opus_int32>(length_encoded_buffer));
|
||||
}
|
||||
|
||||
if (res <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (res <= 2) {
|
||||
// Indicates DTX since the packet has nothing but a header. In principle,
|
||||
// there is no need to send this packet. However, we do transmit the first
|
||||
// occurrence to let the decoder know that the encoder enters DTX mode.
|
||||
if (inst->in_dtx_mode) {
|
||||
return 0;
|
||||
} else {
|
||||
inst->in_dtx_mode = 1;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->avoid_noise_pumping_during_dtx && WebRtcOpus_GetUseDtx(inst) == 1 &&
|
||||
WebRtcOpus_IsHighEnergyRefreshDtxPacket(
|
||||
inst, rtc::MakeArrayView(audio_in, samples),
|
||||
rtc::MakeArrayView(encoded, res))) {
|
||||
// This packet is a high energy refresh DTX packet. For avoiding an increase
|
||||
// of the energy in the DTX region at the decoder, this packet is
|
||||
// substituted by a TOC byte with one empty frame.
|
||||
// The number of frames described in the TOC byte
|
||||
// (https://tools.ietf.org/html/rfc6716#section-3.1) are overwritten to
|
||||
// always indicate one frame (last two bits equal to 0).
|
||||
encoded[0] = encoded[0] & 0b11111100;
|
||||
inst->in_dtx_mode = 1;
|
||||
// The payload is just the TOC byte and has 1 byte as length.
|
||||
return 1;
|
||||
}
|
||||
inst->in_dtx_mode = 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
#define ENCODER_CTL(inst, vargs) \
|
||||
(inst->encoder \
|
||||
? opus_encoder_ctl(inst->encoder, vargs) \
|
||||
: opus_multistream_encoder_ctl(inst->multistream_encoder, vargs))
|
||||
|
||||
int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_BITRATE(rate));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_PACKET_LOSS_PERC(loss_rate));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
|
||||
opus_int32 set_bandwidth;
|
||||
|
||||
if (!inst)
|
||||
return -1;
|
||||
|
||||
if (frequency_hz <= 8000) {
|
||||
set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
|
||||
} else if (frequency_hz <= 12000) {
|
||||
set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
|
||||
} else if (frequency_hz <= 16000) {
|
||||
set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
|
||||
} else if (frequency_hz <= 24000) {
|
||||
set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
|
||||
} else {
|
||||
set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
|
||||
}
|
||||
return ENCODER_CTL(inst, OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst,
|
||||
int32_t* result_hz) {
|
||||
if (inst->encoder) {
|
||||
if (opus_encoder_ctl(inst->encoder, OPUS_GET_MAX_BANDWIDTH(result_hz)) ==
|
||||
OPUS_OK) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
opus_int32 max_bandwidth;
|
||||
int s;
|
||||
int ret;
|
||||
|
||||
max_bandwidth = 0;
|
||||
ret = OPUS_OK;
|
||||
s = 0;
|
||||
while (ret == OPUS_OK) {
|
||||
OpusEncoder* enc;
|
||||
opus_int32 bandwidth;
|
||||
|
||||
ret = ENCODER_CTL(inst, OPUS_MULTISTREAM_GET_ENCODER_STATE(s, &enc));
|
||||
if (ret == OPUS_BAD_ARG)
|
||||
break;
|
||||
if (ret != OPUS_OK)
|
||||
return -1;
|
||||
if (opus_encoder_ctl(enc, OPUS_GET_MAX_BANDWIDTH(&bandwidth)) != OPUS_OK)
|
||||
return -1;
|
||||
|
||||
if (max_bandwidth != 0 && max_bandwidth != bandwidth)
|
||||
return -1;
|
||||
|
||||
max_bandwidth = bandwidth;
|
||||
s++;
|
||||
}
|
||||
*result_hz = max_bandwidth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(1));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(0));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) {
|
||||
int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
|
||||
if (ret != OPUS_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return ENCODER_CTL(inst, OPUS_SET_DTX(1));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) {
|
||||
int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_AUTO));
|
||||
if (ret != OPUS_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return ENCODER_CTL(inst, OPUS_SET_DTX(0));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
opus_int32 use_dtx;
|
||||
if (ENCODER_CTL(inst, OPUS_GET_DTX(&use_dtx)) == 0) {
|
||||
return use_dtx;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_VBR(0));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_VBR(1));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_COMPLEXITY(complexity));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) {
|
||||
if (!inst) {
|
||||
return -1;
|
||||
}
|
||||
int32_t bandwidth;
|
||||
if (ENCODER_CTL(inst, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) {
|
||||
return bandwidth;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) {
|
||||
if (inst) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_BANDWIDTH(bandwidth));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) {
|
||||
if (!inst)
|
||||
return -1;
|
||||
if (num_channels == 0) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(OPUS_AUTO));
|
||||
} else if (num_channels == 1 || num_channels == 2) {
|
||||
return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(num_channels));
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst) {
|
||||
if (!inst) {
|
||||
return -1;
|
||||
}
|
||||
#ifdef OPUS_GET_IN_DTX
|
||||
int32_t in_dtx;
|
||||
if (ENCODER_CTL(inst, OPUS_GET_IN_DTX(&in_dtx)) == 0) {
|
||||
return in_dtx;
|
||||
}
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst,
|
||||
size_t channels,
|
||||
int sample_rate_hz) {
|
||||
int error;
|
||||
OpusDecInst* state;
|
||||
|
||||
if (inst != NULL) {
|
||||
// Create Opus decoder state.
|
||||
state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst)));
|
||||
if (state == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
state->decoder =
|
||||
opus_decoder_create(sample_rate_hz, static_cast<int>(channels), &error);
|
||||
if (error == OPUS_OK && state->decoder) {
|
||||
// Creation of memory all ok.
|
||||
state->channels = channels;
|
||||
state->sample_rate_hz = sample_rate_hz;
|
||||
state->plc_use_prev_decoded_samples =
|
||||
webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial);
|
||||
if (state->plc_use_prev_decoded_samples) {
|
||||
state->prev_decoded_samples =
|
||||
DefaultFrameSizePerChannel(state->sample_rate_hz);
|
||||
}
|
||||
state->in_dtx_mode = 0;
|
||||
*inst = state;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If memory allocation was unsuccessful, free the entire state.
|
||||
if (state->decoder) {
|
||||
opus_decoder_destroy(state->decoder);
|
||||
}
|
||||
free(state);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_MultistreamDecoderCreate(
|
||||
OpusDecInst** inst,
|
||||
size_t channels,
|
||||
size_t streams,
|
||||
size_t coupled_streams,
|
||||
const unsigned char* channel_mapping) {
|
||||
int error;
|
||||
OpusDecInst* state;
|
||||
|
||||
if (inst != NULL) {
|
||||
// Create Opus decoder state.
|
||||
state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst)));
|
||||
if (state == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create new memory, always at 48000 Hz.
|
||||
state->multistream_decoder = opus_multistream_decoder_create(
|
||||
48000, channels, streams, coupled_streams, channel_mapping, &error);
|
||||
|
||||
if (error == OPUS_OK && state->multistream_decoder) {
|
||||
// Creation of memory all ok.
|
||||
state->channels = channels;
|
||||
state->sample_rate_hz = 48000;
|
||||
state->plc_use_prev_decoded_samples =
|
||||
webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial);
|
||||
if (state->plc_use_prev_decoded_samples) {
|
||||
state->prev_decoded_samples =
|
||||
DefaultFrameSizePerChannel(state->sample_rate_hz);
|
||||
}
|
||||
state->in_dtx_mode = 0;
|
||||
*inst = state;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If memory allocation was unsuccessful, free the entire state.
|
||||
opus_multistream_decoder_destroy(state->multistream_decoder);
|
||||
free(state);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
|
||||
if (inst) {
|
||||
if (inst->decoder) {
|
||||
opus_decoder_destroy(inst->decoder);
|
||||
} else if (inst->multistream_decoder) {
|
||||
opus_multistream_decoder_destroy(inst->multistream_decoder);
|
||||
}
|
||||
free(inst);
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
|
||||
return inst->channels;
|
||||
}
|
||||
|
||||
void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
|
||||
if (inst->decoder) {
|
||||
opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
|
||||
} else {
|
||||
opus_multistream_decoder_ctl(inst->multistream_decoder, OPUS_RESET_STATE);
|
||||
}
|
||||
inst->in_dtx_mode = 0;
|
||||
}
|
||||
|
||||
/* For decoder to determine if it is to output speech or comfort noise. */
|
||||
static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
|
||||
// Audio type becomes comfort noise if `encoded_byte` is 1 and keeps
|
||||
// to be so if the following `encoded_byte` are 0 or 1.
|
||||
if (encoded_bytes == 0 && inst->in_dtx_mode) {
|
||||
return 2; // Comfort noise.
|
||||
} else if (encoded_bytes == 1 || encoded_bytes == 2) {
|
||||
// TODO(henrik.lundin): There is a slight risk that a 2-byte payload is in
|
||||
// fact a 1-byte TOC with a 1-byte payload. That will be erroneously
|
||||
// interpreted as comfort noise output, but such a payload is probably
|
||||
// faulty anyway.
|
||||
|
||||
// TODO(webrtc:10218): This is wrong for multistream opus. Then are several
|
||||
// single-stream packets glued together with some packet size bytes in
|
||||
// between. See https://tools.ietf.org/html/rfc6716#appendix-B
|
||||
inst->in_dtx_mode = 1;
|
||||
return 2; // Comfort noise.
|
||||
} else {
|
||||
inst->in_dtx_mode = 0;
|
||||
return 0; // Speech.
|
||||
}
|
||||
}
|
||||
|
||||
/* `frame_size` is set to maximum Opus frame size in the normal case, and
|
||||
* is set to the number of samples needed for PLC in case of losses.
|
||||
* It is up to the caller to make sure the value is correct. */
|
||||
static int DecodeNative(OpusDecInst* inst,
|
||||
const uint8_t* encoded,
|
||||
size_t encoded_bytes,
|
||||
int frame_size,
|
||||
int16_t* decoded,
|
||||
int16_t* audio_type,
|
||||
int decode_fec) {
|
||||
int res = -1;
|
||||
if (inst->decoder) {
|
||||
res = opus_decode(
|
||||
inst->decoder, encoded, static_cast<opus_int32>(encoded_bytes),
|
||||
reinterpret_cast<opus_int16*>(decoded), frame_size, decode_fec);
|
||||
} else {
|
||||
res = opus_multistream_decode(inst->multistream_decoder, encoded,
|
||||
static_cast<opus_int32>(encoded_bytes),
|
||||
reinterpret_cast<opus_int16*>(decoded),
|
||||
frame_size, decode_fec);
|
||||
}
|
||||
|
||||
if (res <= 0)
|
||||
return -1;
|
||||
|
||||
*audio_type = DetermineAudioType(inst, encoded_bytes);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int DecodePlc(OpusDecInst* inst, int16_t* decoded) {
|
||||
int16_t audio_type = 0;
|
||||
int decoded_samples;
|
||||
int plc_samples =
|
||||
FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz);
|
||||
|
||||
if (inst->plc_use_prev_decoded_samples) {
|
||||
/* The number of samples we ask for is `number_of_lost_frames` times
|
||||
* `prev_decoded_samples_`. Limit the number of samples to maximum
|
||||
* `MaxFrameSizePerChannel()`. */
|
||||
plc_samples = inst->prev_decoded_samples;
|
||||
const int max_samples_per_channel =
|
||||
MaxFrameSizePerChannel(inst->sample_rate_hz);
|
||||
plc_samples = plc_samples <= max_samples_per_channel
|
||||
? plc_samples
|
||||
: max_samples_per_channel;
|
||||
}
|
||||
decoded_samples =
|
||||
DecodeNative(inst, NULL, 0, plc_samples, decoded, &audio_type, 0);
|
||||
if (decoded_samples < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return decoded_samples;
|
||||
}
|
||||
|
||||
int WebRtcOpus_Decode(OpusDecInst* inst,
|
||||
const uint8_t* encoded,
|
||||
size_t encoded_bytes,
|
||||
int16_t* decoded,
|
||||
int16_t* audio_type) {
|
||||
int decoded_samples;
|
||||
|
||||
if (encoded_bytes == 0) {
|
||||
*audio_type = DetermineAudioType(inst, encoded_bytes);
|
||||
decoded_samples = DecodePlc(inst, decoded);
|
||||
} else {
|
||||
decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
|
||||
MaxFrameSizePerChannel(inst->sample_rate_hz),
|
||||
decoded, audio_type, 0);
|
||||
}
|
||||
if (decoded_samples < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (inst->plc_use_prev_decoded_samples) {
|
||||
/* Update decoded sample memory, to be used by the PLC in case of losses. */
|
||||
inst->prev_decoded_samples = decoded_samples;
|
||||
}
|
||||
|
||||
return decoded_samples;
|
||||
}
|
||||
|
||||
int WebRtcOpus_DecodeFec(OpusDecInst* inst,
|
||||
const uint8_t* encoded,
|
||||
size_t encoded_bytes,
|
||||
int16_t* decoded,
|
||||
int16_t* audio_type) {
|
||||
int decoded_samples;
|
||||
int fec_samples;
|
||||
|
||||
if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
fec_samples =
|
||||
opus_packet_get_samples_per_frame(encoded, inst->sample_rate_hz);
|
||||
|
||||
decoded_samples = DecodeNative(inst, encoded, encoded_bytes, fec_samples,
|
||||
decoded, audio_type, 1);
|
||||
if (decoded_samples < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return decoded_samples;
|
||||
}
|
||||
|
||||
int WebRtcOpus_DurationEst(OpusDecInst* inst,
|
||||
const uint8_t* payload,
|
||||
size_t payload_length_bytes) {
|
||||
if (payload_length_bytes == 0) {
|
||||
// WebRtcOpus_Decode calls PLC when payload length is zero. So we return
|
||||
// PLC duration correspondingly.
|
||||
return WebRtcOpus_PlcDuration(inst);
|
||||
}
|
||||
|
||||
int frames, samples;
|
||||
frames = opus_packet_get_nb_frames(
|
||||
payload, static_cast<opus_int32>(payload_length_bytes));
|
||||
if (frames < 0) {
|
||||
/* Invalid payload data. */
|
||||
return 0;
|
||||
}
|
||||
samples =
|
||||
frames * opus_packet_get_samples_per_frame(payload, inst->sample_rate_hz);
|
||||
if (samples > 120 * inst->sample_rate_hz / 1000) {
|
||||
// More than 120 ms' worth of samples.
|
||||
return 0;
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
|
||||
if (inst->plc_use_prev_decoded_samples) {
|
||||
/* The number of samples we ask for is `number_of_lost_frames` times
|
||||
* `prev_decoded_samples_`. Limit the number of samples to maximum
|
||||
* `MaxFrameSizePerChannel()`. */
|
||||
const int plc_samples = inst->prev_decoded_samples;
|
||||
const int max_samples_per_channel =
|
||||
MaxFrameSizePerChannel(inst->sample_rate_hz);
|
||||
return plc_samples <= max_samples_per_channel ? plc_samples
|
||||
: max_samples_per_channel;
|
||||
}
|
||||
return FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz);
|
||||
}
|
||||
|
||||
int WebRtcOpus_FecDurationEst(const uint8_t* payload,
|
||||
size_t payload_length_bytes,
|
||||
int sample_rate_hz) {
|
||||
if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
|
||||
return 0;
|
||||
}
|
||||
const int samples =
|
||||
opus_packet_get_samples_per_frame(payload, sample_rate_hz);
|
||||
const int samples_per_ms = sample_rate_hz / 1000;
|
||||
if (samples < 10 * samples_per_ms || samples > 120 * samples_per_ms) {
|
||||
/* Invalid payload duration. */
|
||||
return 0;
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
int WebRtcOpus_NumSilkFrames(const uint8_t* payload) {
|
||||
// For computing the payload length in ms, the sample rate is not important
|
||||
// since it cancels out. We use 48 kHz, but any valid sample rate would work.
|
||||
int payload_length_ms =
|
||||
opus_packet_get_samples_per_frame(payload, 48000) / 48;
|
||||
if (payload_length_ms < 10)
|
||||
payload_length_ms = 10;
|
||||
|
||||
int silk_frames;
|
||||
switch (payload_length_ms) {
|
||||
case 10:
|
||||
case 20:
|
||||
silk_frames = 1;
|
||||
break;
|
||||
case 40:
|
||||
silk_frames = 2;
|
||||
break;
|
||||
case 60:
|
||||
silk_frames = 3;
|
||||
break;
|
||||
default:
|
||||
return 0; // It is actually even an invalid packet.
|
||||
}
|
||||
return silk_frames;
|
||||
}
|
||||
|
||||
// This method is based on Definition of the Opus Audio Codec
|
||||
// (https://tools.ietf.org/html/rfc6716). Basically, this method is based on
|
||||
// parsing the LP layer of an Opus packet, particularly the LBRR flag.
|
||||
int WebRtcOpus_PacketHasFec(const uint8_t* payload,
|
||||
size_t payload_length_bytes) {
|
||||
if (payload == NULL || payload_length_bytes == 0)
|
||||
return 0;
|
||||
|
||||
// In CELT_ONLY mode, packets should not have FEC.
|
||||
if (payload[0] & 0x80)
|
||||
return 0;
|
||||
|
||||
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
|
||||
if (silk_frames == 0)
|
||||
return 0; // Not valid.
|
||||
|
||||
const int channels = opus_packet_get_nb_channels(payload);
|
||||
RTC_DCHECK(channels == 1 || channels == 2);
|
||||
|
||||
// Max number of frames in an Opus packet is 48.
|
||||
opus_int16 frame_sizes[48];
|
||||
const unsigned char* frame_data[48];
|
||||
|
||||
// Parse packet to get the frames. But we only care about the first frame,
|
||||
// since we can only decode the FEC from the first one.
|
||||
if (opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes),
|
||||
NULL, frame_data, frame_sizes, NULL) < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (frame_sizes[0] < 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// A frame starts with the LP layer. The LP layer begins with two to eight
|
||||
// header bits.These consist of one VAD bit per SILK frame (up to 3),
|
||||
// followed by a single flag indicating the presence of LBRR frames.
|
||||
// For a stereo packet, these first flags correspond to the mid channel, and
|
||||
// a second set of flags is included for the side channel. Because these are
|
||||
// the first symbols decoded by the range coder and because they are coded
|
||||
// as binary values with uniform probability, they can be extracted directly
|
||||
// from the most significant bits of the first byte of compressed data.
|
||||
for (int n = 0; n < channels; n++) {
|
||||
// The LBRR bit for channel 1 is on the (`silk_frames` + 1)-th bit, and
|
||||
// that of channel 2 is on the |(`silk_frames` + 1) * 2 + 1|-th bit.
|
||||
if (frame_data[0][0] & (0x80 >> ((n + 1) * (silk_frames + 1) - 1)))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload,
|
||||
size_t payload_length_bytes) {
|
||||
if (payload == NULL || payload_length_bytes == 0)
|
||||
return 0;
|
||||
|
||||
// In CELT_ONLY mode we can not determine whether there is VAD.
|
||||
if (payload[0] & 0x80)
|
||||
return -1;
|
||||
|
||||
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
|
||||
if (silk_frames == 0)
|
||||
return -1;
|
||||
|
||||
const int channels = opus_packet_get_nb_channels(payload);
|
||||
RTC_DCHECK(channels == 1 || channels == 2);
|
||||
|
||||
// Max number of frames in an Opus packet is 48.
|
||||
opus_int16 frame_sizes[48];
|
||||
const unsigned char* frame_data[48];
|
||||
|
||||
// Parse packet to get the frames.
|
||||
int frames =
|
||||
opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes),
|
||||
NULL, frame_data, frame_sizes, NULL);
|
||||
if (frames < 0)
|
||||
return -1;
|
||||
|
||||
// Iterate over all Opus frames which may contain multiple SILK frames.
|
||||
for (int frame = 0; frame < frames; frame++) {
|
||||
if (frame_sizes[frame] < 1) {
|
||||
continue;
|
||||
}
|
||||
if (frame_data[frame][0] >> (8 - silk_frames))
|
||||
return 1;
|
||||
if (channels == 2 &&
|
||||
(frame_data[frame][0] << (silk_frames + 1)) >> (8 - silk_frames))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,547 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/opus_inst.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Opaque wrapper types for the codec state.
|
||||
typedef struct WebRtcOpusEncInst OpusEncInst;
|
||||
typedef struct WebRtcOpusDecInst OpusDecInst;
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_EncoderCreate(...)
|
||||
*
|
||||
* This function creates an Opus encoder that encodes mono or stereo.
|
||||
*
|
||||
* Input:
|
||||
* - channels : number of channels; 1 or 2.
|
||||
* - application : 0 - VOIP applications.
|
||||
* Favor speech intelligibility.
|
||||
* 1 - Audio applications.
|
||||
* Favor faithfulness to the original input.
|
||||
* - sample_rate_hz : sample rate of input audio
|
||||
*
|
||||
* Output:
|
||||
* - inst : a pointer to Encoder context that is created
|
||||
* if success.
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
|
||||
size_t channels,
|
||||
int32_t application,
|
||||
int sample_rate_hz);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_MultistreamEncoderCreate(...)
|
||||
*
|
||||
* This function creates an Opus encoder with any supported channel count.
|
||||
*
|
||||
* Input:
|
||||
* - channels : number of channels in the input of the encoder.
|
||||
* - application : 0 - VOIP applications.
|
||||
* Favor speech intelligibility.
|
||||
* 1 - Audio applications.
|
||||
* Favor faithfulness to the original input.
|
||||
* - streams : number of streams, as described in RFC 7845.
|
||||
* - coupled_streams : number of coupled streams, as described in
|
||||
* RFC 7845.
|
||||
* - channel_mapping : the channel mapping; pointer to array of
|
||||
* `channel` bytes, as described in RFC 7845.
|
||||
*
|
||||
* Output:
|
||||
* - inst : a pointer to Encoder context that is created
|
||||
* if success.
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_MultistreamEncoderCreate(
|
||||
OpusEncInst** inst,
|
||||
size_t channels,
|
||||
int32_t application,
|
||||
size_t streams,
|
||||
size_t coupled_streams,
|
||||
const unsigned char* channel_mapping);
|
||||
|
||||
int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_Encode(...)
|
||||
*
|
||||
* This function encodes audio as a series of Opus frames and inserts
|
||||
* it into a packet. Input buffer can be any length.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
* - audio_in : Input speech data buffer
|
||||
* - samples : Samples per channel in audio_in
|
||||
* - length_encoded_buffer : Output buffer size
|
||||
*
|
||||
* Output:
|
||||
* - encoded : Output compressed data buffer
|
||||
*
|
||||
* Return value : >=0 - Length (in bytes) of coded data
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcOpus_Encode(OpusEncInst* inst,
|
||||
const int16_t* audio_in,
|
||||
size_t samples,
|
||||
size_t length_encoded_buffer,
|
||||
uint8_t* encoded);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_SetBitRate(...)
|
||||
*
|
||||
* This function adjusts the target bitrate of the encoder.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
* - rate : New target bitrate
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_SetPacketLossRate(...)
|
||||
*
|
||||
* This function configures the encoder's expected packet loss percentage.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
* - loss_rate : loss percentage in the range 0-100, inclusive.
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_SetMaxPlaybackRate(...)
|
||||
*
|
||||
* Configures the maximum playback rate for encoding. Due to hardware
|
||||
* limitations, the receiver may render audio up to a playback rate. Opus
|
||||
* encoder can use this information to optimize for network usage and encoding
|
||||
* complexity. This will affect the audio bandwidth in the coded audio. However,
|
||||
* the input/output sample rate is not affected.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
* - frequency_hz : Maximum playback rate in Hz.
|
||||
* This parameter can take any value. The relation
|
||||
* between the value and the Opus internal mode is
|
||||
* as following:
|
||||
* frequency_hz <= 8000 narrow band
|
||||
* 8000 < frequency_hz <= 12000 medium band
|
||||
* 12000 < frequency_hz <= 16000 wide band
|
||||
* 16000 < frequency_hz <= 24000 super wide band
|
||||
* frequency_hz > 24000 full band
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_GetMaxPlaybackRate(...)
|
||||
*
|
||||
* Queries the maximum playback rate for encoding. If different single-stream
|
||||
* encoders have different maximum playback rates, this function fails.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context.
|
||||
* Output:
|
||||
* - result_hz : The maximum playback rate in Hz.
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst,
|
||||
int32_t* result_hz);
|
||||
|
||||
/* TODO(minyue): Check whether an API to check the FEC and the packet loss rate
|
||||
* is needed. It might not be very useful since there are not many use cases and
|
||||
* the caller can always maintain the states. */
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_EnableFec()
|
||||
*
|
||||
* This function enables FEC for encoding.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_EnableFec(OpusEncInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_DisableFec()
|
||||
*
|
||||
* This function disables FEC for encoding.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_DisableFec(OpusEncInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_EnableDtx()
|
||||
*
|
||||
* This function enables Opus internal DTX for encoding.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_DisableDtx()
|
||||
*
|
||||
* This function disables Opus internal DTX for encoding.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_GetUseDtx()
|
||||
*
|
||||
* This function gets the DTX configuration used for encoding.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : 0 - Encoder does not use DTX.
|
||||
* 1 - Encoder uses DTX.
|
||||
* -1 - Error.
|
||||
*/
|
||||
int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_EnableCbr()
|
||||
*
|
||||
* This function enables CBR for encoding.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_DisableCbr()
|
||||
*
|
||||
* This function disables CBR for encoding.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst);
|
||||
|
||||
/*
|
||||
* WebRtcOpus_SetComplexity(...)
|
||||
*
|
||||
* This function adjusts the computational complexity. The effect is the same as
|
||||
* calling the complexity setting of Opus as an Opus encoder related CTL.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
* - complexity : New target complexity (0-10, inclusive)
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity);
|
||||
|
||||
/*
|
||||
* WebRtcOpus_GetBandwidth(...)
|
||||
*
|
||||
* This function returns the current bandwidth.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : Bandwidth - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst);
|
||||
|
||||
/*
|
||||
* WebRtcOpus_SetBandwidth(...)
|
||||
*
|
||||
* By default Opus decides which bandwidth to encode the signal in depending on
|
||||
* the the bitrate. This function overrules the previous setting and forces the
|
||||
* encoder to encode in narrowband/wideband/fullband/etc.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
* - bandwidth : New target bandwidth. Valid values are:
|
||||
* OPUS_BANDWIDTH_NARROWBAND
|
||||
* OPUS_BANDWIDTH_MEDIUMBAND
|
||||
* OPUS_BANDWIDTH_WIDEBAND
|
||||
* OPUS_BANDWIDTH_SUPERWIDEBAND
|
||||
* OPUS_BANDWIDTH_FULLBAND
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth);
|
||||
|
||||
/*
|
||||
* WebRtcOpus_GetInDtx(...)
|
||||
*
|
||||
* Gets the DTX state of the encoder.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
*
|
||||
* Return value : -1 - Error.
|
||||
* 1 - Last encoded frame was comfort noise update during DTX.
|
||||
* 0 - Last encoded frame was encoded with encoder not in DTX.
|
||||
*/
|
||||
int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst);
|
||||
|
||||
/*
|
||||
* WebRtcOpus_SetForceChannels(...)
|
||||
*
|
||||
* If the encoder is initialized as a stereo encoder, Opus will by default
|
||||
* decide whether to encode in mono or stereo based on the bitrate. This
|
||||
* function overrules the previous setting, and forces the encoder to encode
|
||||
* in auto/mono/stereo.
|
||||
*
|
||||
* If the Encoder is initialized as a mono encoder, and one tries to force
|
||||
* stereo, the function will return an error.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Encoder context
|
||||
* - num_channels : 0 - Not forced
|
||||
* 1 - Mono
|
||||
* 2 - Stereo
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels);
|
||||
|
||||
int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst,
|
||||
size_t channels,
|
||||
int sample_rate_hz);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_MultistreamDecoderCreate(...)
|
||||
*
|
||||
* This function creates an Opus decoder with any supported channel count.
|
||||
*
|
||||
* Input:
|
||||
* - channels : number of output channels that the decoder
|
||||
* will produce.
|
||||
* - streams : number of encoded streams, as described in
|
||||
* RFC 7845.
|
||||
* - coupled_streams : number of coupled streams, as described in
|
||||
* RFC 7845.
|
||||
* - channel_mapping : the channel mapping; pointer to array of
|
||||
* `channel` bytes, as described in RFC 7845.
|
||||
*
|
||||
* Output:
|
||||
* - inst : a pointer to a Decoder context that is created
|
||||
* if success.
|
||||
*
|
||||
* Return value : 0 - Success
|
||||
* -1 - Error
|
||||
*/
|
||||
int16_t WebRtcOpus_MultistreamDecoderCreate(
|
||||
OpusDecInst** inst,
|
||||
size_t channels,
|
||||
size_t streams,
|
||||
size_t coupled_streams,
|
||||
const unsigned char* channel_mapping);
|
||||
|
||||
int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_DecoderChannels(...)
|
||||
*
|
||||
* This function returns the number of channels created for Opus decoder.
|
||||
*/
|
||||
size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_DecoderInit(...)
|
||||
*
|
||||
* This function resets state of the decoder.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Decoder context
|
||||
*/
|
||||
void WebRtcOpus_DecoderInit(OpusDecInst* inst);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_Decode(...)
|
||||
*
|
||||
* This function decodes an Opus packet into one or more audio frames at the
|
||||
* ACM interface's sampling rate (32 kHz).
|
||||
*
|
||||
* Input:
|
||||
* - inst : Decoder context
|
||||
* - encoded : Encoded data
|
||||
* - encoded_bytes : Bytes in encoded vector
|
||||
*
|
||||
* Output:
|
||||
* - decoded : The decoded vector
|
||||
* - audio_type : 1 normal, 2 CNG (for Opus it should
|
||||
* always return 1 since we're not using Opus's
|
||||
* built-in DTX/CNG scheme)
|
||||
*
|
||||
* Return value : >0 - Samples per channel in decoded vector
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcOpus_Decode(OpusDecInst* inst,
|
||||
const uint8_t* encoded,
|
||||
size_t encoded_bytes,
|
||||
int16_t* decoded,
|
||||
int16_t* audio_type);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_DecodeFec(...)
|
||||
*
|
||||
* This function decodes the FEC data from an Opus packet into one or more audio
|
||||
* frames at the ACM interface's sampling rate (32 kHz).
|
||||
*
|
||||
* Input:
|
||||
* - inst : Decoder context
|
||||
* - encoded : Encoded data
|
||||
* - encoded_bytes : Bytes in encoded vector
|
||||
*
|
||||
* Output:
|
||||
* - decoded : The decoded vector (previous frame)
|
||||
*
|
||||
* Return value : >0 - Samples per channel in decoded vector
|
||||
* 0 - No FEC data in the packet
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcOpus_DecodeFec(OpusDecInst* inst,
|
||||
const uint8_t* encoded,
|
||||
size_t encoded_bytes,
|
||||
int16_t* decoded,
|
||||
int16_t* audio_type);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_DurationEst(...)
|
||||
*
|
||||
* This function calculates the duration of an opus packet.
|
||||
* Input:
|
||||
* - inst : Decoder context
|
||||
* - payload : Encoded data pointer
|
||||
* - payload_length_bytes : Bytes of encoded data
|
||||
*
|
||||
* Return value : The duration of the packet, in samples per
|
||||
* channel.
|
||||
*/
|
||||
int WebRtcOpus_DurationEst(OpusDecInst* inst,
|
||||
const uint8_t* payload,
|
||||
size_t payload_length_bytes);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_PlcDuration(...)
|
||||
*
|
||||
* This function calculates the duration of a frame returned by packet loss
|
||||
* concealment (PLC).
|
||||
*
|
||||
* Input:
|
||||
* - inst : Decoder context
|
||||
*
|
||||
* Return value : The duration of a frame returned by PLC, in
|
||||
* samples per channel.
|
||||
*/
|
||||
int WebRtcOpus_PlcDuration(OpusDecInst* inst);
|
||||
|
||||
/* TODO(minyue): Check whether it is needed to add a decoder context to the
|
||||
* arguments, like WebRtcOpus_DurationEst(...). In fact, the packet itself tells
|
||||
* the duration. The decoder context in WebRtcOpus_DurationEst(...) is not used.
|
||||
* So it may be advisable to remove it from WebRtcOpus_DurationEst(...). */
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_FecDurationEst(...)
|
||||
*
|
||||
* This function calculates the duration of the FEC data within an opus packet.
|
||||
* Input:
|
||||
* - payload : Encoded data pointer
|
||||
* - payload_length_bytes : Bytes of encoded data
|
||||
* - sample_rate_hz : Sample rate of output audio
|
||||
*
|
||||
* Return value : >0 - The duration of the FEC data in the
|
||||
* packet in samples per channel.
|
||||
* 0 - No FEC data in the packet.
|
||||
*/
|
||||
int WebRtcOpus_FecDurationEst(const uint8_t* payload,
|
||||
size_t payload_length_bytes,
|
||||
int sample_rate_hz);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_PacketHasFec(...)
|
||||
*
|
||||
* This function detects if an opus packet has FEC.
|
||||
* Input:
|
||||
* - payload : Encoded data pointer
|
||||
* - payload_length_bytes : Bytes of encoded data
|
||||
*
|
||||
* Return value : 0 - the packet does NOT contain FEC.
|
||||
* 1 - the packet contains FEC.
|
||||
*/
|
||||
int WebRtcOpus_PacketHasFec(const uint8_t* payload,
|
||||
size_t payload_length_bytes);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcOpus_PacketHasVoiceActivity(...)
|
||||
*
|
||||
* This function returns the SILK VAD information encoded in the opus packet.
|
||||
* For CELT-only packets that do not have VAD information, it returns -1.
|
||||
* Input:
|
||||
* - payload : Encoded data pointer
|
||||
* - payload_length_bytes : Bytes of encoded data
|
||||
*
|
||||
* Return value : 0 - no frame had the VAD flag set.
|
||||
* 1 - at least one frame had the VAD flag set.
|
||||
* -1 - VAD status could not be determined.
|
||||
*/
|
||||
int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload,
|
||||
size_t payload_length_bytes);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
#include "modules/audio_coding/codecs/tools/audio_codec_speed_test.h"
|
||||
|
||||
using ::std::string;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kOpusBlockDurationMs = 20;
|
||||
static const int kOpusSamplingKhz = 48;
|
||||
|
||||
class OpusSpeedTest : public AudioCodecSpeedTest {
|
||||
protected:
|
||||
OpusSpeedTest();
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
float EncodeABlock(int16_t* in_data,
|
||||
uint8_t* bit_stream,
|
||||
size_t max_bytes,
|
||||
size_t* encoded_bytes) override;
|
||||
float DecodeABlock(const uint8_t* bit_stream,
|
||||
size_t encoded_bytes,
|
||||
int16_t* out_data) override;
|
||||
WebRtcOpusEncInst* opus_encoder_;
|
||||
WebRtcOpusDecInst* opus_decoder_;
|
||||
};
|
||||
|
||||
OpusSpeedTest::OpusSpeedTest()
|
||||
: AudioCodecSpeedTest(kOpusBlockDurationMs,
|
||||
kOpusSamplingKhz,
|
||||
kOpusSamplingKhz),
|
||||
opus_encoder_(NULL),
|
||||
opus_decoder_(NULL) {}
|
||||
|
||||
void OpusSpeedTest::SetUp() {
|
||||
AudioCodecSpeedTest::SetUp();
|
||||
// If channels_ == 1, use Opus VOIP mode, otherwise, audio mode.
|
||||
int app = channels_ == 1 ? 0 : 1;
|
||||
/* Create encoder memory. */
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000));
|
||||
/* Set bitrate. */
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_));
|
||||
}
|
||||
|
||||
void OpusSpeedTest::TearDown() {
|
||||
AudioCodecSpeedTest::TearDown();
|
||||
/* Free memory. */
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
float OpusSpeedTest::EncodeABlock(int16_t* in_data,
|
||||
uint8_t* bit_stream,
|
||||
size_t max_bytes,
|
||||
size_t* encoded_bytes) {
|
||||
clock_t clocks = clock();
|
||||
int value = WebRtcOpus_Encode(opus_encoder_, in_data, input_length_sample_,
|
||||
max_bytes, bit_stream);
|
||||
clocks = clock() - clocks;
|
||||
EXPECT_GT(value, 0);
|
||||
*encoded_bytes = static_cast<size_t>(value);
|
||||
return 1000.0 * clocks / CLOCKS_PER_SEC;
|
||||
}
|
||||
|
||||
float OpusSpeedTest::DecodeABlock(const uint8_t* bit_stream,
|
||||
size_t encoded_bytes,
|
||||
int16_t* out_data) {
|
||||
int value;
|
||||
int16_t audio_type;
|
||||
clock_t clocks = clock();
|
||||
value = WebRtcOpus_Decode(opus_decoder_, bit_stream, encoded_bytes, out_data,
|
||||
&audio_type);
|
||||
clocks = clock() - clocks;
|
||||
EXPECT_EQ(output_length_sample_, static_cast<size_t>(value));
|
||||
return 1000.0 * clocks / CLOCKS_PER_SEC;
|
||||
}
|
||||
|
||||
/* Test audio length in second. */
|
||||
constexpr size_t kDurationSec = 400;
|
||||
|
||||
#define ADD_TEST(complexity) \
|
||||
TEST_P(OpusSpeedTest, OpusSetComplexityTest##complexity) { \
|
||||
/* Set complexity. */ \
|
||||
printf("Setting complexity to %d ...\n", complexity); \
|
||||
EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, complexity)); \
|
||||
EncodeDecode(kDurationSec); \
|
||||
}
|
||||
|
||||
ADD_TEST(10)
|
||||
ADD_TEST(9)
|
||||
ADD_TEST(8)
|
||||
ADD_TEST(7)
|
||||
ADD_TEST(6)
|
||||
ADD_TEST(5)
|
||||
ADD_TEST(4)
|
||||
ADD_TEST(3)
|
||||
ADD_TEST(2)
|
||||
ADD_TEST(1)
|
||||
ADD_TEST(0)
|
||||
|
||||
#define ADD_BANDWIDTH_TEST(bandwidth) \
|
||||
TEST_P(OpusSpeedTest, OpusSetBandwidthTest##bandwidth) { \
|
||||
/* Set bandwidth. */ \
|
||||
printf("Setting bandwidth to %d ...\n", bandwidth); \
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, bandwidth)); \
|
||||
EncodeDecode(kDurationSec); \
|
||||
}
|
||||
|
||||
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_NARROWBAND)
|
||||
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_MEDIUMBAND)
|
||||
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_WIDEBAND)
|
||||
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_SUPERWIDEBAND)
|
||||
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_FULLBAND)
|
||||
|
||||
// List all test cases: (channel, bit rat, filename, extension).
|
||||
const coding_param param_set[] = {
|
||||
std::make_tuple(1,
|
||||
64000,
|
||||
string("audio_coding/speech_mono_32_48kHz"),
|
||||
string("pcm"),
|
||||
true),
|
||||
std::make_tuple(1,
|
||||
32000,
|
||||
string("audio_coding/speech_mono_32_48kHz"),
|
||||
string("pcm"),
|
||||
true),
|
||||
std::make_tuple(2,
|
||||
64000,
|
||||
string("audio_coding/music_stereo_48kHz"),
|
||||
string("pcm"),
|
||||
true)};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(AllTest,
|
||||
OpusSpeedTest,
|
||||
::testing::ValuesIn(param_set));
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,979 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/opus_inst.h"
|
||||
#include "modules/audio_coding/codecs/opus/opus_interface.h"
|
||||
#include "modules/audio_coding/neteq/tools/audio_loop.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_conversions.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
// Equivalent to SDP params
|
||||
// {{"channel_mapping", "0,1,2,3"}, {"coupled_streams", "2"}}.
|
||||
constexpr unsigned char kQuadChannelMapping[] = {0, 1, 2, 3};
|
||||
constexpr int kQuadTotalStreams = 2;
|
||||
constexpr int kQuadCoupledStreams = 2;
|
||||
|
||||
constexpr unsigned char kStereoChannelMapping[] = {0, 1};
|
||||
constexpr int kStereoTotalStreams = 1;
|
||||
constexpr int kStereoCoupledStreams = 1;
|
||||
|
||||
constexpr unsigned char kMonoChannelMapping[] = {0};
|
||||
constexpr int kMonoTotalStreams = 1;
|
||||
constexpr int kMonoCoupledStreams = 0;
|
||||
|
||||
void CreateSingleOrMultiStreamEncoder(WebRtcOpusEncInst** opus_encoder,
|
||||
int channels,
|
||||
int application,
|
||||
bool use_multistream,
|
||||
int encoder_sample_rate_hz) {
|
||||
EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream);
|
||||
if (use_multistream) {
|
||||
EXPECT_EQ(encoder_sample_rate_hz, 48000);
|
||||
if (channels == 1) {
|
||||
EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate(
|
||||
opus_encoder, channels, application, kMonoTotalStreams,
|
||||
kMonoCoupledStreams, kMonoChannelMapping));
|
||||
} else if (channels == 2) {
|
||||
EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate(
|
||||
opus_encoder, channels, application, kStereoTotalStreams,
|
||||
kStereoCoupledStreams, kStereoChannelMapping));
|
||||
} else if (channels == 4) {
|
||||
EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate(
|
||||
opus_encoder, channels, application, kQuadTotalStreams,
|
||||
kQuadCoupledStreams, kQuadChannelMapping));
|
||||
} else {
|
||||
EXPECT_TRUE(false) << channels;
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(opus_encoder, channels, application,
|
||||
encoder_sample_rate_hz));
|
||||
}
|
||||
}
|
||||
|
||||
void CreateSingleOrMultiStreamDecoder(WebRtcOpusDecInst** opus_decoder,
|
||||
int channels,
|
||||
bool use_multistream,
|
||||
int decoder_sample_rate_hz) {
|
||||
EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream);
|
||||
if (use_multistream) {
|
||||
EXPECT_EQ(decoder_sample_rate_hz, 48000);
|
||||
if (channels == 1) {
|
||||
EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate(
|
||||
opus_decoder, channels, kMonoTotalStreams,
|
||||
kMonoCoupledStreams, kMonoChannelMapping));
|
||||
} else if (channels == 2) {
|
||||
EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate(
|
||||
opus_decoder, channels, kStereoTotalStreams,
|
||||
kStereoCoupledStreams, kStereoChannelMapping));
|
||||
} else if (channels == 4) {
|
||||
EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate(
|
||||
opus_decoder, channels, kQuadTotalStreams,
|
||||
kQuadCoupledStreams, kQuadChannelMapping));
|
||||
} else {
|
||||
EXPECT_TRUE(false) << channels;
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderCreate(opus_decoder, channels,
|
||||
decoder_sample_rate_hz));
|
||||
}
|
||||
}
|
||||
|
||||
int SamplesPerChannel(int sample_rate_hz, int duration_ms) {
|
||||
const int samples_per_ms = rtc::CheckedDivExact(sample_rate_hz, 1000);
|
||||
return samples_per_ms * duration_ms;
|
||||
}
|
||||
|
||||
using test::AudioLoop;
|
||||
using ::testing::Combine;
|
||||
using ::testing::TestWithParam;
|
||||
using ::testing::Values;
|
||||
|
||||
// Maximum number of bytes in output bitstream.
|
||||
const size_t kMaxBytes = 2000;
|
||||
|
||||
class OpusTest
|
||||
: public TestWithParam<::testing::tuple<size_t, int, bool, int, int>> {
|
||||
protected:
|
||||
OpusTest() = default;
|
||||
|
||||
void TestDtxEffect(bool dtx, int block_length_ms);
|
||||
|
||||
void TestCbrEffect(bool dtx, int block_length_ms);
|
||||
|
||||
// Prepare `speech_data_` for encoding, read from a hard-coded file.
|
||||
// After preparation, `speech_data_.GetNextBlock()` returns a pointer to a
|
||||
// block of `block_length_ms` milliseconds. The data is looped every
|
||||
// `loop_length_ms` milliseconds.
|
||||
void PrepareSpeechData(int block_length_ms, int loop_length_ms);
|
||||
|
||||
int EncodeDecode(WebRtcOpusEncInst* encoder,
|
||||
rtc::ArrayView<const int16_t> input_audio,
|
||||
WebRtcOpusDecInst* decoder,
|
||||
int16_t* output_audio,
|
||||
int16_t* audio_type);
|
||||
|
||||
void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
|
||||
opus_int32 expect,
|
||||
int32_t set);
|
||||
|
||||
void CheckAudioBounded(const int16_t* audio,
|
||||
size_t samples,
|
||||
size_t channels,
|
||||
uint16_t bound) const;
|
||||
|
||||
WebRtcOpusEncInst* opus_encoder_ = nullptr;
|
||||
WebRtcOpusDecInst* opus_decoder_ = nullptr;
|
||||
AudioLoop speech_data_;
|
||||
uint8_t bitstream_[kMaxBytes];
|
||||
size_t encoded_bytes_ = 0;
|
||||
const size_t channels_{std::get<0>(GetParam())};
|
||||
const int application_{std::get<1>(GetParam())};
|
||||
const bool use_multistream_{std::get<2>(GetParam())};
|
||||
const int encoder_sample_rate_hz_{std::get<3>(GetParam())};
|
||||
const int decoder_sample_rate_hz_{std::get<4>(GetParam())};
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// Singlestream: Try all combinations.
|
||||
INSTANTIATE_TEST_SUITE_P(Singlestream,
|
||||
OpusTest,
|
||||
testing::Combine(testing::Values(1, 2),
|
||||
testing::Values(0, 1),
|
||||
testing::Values(false),
|
||||
testing::Values(16000, 48000),
|
||||
testing::Values(16000, 48000)));
|
||||
|
||||
// Multistream: Some representative cases (only 48 kHz for now).
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
Multistream,
|
||||
OpusTest,
|
||||
testing::Values(std::make_tuple(1, 0, true, 48000, 48000),
|
||||
std::make_tuple(2, 1, true, 48000, 48000),
|
||||
std::make_tuple(4, 0, true, 48000, 48000),
|
||||
std::make_tuple(4, 1, true, 48000, 48000)));
|
||||
|
||||
void OpusTest::PrepareSpeechData(int block_length_ms, int loop_length_ms) {
|
||||
std::map<int, std::string> channel_to_basename = {
|
||||
{1, "audio_coding/testfile32kHz"},
|
||||
{2, "audio_coding/teststereo32kHz"},
|
||||
{4, "audio_coding/speech_4_channels_48k_one_second"}};
|
||||
std::map<int, std::string> channel_to_suffix = {
|
||||
{1, "pcm"}, {2, "pcm"}, {4, "wav"}};
|
||||
const std::string file_name = webrtc::test::ResourcePath(
|
||||
channel_to_basename[channels_], channel_to_suffix[channels_]);
|
||||
if (loop_length_ms < block_length_ms) {
|
||||
loop_length_ms = block_length_ms;
|
||||
}
|
||||
const int sample_rate_khz =
|
||||
rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000);
|
||||
EXPECT_TRUE(speech_data_.Init(file_name,
|
||||
loop_length_ms * sample_rate_khz * channels_,
|
||||
block_length_ms * sample_rate_khz * channels_));
|
||||
}
|
||||
|
||||
void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
|
||||
opus_int32 expect,
|
||||
int32_t set) {
|
||||
opus_int32 bandwidth;
|
||||
EXPECT_EQ(0, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, set));
|
||||
EXPECT_EQ(0, WebRtcOpus_GetMaxPlaybackRate(opus_encoder_, &bandwidth));
|
||||
EXPECT_EQ(expect, bandwidth);
|
||||
}
|
||||
|
||||
void OpusTest::CheckAudioBounded(const int16_t* audio,
|
||||
size_t samples,
|
||||
size_t channels,
|
||||
uint16_t bound) const {
|
||||
for (size_t i = 0; i < samples; ++i) {
|
||||
for (size_t c = 0; c < channels; ++c) {
|
||||
ASSERT_GE(audio[i * channels + c], -bound);
|
||||
ASSERT_LE(audio[i * channels + c], bound);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,
|
||||
rtc::ArrayView<const int16_t> input_audio,
|
||||
WebRtcOpusDecInst* decoder,
|
||||
int16_t* output_audio,
|
||||
int16_t* audio_type) {
|
||||
const int input_samples_per_channel =
|
||||
rtc::CheckedDivExact(input_audio.size(), channels_);
|
||||
int encoded_bytes_int =
|
||||
WebRtcOpus_Encode(encoder, input_audio.data(), input_samples_per_channel,
|
||||
kMaxBytes, bitstream_);
|
||||
EXPECT_GE(encoded_bytes_int, 0);
|
||||
encoded_bytes_ = static_cast<size_t>(encoded_bytes_int);
|
||||
if (encoded_bytes_ != 0) {
|
||||
int est_len = WebRtcOpus_DurationEst(decoder, bitstream_, encoded_bytes_);
|
||||
int act_len = WebRtcOpus_Decode(decoder, bitstream_, encoded_bytes_,
|
||||
output_audio, audio_type);
|
||||
EXPECT_EQ(est_len, act_len);
|
||||
return act_len;
|
||||
} else {
|
||||
int total_dtx_len = 0;
|
||||
const int output_samples_per_channel = input_samples_per_channel *
|
||||
decoder_sample_rate_hz_ /
|
||||
encoder_sample_rate_hz_;
|
||||
while (total_dtx_len < output_samples_per_channel) {
|
||||
int est_len = WebRtcOpus_DurationEst(decoder, NULL, 0);
|
||||
int act_len = WebRtcOpus_Decode(decoder, NULL, 0,
|
||||
&output_audio[total_dtx_len * channels_],
|
||||
audio_type);
|
||||
EXPECT_EQ(est_len, act_len);
|
||||
total_dtx_len += act_len;
|
||||
}
|
||||
return total_dtx_len;
|
||||
}
|
||||
}
|
||||
|
||||
// Test if encoder/decoder can enter DTX mode properly and do not enter DTX when
|
||||
// they should not. This test is signal dependent.
|
||||
void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
|
||||
PrepareSpeechData(block_length_ms, 2000);
|
||||
const size_t input_samples =
|
||||
rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000) * block_length_ms;
|
||||
const size_t output_samples =
|
||||
rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms;
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
|
||||
// Set bitrate.
|
||||
EXPECT_EQ(
|
||||
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
|
||||
|
||||
// Set input audio as silence.
|
||||
std::vector<int16_t> silence(input_samples * channels_, 0);
|
||||
|
||||
// Setting DTX.
|
||||
EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_)
|
||||
: WebRtcOpus_DisableDtx(opus_encoder_));
|
||||
|
||||
int16_t audio_type;
|
||||
int16_t* output_data_decode = new int16_t[output_samples * channels_];
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
EXPECT_EQ(output_samples,
|
||||
static_cast<size_t>(EncodeDecode(
|
||||
opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
|
||||
output_data_decode, &audio_type)));
|
||||
// If not DTX, it should never enter DTX mode. If DTX, we do not care since
|
||||
// whether it enters DTX depends on the signal type.
|
||||
if (!dtx) {
|
||||
EXPECT_GT(encoded_bytes_, 1U);
|
||||
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, audio_type); // Speech.
|
||||
}
|
||||
}
|
||||
|
||||
// We input some silent segments. In DTX mode, the encoder will stop sending.
|
||||
// However, DTX may happen after a while.
|
||||
for (int i = 0; i < 30; ++i) {
|
||||
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
|
||||
opus_encoder_, silence, opus_decoder_,
|
||||
output_data_decode, &audio_type)));
|
||||
if (!dtx) {
|
||||
EXPECT_GT(encoded_bytes_, 1U);
|
||||
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, audio_type); // Speech.
|
||||
} else if (encoded_bytes_ == 1) {
|
||||
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(2, audio_type); // Comfort noise.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// When Opus is in DTX, it wakes up in a regular basis. It sends two packets,
|
||||
// one with an arbitrary size and the other of 1-byte, then stops sending for
|
||||
// a certain number of frames.
|
||||
|
||||
// `max_dtx_frames` is the maximum number of frames Opus can stay in DTX.
|
||||
// TODO(kwiberg): Why does this number depend on the encoding sample rate?
|
||||
const int max_dtx_frames =
|
||||
(encoder_sample_rate_hz_ == 16000 ? 800 : 400) / block_length_ms + 1;
|
||||
|
||||
// We run `kRunTimeMs` milliseconds of pure silence.
|
||||
const int kRunTimeMs = 4500;
|
||||
|
||||
// We check that, after a `kCheckTimeMs` milliseconds (given that the CNG in
|
||||
// Opus needs time to adapt), the absolute values of DTX decoded signal are
|
||||
// bounded by `kOutputValueBound`.
|
||||
const int kCheckTimeMs = 4000;
|
||||
|
||||
#if defined(OPUS_FIXED_POINT)
|
||||
// Fixed-point Opus generates a random (comfort) noise, which has a less
|
||||
// predictable value bound than its floating-point Opus. This value depends on
|
||||
// input signal, and the time window for checking the output values (between
|
||||
// `kCheckTimeMs` and `kRunTimeMs`).
|
||||
const uint16_t kOutputValueBound = 30;
|
||||
|
||||
#else
|
||||
const uint16_t kOutputValueBound = 2;
|
||||
#endif
|
||||
|
||||
int time = 0;
|
||||
while (time < kRunTimeMs) {
|
||||
// DTX mode is maintained for maximum `max_dtx_frames` frames.
|
||||
int i = 0;
|
||||
for (; i < max_dtx_frames; ++i) {
|
||||
time += block_length_ms;
|
||||
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
|
||||
opus_encoder_, silence, opus_decoder_,
|
||||
output_data_decode, &audio_type)));
|
||||
if (dtx) {
|
||||
if (encoded_bytes_ > 1)
|
||||
break;
|
||||
EXPECT_EQ(0U, encoded_bytes_) // Send 0 byte.
|
||||
<< "Opus should have entered DTX mode.";
|
||||
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(2, audio_type); // Comfort noise.
|
||||
if (time >= kCheckTimeMs) {
|
||||
CheckAudioBounded(output_data_decode, output_samples, channels_,
|
||||
kOutputValueBound);
|
||||
}
|
||||
} else {
|
||||
EXPECT_GT(encoded_bytes_, 1U);
|
||||
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, audio_type); // Speech.
|
||||
}
|
||||
}
|
||||
|
||||
if (dtx) {
|
||||
// With DTX, Opus must stop transmission for some time.
|
||||
EXPECT_GT(i, 1);
|
||||
}
|
||||
|
||||
// We expect a normal payload.
|
||||
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, audio_type); // Speech.
|
||||
|
||||
// Enters DTX again immediately.
|
||||
time += block_length_ms;
|
||||
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
|
||||
opus_encoder_, silence, opus_decoder_,
|
||||
output_data_decode, &audio_type)));
|
||||
if (dtx) {
|
||||
EXPECT_EQ(1U, encoded_bytes_); // Send 1 byte.
|
||||
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(2, audio_type); // Comfort noise.
|
||||
if (time >= kCheckTimeMs) {
|
||||
CheckAudioBounded(output_data_decode, output_samples, channels_,
|
||||
kOutputValueBound);
|
||||
}
|
||||
} else {
|
||||
EXPECT_GT(encoded_bytes_, 1U);
|
||||
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, audio_type); // Speech.
|
||||
}
|
||||
}
|
||||
|
||||
silence[0] = 10000;
|
||||
if (dtx) {
|
||||
// Verify that encoder/decoder can jump out from DTX mode.
|
||||
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
|
||||
opus_encoder_, silence, opus_decoder_,
|
||||
output_data_decode, &audio_type)));
|
||||
EXPECT_GT(encoded_bytes_, 1U);
|
||||
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
|
||||
EXPECT_EQ(0, audio_type); // Speech.
|
||||
}
|
||||
|
||||
// Free memory.
|
||||
delete[] output_data_decode;
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
// Test if CBR does what we expect.
|
||||
void OpusTest::TestCbrEffect(bool cbr, int block_length_ms) {
|
||||
PrepareSpeechData(block_length_ms, 2000);
|
||||
const size_t output_samples =
|
||||
rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms;
|
||||
|
||||
int32_t max_pkt_size_diff = 0;
|
||||
int32_t prev_pkt_size = 0;
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
|
||||
// Set bitrate.
|
||||
EXPECT_EQ(
|
||||
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
|
||||
|
||||
// Setting CBR.
|
||||
EXPECT_EQ(0, cbr ? WebRtcOpus_EnableCbr(opus_encoder_)
|
||||
: WebRtcOpus_DisableCbr(opus_encoder_));
|
||||
|
||||
int16_t audio_type;
|
||||
std::vector<int16_t> audio_out(output_samples * channels_);
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
EXPECT_EQ(output_samples,
|
||||
static_cast<size_t>(
|
||||
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
|
||||
opus_decoder_, audio_out.data(), &audio_type)));
|
||||
|
||||
if (prev_pkt_size > 0) {
|
||||
int32_t diff = std::abs((int32_t)encoded_bytes_ - prev_pkt_size);
|
||||
max_pkt_size_diff = std::max(max_pkt_size_diff, diff);
|
||||
}
|
||||
prev_pkt_size = rtc::checked_cast<int32_t>(encoded_bytes_);
|
||||
}
|
||||
|
||||
if (cbr) {
|
||||
EXPECT_EQ(max_pkt_size_diff, 0);
|
||||
} else {
|
||||
EXPECT_GT(max_pkt_size_diff, 0);
|
||||
}
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
// Test failing Create.
|
||||
TEST(OpusTest, OpusCreateFail) {
|
||||
WebRtcOpusEncInst* opus_encoder;
|
||||
WebRtcOpusDecInst* opus_decoder;
|
||||
|
||||
// Test to see that an invalid pointer is caught.
|
||||
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(NULL, 1, 0, 48000));
|
||||
// Invalid channel number.
|
||||
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 257, 0, 48000));
|
||||
// Invalid applciation mode.
|
||||
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 2, 48000));
|
||||
// Invalid sample rate.
|
||||
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 0, 12345));
|
||||
|
||||
EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(NULL, 1, 48000));
|
||||
// Invalid channel number.
|
||||
EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 257, 48000));
|
||||
// Invalid sample rate.
|
||||
EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 1, 12345));
|
||||
}
|
||||
|
||||
// Test failing Free.
|
||||
TEST(OpusTest, OpusFreeFail) {
|
||||
// Test to see that an invalid pointer is caught.
|
||||
EXPECT_EQ(-1, WebRtcOpus_EncoderFree(NULL));
|
||||
EXPECT_EQ(-1, WebRtcOpus_DecoderFree(NULL));
|
||||
}
|
||||
|
||||
// Test normal Create and Free.
|
||||
TEST_P(OpusTest, OpusCreateFree) {
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
EXPECT_TRUE(opus_encoder_ != NULL);
|
||||
EXPECT_TRUE(opus_decoder_ != NULL);
|
||||
// Free encoder and decoder memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
#define ENCODER_CTL(inst, vargs) \
|
||||
inst->encoder \
|
||||
? opus_encoder_ctl(inst->encoder, vargs) \
|
||||
: opus_multistream_encoder_ctl(inst->multistream_encoder, vargs)
|
||||
|
||||
TEST_P(OpusTest, OpusEncodeDecode) {
|
||||
PrepareSpeechData(20, 20);
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
|
||||
// Set bitrate.
|
||||
EXPECT_EQ(
|
||||
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
|
||||
|
||||
// Check number of channels for decoder.
|
||||
EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
|
||||
|
||||
// Check application mode.
|
||||
opus_int32 app;
|
||||
ENCODER_CTL(opus_encoder_, OPUS_GET_APPLICATION(&app));
|
||||
EXPECT_EQ(application_ == 0 ? OPUS_APPLICATION_VOIP : OPUS_APPLICATION_AUDIO,
|
||||
app);
|
||||
|
||||
// Encode & decode.
|
||||
int16_t audio_type;
|
||||
const int decode_samples_per_channel =
|
||||
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
|
||||
int16_t* output_data_decode =
|
||||
new int16_t[decode_samples_per_channel * channels_];
|
||||
EXPECT_EQ(decode_samples_per_channel,
|
||||
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
|
||||
opus_decoder_, output_data_decode, &audio_type));
|
||||
|
||||
// Free memory.
|
||||
delete[] output_data_decode;
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusSetBitRate) {
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetBitRate(opus_encoder_, 60000));
|
||||
|
||||
// Create encoder memory, try with different bitrates.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 30000));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 60000));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 300000));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 600000));
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusSetComplexity) {
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 9));
|
||||
|
||||
// Create encoder memory, try with different complexities.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
|
||||
EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 0));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 10));
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 11));
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusSetBandwidth) {
|
||||
if (channels_ > 2) {
|
||||
// TODO(webrtc:10217): investigate why multi-stream Opus reports
|
||||
// narrowband when it's configured with FULLBAND.
|
||||
return;
|
||||
}
|
||||
PrepareSpeechData(20, 20);
|
||||
|
||||
int16_t audio_type;
|
||||
const int decode_samples_per_channel =
|
||||
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
|
||||
std::unique_ptr<int16_t[]> output_data_decode(
|
||||
new int16_t[decode_samples_per_channel * channels_]());
|
||||
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1,
|
||||
WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND));
|
||||
EXPECT_EQ(-1, WebRtcOpus_GetBandwidth(opus_encoder_));
|
||||
|
||||
// Create encoder memory, try with different bandwidths.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetBandwidth(opus_encoder_,
|
||||
OPUS_BANDWIDTH_NARROWBAND - 1));
|
||||
EXPECT_EQ(0,
|
||||
WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND));
|
||||
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
|
||||
output_data_decode.get(), &audio_type);
|
||||
EXPECT_EQ(OPUS_BANDWIDTH_NARROWBAND, WebRtcOpus_GetBandwidth(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND));
|
||||
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
|
||||
output_data_decode.get(), &audio_type);
|
||||
EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND
|
||||
: OPUS_BANDWIDTH_FULLBAND,
|
||||
WebRtcOpus_GetBandwidth(opus_encoder_));
|
||||
EXPECT_EQ(
|
||||
-1, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND + 1));
|
||||
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
|
||||
output_data_decode.get(), &audio_type);
|
||||
EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND
|
||||
: OPUS_BANDWIDTH_FULLBAND,
|
||||
WebRtcOpus_GetBandwidth(opus_encoder_));
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusForceChannels) {
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 1));
|
||||
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
ASSERT_NE(nullptr, opus_encoder_);
|
||||
|
||||
if (channels_ >= 2) {
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 3));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 2));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0));
|
||||
} else {
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 2));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1));
|
||||
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0));
|
||||
}
|
||||
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
}
|
||||
|
||||
// Encode and decode one frame, initialize the decoder and
|
||||
// decode once more.
|
||||
TEST_P(OpusTest, OpusDecodeInit) {
|
||||
PrepareSpeechData(20, 20);
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
|
||||
// Encode & decode.
|
||||
int16_t audio_type;
|
||||
const int decode_samples_per_channel =
|
||||
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
|
||||
int16_t* output_data_decode =
|
||||
new int16_t[decode_samples_per_channel * channels_];
|
||||
EXPECT_EQ(decode_samples_per_channel,
|
||||
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
|
||||
opus_decoder_, output_data_decode, &audio_type));
|
||||
|
||||
WebRtcOpus_DecoderInit(opus_decoder_);
|
||||
|
||||
EXPECT_EQ(decode_samples_per_channel,
|
||||
WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_,
|
||||
output_data_decode, &audio_type));
|
||||
|
||||
// Free memory.
|
||||
delete[] output_data_decode;
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusEnableDisableFec) {
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1, WebRtcOpus_EnableFec(opus_encoder_));
|
||||
EXPECT_EQ(-1, WebRtcOpus_DisableFec(opus_encoder_));
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
|
||||
EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_));
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusEnableDisableDtx) {
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1, WebRtcOpus_EnableDtx(opus_encoder_));
|
||||
EXPECT_EQ(-1, WebRtcOpus_DisableDtx(opus_encoder_));
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
|
||||
opus_int32 dtx;
|
||||
|
||||
// DTX is off by default.
|
||||
ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx));
|
||||
EXPECT_EQ(0, dtx);
|
||||
|
||||
// Test to enable DTX.
|
||||
EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_));
|
||||
ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx));
|
||||
EXPECT_EQ(1, dtx);
|
||||
|
||||
// Test to disable DTX.
|
||||
EXPECT_EQ(0, WebRtcOpus_DisableDtx(opus_encoder_));
|
||||
ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx));
|
||||
EXPECT_EQ(0, dtx);
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusDtxOff) {
|
||||
TestDtxEffect(false, 10);
|
||||
TestDtxEffect(false, 20);
|
||||
TestDtxEffect(false, 40);
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusDtxOn) {
|
||||
if (channels_ > 2 || application_ != 0) {
|
||||
// DTX does not work with OPUS_APPLICATION_AUDIO at low complexity settings.
|
||||
// TODO(webrtc:10218): adapt the test to the sizes and order of multi-stream
|
||||
// DTX packets.
|
||||
return;
|
||||
}
|
||||
TestDtxEffect(true, 10);
|
||||
TestDtxEffect(true, 20);
|
||||
TestDtxEffect(true, 40);
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusCbrOff) {
|
||||
TestCbrEffect(false, 10);
|
||||
TestCbrEffect(false, 20);
|
||||
TestCbrEffect(false, 40);
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusCbrOn) {
|
||||
TestCbrEffect(true, 10);
|
||||
TestCbrEffect(true, 20);
|
||||
TestCbrEffect(true, 40);
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusSetPacketLossRate) {
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50));
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
|
||||
EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50));
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, -1));
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 101));
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusSetMaxPlaybackRate) {
|
||||
// Test without creating encoder memory.
|
||||
EXPECT_EQ(-1, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, 20000));
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 48000);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 24001);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 24000);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 16001);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 16000);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 12001);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 12000);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 8001);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 8000);
|
||||
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 4000);
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
}
|
||||
|
||||
// Test PLC.
|
||||
TEST_P(OpusTest, OpusDecodePlc) {
|
||||
PrepareSpeechData(20, 20);
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
|
||||
// Set bitrate.
|
||||
EXPECT_EQ(
|
||||
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
|
||||
|
||||
// Check number of channels for decoder.
|
||||
EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
|
||||
|
||||
// Encode & decode.
|
||||
int16_t audio_type;
|
||||
const int decode_samples_per_channel =
|
||||
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
|
||||
int16_t* output_data_decode =
|
||||
new int16_t[decode_samples_per_channel * channels_];
|
||||
EXPECT_EQ(decode_samples_per_channel,
|
||||
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
|
||||
opus_decoder_, output_data_decode, &audio_type));
|
||||
|
||||
// Call decoder PLC.
|
||||
constexpr int kPlcDurationMs = 10;
|
||||
const int plc_samples = decoder_sample_rate_hz_ * kPlcDurationMs / 1000;
|
||||
int16_t* plc_buffer = new int16_t[plc_samples * channels_];
|
||||
EXPECT_EQ(plc_samples,
|
||||
WebRtcOpus_Decode(opus_decoder_, NULL, 0, plc_buffer, &audio_type));
|
||||
|
||||
// Free memory.
|
||||
delete[] plc_buffer;
|
||||
delete[] output_data_decode;
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
// Duration estimation.
|
||||
TEST_P(OpusTest, OpusDurationEstimation) {
|
||||
PrepareSpeechData(20, 20);
|
||||
|
||||
// Create.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
|
||||
// 10 ms. We use only first 10 ms of a 20 ms block.
|
||||
auto speech_block = speech_data_.GetNextBlock();
|
||||
int encoded_bytes_int = WebRtcOpus_Encode(
|
||||
opus_encoder_, speech_block.data(),
|
||||
rtc::CheckedDivExact(speech_block.size(), 2 * channels_), kMaxBytes,
|
||||
bitstream_);
|
||||
EXPECT_GE(encoded_bytes_int, 0);
|
||||
EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/10),
|
||||
WebRtcOpus_DurationEst(opus_decoder_, bitstream_,
|
||||
static_cast<size_t>(encoded_bytes_int)));
|
||||
|
||||
// 20 ms
|
||||
speech_block = speech_data_.GetNextBlock();
|
||||
encoded_bytes_int =
|
||||
WebRtcOpus_Encode(opus_encoder_, speech_block.data(),
|
||||
rtc::CheckedDivExact(speech_block.size(), channels_),
|
||||
kMaxBytes, bitstream_);
|
||||
EXPECT_GE(encoded_bytes_int, 0);
|
||||
EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20),
|
||||
WebRtcOpus_DurationEst(opus_decoder_, bitstream_,
|
||||
static_cast<size_t>(encoded_bytes_int)));
|
||||
|
||||
// Free memory.
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
TEST_P(OpusTest, OpusDecodeRepacketized) {
|
||||
if (channels_ > 2) {
|
||||
// As per the Opus documentation
|
||||
// https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/group__opus__repacketizer.html#details,
|
||||
// multiple streams are not supported.
|
||||
return;
|
||||
}
|
||||
constexpr size_t kPackets = 6;
|
||||
|
||||
PrepareSpeechData(20, 20 * kPackets);
|
||||
|
||||
// Create encoder memory.
|
||||
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
|
||||
use_multistream_, encoder_sample_rate_hz_);
|
||||
ASSERT_NE(nullptr, opus_encoder_);
|
||||
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
|
||||
decoder_sample_rate_hz_);
|
||||
ASSERT_NE(nullptr, opus_decoder_);
|
||||
|
||||
// Set bitrate.
|
||||
EXPECT_EQ(
|
||||
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
|
||||
|
||||
// Check number of channels for decoder.
|
||||
EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
|
||||
|
||||
// Encode & decode.
|
||||
int16_t audio_type;
|
||||
const int decode_samples_per_channel =
|
||||
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
|
||||
std::unique_ptr<int16_t[]> output_data_decode(
|
||||
new int16_t[kPackets * decode_samples_per_channel * channels_]);
|
||||
OpusRepacketizer* rp = opus_repacketizer_create();
|
||||
|
||||
size_t num_packets = 0;
|
||||
constexpr size_t kMaxCycles = 100;
|
||||
for (size_t idx = 0; idx < kMaxCycles; ++idx) {
|
||||
auto speech_block = speech_data_.GetNextBlock();
|
||||
encoded_bytes_ =
|
||||
WebRtcOpus_Encode(opus_encoder_, speech_block.data(),
|
||||
rtc::CheckedDivExact(speech_block.size(), channels_),
|
||||
kMaxBytes, bitstream_);
|
||||
if (opus_repacketizer_cat(rp, bitstream_,
|
||||
rtc::checked_cast<opus_int32>(encoded_bytes_)) ==
|
||||
OPUS_OK) {
|
||||
++num_packets;
|
||||
if (num_packets == kPackets) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Opus repacketizer cannot guarantee a success. We try again if it fails.
|
||||
opus_repacketizer_init(rp);
|
||||
num_packets = 0;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(kPackets, num_packets);
|
||||
|
||||
encoded_bytes_ = opus_repacketizer_out(rp, bitstream_, kMaxBytes);
|
||||
|
||||
EXPECT_EQ(decode_samples_per_channel * kPackets,
|
||||
static_cast<size_t>(WebRtcOpus_DurationEst(
|
||||
opus_decoder_, bitstream_, encoded_bytes_)));
|
||||
|
||||
EXPECT_EQ(decode_samples_per_channel * kPackets,
|
||||
static_cast<size_t>(
|
||||
WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_,
|
||||
output_data_decode.get(), &audio_type)));
|
||||
|
||||
// Free memory.
|
||||
opus_repacketizer_destroy(rp);
|
||||
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
|
||||
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, CeltUnknownStatus) {
|
||||
const uint8_t celt[] = {0x80};
|
||||
EXPECT_EQ(WebRtcOpus_PacketHasVoiceActivity(celt, 1), -1);
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, Mono20msVadSet) {
|
||||
uint8_t silk20msMonoVad[] = {0x78, 0x80};
|
||||
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoVad, 2));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, Mono20MsVadUnset) {
|
||||
uint8_t silk20msMonoSilence[] = {0x78, 0x00};
|
||||
EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoSilence, 2));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, Stereo20MsVadOnSideChannel) {
|
||||
uint8_t silk20msStereoVadSideChannel[] = {0x78 | 0x04, 0x20};
|
||||
EXPECT_TRUE(
|
||||
WebRtcOpus_PacketHasVoiceActivity(silk20msStereoVadSideChannel, 2));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, TwoOpusMonoFramesVadOnSecond) {
|
||||
uint8_t twoMonoFrames[] = {0x78 | 0x1, 0x00, 0x80};
|
||||
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(twoMonoFrames, 3));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import("../../../../../webrtc.gni")
|
||||
|
||||
visibility = [
|
||||
":*",
|
||||
"../../../:*",
|
||||
]
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_library("test") {
|
||||
testonly = true
|
||||
|
||||
sources = [
|
||||
"audio_ring_buffer.cc",
|
||||
"audio_ring_buffer.h",
|
||||
"blocker.cc",
|
||||
"blocker.h",
|
||||
"lapped_transform.cc",
|
||||
"lapped_transform.h",
|
||||
]
|
||||
|
||||
deps = [
|
||||
"../../../../../common_audio",
|
||||
"../../../../../common_audio:common_audio_c",
|
||||
"../../../../../rtc_base:checks",
|
||||
"../../../../../rtc_base/memory:aligned_malloc",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("test_unittest") {
|
||||
testonly = true
|
||||
|
||||
sources = [
|
||||
"audio_ring_buffer_unittest.cc",
|
||||
"blocker_unittest.cc",
|
||||
"lapped_transform_unittest.cc",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":test",
|
||||
"../../../../../common_audio",
|
||||
"../../../../../common_audio:common_audio_c",
|
||||
"../../../../../rtc_base:macromagic",
|
||||
"../../../../../test:test_support",
|
||||
"//testing/gtest",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h"
|
||||
|
||||
#include "common_audio/ring_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
// This is a simple multi-channel wrapper over the ring_buffer.h C interface.
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioRingBuffer::AudioRingBuffer(size_t channels, size_t max_frames) {
|
||||
buffers_.reserve(channels);
|
||||
for (size_t i = 0; i < channels; ++i)
|
||||
buffers_.push_back(WebRtc_CreateBuffer(max_frames, sizeof(float)));
|
||||
}
|
||||
|
||||
AudioRingBuffer::~AudioRingBuffer() {
|
||||
for (auto* buf : buffers_)
|
||||
WebRtc_FreeBuffer(buf);
|
||||
}
|
||||
|
||||
void AudioRingBuffer::Write(const float* const* data,
|
||||
size_t channels,
|
||||
size_t frames) {
|
||||
RTC_DCHECK_EQ(buffers_.size(), channels);
|
||||
for (size_t i = 0; i < channels; ++i) {
|
||||
const size_t written = WebRtc_WriteBuffer(buffers_[i], data[i], frames);
|
||||
RTC_CHECK_EQ(written, frames);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioRingBuffer::Read(float* const* data, size_t channels, size_t frames) {
|
||||
RTC_DCHECK_EQ(buffers_.size(), channels);
|
||||
for (size_t i = 0; i < channels; ++i) {
|
||||
const size_t read =
|
||||
WebRtc_ReadBuffer(buffers_[i], nullptr, data[i], frames);
|
||||
RTC_CHECK_EQ(read, frames);
|
||||
}
|
||||
}
|
||||
|
||||
size_t AudioRingBuffer::ReadFramesAvailable() const {
|
||||
// All buffers have the same amount available.
|
||||
return WebRtc_available_read(buffers_[0]);
|
||||
}
|
||||
|
||||
size_t AudioRingBuffer::WriteFramesAvailable() const {
|
||||
// All buffers have the same amount available.
|
||||
return WebRtc_available_write(buffers_[0]);
|
||||
}
|
||||
|
||||
void AudioRingBuffer::MoveReadPositionForward(size_t frames) {
|
||||
for (auto* buf : buffers_) {
|
||||
const size_t moved =
|
||||
static_cast<size_t>(WebRtc_MoveReadPtr(buf, static_cast<int>(frames)));
|
||||
RTC_CHECK_EQ(moved, frames);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioRingBuffer::MoveReadPositionBackward(size_t frames) {
|
||||
for (auto* buf : buffers_) {
|
||||
const size_t moved = static_cast<size_t>(
|
||||
-WebRtc_MoveReadPtr(buf, -static_cast<int>(frames)));
|
||||
RTC_CHECK_EQ(moved, frames);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
struct RingBuffer;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A ring buffer tailored for float deinterleaved audio. Any operation that
|
||||
// cannot be performed as requested will cause a crash (e.g. insufficient data
|
||||
// in the buffer to fulfill a read request.)
|
||||
class AudioRingBuffer final {
|
||||
public:
|
||||
// Specify the number of channels and maximum number of frames the buffer will
|
||||
// contain.
|
||||
AudioRingBuffer(size_t channels, size_t max_frames);
|
||||
~AudioRingBuffer();
|
||||
|
||||
// Copies `data` to the buffer and advances the write pointer. `channels` must
|
||||
// be the same as at creation time.
|
||||
void Write(const float* const* data, size_t channels, size_t frames);
|
||||
|
||||
// Copies from the buffer to `data` and advances the read pointer. `channels`
|
||||
// must be the same as at creation time.
|
||||
void Read(float* const* data, size_t channels, size_t frames);
|
||||
|
||||
size_t ReadFramesAvailable() const;
|
||||
size_t WriteFramesAvailable() const;
|
||||
|
||||
// Moves the read position. The forward version advances the read pointer
|
||||
// towards the write pointer and the backward verison withdraws the read
|
||||
// pointer away from the write pointer (i.e. flushing and stuffing the buffer
|
||||
// respectively.)
|
||||
void MoveReadPositionForward(size_t frames);
|
||||
void MoveReadPositionBackward(size_t frames);
|
||||
|
||||
private:
|
||||
// TODO(kwiberg): Use std::vector<std::unique_ptr<RingBuffer>> instead.
|
||||
std::vector<RingBuffer*> buffers_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common_audio/channel_buffer.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioRingBufferTest
|
||||
: public ::testing::TestWithParam< ::testing::tuple<int, int, int, int> > {
|
||||
};
|
||||
|
||||
void ReadAndWriteTest(const ChannelBuffer<float>& input,
|
||||
size_t num_write_chunk_frames,
|
||||
size_t num_read_chunk_frames,
|
||||
size_t buffer_frames,
|
||||
ChannelBuffer<float>* output) {
|
||||
const size_t num_channels = input.num_channels();
|
||||
const size_t total_frames = input.num_frames();
|
||||
AudioRingBuffer buf(num_channels, buffer_frames);
|
||||
std::unique_ptr<float*[]> slice(new float*[num_channels]);
|
||||
|
||||
size_t input_pos = 0;
|
||||
size_t output_pos = 0;
|
||||
while (input_pos + buf.WriteFramesAvailable() < total_frames) {
|
||||
// Write until the buffer is as full as possible.
|
||||
while (buf.WriteFramesAvailable() >= num_write_chunk_frames) {
|
||||
buf.Write(input.Slice(slice.get(), input_pos), num_channels,
|
||||
num_write_chunk_frames);
|
||||
input_pos += num_write_chunk_frames;
|
||||
}
|
||||
// Read until the buffer is as empty as possible.
|
||||
while (buf.ReadFramesAvailable() >= num_read_chunk_frames) {
|
||||
EXPECT_LT(output_pos, total_frames);
|
||||
buf.Read(output->Slice(slice.get(), output_pos), num_channels,
|
||||
num_read_chunk_frames);
|
||||
output_pos += num_read_chunk_frames;
|
||||
}
|
||||
}
|
||||
|
||||
// Write and read the last bit.
|
||||
if (input_pos < total_frames) {
|
||||
buf.Write(input.Slice(slice.get(), input_pos), num_channels,
|
||||
total_frames - input_pos);
|
||||
}
|
||||
if (buf.ReadFramesAvailable()) {
|
||||
buf.Read(output->Slice(slice.get(), output_pos), num_channels,
|
||||
buf.ReadFramesAvailable());
|
||||
}
|
||||
EXPECT_EQ(0u, buf.ReadFramesAvailable());
|
||||
}
|
||||
|
||||
TEST_P(AudioRingBufferTest, ReadDataMatchesWrittenData) {
|
||||
const size_t kFrames = 5000;
|
||||
const size_t num_channels = ::testing::get<3>(GetParam());
|
||||
|
||||
// Initialize the input data to an increasing sequence.
|
||||
ChannelBuffer<float> input(kFrames, static_cast<int>(num_channels));
|
||||
for (size_t i = 0; i < num_channels; ++i)
|
||||
for (size_t j = 0; j < kFrames; ++j)
|
||||
input.channels()[i][j] = (i + 1) * (j + 1);
|
||||
|
||||
ChannelBuffer<float> output(kFrames, static_cast<int>(num_channels));
|
||||
ReadAndWriteTest(input, ::testing::get<0>(GetParam()),
|
||||
::testing::get<1>(GetParam()), ::testing::get<2>(GetParam()),
|
||||
&output);
|
||||
|
||||
// Verify the read data matches the input.
|
||||
for (size_t i = 0; i < num_channels; ++i)
|
||||
for (size_t j = 0; j < kFrames; ++j)
|
||||
EXPECT_EQ(input.channels()[i][j], output.channels()[i][j]);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
AudioRingBufferTest,
|
||||
AudioRingBufferTest,
|
||||
::testing::Combine(::testing::Values(10, 20, 42), // num_write_chunk_frames
|
||||
::testing::Values(1, 10, 17), // num_read_chunk_frames
|
||||
::testing::Values(100, 256), // buffer_frames
|
||||
::testing::Values(1, 4))); // num_channels
|
||||
|
||||
TEST_F(AudioRingBufferTest, MoveReadPosition) {
|
||||
const size_t kNumChannels = 1;
|
||||
const float kInputArray[] = {1, 2, 3, 4};
|
||||
const size_t kNumFrames = sizeof(kInputArray) / sizeof(*kInputArray);
|
||||
ChannelBuffer<float> input(kNumFrames, kNumChannels);
|
||||
input.SetDataForTesting(kInputArray, kNumFrames);
|
||||
AudioRingBuffer buf(kNumChannels, kNumFrames);
|
||||
buf.Write(input.channels(), kNumChannels, kNumFrames);
|
||||
|
||||
buf.MoveReadPositionForward(3);
|
||||
ChannelBuffer<float> output(1, kNumChannels);
|
||||
buf.Read(output.channels(), kNumChannels, 1);
|
||||
EXPECT_EQ(4, output.channels()[0][0]);
|
||||
buf.MoveReadPositionBackward(3);
|
||||
buf.Read(output.channels(), kNumChannels, 1);
|
||||
EXPECT_EQ(2, output.channels()[0][0]);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/test/blocker.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// Adds `a` and `b` frame by frame into `result` (basically matrix addition).
|
||||
void AddFrames(const float* const* a,
|
||||
size_t a_start_index,
|
||||
const float* const* b,
|
||||
int b_start_index,
|
||||
size_t num_frames,
|
||||
size_t num_channels,
|
||||
float* const* result,
|
||||
size_t result_start_index) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
for (size_t j = 0; j < num_frames; ++j) {
|
||||
result[i][j + result_start_index] =
|
||||
a[i][j + a_start_index] + b[i][j + b_start_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copies `src` into `dst` channel by channel.
|
||||
void CopyFrames(const float* const* src,
|
||||
size_t src_start_index,
|
||||
size_t num_frames,
|
||||
size_t num_channels,
|
||||
float* const* dst,
|
||||
size_t dst_start_index) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
memcpy(&dst[i][dst_start_index], &src[i][src_start_index],
|
||||
num_frames * sizeof(dst[i][dst_start_index]));
|
||||
}
|
||||
}
|
||||
|
||||
// Moves `src` into `dst` channel by channel.
|
||||
void MoveFrames(const float* const* src,
|
||||
size_t src_start_index,
|
||||
size_t num_frames,
|
||||
size_t num_channels,
|
||||
float* const* dst,
|
||||
size_t dst_start_index) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
memmove(&dst[i][dst_start_index], &src[i][src_start_index],
|
||||
num_frames * sizeof(dst[i][dst_start_index]));
|
||||
}
|
||||
}
|
||||
|
||||
void ZeroOut(float* const* buffer,
|
||||
size_t starting_idx,
|
||||
size_t num_frames,
|
||||
size_t num_channels) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
memset(&buffer[i][starting_idx], 0,
|
||||
num_frames * sizeof(buffer[i][starting_idx]));
|
||||
}
|
||||
}
|
||||
|
||||
// Pointwise multiplies each channel of `frames` with `window`. Results are
|
||||
// stored in `frames`.
|
||||
void ApplyWindow(const float* window,
|
||||
size_t num_frames,
|
||||
size_t num_channels,
|
||||
float* const* frames) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
for (size_t j = 0; j < num_frames; ++j) {
|
||||
frames[i][j] = frames[i][j] * window[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t gcd(size_t a, size_t b) {
|
||||
size_t tmp;
|
||||
while (b) {
|
||||
tmp = a;
|
||||
a = b;
|
||||
b = tmp % b;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Blocker::Blocker(size_t chunk_size,
|
||||
size_t block_size,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
const float* window,
|
||||
size_t shift_amount,
|
||||
BlockerCallback* callback)
|
||||
: chunk_size_(chunk_size),
|
||||
block_size_(block_size),
|
||||
num_input_channels_(num_input_channels),
|
||||
num_output_channels_(num_output_channels),
|
||||
initial_delay_(block_size_ - gcd(chunk_size, shift_amount)),
|
||||
frame_offset_(0),
|
||||
input_buffer_(num_input_channels_, chunk_size_ + initial_delay_),
|
||||
output_buffer_(chunk_size_ + initial_delay_, num_output_channels_),
|
||||
input_block_(block_size_, num_input_channels_),
|
||||
output_block_(block_size_, num_output_channels_),
|
||||
window_(new float[block_size_]),
|
||||
shift_amount_(shift_amount),
|
||||
callback_(callback) {
|
||||
RTC_CHECK_LE(num_output_channels_, num_input_channels_);
|
||||
RTC_CHECK_LE(shift_amount_, block_size_);
|
||||
|
||||
memcpy(window_.get(), window, block_size_ * sizeof(*window_.get()));
|
||||
input_buffer_.MoveReadPositionBackward(initial_delay_);
|
||||
}
|
||||
|
||||
Blocker::~Blocker() = default;
|
||||
|
||||
// When block_size < chunk_size the input and output buffers look like this:
|
||||
//
|
||||
// delay* chunk_size chunk_size + delay*
|
||||
// buffer: <-------------|---------------------|---------------|>
|
||||
// _a_ _b_ _c_
|
||||
//
|
||||
// On each call to ProcessChunk():
|
||||
// 1. New input gets read into sections _b_ and _c_ of the input buffer.
|
||||
// 2. We block starting from frame_offset.
|
||||
// 3. We block until we reach a block `bl` that doesn't contain any frames
|
||||
// from sections _a_ or _b_ of the input buffer.
|
||||
// 4. We window the current block, fire the callback for processing, window
|
||||
// again, and overlap/add to the output buffer.
|
||||
// 5. We copy sections _a_ and _b_ of the output buffer into output.
|
||||
// 6. For both the input and the output buffers, we copy section _c_ into
|
||||
// section _a_.
|
||||
// 7. We set the new frame_offset to be the difference between the first frame
|
||||
// of `bl` and the border between sections _b_ and _c_.
|
||||
//
|
||||
// When block_size > chunk_size the input and output buffers look like this:
|
||||
//
|
||||
// chunk_size delay* chunk_size + delay*
|
||||
// buffer: <-------------|---------------------|---------------|>
|
||||
// _a_ _b_ _c_
|
||||
//
|
||||
// On each call to ProcessChunk():
|
||||
// The procedure is the same as above, except for:
|
||||
// 1. New input gets read into section _c_ of the input buffer.
|
||||
// 3. We block until we reach a block `bl` that doesn't contain any frames
|
||||
// from section _a_ of the input buffer.
|
||||
// 5. We copy section _a_ of the output buffer into output.
|
||||
// 6. For both the input and the output buffers, we copy sections _b_ and _c_
|
||||
// into section _a_ and _b_.
|
||||
// 7. We set the new frame_offset to be the difference between the first frame
|
||||
// of `bl` and the border between sections _a_ and _b_.
|
||||
//
|
||||
// * delay here refers to inintial_delay_
|
||||
//
|
||||
// TODO(claguna): Look at using ring buffers to eliminate some copies.
|
||||
void Blocker::ProcessChunk(const float* const* input,
|
||||
size_t chunk_size,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
float* const* output) {
|
||||
RTC_CHECK_EQ(chunk_size, chunk_size_);
|
||||
RTC_CHECK_EQ(num_input_channels, num_input_channels_);
|
||||
RTC_CHECK_EQ(num_output_channels, num_output_channels_);
|
||||
|
||||
input_buffer_.Write(input, num_input_channels, chunk_size_);
|
||||
size_t first_frame_in_block = frame_offset_;
|
||||
|
||||
// Loop through blocks.
|
||||
while (first_frame_in_block < chunk_size_) {
|
||||
input_buffer_.Read(input_block_.channels(), num_input_channels,
|
||||
block_size_);
|
||||
input_buffer_.MoveReadPositionBackward(block_size_ - shift_amount_);
|
||||
|
||||
ApplyWindow(window_.get(), block_size_, num_input_channels_,
|
||||
input_block_.channels());
|
||||
callback_->ProcessBlock(input_block_.channels(), block_size_,
|
||||
num_input_channels_, num_output_channels_,
|
||||
output_block_.channels());
|
||||
ApplyWindow(window_.get(), block_size_, num_output_channels_,
|
||||
output_block_.channels());
|
||||
|
||||
AddFrames(output_buffer_.channels(), first_frame_in_block,
|
||||
output_block_.channels(), 0, block_size_, num_output_channels_,
|
||||
output_buffer_.channels(), first_frame_in_block);
|
||||
|
||||
first_frame_in_block += shift_amount_;
|
||||
}
|
||||
|
||||
// Copy output buffer to output
|
||||
CopyFrames(output_buffer_.channels(), 0, chunk_size_, num_output_channels_,
|
||||
output, 0);
|
||||
|
||||
// Copy output buffer [chunk_size_, chunk_size_ + initial_delay]
|
||||
// to output buffer [0, initial_delay], zero the rest.
|
||||
MoveFrames(output_buffer_.channels(), chunk_size, initial_delay_,
|
||||
num_output_channels_, output_buffer_.channels(), 0);
|
||||
ZeroOut(output_buffer_.channels(), initial_delay_, chunk_size_,
|
||||
num_output_channels_);
|
||||
|
||||
// Calculate new starting frames.
|
||||
frame_offset_ = first_frame_in_block - chunk_size_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common_audio/channel_buffer.h"
|
||||
#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The callback function to process audio in the time domain. Input has already
|
||||
// been windowed, and output will be windowed. The number of input channels
|
||||
// must be >= the number of output channels.
|
||||
class BlockerCallback {
|
||||
public:
|
||||
virtual ~BlockerCallback() {}
|
||||
|
||||
virtual void ProcessBlock(const float* const* input,
|
||||
size_t num_frames,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
float* const* output) = 0;
|
||||
};
|
||||
|
||||
// The main purpose of Blocker is to abstract away the fact that often we
|
||||
// receive a different number of audio frames than our transform takes. For
|
||||
// example, most FFTs work best when the fft-size is a power of 2, but suppose
|
||||
// we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames
|
||||
// of audio, which is not a power of 2. Blocker allows us to specify the
|
||||
// transform and all other necessary processing via the Process() callback
|
||||
// function without any constraints on the transform-size
|
||||
// (read: `block_size_`) or received-audio-size (read: `chunk_size_`).
|
||||
// We handle this for the multichannel audio case, allowing for different
|
||||
// numbers of input and output channels (for example, beamforming takes 2 or
|
||||
// more input channels and returns 1 output channel). Audio signals are
|
||||
// represented as deinterleaved floats in the range [-1, 1].
|
||||
//
|
||||
// Blocker is responsible for:
|
||||
// - blocking audio while handling potential discontinuities on the edges
|
||||
// of chunks
|
||||
// - windowing blocks before sending them to Process()
|
||||
// - windowing processed blocks, and overlap-adding them together before
|
||||
// sending back a processed chunk
|
||||
//
|
||||
// To use blocker:
|
||||
// 1. Impelment a BlockerCallback object `bc`.
|
||||
// 2. Instantiate a Blocker object `b`, passing in `bc`.
|
||||
// 3. As you receive audio, call b.ProcessChunk() to get processed audio.
|
||||
//
|
||||
// A small amount of delay is added to the first received chunk to deal with
|
||||
// the difference in chunk/block sizes. This delay is <= chunk_size.
|
||||
//
|
||||
// Ownership of window is retained by the caller. That is, Blocker makes a
|
||||
// copy of window and does not attempt to delete it.
|
||||
class Blocker {
|
||||
public:
|
||||
Blocker(size_t chunk_size,
|
||||
size_t block_size,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
const float* window,
|
||||
size_t shift_amount,
|
||||
BlockerCallback* callback);
|
||||
~Blocker();
|
||||
|
||||
void ProcessChunk(const float* const* input,
|
||||
size_t chunk_size,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
float* const* output);
|
||||
|
||||
size_t initial_delay() const { return initial_delay_; }
|
||||
|
||||
private:
|
||||
const size_t chunk_size_;
|
||||
const size_t block_size_;
|
||||
const size_t num_input_channels_;
|
||||
const size_t num_output_channels_;
|
||||
|
||||
// The number of frames of delay to add at the beginning of the first chunk.
|
||||
const size_t initial_delay_;
|
||||
|
||||
// The frame index into the input buffer where the first block should be read
|
||||
// from. This is necessary because shift_amount_ is not necessarily a
|
||||
// multiple of chunk_size_, so blocks won't line up at the start of the
|
||||
// buffer.
|
||||
size_t frame_offset_;
|
||||
|
||||
// Since blocks nearly always overlap, there are certain blocks that require
|
||||
// frames from the end of one chunk and the beginning of the next chunk. The
|
||||
// input and output buffers are responsible for saving those frames between
|
||||
// calls to ProcessChunk().
|
||||
//
|
||||
// Both contain |initial delay| + `chunk_size` frames. The input is a fairly
|
||||
// standard FIFO, but due to the overlap-add it's harder to use an
|
||||
// AudioRingBuffer for the output.
|
||||
AudioRingBuffer input_buffer_;
|
||||
ChannelBuffer<float> output_buffer_;
|
||||
|
||||
// Space for the input block (can't wrap because of windowing).
|
||||
ChannelBuffer<float> input_block_;
|
||||
|
||||
// Space for the output block (can't wrap because of overlap/add).
|
||||
ChannelBuffer<float> output_block_;
|
||||
|
||||
std::unique_ptr<float[]> window_;
|
||||
|
||||
// The amount of frames between the start of contiguous blocks. For example,
|
||||
// `shift_amount_` = `block_size_` / 2 for a Hann window.
|
||||
size_t shift_amount_;
|
||||
|
||||
BlockerCallback* callback_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_
|
||||
|
|
@ -0,0 +1,293 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/test/blocker.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// Callback Function to add 3 to every sample in the signal.
|
||||
class PlusThreeBlockerCallback : public webrtc::BlockerCallback {
|
||||
public:
|
||||
void ProcessBlock(const float* const* input,
|
||||
size_t num_frames,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
float* const* output) override {
|
||||
for (size_t i = 0; i < num_output_channels; ++i) {
|
||||
for (size_t j = 0; j < num_frames; ++j) {
|
||||
output[i][j] = input[i][j] + 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// No-op Callback Function.
|
||||
class CopyBlockerCallback : public webrtc::BlockerCallback {
|
||||
public:
|
||||
void ProcessBlock(const float* const* input,
|
||||
size_t num_frames,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
float* const* output) override {
|
||||
for (size_t i = 0; i < num_output_channels; ++i) {
|
||||
for (size_t j = 0; j < num_frames; ++j) {
|
||||
output[i][j] = input[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Tests blocking with a window that multiplies the signal by 2, a callback
|
||||
// that adds 3 to each sample in the signal, and different combinations of chunk
|
||||
// size, block size, and shift amount.
|
||||
class BlockerTest : public ::testing::Test {
|
||||
protected:
|
||||
void RunTest(Blocker* blocker,
|
||||
size_t chunk_size,
|
||||
size_t num_frames,
|
||||
const float* const* input,
|
||||
float* const* input_chunk,
|
||||
float* const* output,
|
||||
float* const* output_chunk,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels) {
|
||||
size_t start = 0;
|
||||
size_t end = chunk_size - 1;
|
||||
while (end < num_frames) {
|
||||
CopyTo(input_chunk, 0, start, num_input_channels, chunk_size, input);
|
||||
blocker->ProcessChunk(input_chunk, chunk_size, num_input_channels,
|
||||
num_output_channels, output_chunk);
|
||||
CopyTo(output, start, 0, num_output_channels, chunk_size, output_chunk);
|
||||
|
||||
start += chunk_size;
|
||||
end += chunk_size;
|
||||
}
|
||||
}
|
||||
|
||||
void ValidateSignalEquality(const float* const* expected,
|
||||
const float* const* actual,
|
||||
size_t num_channels,
|
||||
size_t num_frames) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
for (size_t j = 0; j < num_frames; ++j) {
|
||||
EXPECT_FLOAT_EQ(expected[i][j], actual[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ValidateInitialDelay(const float* const* output,
|
||||
size_t num_channels,
|
||||
size_t num_frames,
|
||||
size_t initial_delay) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
for (size_t j = 0; j < num_frames; ++j) {
|
||||
if (j < initial_delay) {
|
||||
EXPECT_FLOAT_EQ(output[i][j], 0.f);
|
||||
} else {
|
||||
EXPECT_GT(output[i][j], 0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void CopyTo(float* const* dst,
|
||||
size_t start_index_dst,
|
||||
size_t start_index_src,
|
||||
size_t num_channels,
|
||||
size_t num_frames,
|
||||
const float* const* src) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
memcpy(&dst[i][start_index_dst], &src[i][start_index_src],
|
||||
num_frames * sizeof(float));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(BlockerTest, TestBlockerMutuallyPrimeChunkandBlockSize) {
|
||||
const size_t kNumInputChannels = 3;
|
||||
const size_t kNumOutputChannels = 2;
|
||||
const size_t kNumFrames = 10;
|
||||
const size_t kBlockSize = 4;
|
||||
const size_t kChunkSize = 5;
|
||||
const size_t kShiftAmount = 2;
|
||||
|
||||
const float kInput[kNumInputChannels][kNumFrames] = {
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
|
||||
{3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
|
||||
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
|
||||
input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput));
|
||||
|
||||
const float kExpectedOutput[kNumInputChannels][kNumFrames] = {
|
||||
{6, 6, 12, 20, 20, 20, 20, 20, 20, 20},
|
||||
{6, 6, 12, 28, 28, 28, 28, 28, 28, 28}};
|
||||
ChannelBuffer<float> expected_output_cb(kNumFrames, kNumInputChannels);
|
||||
expected_output_cb.SetDataForTesting(
|
||||
kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput));
|
||||
|
||||
const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f};
|
||||
|
||||
ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels);
|
||||
ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels);
|
||||
ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels);
|
||||
|
||||
PlusThreeBlockerCallback callback;
|
||||
Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels,
|
||||
kWindow, kShiftAmount, &callback);
|
||||
|
||||
RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(),
|
||||
input_chunk_cb.channels(), actual_output_cb.channels(),
|
||||
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
|
||||
|
||||
ValidateSignalEquality(expected_output_cb.channels(),
|
||||
actual_output_cb.channels(), kNumOutputChannels,
|
||||
kNumFrames);
|
||||
}
|
||||
|
||||
TEST_F(BlockerTest, TestBlockerMutuallyPrimeShiftAndBlockSize) {
|
||||
const size_t kNumInputChannels = 3;
|
||||
const size_t kNumOutputChannels = 2;
|
||||
const size_t kNumFrames = 12;
|
||||
const size_t kBlockSize = 4;
|
||||
const size_t kChunkSize = 6;
|
||||
const size_t kShiftAmount = 3;
|
||||
|
||||
const float kInput[kNumInputChannels][kNumFrames] = {
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
|
||||
{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
|
||||
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
|
||||
input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput));
|
||||
|
||||
const float kExpectedOutput[kNumOutputChannels][kNumFrames] = {
|
||||
{6, 10, 10, 20, 10, 10, 20, 10, 10, 20, 10, 10},
|
||||
{6, 14, 14, 28, 14, 14, 28, 14, 14, 28, 14, 14}};
|
||||
ChannelBuffer<float> expected_output_cb(kNumFrames, kNumOutputChannels);
|
||||
expected_output_cb.SetDataForTesting(
|
||||
kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput));
|
||||
|
||||
const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f};
|
||||
|
||||
ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels);
|
||||
ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels);
|
||||
ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels);
|
||||
|
||||
PlusThreeBlockerCallback callback;
|
||||
Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels,
|
||||
kWindow, kShiftAmount, &callback);
|
||||
|
||||
RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(),
|
||||
input_chunk_cb.channels(), actual_output_cb.channels(),
|
||||
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
|
||||
|
||||
ValidateSignalEquality(expected_output_cb.channels(),
|
||||
actual_output_cb.channels(), kNumOutputChannels,
|
||||
kNumFrames);
|
||||
}
|
||||
|
||||
TEST_F(BlockerTest, TestBlockerNoOverlap) {
|
||||
const size_t kNumInputChannels = 3;
|
||||
const size_t kNumOutputChannels = 2;
|
||||
const size_t kNumFrames = 12;
|
||||
const size_t kBlockSize = 4;
|
||||
const size_t kChunkSize = 4;
|
||||
const size_t kShiftAmount = 4;
|
||||
|
||||
const float kInput[kNumInputChannels][kNumFrames] = {
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
|
||||
{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
|
||||
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
|
||||
input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput));
|
||||
|
||||
const float kExpectedOutput[kNumOutputChannels][kNumFrames] = {
|
||||
{10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
|
||||
{14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}};
|
||||
ChannelBuffer<float> expected_output_cb(kNumFrames, kNumOutputChannels);
|
||||
expected_output_cb.SetDataForTesting(
|
||||
kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput));
|
||||
|
||||
const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f};
|
||||
|
||||
ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels);
|
||||
ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels);
|
||||
ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels);
|
||||
|
||||
PlusThreeBlockerCallback callback;
|
||||
Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels,
|
||||
kWindow, kShiftAmount, &callback);
|
||||
|
||||
RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(),
|
||||
input_chunk_cb.channels(), actual_output_cb.channels(),
|
||||
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
|
||||
|
||||
ValidateSignalEquality(expected_output_cb.channels(),
|
||||
actual_output_cb.channels(), kNumOutputChannels,
|
||||
kNumFrames);
|
||||
}
|
||||
|
||||
TEST_F(BlockerTest, InitialDelaysAreMinimum) {
|
||||
const size_t kNumInputChannels = 3;
|
||||
const size_t kNumOutputChannels = 2;
|
||||
const size_t kNumFrames = 1280;
|
||||
const size_t kChunkSize[] = {80, 80, 80, 80, 80, 80,
|
||||
160, 160, 160, 160, 160, 160};
|
||||
const size_t kBlockSize[] = {64, 64, 64, 128, 128, 128,
|
||||
128, 128, 128, 256, 256, 256};
|
||||
const size_t kShiftAmount[] = {16, 32, 64, 32, 64, 128,
|
||||
32, 64, 128, 64, 128, 256};
|
||||
const size_t kInitialDelay[] = {48, 48, 48, 112, 112, 112,
|
||||
96, 96, 96, 224, 224, 224};
|
||||
|
||||
float input[kNumInputChannels][kNumFrames];
|
||||
for (size_t i = 0; i < kNumInputChannels; ++i) {
|
||||
for (size_t j = 0; j < kNumFrames; ++j) {
|
||||
input[i][j] = i + 1;
|
||||
}
|
||||
}
|
||||
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
|
||||
input_cb.SetDataForTesting(input[0], sizeof(input) / sizeof(**input));
|
||||
|
||||
ChannelBuffer<float> output_cb(kNumFrames, kNumOutputChannels);
|
||||
|
||||
CopyBlockerCallback callback;
|
||||
|
||||
for (size_t i = 0; i < arraysize(kChunkSize); ++i) {
|
||||
std::unique_ptr<float[]> window(new float[kBlockSize[i]]);
|
||||
for (size_t j = 0; j < kBlockSize[i]; ++j) {
|
||||
window[j] = 1.f;
|
||||
}
|
||||
|
||||
ChannelBuffer<float> input_chunk_cb(kChunkSize[i], kNumInputChannels);
|
||||
ChannelBuffer<float> output_chunk_cb(kChunkSize[i], kNumOutputChannels);
|
||||
|
||||
Blocker blocker(kChunkSize[i], kBlockSize[i], kNumInputChannels,
|
||||
kNumOutputChannels, window.get(), kShiftAmount[i],
|
||||
&callback);
|
||||
|
||||
RunTest(&blocker, kChunkSize[i], kNumFrames, input_cb.channels(),
|
||||
input_chunk_cb.channels(), output_cb.channels(),
|
||||
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
|
||||
|
||||
ValidateInitialDelay(output_cb.channels(), kNumOutputChannels, kNumFrames,
|
||||
kInitialDelay[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/test/lapped_transform.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "common_audio/real_fourier.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
void LappedTransform::BlockThunk::ProcessBlock(const float* const* input,
|
||||
size_t num_frames,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
float* const* output) {
|
||||
RTC_CHECK_EQ(num_input_channels, parent_->num_in_channels_);
|
||||
RTC_CHECK_EQ(num_output_channels, parent_->num_out_channels_);
|
||||
RTC_CHECK_EQ(parent_->block_length_, num_frames);
|
||||
|
||||
for (size_t i = 0; i < num_input_channels; ++i) {
|
||||
memcpy(parent_->real_buf_.Row(i), input[i], num_frames * sizeof(*input[0]));
|
||||
parent_->fft_->Forward(parent_->real_buf_.Row(i),
|
||||
parent_->cplx_pre_.Row(i));
|
||||
}
|
||||
|
||||
size_t block_length =
|
||||
RealFourier::ComplexLength(RealFourier::FftOrder(num_frames));
|
||||
RTC_CHECK_EQ(parent_->cplx_length_, block_length);
|
||||
parent_->block_processor_->ProcessAudioBlock(
|
||||
parent_->cplx_pre_.Array(), num_input_channels, parent_->cplx_length_,
|
||||
num_output_channels, parent_->cplx_post_.Array());
|
||||
|
||||
for (size_t i = 0; i < num_output_channels; ++i) {
|
||||
parent_->fft_->Inverse(parent_->cplx_post_.Row(i),
|
||||
parent_->real_buf_.Row(i));
|
||||
memcpy(output[i], parent_->real_buf_.Row(i),
|
||||
num_frames * sizeof(*input[0]));
|
||||
}
|
||||
}
|
||||
|
||||
LappedTransform::LappedTransform(size_t num_in_channels,
|
||||
size_t num_out_channels,
|
||||
size_t chunk_length,
|
||||
const float* window,
|
||||
size_t block_length,
|
||||
size_t shift_amount,
|
||||
Callback* callback)
|
||||
: blocker_callback_(this),
|
||||
num_in_channels_(num_in_channels),
|
||||
num_out_channels_(num_out_channels),
|
||||
block_length_(block_length),
|
||||
chunk_length_(chunk_length),
|
||||
block_processor_(callback),
|
||||
blocker_(chunk_length_,
|
||||
block_length_,
|
||||
num_in_channels_,
|
||||
num_out_channels_,
|
||||
window,
|
||||
shift_amount,
|
||||
&blocker_callback_),
|
||||
fft_(RealFourier::Create(RealFourier::FftOrder(block_length_))),
|
||||
cplx_length_(RealFourier::ComplexLength(fft_->order())),
|
||||
real_buf_(num_in_channels,
|
||||
block_length_,
|
||||
RealFourier::kFftBufferAlignment),
|
||||
cplx_pre_(num_in_channels,
|
||||
cplx_length_,
|
||||
RealFourier::kFftBufferAlignment),
|
||||
cplx_post_(num_out_channels,
|
||||
cplx_length_,
|
||||
RealFourier::kFftBufferAlignment) {
|
||||
RTC_CHECK(num_in_channels_ > 0);
|
||||
RTC_CHECK_GT(block_length_, 0);
|
||||
RTC_CHECK_GT(chunk_length_, 0);
|
||||
RTC_CHECK(block_processor_);
|
||||
|
||||
// block_length_ power of 2?
|
||||
RTC_CHECK_EQ(0, block_length_ & (block_length_ - 1));
|
||||
}
|
||||
|
||||
LappedTransform::~LappedTransform() = default;
|
||||
|
||||
void LappedTransform::ProcessChunk(const float* const* in_chunk,
|
||||
float* const* out_chunk) {
|
||||
blocker_.ProcessChunk(in_chunk, chunk_length_, num_in_channels_,
|
||||
num_out_channels_, out_chunk);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_
|
||||
|
||||
#include <complex>
|
||||
#include <memory>
|
||||
|
||||
#include "common_audio/real_fourier.h"
|
||||
#include "modules/audio_coding/codecs/opus/test/blocker.h"
|
||||
#include "rtc_base/memory/aligned_malloc.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Wrapper class for aligned arrays. Every row (and the first dimension) are
|
||||
// aligned to the given byte alignment.
|
||||
template <typename T>
|
||||
class AlignedArray {
|
||||
public:
|
||||
AlignedArray(size_t rows, size_t cols, size_t alignment)
|
||||
: rows_(rows), cols_(cols) {
|
||||
RTC_CHECK_GT(alignment, 0);
|
||||
head_row_ =
|
||||
static_cast<T**>(AlignedMalloc(rows_ * sizeof(*head_row_), alignment));
|
||||
for (size_t i = 0; i < rows_; ++i) {
|
||||
head_row_[i] = static_cast<T*>(
|
||||
AlignedMalloc(cols_ * sizeof(**head_row_), alignment));
|
||||
}
|
||||
}
|
||||
|
||||
~AlignedArray() {
|
||||
for (size_t i = 0; i < rows_; ++i) {
|
||||
AlignedFree(head_row_[i]);
|
||||
}
|
||||
AlignedFree(head_row_);
|
||||
}
|
||||
|
||||
T* const* Array() { return head_row_; }
|
||||
|
||||
const T* const* Array() const { return head_row_; }
|
||||
|
||||
T* Row(size_t row) {
|
||||
RTC_CHECK_LE(row, rows_);
|
||||
return head_row_[row];
|
||||
}
|
||||
|
||||
const T* Row(size_t row) const {
|
||||
RTC_CHECK_LE(row, rows_);
|
||||
return head_row_[row];
|
||||
}
|
||||
|
||||
private:
|
||||
size_t rows_;
|
||||
size_t cols_;
|
||||
T** head_row_;
|
||||
};
|
||||
|
||||
// Helper class for audio processing modules which operate on frequency domain
|
||||
// input derived from the windowed time domain audio stream.
|
||||
//
|
||||
// The input audio chunk is sliced into possibly overlapping blocks, multiplied
|
||||
// by a window and transformed with an FFT implementation. The transformed data
|
||||
// is supplied to the given callback for processing. The processed output is
|
||||
// then inverse transformed into the time domain and spliced back into a chunk
|
||||
// which constitutes the final output of this processing module.
|
||||
class LappedTransform {
|
||||
public:
|
||||
class Callback {
|
||||
public:
|
||||
virtual ~Callback() {}
|
||||
|
||||
virtual void ProcessAudioBlock(const std::complex<float>* const* in_block,
|
||||
size_t num_in_channels,
|
||||
size_t frames,
|
||||
size_t num_out_channels,
|
||||
std::complex<float>* const* out_block) = 0;
|
||||
};
|
||||
|
||||
// Construct a transform instance. `chunk_length` is the number of samples in
|
||||
// each channel. `window` defines the window, owned by the caller (a copy is
|
||||
// made internally); `window` should have length equal to `block_length`.
|
||||
// `block_length` defines the length of a block, in samples.
|
||||
// `shift_amount` is in samples. `callback` is the caller-owned audio
|
||||
// processing function called for each block of the input chunk.
|
||||
LappedTransform(size_t num_in_channels,
|
||||
size_t num_out_channels,
|
||||
size_t chunk_length,
|
||||
const float* window,
|
||||
size_t block_length,
|
||||
size_t shift_amount,
|
||||
Callback* callback);
|
||||
~LappedTransform();
|
||||
|
||||
// Main audio processing helper method. Internally slices `in_chunk` into
|
||||
// blocks, transforms them to frequency domain, calls the callback for each
|
||||
// block and returns a de-blocked time domain chunk of audio through
|
||||
// `out_chunk`. Both buffers are caller-owned.
|
||||
void ProcessChunk(const float* const* in_chunk, float* const* out_chunk);
|
||||
|
||||
// Get the chunk length.
|
||||
//
|
||||
// The chunk length is the number of samples per channel that must be passed
|
||||
// to ProcessChunk via the parameter in_chunk.
|
||||
//
|
||||
// Returns the same chunk_length passed to the LappedTransform constructor.
|
||||
size_t chunk_length() const { return chunk_length_; }
|
||||
|
||||
// Get the number of input channels.
|
||||
//
|
||||
// This is the number of arrays that must be passed to ProcessChunk via
|
||||
// in_chunk.
|
||||
//
|
||||
// Returns the same num_in_channels passed to the LappedTransform constructor.
|
||||
size_t num_in_channels() const { return num_in_channels_; }
|
||||
|
||||
// Get the number of output channels.
|
||||
//
|
||||
// This is the number of arrays that must be passed to ProcessChunk via
|
||||
// out_chunk.
|
||||
//
|
||||
// Returns the same num_out_channels passed to the LappedTransform
|
||||
// constructor.
|
||||
size_t num_out_channels() const { return num_out_channels_; }
|
||||
|
||||
// Returns the initial delay.
|
||||
//
|
||||
// This is the delay introduced by the `blocker_` to be able to get and return
|
||||
// chunks of `chunk_length`, but process blocks of `block_length`.
|
||||
size_t initial_delay() const { return blocker_.initial_delay(); }
|
||||
|
||||
private:
|
||||
// Internal middleware callback, given to the blocker. Transforms each block
|
||||
// and hands it over to the processing method given at construction time.
|
||||
class BlockThunk : public BlockerCallback {
|
||||
public:
|
||||
explicit BlockThunk(LappedTransform* parent) : parent_(parent) {}
|
||||
|
||||
void ProcessBlock(const float* const* input,
|
||||
size_t num_frames,
|
||||
size_t num_input_channels,
|
||||
size_t num_output_channels,
|
||||
float* const* output) override;
|
||||
|
||||
private:
|
||||
LappedTransform* const parent_;
|
||||
} blocker_callback_;
|
||||
|
||||
const size_t num_in_channels_;
|
||||
const size_t num_out_channels_;
|
||||
|
||||
const size_t block_length_;
|
||||
const size_t chunk_length_;
|
||||
|
||||
Callback* const block_processor_;
|
||||
Blocker blocker_;
|
||||
|
||||
// TODO(alessiob): Replace RealFourier with a different FFT library.
|
||||
std::unique_ptr<RealFourier> fft_;
|
||||
const size_t cplx_length_;
|
||||
AlignedArray<float> real_buf_;
|
||||
AlignedArray<std::complex<float> > cplx_pre_;
|
||||
AlignedArray<std::complex<float> > cplx_post_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_
|
||||
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/test/lapped_transform.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "test/gtest.h"
|
||||
|
||||
using std::complex;
|
||||
|
||||
namespace {
|
||||
|
||||
class NoopCallback : public webrtc::LappedTransform::Callback {
|
||||
public:
|
||||
NoopCallback() : block_num_(0) {}
|
||||
|
||||
void ProcessAudioBlock(const complex<float>* const* in_block,
|
||||
size_t in_channels,
|
||||
size_t frames,
|
||||
size_t out_channels,
|
||||
complex<float>* const* out_block) override {
|
||||
RTC_CHECK_EQ(in_channels, out_channels);
|
||||
for (size_t i = 0; i < out_channels; ++i) {
|
||||
memcpy(out_block[i], in_block[i], sizeof(**in_block) * frames);
|
||||
}
|
||||
++block_num_;
|
||||
}
|
||||
|
||||
size_t block_num() { return block_num_; }
|
||||
|
||||
private:
|
||||
size_t block_num_;
|
||||
};
|
||||
|
||||
class FftCheckerCallback : public webrtc::LappedTransform::Callback {
|
||||
public:
|
||||
FftCheckerCallback() : block_num_(0) {}
|
||||
|
||||
void ProcessAudioBlock(const complex<float>* const* in_block,
|
||||
size_t in_channels,
|
||||
size_t frames,
|
||||
size_t out_channels,
|
||||
complex<float>* const* out_block) override {
|
||||
RTC_CHECK_EQ(in_channels, out_channels);
|
||||
|
||||
size_t full_length = (frames - 1) * 2;
|
||||
++block_num_;
|
||||
|
||||
if (block_num_ > 0) {
|
||||
ASSERT_NEAR(in_block[0][0].real(), static_cast<float>(full_length),
|
||||
1e-5f);
|
||||
ASSERT_NEAR(in_block[0][0].imag(), 0.0f, 1e-5f);
|
||||
for (size_t i = 1; i < frames; ++i) {
|
||||
ASSERT_NEAR(in_block[0][i].real(), 0.0f, 1e-5f);
|
||||
ASSERT_NEAR(in_block[0][i].imag(), 0.0f, 1e-5f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t block_num() { return block_num_; }
|
||||
|
||||
private:
|
||||
size_t block_num_;
|
||||
};
|
||||
|
||||
void SetFloatArray(float value, int rows, int cols, float* const* array) {
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
array[i][j] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(LappedTransformTest, Windowless) {
|
||||
const size_t kChannels = 3;
|
||||
const size_t kChunkLength = 512;
|
||||
const size_t kBlockLength = 64;
|
||||
const size_t kShiftAmount = 64;
|
||||
NoopCallback noop;
|
||||
|
||||
// Rectangular window.
|
||||
float window[kBlockLength];
|
||||
std::fill(window, &window[kBlockLength], 1.0f);
|
||||
|
||||
LappedTransform trans(kChannels, kChannels, kChunkLength, window,
|
||||
kBlockLength, kShiftAmount, &noop);
|
||||
float in_buffer[kChannels][kChunkLength];
|
||||
float* in_chunk[kChannels];
|
||||
float out_buffer[kChannels][kChunkLength];
|
||||
float* out_chunk[kChannels];
|
||||
|
||||
in_chunk[0] = in_buffer[0];
|
||||
in_chunk[1] = in_buffer[1];
|
||||
in_chunk[2] = in_buffer[2];
|
||||
out_chunk[0] = out_buffer[0];
|
||||
out_chunk[1] = out_buffer[1];
|
||||
out_chunk[2] = out_buffer[2];
|
||||
SetFloatArray(2.0f, kChannels, kChunkLength, in_chunk);
|
||||
SetFloatArray(-1.0f, kChannels, kChunkLength, out_chunk);
|
||||
|
||||
trans.ProcessChunk(in_chunk, out_chunk);
|
||||
|
||||
for (size_t i = 0; i < kChannels; ++i) {
|
||||
for (size_t j = 0; j < kChunkLength; ++j) {
|
||||
ASSERT_NEAR(out_chunk[i][j], 2.0f, 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(kChunkLength / kBlockLength, noop.block_num());
|
||||
}
|
||||
|
||||
TEST(LappedTransformTest, IdentityProcessor) {
|
||||
const size_t kChunkLength = 512;
|
||||
const size_t kBlockLength = 64;
|
||||
const size_t kShiftAmount = 32;
|
||||
NoopCallback noop;
|
||||
|
||||
// Identity window for |overlap = block_size / 2|.
|
||||
float window[kBlockLength];
|
||||
std::fill(window, &window[kBlockLength], std::sqrt(0.5f));
|
||||
|
||||
LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kShiftAmount,
|
||||
&noop);
|
||||
float in_buffer[kChunkLength];
|
||||
float* in_chunk = in_buffer;
|
||||
float out_buffer[kChunkLength];
|
||||
float* out_chunk = out_buffer;
|
||||
|
||||
SetFloatArray(2.0f, 1, kChunkLength, &in_chunk);
|
||||
SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk);
|
||||
|
||||
trans.ProcessChunk(&in_chunk, &out_chunk);
|
||||
|
||||
for (size_t i = 0; i < kChunkLength; ++i) {
|
||||
ASSERT_NEAR(out_chunk[i], (i < kBlockLength - kShiftAmount) ? 0.0f : 2.0f,
|
||||
1e-5f);
|
||||
}
|
||||
|
||||
ASSERT_EQ(kChunkLength / kShiftAmount, noop.block_num());
|
||||
}
|
||||
|
||||
TEST(LappedTransformTest, Callbacks) {
|
||||
const size_t kChunkLength = 512;
|
||||
const size_t kBlockLength = 64;
|
||||
FftCheckerCallback call;
|
||||
|
||||
// Rectangular window.
|
||||
float window[kBlockLength];
|
||||
std::fill(window, &window[kBlockLength], 1.0f);
|
||||
|
||||
LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kBlockLength,
|
||||
&call);
|
||||
float in_buffer[kChunkLength];
|
||||
float* in_chunk = in_buffer;
|
||||
float out_buffer[kChunkLength];
|
||||
float* out_chunk = out_buffer;
|
||||
|
||||
SetFloatArray(1.0f, 1, kChunkLength, &in_chunk);
|
||||
SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk);
|
||||
|
||||
trans.ProcessChunk(&in_chunk, &out_chunk);
|
||||
|
||||
ASSERT_EQ(kChunkLength / kBlockLength, call.block_num());
|
||||
}
|
||||
|
||||
TEST(LappedTransformTest, chunk_length) {
|
||||
const size_t kBlockLength = 64;
|
||||
FftCheckerCallback call;
|
||||
const float window[kBlockLength] = {};
|
||||
|
||||
// Make sure that chunk_length returns the same value passed to the
|
||||
// LappedTransform constructor.
|
||||
{
|
||||
const size_t kExpectedChunkLength = 512;
|
||||
const LappedTransform trans(1, 1, kExpectedChunkLength, window,
|
||||
kBlockLength, kBlockLength, &call);
|
||||
|
||||
EXPECT_EQ(kExpectedChunkLength, trans.chunk_length());
|
||||
}
|
||||
{
|
||||
const size_t kExpectedChunkLength = 160;
|
||||
const LappedTransform trans(1, 1, kExpectedChunkLength, window,
|
||||
kBlockLength, kBlockLength, &call);
|
||||
|
||||
EXPECT_EQ(kExpectedChunkLength, trans.chunk_length());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
Loading…
Add table
Add a link
Reference in a new issue