Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,5 @@
specific_include_rules = {
"opus_inst\.h": [
"+third_party/opus",
],
}

View file

@ -0,0 +1,52 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
#include "absl/strings/string_view.h"
namespace webrtc {
absl::optional<std::string> GetFormatParameter(const SdpAudioFormat& format,
absl::string_view param) {
auto it = format.parameters.find(std::string(param));
if (it == format.parameters.end())
return absl::nullopt;
return it->second;
}
// Parses a comma-separated string "1,2,0,6" into a std::vector<unsigned char>.
template <>
absl::optional<std::vector<unsigned char>> GetFormatParameter(
const SdpAudioFormat& format,
absl::string_view param) {
std::vector<unsigned char> result;
const std::string comma_separated_list =
GetFormatParameter(format, param).value_or("");
size_t pos = 0;
while (pos < comma_separated_list.size()) {
const size_t next_comma = comma_separated_list.find(',', pos);
const size_t distance_to_next_comma = next_comma == std::string::npos
? std::string::npos
: (next_comma - pos);
auto substring_with_number =
comma_separated_list.substr(pos, distance_to_next_comma);
auto conv = rtc::StringToNumber<int>(substring_with_number);
if (!conv.has_value()) {
return absl::nullopt;
}
result.push_back(*conv);
pos += substring_with_number.size() + 1;
}
return result;
}
} // namespace webrtc

View file

@ -0,0 +1,89 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_
#include <string>
#include <utility>
#include <vector>
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/audio_codecs/audio_decoder.h"
#include "api/audio_codecs/audio_format.h"
#include "rtc_base/string_to_number.h"
namespace webrtc {
absl::optional<std::string> GetFormatParameter(const SdpAudioFormat& format,
absl::string_view param);
template <typename T>
absl::optional<T> GetFormatParameter(const SdpAudioFormat& format,
absl::string_view param) {
return rtc::StringToNumber<T>(GetFormatParameter(format, param).value_or(""));
}
template <>
absl::optional<std::vector<unsigned char>> GetFormatParameter(
const SdpAudioFormat& format,
absl::string_view param);
class OpusFrame : public AudioDecoder::EncodedAudioFrame {
public:
OpusFrame(AudioDecoder* decoder,
rtc::Buffer&& payload,
bool is_primary_payload)
: decoder_(decoder),
payload_(std::move(payload)),
is_primary_payload_(is_primary_payload) {}
size_t Duration() const override {
int ret;
if (is_primary_payload_) {
ret = decoder_->PacketDuration(payload_.data(), payload_.size());
} else {
ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size());
}
return (ret < 0) ? 0 : static_cast<size_t>(ret);
}
bool IsDtxPacket() const override { return payload_.size() <= 2; }
absl::optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const override {
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
int ret;
if (is_primary_payload_) {
ret = decoder_->Decode(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
} else {
ret = decoder_->DecodeRedundant(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
}
if (ret < 0)
return absl::nullopt;
return DecodeResult{static_cast<size_t>(ret), speech_type};
}
private:
AudioDecoder* const decoder_;
const rtc::Buffer payload_;
const bool is_primary_payload_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_CODER_OPUS_COMMON_H_

View file

@ -0,0 +1,182 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/audio_decoder_multi_channel_opus_impl.h"
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
#include "rtc_base/string_to_number.h"
namespace webrtc {
std::unique_ptr<AudioDecoderMultiChannelOpusImpl>
AudioDecoderMultiChannelOpusImpl::MakeAudioDecoder(
AudioDecoderMultiChannelOpusConfig config) {
if (!config.IsOk()) {
RTC_DCHECK_NOTREACHED();
return nullptr;
}
// Fill the pointer with a working decoder through the C interface. This
// allocates memory.
OpusDecInst* dec_state = nullptr;
const int error = WebRtcOpus_MultistreamDecoderCreate(
&dec_state, config.num_channels, config.num_streams,
config.coupled_streams, config.channel_mapping.data());
if (error != 0) {
return nullptr;
}
// Pass the ownership to DecoderImpl. Not using 'make_unique' because the
// c-tor is private.
return std::unique_ptr<AudioDecoderMultiChannelOpusImpl>(
new AudioDecoderMultiChannelOpusImpl(dec_state, config));
}
AudioDecoderMultiChannelOpusImpl::AudioDecoderMultiChannelOpusImpl(
OpusDecInst* dec_state,
AudioDecoderMultiChannelOpusConfig config)
: dec_state_(dec_state), config_(config) {
RTC_DCHECK(dec_state);
WebRtcOpus_DecoderInit(dec_state_);
}
AudioDecoderMultiChannelOpusImpl::~AudioDecoderMultiChannelOpusImpl() {
WebRtcOpus_DecoderFree(dec_state_);
}
absl::optional<AudioDecoderMultiChannelOpusConfig>
AudioDecoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) {
AudioDecoderMultiChannelOpusConfig config;
config.num_channels = format.num_channels;
auto num_streams = GetFormatParameter<int>(format, "num_streams");
if (!num_streams.has_value()) {
return absl::nullopt;
}
config.num_streams = *num_streams;
auto coupled_streams = GetFormatParameter<int>(format, "coupled_streams");
if (!coupled_streams.has_value()) {
return absl::nullopt;
}
config.coupled_streams = *coupled_streams;
auto channel_mapping =
GetFormatParameter<std::vector<unsigned char>>(format, "channel_mapping");
if (!channel_mapping.has_value()) {
return absl::nullopt;
}
config.channel_mapping = *channel_mapping;
if (!config.IsOk()) {
return absl::nullopt;
}
return config;
}
std::vector<AudioDecoder::ParseResult>
AudioDecoderMultiChannelOpusImpl::ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp) {
std::vector<ParseResult> results;
if (PacketHasFec(payload.data(), payload.size())) {
const int duration =
PacketDurationRedundant(payload.data(), payload.size());
RTC_DCHECK_GE(duration, 0);
rtc::Buffer payload_copy(payload.data(), payload.size());
std::unique_ptr<EncodedAudioFrame> fec_frame(
new OpusFrame(this, std::move(payload_copy), false));
results.emplace_back(timestamp - duration, 1, std::move(fec_frame));
}
std::unique_ptr<EncodedAudioFrame> frame(
new OpusFrame(this, std::move(payload), true));
results.emplace_back(timestamp, 0, std::move(frame));
return results;
}
int AudioDecoderMultiChannelOpusImpl::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
RTC_DCHECK_EQ(sample_rate_hz, 48000);
int16_t temp_type = 1; // Default is speech.
int ret =
WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type);
if (ret > 0)
ret *= static_cast<int>(
config_.num_channels); // Return total number of samples.
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderMultiChannelOpusImpl::DecodeRedundantInternal(
const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
if (!PacketHasFec(encoded, encoded_len)) {
// This packet is a RED packet.
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
RTC_DCHECK_EQ(sample_rate_hz, 48000);
int16_t temp_type = 1; // Default is speech.
int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded,
&temp_type);
if (ret > 0)
ret *= static_cast<int>(
config_.num_channels); // Return total number of samples.
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
void AudioDecoderMultiChannelOpusImpl::Reset() {
WebRtcOpus_DecoderInit(dec_state_);
}
int AudioDecoderMultiChannelOpusImpl::PacketDuration(const uint8_t* encoded,
size_t encoded_len) const {
return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len);
}
int AudioDecoderMultiChannelOpusImpl::PacketDurationRedundant(
const uint8_t* encoded,
size_t encoded_len) const {
if (!PacketHasFec(encoded, encoded_len)) {
// This packet is a RED packet.
return PacketDuration(encoded, encoded_len);
}
return WebRtcOpus_FecDurationEst(encoded, encoded_len, 48000);
}
bool AudioDecoderMultiChannelOpusImpl::PacketHasFec(const uint8_t* encoded,
size_t encoded_len) const {
int fec;
fec = WebRtcOpus_PacketHasFec(encoded, encoded_len);
return (fec == 1);
}
int AudioDecoderMultiChannelOpusImpl::SampleRateHz() const {
return 48000;
}
size_t AudioDecoderMultiChannelOpusImpl::Channels() const {
return config_.num_channels;
}
} // namespace webrtc

View file

@ -0,0 +1,74 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_
#include <stddef.h>
#include <memory>
#include <vector>
#include "api/audio_codecs/audio_decoder.h"
#include "api/audio_codecs/audio_format.h"
#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "rtc_base/buffer.h"
namespace webrtc {
class AudioDecoderMultiChannelOpusImpl final : public AudioDecoder {
public:
static std::unique_ptr<AudioDecoderMultiChannelOpusImpl> MakeAudioDecoder(
AudioDecoderMultiChannelOpusConfig config);
~AudioDecoderMultiChannelOpusImpl() override;
AudioDecoderMultiChannelOpusImpl(const AudioDecoderMultiChannelOpusImpl&) =
delete;
AudioDecoderMultiChannelOpusImpl& operator=(
const AudioDecoderMultiChannelOpusImpl&) = delete;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp) override;
void Reset() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const override;
bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override;
int SampleRateHz() const override;
size_t Channels() const override;
static absl::optional<AudioDecoderMultiChannelOpusConfig> SdpToConfig(
const SdpAudioFormat& format);
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
int DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
private:
AudioDecoderMultiChannelOpusImpl(OpusDecInst* dec_state,
AudioDecoderMultiChannelOpusConfig config);
OpusDecInst* dec_state_;
const AudioDecoderMultiChannelOpusConfig config_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_MULTI_CHANNEL_OPUS_IMPL_H_

View file

@ -0,0 +1,148 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio_codecs/opus/audio_decoder_multi_channel_opus.h"
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
using ::testing::NiceMock;
using ::testing::Return;
TEST(AudioDecoderMultiOpusTest, GetFormatParameter) {
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
{{"channel_mapping", "0,1,2,3"},
{"coupled_streams", "2"},
{"num_streams", "2"}});
EXPECT_EQ(GetFormatParameter(sdp_format, "channel_mapping"),
absl::optional<std::string>("0,1,2,3"));
EXPECT_EQ(GetFormatParameter<int>(sdp_format, "coupled_streams"),
absl::optional<int>(2));
EXPECT_EQ(GetFormatParameter(sdp_format, "missing"), absl::nullopt);
EXPECT_EQ(GetFormatParameter<int>(sdp_format, "channel_mapping"),
absl::nullopt);
}
TEST(AudioDecoderMultiOpusTest, InvalidChannelMappings) {
{
// Can't use channel 3 if there are only 2 channels.
const SdpAudioFormat sdp_format("multiopus", 48000, 2,
{{"channel_mapping", "3,0"},
{"coupled_streams", "1"},
{"num_streams", "2"}});
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
EXPECT_FALSE(decoder_config.has_value());
}
{
// The mapping is too long. There are only 5 channels, but 6 elements in the
// mapping.
const SdpAudioFormat sdp_format("multiopus", 48000, 5,
{{"channel_mapping", "0,1,2,3,4,5"},
{"coupled_streams", "0"},
{"num_streams", "2"}});
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
EXPECT_FALSE(decoder_config.has_value());
}
{
// The mapping doesn't parse correctly.
const SdpAudioFormat sdp_format(
"multiopus", 48000, 5,
{{"channel_mapping", "0,1,two,3,4"}, {"coupled_streams", "0"}});
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
EXPECT_FALSE(decoder_config.has_value());
}
}
TEST(AudioDecoderMultiOpusTest, ValidSdpToConfigProducesCorrectConfig) {
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
{{"channel_mapping", "3,1,2,0"},
{"coupled_streams", "2"},
{"num_streams", "2"}});
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
ASSERT_TRUE(decoder_config.has_value());
EXPECT_TRUE(decoder_config->IsOk());
EXPECT_EQ(decoder_config->coupled_streams, 2);
EXPECT_THAT(decoder_config->channel_mapping,
::testing::ContainerEq(std::vector<unsigned char>({3, 1, 2, 0})));
}
TEST(AudioDecoderMultiOpusTest, InvalidSdpToConfigDoesNotProduceConfig) {
{
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
{{"channel_mapping", "0,1,2,3"},
{"coupled_stream", "2"},
{"num_streams", "2"}});
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
EXPECT_FALSE(decoder_config.has_value());
}
{
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
{{"channel_mapping", "0,1,2 3"},
{"coupled_streams", "2"},
{"num_streams", "2"}});
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
EXPECT_FALSE(decoder_config.has_value());
}
}
TEST(AudioDecoderMultiOpusTest, CodecsCanBeCreated) {
const SdpAudioFormat sdp_format("multiopus", 48000, 4,
{{"channel_mapping", "0,1,2,3"},
{"coupled_streams", "2"},
{"num_streams", "2"}});
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(sdp_format);
ASSERT_TRUE(decoder_config.has_value());
const std::unique_ptr<AudioDecoder> opus_decoder =
AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config);
EXPECT_TRUE(opus_decoder);
}
TEST(AudioDecoderMultiOpusTest, AdvertisedCodecsCanBeCreated) {
std::vector<AudioCodecSpec> specs;
AudioDecoderMultiChannelOpus::AppendSupportedDecoders(&specs);
EXPECT_FALSE(specs.empty());
for (const AudioCodecSpec& spec : specs) {
const absl::optional<AudioDecoderMultiChannelOpus::Config> decoder_config =
AudioDecoderMultiChannelOpus::SdpToConfig(spec.format);
ASSERT_TRUE(decoder_config.has_value());
const std::unique_ptr<AudioDecoder> opus_decoder =
AudioDecoderMultiChannelOpus::MakeAudioDecoder(*decoder_config);
EXPECT_TRUE(opus_decoder);
}
}
} // namespace webrtc

View file

@ -0,0 +1,149 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h"
#include <memory>
#include <utility>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
AudioDecoderOpusImpl::AudioDecoderOpusImpl(size_t num_channels,
int sample_rate_hz)
: channels_(num_channels),
sample_rate_hz_(sample_rate_hz),
generate_plc_(field_trial::IsEnabled("WebRTC-Audio-OpusGeneratePlc")) {
RTC_DCHECK(num_channels == 1 || num_channels == 2);
RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 48000);
const int error =
WebRtcOpus_DecoderCreate(&dec_state_, channels_, sample_rate_hz_);
RTC_DCHECK(error == 0);
WebRtcOpus_DecoderInit(dec_state_);
}
AudioDecoderOpusImpl::~AudioDecoderOpusImpl() {
WebRtcOpus_DecoderFree(dec_state_);
}
std::vector<AudioDecoder::ParseResult> AudioDecoderOpusImpl::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp) {
std::vector<ParseResult> results;
if (PacketHasFec(payload.data(), payload.size())) {
const int duration =
PacketDurationRedundant(payload.data(), payload.size());
RTC_DCHECK_GE(duration, 0);
rtc::Buffer payload_copy(payload.data(), payload.size());
std::unique_ptr<EncodedAudioFrame> fec_frame(
new OpusFrame(this, std::move(payload_copy), false));
results.emplace_back(timestamp - duration, 1, std::move(fec_frame));
}
std::unique_ptr<EncodedAudioFrame> frame(
new OpusFrame(this, std::move(payload), true));
results.emplace_back(timestamp, 0, std::move(frame));
return results;
}
int AudioDecoderOpusImpl::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_);
int16_t temp_type = 1; // Default is speech.
int ret =
WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type);
if (ret > 0)
ret *= static_cast<int>(channels_); // Return total number of samples.
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderOpusImpl::DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) {
if (!PacketHasFec(encoded, encoded_len)) {
// This packet is a RED packet.
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
RTC_DCHECK_EQ(sample_rate_hz, sample_rate_hz_);
int16_t temp_type = 1; // Default is speech.
int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded,
&temp_type);
if (ret > 0)
ret *= static_cast<int>(channels_); // Return total number of samples.
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
void AudioDecoderOpusImpl::Reset() {
WebRtcOpus_DecoderInit(dec_state_);
}
int AudioDecoderOpusImpl::PacketDuration(const uint8_t* encoded,
size_t encoded_len) const {
return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len);
}
int AudioDecoderOpusImpl::PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const {
if (!PacketHasFec(encoded, encoded_len)) {
// This packet is a RED packet.
return PacketDuration(encoded, encoded_len);
}
return WebRtcOpus_FecDurationEst(encoded, encoded_len, sample_rate_hz_);
}
bool AudioDecoderOpusImpl::PacketHasFec(const uint8_t* encoded,
size_t encoded_len) const {
int fec;
fec = WebRtcOpus_PacketHasFec(encoded, encoded_len);
return (fec == 1);
}
int AudioDecoderOpusImpl::SampleRateHz() const {
return sample_rate_hz_;
}
size_t AudioDecoderOpusImpl::Channels() const {
return channels_;
}
void AudioDecoderOpusImpl::GeneratePlc(
size_t requested_samples_per_channel,
rtc::BufferT<int16_t>* concealment_audio) {
if (!generate_plc_) {
return;
}
int plc_size = WebRtcOpus_PlcDuration(dec_state_) * channels_;
concealment_audio->AppendData(plc_size, [&](rtc::ArrayView<int16_t> decoded) {
int16_t temp_type = 1;
int ret =
WebRtcOpus_Decode(dec_state_, nullptr, 0, decoded.data(), &temp_type);
if (ret < 0) {
return 0;
}
return ret;
});
}
} // namespace webrtc

View file

@ -0,0 +1,67 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_
#include <stddef.h>
#include <stdint.h>
#include <vector>
#include "api/audio_codecs/audio_decoder.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "rtc_base/buffer.h"
namespace webrtc {
class AudioDecoderOpusImpl final : public AudioDecoder {
public:
explicit AudioDecoderOpusImpl(size_t num_channels,
int sample_rate_hz = 48000);
~AudioDecoderOpusImpl() override;
AudioDecoderOpusImpl(const AudioDecoderOpusImpl&) = delete;
AudioDecoderOpusImpl& operator=(const AudioDecoderOpusImpl&) = delete;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp) override;
void Reset() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const override;
bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override;
int SampleRateHz() const override;
size_t Channels() const override;
void GeneratePlc(size_t requested_samples_per_channel,
rtc::BufferT<int16_t>* concealment_audio) override;
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
int DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
private:
OpusDecInst* dec_state_;
const size_t channels_;
const int sample_rate_hz_;
const bool generate_plc_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_DECODER_OPUS_H_

View file

@ -0,0 +1,366 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* LEFT TO DO:
* - WRITE TESTS for the stuff in this file.
* - Check the creation, maybe make it safer by returning an empty optional or
* unique_ptr. --- It looks OK, but RecreateEncoderInstance can perhaps crash
* on a valid config. Can run it in the fuzzer for some time. Should prbl also
* fuzz the config.
*/
#include "modules/audio_coding/codecs/opus/audio_encoder_multi_channel_opus_impl.h"
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/match.h"
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/string_to_number.h"
namespace webrtc {
namespace {
// Recommended bitrates for one channel:
// 8-12 kb/s for NB speech,
// 16-20 kb/s for WB speech,
// 28-40 kb/s for FB speech,
// 48-64 kb/s for FB mono music, and
// 64-128 kb/s for FB stereo music.
// The current implementation multiplies these values by the number of channels.
constexpr int kOpusBitrateNbBps = 12000;
constexpr int kOpusBitrateWbBps = 20000;
constexpr int kOpusBitrateFbBps = 32000;
constexpr int kDefaultMaxPlaybackRate = 48000;
// These two lists must be sorted from low to high
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120};
#else
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60};
#endif
int GetBitrateBps(const AudioEncoderMultiChannelOpusConfig& config) {
RTC_DCHECK(config.IsOk());
return config.bitrate_bps;
}
int GetMaxPlaybackRate(const SdpAudioFormat& format) {
const auto param = GetFormatParameter<int>(format, "maxplaybackrate");
if (param && *param >= 8000) {
return std::min(*param, kDefaultMaxPlaybackRate);
}
return kDefaultMaxPlaybackRate;
}
int GetFrameSizeMs(const SdpAudioFormat& format) {
const auto ptime = GetFormatParameter<int>(format, "ptime");
if (ptime.has_value()) {
// Pick the next highest supported frame length from
// kOpusSupportedFrameLengths.
for (const int supported_frame_length : kOpusSupportedFrameLengths) {
if (supported_frame_length >= *ptime) {
return supported_frame_length;
}
}
// If none was found, return the largest supported frame length.
return *(std::end(kOpusSupportedFrameLengths) - 1);
}
return AudioEncoderOpusConfig::kDefaultFrameSizeMs;
}
int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) {
const int bitrate = [&] {
if (max_playback_rate <= 8000) {
return kOpusBitrateNbBps * rtc::dchecked_cast<int>(num_channels);
} else if (max_playback_rate <= 16000) {
return kOpusBitrateWbBps * rtc::dchecked_cast<int>(num_channels);
} else {
return kOpusBitrateFbBps * rtc::dchecked_cast<int>(num_channels);
}
}();
RTC_DCHECK_GE(bitrate, AudioEncoderMultiChannelOpusConfig::kMinBitrateBps);
return bitrate;
}
// Get the maxaveragebitrate parameter in string-form, so we can properly figure
// out how invalid it is and accurately log invalid values.
int CalculateBitrate(int max_playback_rate_hz,
size_t num_channels,
absl::optional<std::string> bitrate_param) {
const int default_bitrate =
CalculateDefaultBitrate(max_playback_rate_hz, num_channels);
if (bitrate_param) {
const auto bitrate = rtc::StringToNumber<int>(*bitrate_param);
if (bitrate) {
const int chosen_bitrate =
std::max(AudioEncoderOpusConfig::kMinBitrateBps,
std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps));
if (bitrate != chosen_bitrate) {
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate
<< " clamped to " << chosen_bitrate;
}
return chosen_bitrate;
}
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param
<< "\" replaced by default bitrate " << default_bitrate;
}
return default_bitrate;
}
} // namespace
std::unique_ptr<AudioEncoder>
AudioEncoderMultiChannelOpusImpl::MakeAudioEncoder(
const AudioEncoderMultiChannelOpusConfig& config,
int payload_type) {
if (!config.IsOk()) {
RTC_DCHECK_NOTREACHED();
return nullptr;
}
return std::make_unique<AudioEncoderMultiChannelOpusImpl>(config,
payload_type);
}
AudioEncoderMultiChannelOpusImpl::AudioEncoderMultiChannelOpusImpl(
const AudioEncoderMultiChannelOpusConfig& config,
int payload_type)
: payload_type_(payload_type), inst_(nullptr) {
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
RTC_CHECK(RecreateEncoderInstance(config));
}
AudioEncoderMultiChannelOpusImpl::~AudioEncoderMultiChannelOpusImpl() {
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
}
size_t AudioEncoderMultiChannelOpusImpl::SufficientOutputBufferSize() const {
// Calculate the number of bytes we expect the encoder to produce,
// then multiply by two to give a wide margin for error.
const size_t bytes_per_millisecond =
static_cast<size_t>(GetBitrateBps(config_) / (1000 * 8) + 1);
const size_t approx_encoded_bytes =
Num10msFramesPerPacket() * 10 * bytes_per_millisecond;
return 2 * approx_encoded_bytes;
}
void AudioEncoderMultiChannelOpusImpl::Reset() {
RTC_CHECK(RecreateEncoderInstance(config_));
}
absl::optional<std::pair<TimeDelta, TimeDelta>>
AudioEncoderMultiChannelOpusImpl::GetFrameLengthRange() const {
return {{TimeDelta::Millis(config_.frame_size_ms),
TimeDelta::Millis(config_.frame_size_ms)}};
}
// If the given config is OK, recreate the Opus encoder instance with those
// settings, save the config, and return true. Otherwise, do nothing and return
// false.
bool AudioEncoderMultiChannelOpusImpl::RecreateEncoderInstance(
const AudioEncoderMultiChannelOpusConfig& config) {
if (!config.IsOk())
return false;
config_ = config;
if (inst_)
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
input_buffer_.clear();
input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame());
RTC_CHECK_EQ(
0, WebRtcOpus_MultistreamEncoderCreate(
&inst_, config.num_channels,
config.application ==
AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip
? 0
: 1,
config.num_streams, config.coupled_streams,
config.channel_mapping.data()));
const int bitrate = GetBitrateBps(config);
RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate));
RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps.";
if (config.fec_enabled) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_));
RTC_LOG(LS_VERBOSE) << "Opus enable FEC";
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_));
RTC_LOG(LS_VERBOSE) << "Opus disable FEC";
}
RTC_CHECK_EQ(
0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz));
RTC_LOG(LS_VERBOSE) << "Set Opus playback rate to "
<< config.max_playback_rate_hz << " hz.";
// Use the DEFAULT complexity.
RTC_CHECK_EQ(
0, WebRtcOpus_SetComplexity(inst_, AudioEncoderOpusConfig().complexity));
RTC_LOG(LS_VERBOSE) << "Set Opus coding complexity to "
<< AudioEncoderOpusConfig().complexity;
if (config.dtx_enabled) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_));
RTC_LOG(LS_VERBOSE) << "Opus enable DTX";
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_));
RTC_LOG(LS_VERBOSE) << "Opus disable DTX";
}
if (config.cbr_enabled) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_));
RTC_LOG(LS_VERBOSE) << "Opus enable CBR";
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_));
RTC_LOG(LS_VERBOSE) << "Opus disable CBR";
}
num_channels_to_encode_ = NumChannels();
next_frame_length_ms_ = config_.frame_size_ms;
RTC_LOG(LS_VERBOSE) << "Set Opus frame length to " << config_.frame_size_ms
<< " ms";
return true;
}
absl::optional<AudioEncoderMultiChannelOpusConfig>
AudioEncoderMultiChannelOpusImpl::SdpToConfig(const SdpAudioFormat& format) {
if (!absl::EqualsIgnoreCase(format.name, "multiopus") ||
format.clockrate_hz != 48000) {
return absl::nullopt;
}
AudioEncoderMultiChannelOpusConfig config;
config.num_channels = format.num_channels;
config.frame_size_ms = GetFrameSizeMs(format);
config.max_playback_rate_hz = GetMaxPlaybackRate(format);
config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1");
config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1");
config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1");
config.bitrate_bps =
CalculateBitrate(config.max_playback_rate_hz, config.num_channels,
GetFormatParameter(format, "maxaveragebitrate"));
config.application =
config.num_channels == 1
? AudioEncoderMultiChannelOpusConfig::ApplicationMode::kVoip
: AudioEncoderMultiChannelOpusConfig::ApplicationMode::kAudio;
config.supported_frame_lengths_ms.clear();
std::copy(std::begin(kOpusSupportedFrameLengths),
std::end(kOpusSupportedFrameLengths),
std::back_inserter(config.supported_frame_lengths_ms));
auto num_streams = GetFormatParameter<int>(format, "num_streams");
if (!num_streams.has_value()) {
return absl::nullopt;
}
config.num_streams = *num_streams;
auto coupled_streams = GetFormatParameter<int>(format, "coupled_streams");
if (!coupled_streams.has_value()) {
return absl::nullopt;
}
config.coupled_streams = *coupled_streams;
auto channel_mapping =
GetFormatParameter<std::vector<unsigned char>>(format, "channel_mapping");
if (!channel_mapping.has_value()) {
return absl::nullopt;
}
config.channel_mapping = *channel_mapping;
if (!config.IsOk()) {
return absl::nullopt;
}
return config;
}
AudioCodecInfo AudioEncoderMultiChannelOpusImpl::QueryAudioEncoder(
const AudioEncoderMultiChannelOpusConfig& config) {
RTC_DCHECK(config.IsOk());
AudioCodecInfo info(48000, config.num_channels, config.bitrate_bps,
AudioEncoderOpusConfig::kMinBitrateBps,
AudioEncoderOpusConfig::kMaxBitrateBps);
info.allow_comfort_noise = false;
info.supports_network_adaption = false;
return info;
}
size_t AudioEncoderMultiChannelOpusImpl::Num10msFramesPerPacket() const {
return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10));
}
size_t AudioEncoderMultiChannelOpusImpl::SamplesPer10msFrame() const {
return rtc::CheckedDivExact(48000, 100) * config_.num_channels;
}
int AudioEncoderMultiChannelOpusImpl::SampleRateHz() const {
return 48000;
}
size_t AudioEncoderMultiChannelOpusImpl::NumChannels() const {
return config_.num_channels;
}
size_t AudioEncoderMultiChannelOpusImpl::Num10MsFramesInNextPacket() const {
return Num10msFramesPerPacket();
}
size_t AudioEncoderMultiChannelOpusImpl::Max10MsFramesInAPacket() const {
return Num10msFramesPerPacket();
}
int AudioEncoderMultiChannelOpusImpl::GetTargetBitrate() const {
return GetBitrateBps(config_);
}
AudioEncoder::EncodedInfo AudioEncoderMultiChannelOpusImpl::EncodeImpl(
uint32_t rtp_timestamp,
rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) {
if (input_buffer_.empty())
first_timestamp_in_buffer_ = rtp_timestamp;
input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend());
if (input_buffer_.size() <
(Num10msFramesPerPacket() * SamplesPer10msFrame())) {
return EncodedInfo();
}
RTC_CHECK_EQ(input_buffer_.size(),
Num10msFramesPerPacket() * SamplesPer10msFrame());
const size_t max_encoded_bytes = SufficientOutputBufferSize();
EncodedInfo info;
info.encoded_bytes = encoded->AppendData(
max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
int status = WebRtcOpus_Encode(
inst_, &input_buffer_[0],
rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels),
rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data());
RTC_CHECK_GE(status, 0); // Fails only if fed invalid data.
return static_cast<size_t>(status);
});
input_buffer_.clear();
// Will use new packet size for next encoding.
config_.frame_size_ms = next_frame_length_ms_;
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
info.speech = true;
info.encoder_type = CodecType::kOther;
return info;
}
} // namespace webrtc

View file

@ -0,0 +1,92 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_
#include <memory>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "api/audio_codecs/audio_encoder.h"
#include "api/audio_codecs/audio_format.h"
#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus_config.h"
#include "api/units/time_delta.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
namespace webrtc {
class RtcEventLog;
class AudioEncoderMultiChannelOpusImpl final : public AudioEncoder {
public:
AudioEncoderMultiChannelOpusImpl(
const AudioEncoderMultiChannelOpusConfig& config,
int payload_type);
~AudioEncoderMultiChannelOpusImpl() override;
AudioEncoderMultiChannelOpusImpl(const AudioEncoderMultiChannelOpusImpl&) =
delete;
AudioEncoderMultiChannelOpusImpl& operator=(
const AudioEncoderMultiChannelOpusImpl&) = delete;
// Static interface for use by BuiltinAudioEncoderFactory.
static constexpr const char* GetPayloadName() { return "multiopus"; }
static absl::optional<AudioCodecInfo> QueryAudioEncoder(
const SdpAudioFormat& format);
int SampleRateHz() const override;
size_t NumChannels() const override;
size_t Num10MsFramesInNextPacket() const override;
size_t Max10MsFramesInAPacket() const override;
int GetTargetBitrate() const override;
void Reset() override;
absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange()
const override;
protected:
EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) override;
private:
static absl::optional<AudioEncoderMultiChannelOpusConfig> SdpToConfig(
const SdpAudioFormat& format);
static AudioCodecInfo QueryAudioEncoder(
const AudioEncoderMultiChannelOpusConfig& config);
static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
const AudioEncoderMultiChannelOpusConfig&,
int payload_type);
size_t Num10msFramesPerPacket() const;
size_t SamplesPer10msFrame() const;
size_t SufficientOutputBufferSize() const;
bool RecreateEncoderInstance(
const AudioEncoderMultiChannelOpusConfig& config);
void SetFrameLength(int frame_length_ms);
void SetNumChannelsToEncode(size_t num_channels_to_encode);
void SetProjectedPacketLossRate(float fraction);
AudioEncoderMultiChannelOpusConfig config_;
const int payload_type_;
std::vector<int16_t> input_buffer_;
OpusEncInst* inst_;
uint32_t first_timestamp_in_buffer_;
size_t num_channels_to_encode_;
int next_frame_length_ms_;
friend struct AudioEncoderMultiChannelOpus;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_MULTI_CHANNEL_OPUS_IMPL_H_

View file

@ -0,0 +1,156 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio_codecs/opus/audio_encoder_multi_channel_opus.h"
#include "test/gmock.h"
namespace webrtc {
using ::testing::NiceMock;
using ::testing::Return;
namespace {
constexpr int kOpusPayloadType = 120;
} // namespace
TEST(AudioEncoderMultiOpusTest, CheckConfigValidity) {
{
const SdpAudioFormat sdp_format("multiopus", 48000, 2,
{{"channel_mapping", "3,0"},
{"coupled_streams", "1"},
{"num_streams", "2"}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
// Maps input channel 0 to coded channel 3, which doesn't exist.
EXPECT_FALSE(encoder_config.has_value());
}
{
const SdpAudioFormat sdp_format("multiopus", 48000, 2,
{{"channel_mapping", "0"},
{"coupled_streams", "1"},
{"num_streams", "2"}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
// The mapping is too short.
EXPECT_FALSE(encoder_config.has_value());
}
{
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
{{"channel_mapping", "0,0,0"},
{"coupled_streams", "0"},
{"num_streams", "1"}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
// Coded channel 0 comes from both input channels 0, 1 and 2.
EXPECT_FALSE(encoder_config.has_value());
}
{
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
{{"channel_mapping", "0,255,255"},
{"coupled_streams", "0"},
{"num_streams", "1"}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
ASSERT_TRUE(encoder_config.has_value());
// This is fine, because channels 1, 2 are set to be ignored.
EXPECT_TRUE(encoder_config->IsOk());
}
{
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
{{"channel_mapping", "0,255,255"},
{"coupled_streams", "0"},
{"num_streams", "2"}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
// This is NOT fine, because channels nothing says how coded channel 1
// should be coded.
EXPECT_FALSE(encoder_config.has_value());
}
}
TEST(AudioEncoderMultiOpusTest, ConfigValuesAreParsedCorrectly) {
SdpAudioFormat sdp_format({"multiopus",
48000,
6,
{{"minptime", "10"},
{"useinbandfec", "1"},
{"channel_mapping", "0,4,1,2,3,5"},
{"num_streams", "4"},
{"coupled_streams", "2"}}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
ASSERT_TRUE(encoder_config.has_value());
EXPECT_EQ(encoder_config->coupled_streams, 2);
EXPECT_EQ(encoder_config->num_streams, 4);
EXPECT_THAT(
encoder_config->channel_mapping,
testing::ContainerEq(std::vector<unsigned char>({0, 4, 1, 2, 3, 5})));
}
TEST(AudioEncoderMultiOpusTest, CreateFromValidConfig) {
{
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
{{"channel_mapping", "0,255,255"},
{"coupled_streams", "0"},
{"num_streams", "2"}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
ASSERT_FALSE(encoder_config.has_value());
}
{
const SdpAudioFormat sdp_format("multiopus", 48000, 3,
{{"channel_mapping", "1,255,0"},
{"coupled_streams", "1"},
{"num_streams", "1"}});
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(sdp_format);
ASSERT_TRUE(encoder_config.has_value());
EXPECT_THAT(encoder_config->channel_mapping,
testing::ContainerEq(std::vector<unsigned char>({1, 255, 0})));
EXPECT_TRUE(encoder_config->IsOk());
const std::unique_ptr<AudioEncoder> opus_encoder =
AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config,
kOpusPayloadType);
// Creating an encoder from a valid config should work.
EXPECT_TRUE(opus_encoder);
}
}
TEST(AudioEncoderMultiOpusTest, AdvertisedCodecsCanBeCreated) {
std::vector<AudioCodecSpec> specs;
AudioEncoderMultiChannelOpus::AppendSupportedEncoders(&specs);
EXPECT_FALSE(specs.empty());
for (const AudioCodecSpec& spec : specs) {
const absl::optional<AudioEncoderMultiChannelOpus::Config> encoder_config =
AudioEncoderMultiChannelOpus::SdpToConfig(spec.format);
ASSERT_TRUE(encoder_config.has_value());
const std::unique_ptr<AudioEncoder> opus_encoder =
AudioEncoderMultiChannelOpus::MakeAudioEncoder(*encoder_config,
kOpusPayloadType);
EXPECT_TRUE(opus_encoder);
}
}
} // namespace webrtc

View file

@ -0,0 +1,824 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
#include <algorithm>
#include <iterator>
#include <memory>
#include <string>
#include <utility>
#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
#include "modules/audio_coding/audio_network_adaptor/audio_network_adaptor_impl.h"
#include "modules/audio_coding/audio_network_adaptor/controller_manager.h"
#include "modules/audio_coding/codecs/opus/audio_coder_opus_common.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/exp_filter.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "rtc_base/numerics/safe_minmax.h"
#include "rtc_base/string_encode.h"
#include "rtc_base/string_to_number.h"
#include "rtc_base/time_utils.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
// Codec parameters for Opus.
// draft-spittka-payload-rtp-opus-03
// Recommended bitrates:
// 8-12 kb/s for NB speech,
// 16-20 kb/s for WB speech,
// 28-40 kb/s for FB speech,
// 48-64 kb/s for FB mono music, and
// 64-128 kb/s for FB stereo music.
// The current implementation applies the following values to mono signals,
// and multiplies them by 2 for stereo.
constexpr int kOpusBitrateNbBps = 12000;
constexpr int kOpusBitrateWbBps = 20000;
constexpr int kOpusBitrateFbBps = 32000;
constexpr int kRtpTimestampRateHz = 48000;
constexpr int kDefaultMaxPlaybackRate = 48000;
// These two lists must be sorted from low to high
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
constexpr int kANASupportedFrameLengths[] = {20, 40, 60, 120};
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60, 120};
#else
constexpr int kANASupportedFrameLengths[] = {20, 40, 60};
constexpr int kOpusSupportedFrameLengths[] = {10, 20, 40, 60};
#endif
// PacketLossFractionSmoother uses an exponential filter with a time constant
// of -1.0 / ln(0.9999) = 10000 ms.
constexpr float kAlphaForPacketLossFractionSmoother = 0.9999f;
constexpr float kMaxPacketLossFraction = 0.2f;
int CalculateDefaultBitrate(int max_playback_rate, size_t num_channels) {
const int bitrate = [&] {
if (max_playback_rate <= 8000) {
return kOpusBitrateNbBps * rtc::dchecked_cast<int>(num_channels);
} else if (max_playback_rate <= 16000) {
return kOpusBitrateWbBps * rtc::dchecked_cast<int>(num_channels);
} else {
return kOpusBitrateFbBps * rtc::dchecked_cast<int>(num_channels);
}
}();
RTC_DCHECK_GE(bitrate, AudioEncoderOpusConfig::kMinBitrateBps);
RTC_DCHECK_LE(bitrate, AudioEncoderOpusConfig::kMaxBitrateBps);
return bitrate;
}
// Get the maxaveragebitrate parameter in string-form, so we can properly figure
// out how invalid it is and accurately log invalid values.
int CalculateBitrate(int max_playback_rate_hz,
size_t num_channels,
absl::optional<std::string> bitrate_param) {
const int default_bitrate =
CalculateDefaultBitrate(max_playback_rate_hz, num_channels);
if (bitrate_param) {
const auto bitrate = rtc::StringToNumber<int>(*bitrate_param);
if (bitrate) {
const int chosen_bitrate =
std::max(AudioEncoderOpusConfig::kMinBitrateBps,
std::min(*bitrate, AudioEncoderOpusConfig::kMaxBitrateBps));
if (bitrate != chosen_bitrate) {
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate " << *bitrate
<< " clamped to " << chosen_bitrate;
}
return chosen_bitrate;
}
RTC_LOG(LS_WARNING) << "Invalid maxaveragebitrate \"" << *bitrate_param
<< "\" replaced by default bitrate " << default_bitrate;
}
return default_bitrate;
}
int GetChannelCount(const SdpAudioFormat& format) {
const auto param = GetFormatParameter(format, "stereo");
if (param == "1") {
return 2;
} else {
return 1;
}
}
int GetMaxPlaybackRate(const SdpAudioFormat& format) {
const auto param = GetFormatParameter<int>(format, "maxplaybackrate");
if (param && *param >= 8000) {
return std::min(*param, kDefaultMaxPlaybackRate);
}
return kDefaultMaxPlaybackRate;
}
int GetFrameSizeMs(const SdpAudioFormat& format) {
const auto ptime = GetFormatParameter<int>(format, "ptime");
if (ptime) {
// Pick the next highest supported frame length from
// kOpusSupportedFrameLengths.
for (const int supported_frame_length : kOpusSupportedFrameLengths) {
if (supported_frame_length >= *ptime) {
return supported_frame_length;
}
}
// If none was found, return the largest supported frame length.
return *(std::end(kOpusSupportedFrameLengths) - 1);
}
return AudioEncoderOpusConfig::kDefaultFrameSizeMs;
}
void FindSupportedFrameLengths(int min_frame_length_ms,
int max_frame_length_ms,
std::vector<int>* out) {
out->clear();
std::copy_if(std::begin(kANASupportedFrameLengths),
std::end(kANASupportedFrameLengths), std::back_inserter(*out),
[&](int frame_length_ms) {
return frame_length_ms >= min_frame_length_ms &&
frame_length_ms <= max_frame_length_ms;
});
RTC_DCHECK(std::is_sorted(out->begin(), out->end()));
}
int GetBitrateBps(const AudioEncoderOpusConfig& config) {
RTC_DCHECK(config.IsOk());
return *config.bitrate_bps;
}
std::vector<float> GetBitrateMultipliers() {
constexpr char kBitrateMultipliersName[] =
"WebRTC-Audio-OpusBitrateMultipliers";
const bool use_bitrate_multipliers =
webrtc::field_trial::IsEnabled(kBitrateMultipliersName);
if (use_bitrate_multipliers) {
const std::string field_trial_string =
webrtc::field_trial::FindFullName(kBitrateMultipliersName);
std::vector<std::string> pieces;
rtc::tokenize(field_trial_string, '-', &pieces);
if (pieces.size() < 2 || pieces[0] != "Enabled") {
RTC_LOG(LS_WARNING) << "Invalid parameters for "
<< kBitrateMultipliersName
<< ", not using custom values.";
return std::vector<float>();
}
std::vector<float> multipliers(pieces.size() - 1);
for (size_t i = 1; i < pieces.size(); i++) {
if (!rtc::FromString(pieces[i], &multipliers[i - 1])) {
RTC_LOG(LS_WARNING)
<< "Invalid parameters for " << kBitrateMultipliersName
<< ", not using custom values.";
return std::vector<float>();
}
}
RTC_LOG(LS_INFO) << "Using custom bitrate multipliers: "
<< field_trial_string;
return multipliers;
}
return std::vector<float>();
}
int GetMultipliedBitrate(int bitrate, const std::vector<float>& multipliers) {
// The multipliers are valid from 5 kbps.
const size_t bitrate_kbps = static_cast<size_t>(bitrate / 1000);
if (bitrate_kbps < 5 || bitrate_kbps >= multipliers.size() + 5) {
return bitrate;
}
return static_cast<int>(multipliers[bitrate_kbps - 5] * bitrate);
}
} // namespace
void AudioEncoderOpusImpl::AppendSupportedEncoders(
std::vector<AudioCodecSpec>* specs) {
const SdpAudioFormat fmt = {"opus",
kRtpTimestampRateHz,
2,
{{"minptime", "10"}, {"useinbandfec", "1"}}};
const AudioCodecInfo info = QueryAudioEncoder(*SdpToConfig(fmt));
specs->push_back({fmt, info});
}
AudioCodecInfo AudioEncoderOpusImpl::QueryAudioEncoder(
const AudioEncoderOpusConfig& config) {
RTC_DCHECK(config.IsOk());
AudioCodecInfo info(config.sample_rate_hz, config.num_channels,
*config.bitrate_bps,
AudioEncoderOpusConfig::kMinBitrateBps,
AudioEncoderOpusConfig::kMaxBitrateBps);
info.allow_comfort_noise = false;
info.supports_network_adaption = true;
return info;
}
std::unique_ptr<AudioEncoder> AudioEncoderOpusImpl::MakeAudioEncoder(
const AudioEncoderOpusConfig& config,
int payload_type) {
if (!config.IsOk()) {
RTC_DCHECK_NOTREACHED();
return nullptr;
}
return std::make_unique<AudioEncoderOpusImpl>(config, payload_type);
}
absl::optional<AudioEncoderOpusConfig> AudioEncoderOpusImpl::SdpToConfig(
const SdpAudioFormat& format) {
if (!absl::EqualsIgnoreCase(format.name, "opus") ||
format.clockrate_hz != kRtpTimestampRateHz || format.num_channels != 2) {
return absl::nullopt;
}
AudioEncoderOpusConfig config;
config.num_channels = GetChannelCount(format);
config.frame_size_ms = GetFrameSizeMs(format);
config.max_playback_rate_hz = GetMaxPlaybackRate(format);
config.fec_enabled = (GetFormatParameter(format, "useinbandfec") == "1");
config.dtx_enabled = (GetFormatParameter(format, "usedtx") == "1");
config.cbr_enabled = (GetFormatParameter(format, "cbr") == "1");
config.bitrate_bps =
CalculateBitrate(config.max_playback_rate_hz, config.num_channels,
GetFormatParameter(format, "maxaveragebitrate"));
config.application = config.num_channels == 1
? AudioEncoderOpusConfig::ApplicationMode::kVoip
: AudioEncoderOpusConfig::ApplicationMode::kAudio;
constexpr int kMinANAFrameLength = kANASupportedFrameLengths[0];
constexpr int kMaxANAFrameLength =
kANASupportedFrameLengths[arraysize(kANASupportedFrameLengths) - 1];
// For now, minptime and maxptime are only used with ANA. If ptime is outside
// of this range, it will get adjusted once ANA takes hold. Ideally, we'd know
// if ANA was to be used when setting up the config, and adjust accordingly.
const int min_frame_length_ms =
GetFormatParameter<int>(format, "minptime").value_or(kMinANAFrameLength);
const int max_frame_length_ms =
GetFormatParameter<int>(format, "maxptime").value_or(kMaxANAFrameLength);
FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms,
&config.supported_frame_lengths_ms);
if (!config.IsOk()) {
RTC_DCHECK_NOTREACHED();
return absl::nullopt;
}
return config;
}
absl::optional<int> AudioEncoderOpusImpl::GetNewComplexity(
const AudioEncoderOpusConfig& config) {
RTC_DCHECK(config.IsOk());
const int bitrate_bps = GetBitrateBps(config);
if (bitrate_bps >= config.complexity_threshold_bps -
config.complexity_threshold_window_bps &&
bitrate_bps <= config.complexity_threshold_bps +
config.complexity_threshold_window_bps) {
// Within the hysteresis window; make no change.
return absl::nullopt;
} else {
return bitrate_bps <= config.complexity_threshold_bps
? config.low_rate_complexity
: config.complexity;
}
}
absl::optional<int> AudioEncoderOpusImpl::GetNewBandwidth(
const AudioEncoderOpusConfig& config,
OpusEncInst* inst) {
constexpr int kMinWidebandBitrate = 8000;
constexpr int kMaxNarrowbandBitrate = 9000;
constexpr int kAutomaticThreshold = 11000;
RTC_DCHECK(config.IsOk());
const int bitrate = GetBitrateBps(config);
if (bitrate > kAutomaticThreshold) {
return absl::optional<int>(OPUS_AUTO);
}
const int bandwidth = WebRtcOpus_GetBandwidth(inst);
RTC_DCHECK_GE(bandwidth, 0);
if (bitrate > kMaxNarrowbandBitrate && bandwidth < OPUS_BANDWIDTH_WIDEBAND) {
return absl::optional<int>(OPUS_BANDWIDTH_WIDEBAND);
} else if (bitrate < kMinWidebandBitrate &&
bandwidth > OPUS_BANDWIDTH_NARROWBAND) {
return absl::optional<int>(OPUS_BANDWIDTH_NARROWBAND);
}
return absl::optional<int>();
}
class AudioEncoderOpusImpl::PacketLossFractionSmoother {
public:
explicit PacketLossFractionSmoother()
: last_sample_time_ms_(rtc::TimeMillis()),
smoother_(kAlphaForPacketLossFractionSmoother) {}
// Gets the smoothed packet loss fraction.
float GetAverage() const {
float value = smoother_.filtered();
return (value == rtc::ExpFilter::kValueUndefined) ? 0.0f : value;
}
// Add new observation to the packet loss fraction smoother.
void AddSample(float packet_loss_fraction) {
int64_t now_ms = rtc::TimeMillis();
smoother_.Apply(static_cast<float>(now_ms - last_sample_time_ms_),
packet_loss_fraction);
last_sample_time_ms_ = now_ms;
}
private:
int64_t last_sample_time_ms_;
// An exponential filter is used to smooth the packet loss fraction.
rtc::ExpFilter smoother_;
};
AudioEncoderOpusImpl::AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config,
int payload_type)
: AudioEncoderOpusImpl(
config,
payload_type,
[this](absl::string_view config_string, RtcEventLog* event_log) {
return DefaultAudioNetworkAdaptorCreator(config_string, event_log);
},
// We choose 5sec as initial time constant due to empirical data.
std::make_unique<SmoothingFilterImpl>(5000)) {}
AudioEncoderOpusImpl::AudioEncoderOpusImpl(
const AudioEncoderOpusConfig& config,
int payload_type,
const AudioNetworkAdaptorCreator& audio_network_adaptor_creator,
std::unique_ptr<SmoothingFilter> bitrate_smoother)
: payload_type_(payload_type),
use_stable_target_for_adaptation_(!webrtc::field_trial::IsDisabled(
"WebRTC-Audio-StableTargetAdaptation")),
adjust_bandwidth_(
webrtc::field_trial::IsEnabled("WebRTC-AdjustOpusBandwidth")),
bitrate_changed_(true),
bitrate_multipliers_(GetBitrateMultipliers()),
packet_loss_rate_(0.0),
inst_(nullptr),
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
audio_network_adaptor_creator_(audio_network_adaptor_creator),
bitrate_smoother_(std::move(bitrate_smoother)),
consecutive_dtx_frames_(0) {
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
// Sanity check of the redundant payload type field that we want to get rid
// of. See https://bugs.chromium.org/p/webrtc/issues/detail?id=7847
RTC_CHECK(config.payload_type == -1 || config.payload_type == payload_type);
RTC_CHECK(RecreateEncoderInstance(config));
SetProjectedPacketLossRate(packet_loss_rate_);
}
AudioEncoderOpusImpl::AudioEncoderOpusImpl(int payload_type,
const SdpAudioFormat& format)
: AudioEncoderOpusImpl(*SdpToConfig(format), payload_type) {}
AudioEncoderOpusImpl::~AudioEncoderOpusImpl() {
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
}
int AudioEncoderOpusImpl::SampleRateHz() const {
return config_.sample_rate_hz;
}
size_t AudioEncoderOpusImpl::NumChannels() const {
return config_.num_channels;
}
int AudioEncoderOpusImpl::RtpTimestampRateHz() const {
return kRtpTimestampRateHz;
}
size_t AudioEncoderOpusImpl::Num10MsFramesInNextPacket() const {
return Num10msFramesPerPacket();
}
size_t AudioEncoderOpusImpl::Max10MsFramesInAPacket() const {
return Num10msFramesPerPacket();
}
int AudioEncoderOpusImpl::GetTargetBitrate() const {
return GetBitrateBps(config_);
}
void AudioEncoderOpusImpl::Reset() {
RTC_CHECK(RecreateEncoderInstance(config_));
}
bool AudioEncoderOpusImpl::SetFec(bool enable) {
if (enable) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_));
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_));
}
config_.fec_enabled = enable;
return true;
}
bool AudioEncoderOpusImpl::SetDtx(bool enable) {
if (enable) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_));
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_));
}
config_.dtx_enabled = enable;
return true;
}
bool AudioEncoderOpusImpl::GetDtx() const {
return config_.dtx_enabled;
}
bool AudioEncoderOpusImpl::SetApplication(Application application) {
auto conf = config_;
switch (application) {
case Application::kSpeech:
conf.application = AudioEncoderOpusConfig::ApplicationMode::kVoip;
break;
case Application::kAudio:
conf.application = AudioEncoderOpusConfig::ApplicationMode::kAudio;
break;
}
return RecreateEncoderInstance(conf);
}
void AudioEncoderOpusImpl::SetMaxPlaybackRate(int frequency_hz) {
auto conf = config_;
conf.max_playback_rate_hz = frequency_hz;
RTC_CHECK(RecreateEncoderInstance(conf));
}
bool AudioEncoderOpusImpl::EnableAudioNetworkAdaptor(
const std::string& config_string,
RtcEventLog* event_log) {
audio_network_adaptor_ =
audio_network_adaptor_creator_(config_string, event_log);
return audio_network_adaptor_.get() != nullptr;
}
void AudioEncoderOpusImpl::DisableAudioNetworkAdaptor() {
audio_network_adaptor_.reset(nullptr);
}
void AudioEncoderOpusImpl::OnReceivedUplinkPacketLossFraction(
float uplink_packet_loss_fraction) {
if (audio_network_adaptor_) {
audio_network_adaptor_->SetUplinkPacketLossFraction(
uplink_packet_loss_fraction);
ApplyAudioNetworkAdaptor();
}
packet_loss_fraction_smoother_->AddSample(uplink_packet_loss_fraction);
float average_fraction_loss = packet_loss_fraction_smoother_->GetAverage();
SetProjectedPacketLossRate(average_fraction_loss);
}
void AudioEncoderOpusImpl::OnReceivedTargetAudioBitrate(
int target_audio_bitrate_bps) {
SetTargetBitrate(target_audio_bitrate_bps);
}
void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth(
int target_audio_bitrate_bps,
absl::optional<int64_t> bwe_period_ms,
absl::optional<int64_t> stable_target_bitrate_bps) {
if (audio_network_adaptor_) {
audio_network_adaptor_->SetTargetAudioBitrate(target_audio_bitrate_bps);
if (use_stable_target_for_adaptation_) {
if (stable_target_bitrate_bps)
audio_network_adaptor_->SetUplinkBandwidth(*stable_target_bitrate_bps);
} else {
// We give smoothed bitrate allocation to audio network adaptor as
// the uplink bandwidth.
// The BWE spikes should not affect the bitrate smoother more than 25%.
// To simplify the calculations we use a step response as input signal.
// The step response of an exponential filter is
// u(t) = 1 - e^(-t / time_constant).
// In order to limit the affect of a BWE spike within 25% of its value
// before
// the next BWE update, we would choose a time constant that fulfills
// 1 - e^(-bwe_period_ms / time_constant) < 0.25
// Then 4 * bwe_period_ms is a good choice.
if (bwe_period_ms)
bitrate_smoother_->SetTimeConstantMs(*bwe_period_ms * 4);
bitrate_smoother_->AddSample(target_audio_bitrate_bps);
}
ApplyAudioNetworkAdaptor();
} else {
if (!overhead_bytes_per_packet_) {
RTC_LOG(LS_INFO)
<< "AudioEncoderOpusImpl: Overhead unknown, target audio bitrate "
<< target_audio_bitrate_bps << " bps is ignored.";
return;
}
const int overhead_bps = static_cast<int>(
*overhead_bytes_per_packet_ * 8 * 100 / Num10MsFramesInNextPacket());
SetTargetBitrate(
std::min(AudioEncoderOpusConfig::kMaxBitrateBps,
std::max(AudioEncoderOpusConfig::kMinBitrateBps,
target_audio_bitrate_bps - overhead_bps)));
}
}
void AudioEncoderOpusImpl::OnReceivedUplinkBandwidth(
int target_audio_bitrate_bps,
absl::optional<int64_t> bwe_period_ms) {
OnReceivedUplinkBandwidth(target_audio_bitrate_bps, bwe_period_ms,
absl::nullopt);
}
void AudioEncoderOpusImpl::OnReceivedUplinkAllocation(
BitrateAllocationUpdate update) {
OnReceivedUplinkBandwidth(update.target_bitrate.bps(), update.bwe_period.ms(),
update.stable_target_bitrate.bps());
}
void AudioEncoderOpusImpl::OnReceivedRtt(int rtt_ms) {
if (!audio_network_adaptor_)
return;
audio_network_adaptor_->SetRtt(rtt_ms);
ApplyAudioNetworkAdaptor();
}
void AudioEncoderOpusImpl::OnReceivedOverhead(
size_t overhead_bytes_per_packet) {
if (audio_network_adaptor_) {
audio_network_adaptor_->SetOverhead(overhead_bytes_per_packet);
ApplyAudioNetworkAdaptor();
} else {
overhead_bytes_per_packet_ = overhead_bytes_per_packet;
}
}
void AudioEncoderOpusImpl::SetReceiverFrameLengthRange(
int min_frame_length_ms,
int max_frame_length_ms) {
// Ensure that `SetReceiverFrameLengthRange` is called before
// `EnableAudioNetworkAdaptor`, otherwise we need to recreate
// `audio_network_adaptor_`, which is not a needed use case.
RTC_DCHECK(!audio_network_adaptor_);
FindSupportedFrameLengths(min_frame_length_ms, max_frame_length_ms,
&config_.supported_frame_lengths_ms);
}
AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
uint32_t rtp_timestamp,
rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) {
MaybeUpdateUplinkBandwidth();
if (input_buffer_.empty())
first_timestamp_in_buffer_ = rtp_timestamp;
input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend());
if (input_buffer_.size() <
(Num10msFramesPerPacket() * SamplesPer10msFrame())) {
return EncodedInfo();
}
RTC_CHECK_EQ(input_buffer_.size(),
Num10msFramesPerPacket() * SamplesPer10msFrame());
const size_t max_encoded_bytes = SufficientOutputBufferSize();
EncodedInfo info;
info.encoded_bytes = encoded->AppendData(
max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
int status = WebRtcOpus_Encode(
inst_, &input_buffer_[0],
rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels),
rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data());
RTC_CHECK_GE(status, 0); // Fails only if fed invalid data.
return static_cast<size_t>(status);
});
input_buffer_.clear();
bool dtx_frame = (info.encoded_bytes <= 2);
// Will use new packet size for next encoding.
config_.frame_size_ms = next_frame_length_ms_;
if (adjust_bandwidth_ && bitrate_changed_) {
const auto bandwidth = GetNewBandwidth(config_, inst_);
if (bandwidth) {
RTC_CHECK_EQ(0, WebRtcOpus_SetBandwidth(inst_, *bandwidth));
}
bitrate_changed_ = false;
}
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
// After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
// coding the background noise. Avoid flagging this frame as speech
// (even though there is a probability of the frame being speech).
info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20);
info.encoder_type = CodecType::kOpus;
// Increase or reset DTX counter.
consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0);
return info;
}
size_t AudioEncoderOpusImpl::Num10msFramesPerPacket() const {
return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10));
}
size_t AudioEncoderOpusImpl::SamplesPer10msFrame() const {
return rtc::CheckedDivExact(config_.sample_rate_hz, 100) *
config_.num_channels;
}
size_t AudioEncoderOpusImpl::SufficientOutputBufferSize() const {
// Calculate the number of bytes we expect the encoder to produce,
// then multiply by two to give a wide margin for error.
const size_t bytes_per_millisecond =
static_cast<size_t>(GetBitrateBps(config_) / (1000 * 8) + 1);
const size_t approx_encoded_bytes =
Num10msFramesPerPacket() * 10 * bytes_per_millisecond;
return 2 * approx_encoded_bytes;
}
// If the given config is OK, recreate the Opus encoder instance with those
// settings, save the config, and return true. Otherwise, do nothing and return
// false.
bool AudioEncoderOpusImpl::RecreateEncoderInstance(
const AudioEncoderOpusConfig& config) {
if (!config.IsOk())
return false;
config_ = config;
if (inst_)
RTC_CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
input_buffer_.clear();
input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame());
RTC_CHECK_EQ(0, WebRtcOpus_EncoderCreate(
&inst_, config.num_channels,
config.application ==
AudioEncoderOpusConfig::ApplicationMode::kVoip
? 0
: 1,
config.sample_rate_hz));
const int bitrate = GetBitrateBps(config);
RTC_CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, bitrate));
RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps.";
if (config.fec_enabled) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableFec(inst_));
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableFec(inst_));
}
RTC_CHECK_EQ(
0, WebRtcOpus_SetMaxPlaybackRate(inst_, config.max_playback_rate_hz));
// Use the default complexity if the start bitrate is within the hysteresis
// window.
complexity_ = GetNewComplexity(config).value_or(config.complexity);
RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_));
bitrate_changed_ = true;
if (config.dtx_enabled) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_));
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_));
}
RTC_CHECK_EQ(0,
WebRtcOpus_SetPacketLossRate(
inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5)));
if (config.cbr_enabled) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableCbr(inst_));
} else {
RTC_CHECK_EQ(0, WebRtcOpus_DisableCbr(inst_));
}
num_channels_to_encode_ = NumChannels();
next_frame_length_ms_ = config_.frame_size_ms;
return true;
}
void AudioEncoderOpusImpl::SetFrameLength(int frame_length_ms) {
if (next_frame_length_ms_ != frame_length_ms) {
RTC_LOG(LS_VERBOSE) << "Update Opus frame length "
<< "from " << next_frame_length_ms_ << " ms "
<< "to " << frame_length_ms << " ms.";
}
next_frame_length_ms_ = frame_length_ms;
}
void AudioEncoderOpusImpl::SetNumChannelsToEncode(
size_t num_channels_to_encode) {
RTC_DCHECK_GT(num_channels_to_encode, 0);
RTC_DCHECK_LE(num_channels_to_encode, config_.num_channels);
if (num_channels_to_encode_ == num_channels_to_encode)
return;
RTC_CHECK_EQ(0, WebRtcOpus_SetForceChannels(inst_, num_channels_to_encode));
num_channels_to_encode_ = num_channels_to_encode;
}
void AudioEncoderOpusImpl::SetProjectedPacketLossRate(float fraction) {
fraction = std::min(std::max(fraction, 0.0f), kMaxPacketLossFraction);
if (packet_loss_rate_ != fraction) {
packet_loss_rate_ = fraction;
RTC_CHECK_EQ(
0, WebRtcOpus_SetPacketLossRate(
inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5)));
}
}
void AudioEncoderOpusImpl::SetTargetBitrate(int bits_per_second) {
const int new_bitrate = rtc::SafeClamp<int>(
bits_per_second, AudioEncoderOpusConfig::kMinBitrateBps,
AudioEncoderOpusConfig::kMaxBitrateBps);
if (config_.bitrate_bps && *config_.bitrate_bps != new_bitrate) {
config_.bitrate_bps = new_bitrate;
RTC_DCHECK(config_.IsOk());
const int bitrate = GetBitrateBps(config_);
RTC_CHECK_EQ(
0, WebRtcOpus_SetBitRate(
inst_, GetMultipliedBitrate(bitrate, bitrate_multipliers_)));
RTC_LOG(LS_VERBOSE) << "Set Opus bitrate to " << bitrate << " bps.";
bitrate_changed_ = true;
}
const auto new_complexity = GetNewComplexity(config_);
if (new_complexity && complexity_ != *new_complexity) {
complexity_ = *new_complexity;
RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_));
}
}
void AudioEncoderOpusImpl::ApplyAudioNetworkAdaptor() {
auto config = audio_network_adaptor_->GetEncoderRuntimeConfig();
if (config.bitrate_bps)
SetTargetBitrate(*config.bitrate_bps);
if (config.frame_length_ms)
SetFrameLength(*config.frame_length_ms);
if (config.enable_dtx)
SetDtx(*config.enable_dtx);
if (config.num_channels)
SetNumChannelsToEncode(*config.num_channels);
}
std::unique_ptr<AudioNetworkAdaptor>
AudioEncoderOpusImpl::DefaultAudioNetworkAdaptorCreator(
absl::string_view config_string,
RtcEventLog* event_log) const {
AudioNetworkAdaptorImpl::Config config;
config.event_log = event_log;
return std::unique_ptr<AudioNetworkAdaptor>(new AudioNetworkAdaptorImpl(
config, ControllerManagerImpl::Create(
config_string, NumChannels(), supported_frame_lengths_ms(),
AudioEncoderOpusConfig::kMinBitrateBps,
num_channels_to_encode_, next_frame_length_ms_,
GetTargetBitrate(), config_.fec_enabled, GetDtx())));
}
void AudioEncoderOpusImpl::MaybeUpdateUplinkBandwidth() {
if (audio_network_adaptor_ && !use_stable_target_for_adaptation_) {
int64_t now_ms = rtc::TimeMillis();
if (!bitrate_smoother_last_update_time_ ||
now_ms - *bitrate_smoother_last_update_time_ >=
config_.uplink_bandwidth_update_interval_ms) {
absl::optional<float> smoothed_bitrate = bitrate_smoother_->GetAverage();
if (smoothed_bitrate)
audio_network_adaptor_->SetUplinkBandwidth(*smoothed_bitrate);
bitrate_smoother_last_update_time_ = now_ms;
}
}
}
ANAStats AudioEncoderOpusImpl::GetANAStats() const {
if (audio_network_adaptor_) {
return audio_network_adaptor_->GetStats();
}
return ANAStats();
}
absl::optional<std::pair<TimeDelta, TimeDelta> >
AudioEncoderOpusImpl::GetFrameLengthRange() const {
if (audio_network_adaptor_) {
if (config_.supported_frame_lengths_ms.empty()) {
return absl::nullopt;
}
return {{TimeDelta::Millis(config_.supported_frame_lengths_ms.front()),
TimeDelta::Millis(config_.supported_frame_lengths_ms.back())}};
} else {
return {{TimeDelta::Millis(config_.frame_size_ms),
TimeDelta::Millis(config_.frame_size_ms)}};
}
}
} // namespace webrtc

View file

@ -0,0 +1,184 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/audio_codecs/audio_encoder.h"
#include "api/audio_codecs/audio_format.h"
#include "api/audio_codecs/opus/audio_encoder_opus_config.h"
#include "common_audio/smoothing_filter.h"
#include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
namespace webrtc {
class RtcEventLog;
class AudioEncoderOpusImpl final : public AudioEncoder {
public:
// Returns empty if the current bitrate falls within the hysteresis window,
// defined by complexity_threshold_bps +/- complexity_threshold_window_bps.
// Otherwise, returns the current complexity depending on whether the
// current bitrate is above or below complexity_threshold_bps.
static absl::optional<int> GetNewComplexity(
const AudioEncoderOpusConfig& config);
// Returns OPUS_AUTO if the the current bitrate is above wideband threshold.
// Returns empty if it is below, but bandwidth coincides with the desired one.
// Otherwise returns the desired bandwidth.
static absl::optional<int> GetNewBandwidth(
const AudioEncoderOpusConfig& config,
OpusEncInst* inst);
using AudioNetworkAdaptorCreator =
std::function<std::unique_ptr<AudioNetworkAdaptor>(absl::string_view,
RtcEventLog*)>;
AudioEncoderOpusImpl(const AudioEncoderOpusConfig& config, int payload_type);
// Dependency injection for testing.
AudioEncoderOpusImpl(
const AudioEncoderOpusConfig& config,
int payload_type,
const AudioNetworkAdaptorCreator& audio_network_adaptor_creator,
std::unique_ptr<SmoothingFilter> bitrate_smoother);
AudioEncoderOpusImpl(int payload_type, const SdpAudioFormat& format);
~AudioEncoderOpusImpl() override;
AudioEncoderOpusImpl(const AudioEncoderOpusImpl&) = delete;
AudioEncoderOpusImpl& operator=(const AudioEncoderOpusImpl&) = delete;
int SampleRateHz() const override;
size_t NumChannels() const override;
int RtpTimestampRateHz() const override;
size_t Num10MsFramesInNextPacket() const override;
size_t Max10MsFramesInAPacket() const override;
int GetTargetBitrate() const override;
void Reset() override;
bool SetFec(bool enable) override;
// Set Opus DTX. Once enabled, Opus stops transmission, when it detects
// voice being inactive. During that, it still sends 2 packets (one for
// content, one for signaling) about every 400 ms.
bool SetDtx(bool enable) override;
bool GetDtx() const override;
bool SetApplication(Application application) override;
void SetMaxPlaybackRate(int frequency_hz) override;
bool EnableAudioNetworkAdaptor(const std::string& config_string,
RtcEventLog* event_log) override;
void DisableAudioNetworkAdaptor() override;
void OnReceivedUplinkPacketLossFraction(
float uplink_packet_loss_fraction) override;
void OnReceivedTargetAudioBitrate(int target_audio_bitrate_bps) override;
void OnReceivedUplinkBandwidth(
int target_audio_bitrate_bps,
absl::optional<int64_t> bwe_period_ms) override;
void OnReceivedUplinkAllocation(BitrateAllocationUpdate update) override;
void OnReceivedRtt(int rtt_ms) override;
void OnReceivedOverhead(size_t overhead_bytes_per_packet) override;
void SetReceiverFrameLengthRange(int min_frame_length_ms,
int max_frame_length_ms) override;
ANAStats GetANAStats() const override;
absl::optional<std::pair<TimeDelta, TimeDelta> > GetFrameLengthRange()
const override;
rtc::ArrayView<const int> supported_frame_lengths_ms() const {
return config_.supported_frame_lengths_ms;
}
// Getters for testing.
float packet_loss_rate() const { return packet_loss_rate_; }
AudioEncoderOpusConfig::ApplicationMode application() const {
return config_.application;
}
bool fec_enabled() const { return config_.fec_enabled; }
size_t num_channels_to_encode() const { return num_channels_to_encode_; }
int next_frame_length_ms() const { return next_frame_length_ms_; }
protected:
EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) override;
private:
class PacketLossFractionSmoother;
static absl::optional<AudioEncoderOpusConfig> SdpToConfig(
const SdpAudioFormat& format);
static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
static AudioCodecInfo QueryAudioEncoder(const AudioEncoderOpusConfig& config);
static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
const AudioEncoderOpusConfig&,
int payload_type);
size_t Num10msFramesPerPacket() const;
size_t SamplesPer10msFrame() const;
size_t SufficientOutputBufferSize() const;
bool RecreateEncoderInstance(const AudioEncoderOpusConfig& config);
void SetFrameLength(int frame_length_ms);
void SetNumChannelsToEncode(size_t num_channels_to_encode);
void SetProjectedPacketLossRate(float fraction);
void OnReceivedUplinkBandwidth(
int target_audio_bitrate_bps,
absl::optional<int64_t> bwe_period_ms,
absl::optional<int64_t> link_capacity_allocation);
// TODO(minyue): remove "override" when we can deprecate
// `AudioEncoder::SetTargetBitrate`.
void SetTargetBitrate(int target_bps) override;
void ApplyAudioNetworkAdaptor();
std::unique_ptr<AudioNetworkAdaptor> DefaultAudioNetworkAdaptorCreator(
absl::string_view config_string,
RtcEventLog* event_log) const;
void MaybeUpdateUplinkBandwidth();
AudioEncoderOpusConfig config_;
const int payload_type_;
const bool use_stable_target_for_adaptation_;
const bool adjust_bandwidth_;
bool bitrate_changed_;
// A multiplier for bitrates at 5 kbps and higher. The target bitrate
// will be multiplied by these multipliers, each multiplier is applied to a
// 1 kbps range.
std::vector<float> bitrate_multipliers_;
float packet_loss_rate_;
std::vector<int16_t> input_buffer_;
OpusEncInst* inst_;
uint32_t first_timestamp_in_buffer_;
size_t num_channels_to_encode_;
int next_frame_length_ms_;
int complexity_;
std::unique_ptr<PacketLossFractionSmoother> packet_loss_fraction_smoother_;
const AudioNetworkAdaptorCreator audio_network_adaptor_creator_;
std::unique_ptr<AudioNetworkAdaptor> audio_network_adaptor_;
absl::optional<size_t> overhead_bytes_per_packet_;
const std::unique_ptr<SmoothingFilter> bitrate_smoother_;
absl::optional<int64_t> bitrate_smoother_last_update_time_;
int consecutive_dtx_frames_;
friend struct AudioEncoderOpus;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_AUDIO_ENCODER_OPUS_H_

View file

@ -0,0 +1,914 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio_codecs/opus/audio_encoder_opus.h"
#include <array>
#include <memory>
#include <utility>
#include "absl/strings/string_view.h"
#include "common_audio/mocks/mock_smoothing_filter.h"
#include "modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h"
#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "modules/audio_coding/neteq/tools/audio_loop.h"
#include "rtc_base/checks.h"
#include "rtc_base/fake_clock.h"
#include "test/field_trial.h"
#include "test/gmock.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
using ::testing::NiceMock;
using ::testing::Return;
namespace {
constexpr int kDefaultOpusPayloadType = 105;
constexpr int kDefaultOpusRate = 32000;
constexpr int kDefaultOpusPacSize = 960;
constexpr int64_t kInitialTimeUs = 12345678;
AudioEncoderOpusConfig CreateConfigWithParameters(
const CodecParameterMap& params) {
const SdpAudioFormat format("opus", 48000, 2, params);
return *AudioEncoderOpus::SdpToConfig(format);
}
struct AudioEncoderOpusStates {
MockAudioNetworkAdaptor* mock_audio_network_adaptor;
MockSmoothingFilter* mock_bitrate_smoother;
std::unique_ptr<AudioEncoderOpusImpl> encoder;
std::unique_ptr<rtc::ScopedFakeClock> fake_clock;
AudioEncoderOpusConfig config;
};
std::unique_ptr<AudioEncoderOpusStates> CreateCodec(int sample_rate_hz,
size_t num_channels) {
std::unique_ptr<AudioEncoderOpusStates> states =
std::make_unique<AudioEncoderOpusStates>();
states->mock_audio_network_adaptor = nullptr;
states->fake_clock.reset(new rtc::ScopedFakeClock());
states->fake_clock->SetTime(Timestamp::Micros(kInitialTimeUs));
MockAudioNetworkAdaptor** mock_ptr = &states->mock_audio_network_adaptor;
AudioEncoderOpusImpl::AudioNetworkAdaptorCreator creator =
[mock_ptr](absl::string_view, RtcEventLog* event_log) {
std::unique_ptr<MockAudioNetworkAdaptor> adaptor(
new NiceMock<MockAudioNetworkAdaptor>());
EXPECT_CALL(*adaptor, Die());
*mock_ptr = adaptor.get();
return adaptor;
};
AudioEncoderOpusConfig config;
config.frame_size_ms = rtc::CheckedDivExact(kDefaultOpusPacSize, 48);
config.sample_rate_hz = sample_rate_hz;
config.num_channels = num_channels;
config.bitrate_bps = kDefaultOpusRate;
config.application = num_channels == 1
? AudioEncoderOpusConfig::ApplicationMode::kVoip
: AudioEncoderOpusConfig::ApplicationMode::kAudio;
config.supported_frame_lengths_ms.push_back(config.frame_size_ms);
states->config = config;
std::unique_ptr<MockSmoothingFilter> bitrate_smoother(
new MockSmoothingFilter());
states->mock_bitrate_smoother = bitrate_smoother.get();
states->encoder.reset(
new AudioEncoderOpusImpl(states->config, kDefaultOpusPayloadType, creator,
std::move(bitrate_smoother)));
return states;
}
AudioEncoderRuntimeConfig CreateEncoderRuntimeConfig() {
constexpr int kBitrate = 40000;
constexpr int kFrameLength = 60;
constexpr bool kEnableDtx = false;
constexpr size_t kNumChannels = 1;
AudioEncoderRuntimeConfig config;
config.bitrate_bps = kBitrate;
config.frame_length_ms = kFrameLength;
config.enable_dtx = kEnableDtx;
config.num_channels = kNumChannels;
return config;
}
void CheckEncoderRuntimeConfig(const AudioEncoderOpusImpl* encoder,
const AudioEncoderRuntimeConfig& config) {
EXPECT_EQ(*config.bitrate_bps, encoder->GetTargetBitrate());
EXPECT_EQ(*config.frame_length_ms, encoder->next_frame_length_ms());
EXPECT_EQ(*config.enable_dtx, encoder->GetDtx());
EXPECT_EQ(*config.num_channels, encoder->num_channels_to_encode());
}
// Create 10ms audio data blocks for a total packet size of "packet_size_ms".
std::unique_ptr<test::AudioLoop> Create10msAudioBlocks(
const std::unique_ptr<AudioEncoderOpusImpl>& encoder,
int packet_size_ms) {
const std::string file_name =
test::ResourcePath("audio_coding/testfile32kHz", "pcm");
std::unique_ptr<test::AudioLoop> speech_data(new test::AudioLoop());
int audio_samples_per_ms =
rtc::CheckedDivExact(encoder->SampleRateHz(), 1000);
if (!speech_data->Init(
file_name,
packet_size_ms * audio_samples_per_ms *
encoder->num_channels_to_encode(),
10 * audio_samples_per_ms * encoder->num_channels_to_encode()))
return nullptr;
return speech_data;
}
} // namespace
class AudioEncoderOpusTest : public ::testing::TestWithParam<int> {
protected:
int sample_rate_hz_{GetParam()};
};
INSTANTIATE_TEST_SUITE_P(Param,
AudioEncoderOpusTest,
::testing::Values(16000, 48000));
TEST_P(AudioEncoderOpusTest, DefaultApplicationModeMono) {
auto states = CreateCodec(sample_rate_hz_, 1);
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
states->encoder->application());
}
TEST_P(AudioEncoderOpusTest, DefaultApplicationModeStereo) {
auto states = CreateCodec(sample_rate_hz_, 2);
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio,
states->encoder->application());
}
TEST_P(AudioEncoderOpusTest, ChangeApplicationMode) {
auto states = CreateCodec(sample_rate_hz_, 2);
EXPECT_TRUE(
states->encoder->SetApplication(AudioEncoder::Application::kSpeech));
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
states->encoder->application());
}
TEST_P(AudioEncoderOpusTest, ResetWontChangeApplicationMode) {
auto states = CreateCodec(sample_rate_hz_, 2);
// Trigger a reset.
states->encoder->Reset();
// Verify that the mode is still kAudio.
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kAudio,
states->encoder->application());
// Now change to kVoip.
EXPECT_TRUE(
states->encoder->SetApplication(AudioEncoder::Application::kSpeech));
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
states->encoder->application());
// Trigger a reset again.
states->encoder->Reset();
// Verify that the mode is still kVoip.
EXPECT_EQ(AudioEncoderOpusConfig::ApplicationMode::kVoip,
states->encoder->application());
}
TEST_P(AudioEncoderOpusTest, ToggleDtx) {
auto states = CreateCodec(sample_rate_hz_, 2);
// Enable DTX
EXPECT_TRUE(states->encoder->SetDtx(true));
EXPECT_TRUE(states->encoder->GetDtx());
// Turn off DTX.
EXPECT_TRUE(states->encoder->SetDtx(false));
EXPECT_FALSE(states->encoder->GetDtx());
}
TEST_P(AudioEncoderOpusTest,
OnReceivedUplinkBandwidthWithoutAudioNetworkAdaptor) {
auto states = CreateCodec(sample_rate_hz_, 1);
// Constants are replicated from audio_states->encoderopus.cc.
const int kMinBitrateBps = 6000;
const int kMaxBitrateBps = 510000;
const int kOverheadBytesPerPacket = 64;
states->encoder->OnReceivedOverhead(kOverheadBytesPerPacket);
const int kOverheadBps = 8 * kOverheadBytesPerPacket *
rtc::CheckedDivExact(48000, kDefaultOpusPacSize);
// Set a too low bitrate.
states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps - 1,
absl::nullopt);
EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate());
// Set a too high bitrate.
states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps + 1,
absl::nullopt);
EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate());
// Set the minimum rate.
states->encoder->OnReceivedUplinkBandwidth(kMinBitrateBps + kOverheadBps,
absl::nullopt);
EXPECT_EQ(kMinBitrateBps, states->encoder->GetTargetBitrate());
// Set the maximum rate.
states->encoder->OnReceivedUplinkBandwidth(kMaxBitrateBps + kOverheadBps,
absl::nullopt);
EXPECT_EQ(kMaxBitrateBps, states->encoder->GetTargetBitrate());
// Set rates from kMaxBitrateBps up to 32000 bps.
for (int rate = kMinBitrateBps + kOverheadBps; rate <= 32000 + kOverheadBps;
rate += 1000) {
states->encoder->OnReceivedUplinkBandwidth(rate, absl::nullopt);
EXPECT_EQ(rate - kOverheadBps, states->encoder->GetTargetBitrate());
}
}
TEST_P(AudioEncoderOpusTest, SetReceiverFrameLengthRange) {
auto states = CreateCodec(sample_rate_hz_, 2);
// Before calling to `SetReceiverFrameLengthRange`,
// `supported_frame_lengths_ms` should contain only the frame length being
// used.
using ::testing::ElementsAre;
EXPECT_THAT(states->encoder->supported_frame_lengths_ms(),
ElementsAre(states->encoder->next_frame_length_ms()));
states->encoder->SetReceiverFrameLengthRange(0, 12345);
states->encoder->SetReceiverFrameLengthRange(21, 60);
EXPECT_THAT(states->encoder->supported_frame_lengths_ms(),
ElementsAre(40, 60));
states->encoder->SetReceiverFrameLengthRange(20, 59);
EXPECT_THAT(states->encoder->supported_frame_lengths_ms(),
ElementsAre(20, 40));
}
TEST_P(AudioEncoderOpusTest,
InvokeAudioNetworkAdaptorOnReceivedUplinkPacketLossFraction) {
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
auto config = CreateEncoderRuntimeConfig();
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
.WillOnce(Return(config));
// Since using mock audio network adaptor, any packet loss fraction is fine.
constexpr float kUplinkPacketLoss = 0.1f;
EXPECT_CALL(*states->mock_audio_network_adaptor,
SetUplinkPacketLossFraction(kUplinkPacketLoss));
states->encoder->OnReceivedUplinkPacketLossFraction(kUplinkPacketLoss);
CheckEncoderRuntimeConfig(states->encoder.get(), config);
}
TEST_P(AudioEncoderOpusTest,
InvokeAudioNetworkAdaptorOnReceivedUplinkBandwidth) {
test::ScopedFieldTrials override_field_trials(
"WebRTC-Audio-StableTargetAdaptation/Disabled/");
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
auto config = CreateEncoderRuntimeConfig();
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
.WillOnce(Return(config));
// Since using mock audio network adaptor, any target audio bitrate is fine.
constexpr int kTargetAudioBitrate = 30000;
constexpr int64_t kProbingIntervalMs = 3000;
EXPECT_CALL(*states->mock_audio_network_adaptor,
SetTargetAudioBitrate(kTargetAudioBitrate));
EXPECT_CALL(*states->mock_bitrate_smoother,
SetTimeConstantMs(kProbingIntervalMs * 4));
EXPECT_CALL(*states->mock_bitrate_smoother, AddSample(kTargetAudioBitrate));
states->encoder->OnReceivedUplinkBandwidth(kTargetAudioBitrate,
kProbingIntervalMs);
CheckEncoderRuntimeConfig(states->encoder.get(), config);
}
TEST_P(AudioEncoderOpusTest,
InvokeAudioNetworkAdaptorOnReceivedUplinkAllocation) {
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
auto config = CreateEncoderRuntimeConfig();
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
.WillOnce(Return(config));
BitrateAllocationUpdate update;
update.target_bitrate = DataRate::BitsPerSec(30000);
update.stable_target_bitrate = DataRate::BitsPerSec(20000);
update.bwe_period = TimeDelta::Millis(200);
EXPECT_CALL(*states->mock_audio_network_adaptor,
SetTargetAudioBitrate(update.target_bitrate.bps()));
EXPECT_CALL(*states->mock_audio_network_adaptor,
SetUplinkBandwidth(update.stable_target_bitrate.bps()));
states->encoder->OnReceivedUplinkAllocation(update);
CheckEncoderRuntimeConfig(states->encoder.get(), config);
}
TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedRtt) {
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
auto config = CreateEncoderRuntimeConfig();
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
.WillOnce(Return(config));
// Since using mock audio network adaptor, any rtt is fine.
constexpr int kRtt = 30;
EXPECT_CALL(*states->mock_audio_network_adaptor, SetRtt(kRtt));
states->encoder->OnReceivedRtt(kRtt);
CheckEncoderRuntimeConfig(states->encoder.get(), config);
}
TEST_P(AudioEncoderOpusTest, InvokeAudioNetworkAdaptorOnReceivedOverhead) {
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
auto config = CreateEncoderRuntimeConfig();
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
.WillOnce(Return(config));
// Since using mock audio network adaptor, any overhead is fine.
constexpr size_t kOverhead = 64;
EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead));
states->encoder->OnReceivedOverhead(kOverhead);
CheckEncoderRuntimeConfig(states->encoder.get(), config);
}
TEST_P(AudioEncoderOpusTest,
PacketLossFractionSmoothedOnSetUplinkPacketLossFraction) {
auto states = CreateCodec(sample_rate_hz_, 2);
// The values are carefully chosen so that if no smoothing is made, the test
// will fail.
constexpr float kPacketLossFraction_1 = 0.02f;
constexpr float kPacketLossFraction_2 = 0.198f;
// `kSecondSampleTimeMs` is chosen to ease the calculation since
// 0.9999 ^ 6931 = 0.5.
constexpr int64_t kSecondSampleTimeMs = 6931;
// First time, no filtering.
states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_1);
EXPECT_FLOAT_EQ(0.02f, states->encoder->packet_loss_rate());
states->fake_clock->AdvanceTime(TimeDelta::Millis(kSecondSampleTimeMs));
states->encoder->OnReceivedUplinkPacketLossFraction(kPacketLossFraction_2);
// Now the output of packet loss fraction smoother should be
// (0.02 + 0.198) / 2 = 0.109.
EXPECT_NEAR(0.109f, states->encoder->packet_loss_rate(), 0.001);
}
TEST_P(AudioEncoderOpusTest, PacketLossRateUpperBounded) {
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->OnReceivedUplinkPacketLossFraction(0.5);
EXPECT_FLOAT_EQ(0.2f, states->encoder->packet_loss_rate());
}
TEST_P(AudioEncoderOpusTest, DoNotInvokeSetTargetBitrateIfOverheadUnknown) {
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->OnReceivedUplinkBandwidth(kDefaultOpusRate * 2,
absl::nullopt);
// Since `OnReceivedOverhead` has not been called, the codec bitrate should
// not change.
EXPECT_EQ(kDefaultOpusRate, states->encoder->GetTargetBitrate());
}
// Verifies that the complexity adaptation in the config works as intended.
TEST(AudioEncoderOpusTest, ConfigComplexityAdaptation) {
AudioEncoderOpusConfig config;
config.low_rate_complexity = 8;
config.complexity = 6;
// Bitrate within hysteresis window. Expect empty output.
config.bitrate_bps = 12500;
EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config));
// Bitrate below hysteresis window. Expect higher complexity.
config.bitrate_bps = 10999;
EXPECT_EQ(8, AudioEncoderOpusImpl::GetNewComplexity(config));
// Bitrate within hysteresis window. Expect empty output.
config.bitrate_bps = 12500;
EXPECT_EQ(absl::nullopt, AudioEncoderOpusImpl::GetNewComplexity(config));
// Bitrate above hysteresis window. Expect lower complexity.
config.bitrate_bps = 14001;
EXPECT_EQ(6, AudioEncoderOpusImpl::GetNewComplexity(config));
}
// Verifies that the bandwidth adaptation in the config works as intended.
TEST_P(AudioEncoderOpusTest, ConfigBandwidthAdaptation) {
AudioEncoderOpusConfig config;
const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000);
const std::vector<int16_t> silence(
opus_rate_khz * config.frame_size_ms * config.num_channels, 0);
constexpr size_t kMaxBytes = 1000;
uint8_t bitstream[kMaxBytes];
OpusEncInst* inst;
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(
&inst, config.num_channels,
config.application ==
AudioEncoderOpusConfig::ApplicationMode::kVoip
? 0
: 1,
sample_rate_hz_));
// Bitrate below minmum wideband. Expect narrowband.
config.bitrate_bps = absl::optional<int>(7999);
auto bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(absl::optional<int>(OPUS_BANDWIDTH_NARROWBAND), bandwidth);
WebRtcOpus_SetBandwidth(inst, *bandwidth);
// It is necessary to encode here because Opus has some logic in the encoder
// that goes from the user-set bandwidth to the used and returned one.
WebRtcOpus_Encode(inst, silence.data(),
rtc::CheckedDivExact(silence.size(), config.num_channels),
kMaxBytes, bitstream);
// Bitrate not yet above maximum narrowband. Expect empty.
config.bitrate_bps = absl::optional<int>(9000);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(absl::optional<int>(), bandwidth);
// Bitrate above maximum narrowband. Expect wideband.
config.bitrate_bps = absl::optional<int>(9001);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(absl::optional<int>(OPUS_BANDWIDTH_WIDEBAND), bandwidth);
WebRtcOpus_SetBandwidth(inst, *bandwidth);
// It is necessary to encode here because Opus has some logic in the encoder
// that goes from the user-set bandwidth to the used and returned one.
WebRtcOpus_Encode(inst, silence.data(),
rtc::CheckedDivExact(silence.size(), config.num_channels),
kMaxBytes, bitstream);
// Bitrate not yet below minimum wideband. Expect empty.
config.bitrate_bps = absl::optional<int>(8000);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(absl::optional<int>(), bandwidth);
// Bitrate above automatic threshold. Expect automatic.
config.bitrate_bps = absl::optional<int>(12001);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(absl::optional<int>(OPUS_AUTO), bandwidth);
EXPECT_EQ(0, WebRtcOpus_EncoderFree(inst));
}
TEST_P(AudioEncoderOpusTest, EmptyConfigDoesNotAffectEncoderSettings) {
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
auto config = CreateEncoderRuntimeConfig();
AudioEncoderRuntimeConfig empty_config;
EXPECT_CALL(*states->mock_audio_network_adaptor, GetEncoderRuntimeConfig())
.WillOnce(Return(config))
.WillOnce(Return(empty_config));
constexpr size_t kOverhead = 64;
EXPECT_CALL(*states->mock_audio_network_adaptor, SetOverhead(kOverhead))
.Times(2);
states->encoder->OnReceivedOverhead(kOverhead);
states->encoder->OnReceivedOverhead(kOverhead);
CheckEncoderRuntimeConfig(states->encoder.get(), config);
}
TEST_P(AudioEncoderOpusTest, UpdateUplinkBandwidthInAudioNetworkAdaptor) {
test::ScopedFieldTrials override_field_trials(
"WebRTC-Audio-StableTargetAdaptation/Disabled/");
auto states = CreateCodec(sample_rate_hz_, 2);
states->encoder->EnableAudioNetworkAdaptor("", nullptr);
const size_t opus_rate_khz = rtc::CheckedDivExact(sample_rate_hz_, 1000);
const std::vector<int16_t> audio(opus_rate_khz * 10 * 2, 0);
rtc::Buffer encoded;
EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage())
.WillOnce(Return(50000));
EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(50000));
states->encoder->Encode(
0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded);
// Repeat update uplink bandwidth tests.
for (int i = 0; i < 5; i++) {
// Don't update till it is time to update again.
states->fake_clock->AdvanceTime(TimeDelta::Millis(
states->config.uplink_bandwidth_update_interval_ms - 1));
states->encoder->Encode(
0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded);
// Update when it is time to update.
EXPECT_CALL(*states->mock_bitrate_smoother, GetAverage())
.WillOnce(Return(40000));
EXPECT_CALL(*states->mock_audio_network_adaptor, SetUplinkBandwidth(40000));
states->fake_clock->AdvanceTime(TimeDelta::Millis(1));
states->encoder->Encode(
0, rtc::ArrayView<const int16_t>(audio.data(), audio.size()), &encoded);
}
}
TEST_P(AudioEncoderOpusTest, EncodeAtMinBitrate) {
auto states = CreateCodec(sample_rate_hz_, 1);
constexpr int kNumPacketsToEncode = 2;
auto audio_frames =
Create10msAudioBlocks(states->encoder, kNumPacketsToEncode * 20);
ASSERT_TRUE(audio_frames) << "Create10msAudioBlocks failed";
rtc::Buffer encoded;
uint32_t rtp_timestamp = 12345; // Just a number not important to this test.
states->encoder->OnReceivedUplinkBandwidth(0, absl::nullopt);
for (int packet_index = 0; packet_index < kNumPacketsToEncode;
packet_index++) {
// Make sure we are not encoding before we have enough data for
// a 20ms packet.
for (int index = 0; index < 1; index++) {
states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(),
&encoded);
EXPECT_EQ(0u, encoded.size());
}
// Should encode now.
states->encoder->Encode(rtp_timestamp, audio_frames->GetNextBlock(),
&encoded);
EXPECT_GT(encoded.size(), 0u);
encoded.Clear();
}
}
TEST(AudioEncoderOpusTest, TestConfigDefaults) {
const auto config_opt = AudioEncoderOpus::SdpToConfig({"opus", 48000, 2});
ASSERT_TRUE(config_opt);
EXPECT_EQ(48000, config_opt->max_playback_rate_hz);
EXPECT_EQ(1u, config_opt->num_channels);
EXPECT_FALSE(config_opt->fec_enabled);
EXPECT_FALSE(config_opt->dtx_enabled);
EXPECT_EQ(20, config_opt->frame_size_ms);
}
TEST(AudioEncoderOpusTest, TestConfigFromParams) {
const auto config1 = CreateConfigWithParameters({{"stereo", "0"}});
EXPECT_EQ(1U, config1.num_channels);
const auto config2 = CreateConfigWithParameters({{"stereo", "1"}});
EXPECT_EQ(2U, config2.num_channels);
const auto config3 = CreateConfigWithParameters({{"useinbandfec", "0"}});
EXPECT_FALSE(config3.fec_enabled);
const auto config4 = CreateConfigWithParameters({{"useinbandfec", "1"}});
EXPECT_TRUE(config4.fec_enabled);
const auto config5 = CreateConfigWithParameters({{"usedtx", "0"}});
EXPECT_FALSE(config5.dtx_enabled);
const auto config6 = CreateConfigWithParameters({{"usedtx", "1"}});
EXPECT_TRUE(config6.dtx_enabled);
const auto config7 = CreateConfigWithParameters({{"cbr", "0"}});
EXPECT_FALSE(config7.cbr_enabled);
const auto config8 = CreateConfigWithParameters({{"cbr", "1"}});
EXPECT_TRUE(config8.cbr_enabled);
const auto config9 =
CreateConfigWithParameters({{"maxplaybackrate", "12345"}});
EXPECT_EQ(12345, config9.max_playback_rate_hz);
const auto config10 =
CreateConfigWithParameters({{"maxaveragebitrate", "96000"}});
EXPECT_EQ(96000, config10.bitrate_bps);
const auto config11 = CreateConfigWithParameters({{"maxptime", "40"}});
for (int frame_length : config11.supported_frame_lengths_ms) {
EXPECT_LE(frame_length, 40);
}
const auto config12 = CreateConfigWithParameters({{"minptime", "40"}});
for (int frame_length : config12.supported_frame_lengths_ms) {
EXPECT_GE(frame_length, 40);
}
const auto config13 = CreateConfigWithParameters({{"ptime", "40"}});
EXPECT_EQ(40, config13.frame_size_ms);
constexpr int kMinSupportedFrameLength = 10;
constexpr int kMaxSupportedFrameLength =
WEBRTC_OPUS_SUPPORT_120MS_PTIME ? 120 : 60;
const auto config14 = CreateConfigWithParameters({{"ptime", "1"}});
EXPECT_EQ(kMinSupportedFrameLength, config14.frame_size_ms);
const auto config15 = CreateConfigWithParameters({{"ptime", "2000"}});
EXPECT_EQ(kMaxSupportedFrameLength, config15.frame_size_ms);
}
TEST(AudioEncoderOpusTest, TestConfigFromInvalidParams) {
const webrtc::SdpAudioFormat format("opus", 48000, 2);
const auto default_config = *AudioEncoderOpus::SdpToConfig(format);
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
const std::vector<int> default_supported_frame_lengths_ms({20, 40, 60, 120});
#else
const std::vector<int> default_supported_frame_lengths_ms({20, 40, 60});
#endif
AudioEncoderOpusConfig config;
config = CreateConfigWithParameters({{"stereo", "invalid"}});
EXPECT_EQ(default_config.num_channels, config.num_channels);
config = CreateConfigWithParameters({{"useinbandfec", "invalid"}});
EXPECT_EQ(default_config.fec_enabled, config.fec_enabled);
config = CreateConfigWithParameters({{"usedtx", "invalid"}});
EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled);
config = CreateConfigWithParameters({{"cbr", "invalid"}});
EXPECT_EQ(default_config.dtx_enabled, config.dtx_enabled);
config = CreateConfigWithParameters({{"maxplaybackrate", "0"}});
EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz);
config = CreateConfigWithParameters({{"maxplaybackrate", "-23"}});
EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz);
config = CreateConfigWithParameters({{"maxplaybackrate", "not a number!"}});
EXPECT_EQ(default_config.max_playback_rate_hz, config.max_playback_rate_hz);
config = CreateConfigWithParameters({{"maxaveragebitrate", "0"}});
EXPECT_EQ(6000, config.bitrate_bps);
config = CreateConfigWithParameters({{"maxaveragebitrate", "-1000"}});
EXPECT_EQ(6000, config.bitrate_bps);
config = CreateConfigWithParameters({{"maxaveragebitrate", "1024000"}});
EXPECT_EQ(510000, config.bitrate_bps);
config = CreateConfigWithParameters({{"maxaveragebitrate", "not a number!"}});
EXPECT_EQ(default_config.bitrate_bps, config.bitrate_bps);
config = CreateConfigWithParameters({{"maxptime", "invalid"}});
EXPECT_EQ(default_supported_frame_lengths_ms,
config.supported_frame_lengths_ms);
config = CreateConfigWithParameters({{"minptime", "invalid"}});
EXPECT_EQ(default_supported_frame_lengths_ms,
config.supported_frame_lengths_ms);
config = CreateConfigWithParameters({{"ptime", "invalid"}});
EXPECT_EQ(default_supported_frame_lengths_ms,
config.supported_frame_lengths_ms);
}
TEST(AudioEncoderOpusTest, GetFrameLenghtRange) {
AudioEncoderOpusConfig config =
CreateConfigWithParameters({{"maxptime", "10"}, {"ptime", "10"}});
std::unique_ptr<AudioEncoder> encoder =
AudioEncoderOpus::MakeAudioEncoder(config, kDefaultOpusPayloadType);
auto ptime = webrtc::TimeDelta::Millis(10);
absl::optional<std::pair<webrtc::TimeDelta, webrtc::TimeDelta>> range = {
{ptime, ptime}};
EXPECT_EQ(encoder->GetFrameLengthRange(), range);
}
// Test that bitrate will be overridden by the "maxaveragebitrate" parameter.
// Also test that the "maxaveragebitrate" can't be set to values outside the
// range of 6000 and 510000
TEST(AudioEncoderOpusTest, SetSendCodecOpusMaxAverageBitrate) {
// Ignore if less than 6000.
const auto config1 = AudioEncoderOpus::SdpToConfig(
{"opus", 48000, 2, {{"maxaveragebitrate", "5999"}}});
EXPECT_EQ(6000, config1->bitrate_bps);
// Ignore if larger than 510000.
const auto config2 = AudioEncoderOpus::SdpToConfig(
{"opus", 48000, 2, {{"maxaveragebitrate", "510001"}}});
EXPECT_EQ(510000, config2->bitrate_bps);
const auto config3 = AudioEncoderOpus::SdpToConfig(
{"opus", 48000, 2, {{"maxaveragebitrate", "200000"}}});
EXPECT_EQ(200000, config3->bitrate_bps);
}
// Test maxplaybackrate <= 8000 triggers Opus narrow band mode.
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateNb) {
auto config = CreateConfigWithParameters({{"maxplaybackrate", "8000"}});
EXPECT_EQ(8000, config.max_playback_rate_hz);
EXPECT_EQ(12000, config.bitrate_bps);
config = CreateConfigWithParameters(
{{"maxplaybackrate", "8000"}, {"stereo", "1"}});
EXPECT_EQ(8000, config.max_playback_rate_hz);
EXPECT_EQ(24000, config.bitrate_bps);
}
// Test 8000 < maxplaybackrate <= 12000 triggers Opus medium band mode.
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateMb) {
auto config = CreateConfigWithParameters({{"maxplaybackrate", "8001"}});
EXPECT_EQ(8001, config.max_playback_rate_hz);
EXPECT_EQ(20000, config.bitrate_bps);
config = CreateConfigWithParameters(
{{"maxplaybackrate", "8001"}, {"stereo", "1"}});
EXPECT_EQ(8001, config.max_playback_rate_hz);
EXPECT_EQ(40000, config.bitrate_bps);
}
// Test 12000 < maxplaybackrate <= 16000 triggers Opus wide band mode.
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateWb) {
auto config = CreateConfigWithParameters({{"maxplaybackrate", "12001"}});
EXPECT_EQ(12001, config.max_playback_rate_hz);
EXPECT_EQ(20000, config.bitrate_bps);
config = CreateConfigWithParameters(
{{"maxplaybackrate", "12001"}, {"stereo", "1"}});
EXPECT_EQ(12001, config.max_playback_rate_hz);
EXPECT_EQ(40000, config.bitrate_bps);
}
// Test 16000 < maxplaybackrate <= 24000 triggers Opus super wide band mode.
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateSwb) {
auto config = CreateConfigWithParameters({{"maxplaybackrate", "16001"}});
EXPECT_EQ(16001, config.max_playback_rate_hz);
EXPECT_EQ(32000, config.bitrate_bps);
config = CreateConfigWithParameters(
{{"maxplaybackrate", "16001"}, {"stereo", "1"}});
EXPECT_EQ(16001, config.max_playback_rate_hz);
EXPECT_EQ(64000, config.bitrate_bps);
}
// Test 24000 < maxplaybackrate triggers Opus full band mode.
TEST(AudioEncoderOpusTest, SetMaxPlaybackRateFb) {
auto config = CreateConfigWithParameters({{"maxplaybackrate", "24001"}});
EXPECT_EQ(24001, config.max_playback_rate_hz);
EXPECT_EQ(32000, config.bitrate_bps);
config = CreateConfigWithParameters(
{{"maxplaybackrate", "24001"}, {"stereo", "1"}});
EXPECT_EQ(24001, config.max_playback_rate_hz);
EXPECT_EQ(64000, config.bitrate_bps);
}
TEST_P(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) {
// Create encoder with DTX enabled.
AudioEncoderOpusConfig config;
config.dtx_enabled = true;
config.sample_rate_hz = sample_rate_hz_;
constexpr int payload_type = 17;
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
// Open file containing speech and silence.
const std::string kInputFileName =
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
test::AudioLoop audio_loop;
// Use the file as if it were sampled at our desired input rate.
const size_t max_loop_length_samples =
sample_rate_hz_ * 10; // Max 10 second loop.
const size_t input_block_size_samples =
10 * sample_rate_hz_ / 1000; // 10 ms.
EXPECT_TRUE(audio_loop.Init(kInputFileName, max_loop_length_samples,
input_block_size_samples));
// Encode.
AudioEncoder::EncodedInfo info;
rtc::Buffer encoded(500);
int nonspeech_frames = 0;
int max_nonspeech_frames = 0;
int dtx_frames = 0;
int max_dtx_frames = 0;
uint32_t rtp_timestamp = 0u;
for (size_t i = 0; i < 500; ++i) {
encoded.Clear();
// Every second call to the encoder will generate an Opus packet.
for (int j = 0; j < 2; j++) {
info =
encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded);
rtp_timestamp += input_block_size_samples;
}
// Bookkeeping of number of DTX frames.
if (info.encoded_bytes <= 2) {
++dtx_frames;
} else {
if (dtx_frames > max_dtx_frames)
max_dtx_frames = dtx_frames;
dtx_frames = 0;
}
// Bookkeeping of number of non-speech frames.
if (info.speech == 0) {
++nonspeech_frames;
} else {
if (nonspeech_frames > max_nonspeech_frames)
max_nonspeech_frames = nonspeech_frames;
nonspeech_frames = 0;
}
}
// Maximum number of consecutive non-speech packets should exceed 15.
EXPECT_GT(max_nonspeech_frames, 15);
}
TEST(AudioEncoderOpusTest, OpusDtxFilteringHighEnergyRefreshPackets) {
test::ScopedFieldTrials override_field_trials(
"WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx/Enabled/");
const std::string kInputFileName =
webrtc::test::ResourcePath("audio_coding/testfile16kHz", "pcm");
constexpr int kSampleRateHz = 16000;
AudioEncoderOpusConfig config;
config.dtx_enabled = true;
config.sample_rate_hz = kSampleRateHz;
constexpr int payload_type = 17;
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
test::AudioLoop audio_loop;
constexpr size_t kMaxLoopLengthSaples = kSampleRateHz * 11.6f;
constexpr size_t kInputBlockSizeSamples = kSampleRateHz / 100;
EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSaples,
kInputBlockSizeSamples));
AudioEncoder::EncodedInfo info;
rtc::Buffer encoded(500);
// Encode the audio file and store the last part that corresponds to silence.
constexpr size_t kSilenceDurationSamples = kSampleRateHz * 0.2f;
std::array<int16_t, kSilenceDurationSamples> silence;
uint32_t rtp_timestamp = 0;
bool last_packet_dtx_frame = false;
bool opus_entered_dtx = false;
bool silence_filled = false;
size_t timestamp_start_silence = 0;
while (!silence_filled && rtp_timestamp < kMaxLoopLengthSaples) {
encoded.Clear();
// Every second call to the encoder will generate an Opus packet.
for (int j = 0; j < 2; j++) {
auto next_frame = audio_loop.GetNextBlock();
info = encoder->Encode(rtp_timestamp, next_frame, &encoded);
if (opus_entered_dtx) {
size_t silence_frame_start = rtp_timestamp - timestamp_start_silence;
silence_filled = silence_frame_start >= kSilenceDurationSamples;
if (!silence_filled) {
std::copy(next_frame.begin(), next_frame.end(),
silence.begin() + silence_frame_start);
}
}
rtp_timestamp += kInputBlockSizeSamples;
}
EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame);
last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2
: last_packet_dtx_frame;
if (info.encoded_bytes <= 2 && !opus_entered_dtx) {
timestamp_start_silence = rtp_timestamp;
}
opus_entered_dtx = info.encoded_bytes <= 2;
}
EXPECT_TRUE(silence_filled);
// The copied 200 ms of silence is used for creating 6 bursts that are fed to
// the encoder, the first three ones with a larger energy and the last three
// with a lower energy. This test verifies that the encoder just sends refresh
// DTX packets during the last bursts.
int number_non_empty_packets_during_increase = 0;
int number_non_empty_packets_during_decrease = 0;
for (size_t burst = 0; burst < 6; ++burst) {
uint32_t rtp_timestamp_start = rtp_timestamp;
const bool increase_noise = burst < 3;
const float gain = increase_noise ? 1.4f : 0.0f;
while (rtp_timestamp < rtp_timestamp_start + kSilenceDurationSamples) {
encoded.Clear();
// Every second call to the encoder will generate an Opus packet.
for (int j = 0; j < 2; j++) {
std::array<int16_t, kInputBlockSizeSamples> silence_frame;
size_t silence_frame_start = rtp_timestamp - rtp_timestamp_start;
std::transform(
silence.begin() + silence_frame_start,
silence.begin() + silence_frame_start + kInputBlockSizeSamples,
silence_frame.begin(), [gain](float s) { return gain * s; });
info = encoder->Encode(rtp_timestamp, silence_frame, &encoded);
rtp_timestamp += kInputBlockSizeSamples;
}
EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame);
last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2
: last_packet_dtx_frame;
// Tracking the number of non empty packets.
if (increase_noise && info.encoded_bytes > 2) {
number_non_empty_packets_during_increase++;
}
if (!increase_noise && info.encoded_bytes > 2) {
number_non_empty_packets_during_decrease++;
}
}
}
// Check that the refresh DTX packets are just sent during the decrease energy
// region.
EXPECT_EQ(number_non_empty_packets_during_increase, 0);
EXPECT_GT(number_non_empty_packets_during_decrease, 0);
}
} // namespace webrtc

View file

@ -0,0 +1,152 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio_codecs/opus/audio_decoder_opus.h"
#include "api/audio_codecs/opus/audio_encoder_opus.h"
#include "common_audio/include/audio_util.h"
#include "common_audio/window_generator.h"
#include "modules/audio_coding/codecs/opus/test/lapped_transform.h"
#include "modules/audio_coding/neteq/tools/audio_loop.h"
#include "test/field_trial.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
namespace {
constexpr size_t kNumChannels = 1u;
constexpr int kSampleRateHz = 48000;
constexpr size_t kMaxLoopLengthSamples = kSampleRateHz * 50; // 50 seconds.
constexpr size_t kInputBlockSizeSamples = 10 * kSampleRateHz / 1000; // 10 ms
constexpr size_t kOutputBlockSizeSamples = 20 * kSampleRateHz / 1000; // 20 ms
constexpr size_t kFftSize = 1024;
constexpr size_t kNarrowbandSize = 4000 * kFftSize / kSampleRateHz;
constexpr float kKbdAlpha = 1.5f;
class PowerRatioEstimator : public LappedTransform::Callback {
public:
PowerRatioEstimator() : low_pow_(0.f), high_pow_(0.f) {
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
transform_.reset(new LappedTransform(kNumChannels, 0u,
kInputBlockSizeSamples, window_,
kFftSize, kFftSize / 2, this));
}
void ProcessBlock(float* data) { transform_->ProcessChunk(&data, nullptr); }
float PowerRatio() { return high_pow_ / low_pow_; }
protected:
void ProcessAudioBlock(const std::complex<float>* const* input,
size_t num_input_channels,
size_t num_freq_bins,
size_t num_output_channels,
std::complex<float>* const* output) override {
float low_pow = 0.f;
float high_pow = 0.f;
for (size_t i = 0u; i < num_input_channels; ++i) {
for (size_t j = 0u; j < kNarrowbandSize; ++j) {
float low_mag = std::abs(input[i][j]);
low_pow += low_mag * low_mag;
float high_mag = std::abs(input[i][j + kNarrowbandSize]);
high_pow += high_mag * high_mag;
}
}
low_pow_ += low_pow / (num_input_channels * kFftSize);
high_pow_ += high_pow / (num_input_channels * kFftSize);
}
private:
std::unique_ptr<LappedTransform> transform_;
float window_[kFftSize];
float low_pow_;
float high_pow_;
};
float EncodedPowerRatio(AudioEncoder* encoder,
AudioDecoder* decoder,
test::AudioLoop* audio_loop) {
// Encode and decode.
uint32_t rtp_timestamp = 0u;
constexpr size_t kBufferSize = 500;
rtc::Buffer encoded(kBufferSize);
std::vector<int16_t> decoded(kOutputBlockSizeSamples);
std::vector<float> decoded_float(kOutputBlockSizeSamples);
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
PowerRatioEstimator power_ratio_estimator;
for (size_t i = 0; i < 1000; ++i) {
encoded.Clear();
AudioEncoder::EncodedInfo encoder_info =
encoder->Encode(rtp_timestamp, audio_loop->GetNextBlock(), &encoded);
rtp_timestamp += kInputBlockSizeSamples;
if (encoded.size() > 0) {
int decoder_info = decoder->Decode(
encoded.data(), encoded.size(), kSampleRateHz,
decoded.size() * sizeof(decoded[0]), decoded.data(), &speech_type);
if (decoder_info > 0) {
S16ToFloat(decoded.data(), decoded.size(), decoded_float.data());
power_ratio_estimator.ProcessBlock(decoded_float.data());
}
}
}
return power_ratio_estimator.PowerRatio();
}
} // namespace
// TODO(ivoc): Remove this test, WebRTC-AdjustOpusBandwidth is obsolete.
TEST(BandwidthAdaptationTest, BandwidthAdaptationTest) {
test::ScopedFieldTrials override_field_trials(
"WebRTC-AdjustOpusBandwidth/Enabled/");
constexpr float kMaxNarrowbandRatio = 0.0035f;
constexpr float kMinWidebandRatio = 0.01f;
// Create encoder.
AudioEncoderOpusConfig enc_config;
enc_config.bitrate_bps = absl::optional<int>(7999);
enc_config.num_channels = kNumChannels;
constexpr int payload_type = 17;
auto encoder = AudioEncoderOpus::MakeAudioEncoder(enc_config, payload_type);
// Create decoder.
AudioDecoderOpus::Config dec_config;
dec_config.num_channels = kNumChannels;
auto decoder = AudioDecoderOpus::MakeAudioDecoder(dec_config);
// Open speech file.
const std::string kInputFileName =
webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm");
test::AudioLoop audio_loop;
EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
ASSERT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
kInputBlockSizeSamples));
EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMaxNarrowbandRatio);
encoder->OnReceivedTargetAudioBitrate(9000);
EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMaxNarrowbandRatio);
encoder->OnReceivedTargetAudioBitrate(9001);
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMinWidebandRatio);
encoder->OnReceivedTargetAudioBitrate(8000);
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMinWidebandRatio);
encoder->OnReceivedTargetAudioBitrate(12001);
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMinWidebandRatio);
}
} // namespace webrtc

View file

@ -0,0 +1,105 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio_codecs/opus/audio_encoder_opus.h"
#include "api/test/metrics/global_metrics_logger_and_exporter.h"
#include "api/test/metrics/metric.h"
#include "modules/audio_coding/neteq/tools/audio_loop.h"
#include "rtc_base/time_utils.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
namespace {
using ::webrtc::test::GetGlobalMetricsLogger;
using ::webrtc::test::ImprovementDirection;
using ::webrtc::test::Unit;
int64_t RunComplexityTest(const AudioEncoderOpusConfig& config) {
// Create encoder.
constexpr int payload_type = 17;
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
// Open speech file.
const std::string kInputFileName =
webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm");
test::AudioLoop audio_loop;
constexpr int kSampleRateHz = 48000;
EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
constexpr size_t kMaxLoopLengthSamples =
kSampleRateHz * 10; // 10 second loop.
constexpr size_t kInputBlockSizeSamples =
10 * kSampleRateHz / 1000; // 60 ms.
EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
kInputBlockSizeSamples));
// Encode.
const int64_t start_time_ms = rtc::TimeMillis();
AudioEncoder::EncodedInfo info;
rtc::Buffer encoded(500);
uint32_t rtp_timestamp = 0u;
for (size_t i = 0; i < 10000; ++i) {
encoded.Clear();
info = encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded);
rtp_timestamp += kInputBlockSizeSamples;
}
return rtc::TimeMillis() - start_time_ms;
}
// This test encodes an audio file using Opus twice with different bitrates
// (~11 kbps and 15.5 kbps). The runtime for each is measured, and the ratio
// between the two is calculated and tracked. This test explicitly sets the
// low_rate_complexity to 9. When running on desktop platforms, this is the same
// as the regular complexity, and the expectation is that the resulting ratio
// should be less than 100% (since the encoder runs faster at lower bitrates,
// given a fixed complexity setting). On the other hand, when running on
// mobiles, the regular complexity is 5, and we expect the resulting ratio to
// be higher, since we have explicitly asked for a higher complexity setting at
// the lower rate.
TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_On) {
// Create config.
AudioEncoderOpusConfig config;
// The limit -- including the hysteresis window -- at which the complexity
// shuold be increased.
config.bitrate_bps = 11000 - 1;
config.low_rate_complexity = 9;
int64_t runtime_10999bps = RunComplexityTest(config);
config.bitrate_bps = 15500;
int64_t runtime_15500bps = RunComplexityTest(config);
GetGlobalMetricsLogger()->LogSingleValueMetric(
"opus_encoding_complexity_ratio", "adaptation_on",
100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent,
ImprovementDirection::kNeitherIsBetter);
}
// This test is identical to the one above, but without the complexity
// adaptation enabled (neither on desktop, nor on mobile). The expectation is
// that the resulting ratio is less than 100% at all times.
TEST(AudioEncoderOpusComplexityAdaptationTest, Adaptation_Off) {
// Create config.
AudioEncoderOpusConfig config;
// The limit -- including the hysteresis window -- at which the complexity
// shuold be increased (but not in this test since complexity adaptation is
// disabled).
config.bitrate_bps = 11000 - 1;
int64_t runtime_10999bps = RunComplexityTest(config);
config.bitrate_bps = 15500;
int64_t runtime_15500bps = RunComplexityTest(config);
GetGlobalMetricsLogger()->LogSingleValueMetric(
"opus_encoding_complexity_ratio", "adaptation_off",
100.0 * runtime_10999bps / runtime_15500bps, Unit::kPercent,
ImprovementDirection::kNeitherIsBetter);
}
} // namespace
} // namespace webrtc

View file

@ -0,0 +1,248 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory>
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
using std::get;
using std::string;
using std::tuple;
using ::testing::TestWithParam;
namespace webrtc {
// Define coding parameter as <channels, bit_rate, filename, extension>.
typedef tuple<size_t, int, string, string> coding_param;
typedef struct mode mode;
struct mode {
bool fec;
uint8_t target_packet_loss_rate;
};
const int kOpusBlockDurationMs = 20;
const int kOpusSamplingKhz = 48;
class OpusFecTest : public TestWithParam<coding_param> {
protected:
OpusFecTest();
void SetUp() override;
void TearDown() override;
virtual void EncodeABlock();
virtual void DecodeABlock(bool lost_previous, bool lost_current);
int block_duration_ms_;
int sampling_khz_;
size_t block_length_sample_;
size_t channels_;
int bit_rate_;
size_t data_pointer_;
size_t loop_length_samples_;
size_t max_bytes_;
size_t encoded_bytes_;
WebRtcOpusEncInst* opus_encoder_;
WebRtcOpusDecInst* opus_decoder_;
string in_filename_;
std::unique_ptr<int16_t[]> in_data_;
std::unique_ptr<int16_t[]> out_data_;
std::unique_ptr<uint8_t[]> bit_stream_;
};
void OpusFecTest::SetUp() {
channels_ = get<0>(GetParam());
bit_rate_ = get<1>(GetParam());
printf("Coding %zu channel signal at %d bps.\n", channels_, bit_rate_);
in_filename_ = test::ResourcePath(get<2>(GetParam()), get<3>(GetParam()));
FILE* fp = fopen(in_filename_.c_str(), "rb");
ASSERT_FALSE(fp == NULL);
// Obtain file size.
fseek(fp, 0, SEEK_END);
loop_length_samples_ = ftell(fp) / sizeof(int16_t);
rewind(fp);
// Allocate memory to contain the whole file.
in_data_.reset(
new int16_t[loop_length_samples_ + block_length_sample_ * channels_]);
// Copy the file into the buffer.
ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp),
loop_length_samples_);
fclose(fp);
// The audio will be used in a looped manner. To ease the acquisition of an
// audio frame that crosses the end of the excerpt, we add an extra block
// length of samples to the end of the array, starting over again from the
// beginning of the array. Audio frames cross the end of the excerpt always
// appear as a continuum of memory.
memcpy(&in_data_[loop_length_samples_], &in_data_[0],
block_length_sample_ * channels_ * sizeof(int16_t));
// Maximum number of bytes in output bitstream.
max_bytes_ = block_length_sample_ * channels_ * sizeof(int16_t);
out_data_.reset(new int16_t[2 * block_length_sample_ * channels_]);
bit_stream_.reset(new uint8_t[max_bytes_]);
// If channels_ == 1, use Opus VOIP mode, otherwise, audio mode.
int app = channels_ == 1 ? 0 : 1;
// Create encoder memory.
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000));
EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000));
// Set bitrate.
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_));
}
void OpusFecTest::TearDown() {
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
OpusFecTest::OpusFecTest()
: block_duration_ms_(kOpusBlockDurationMs),
sampling_khz_(kOpusSamplingKhz),
block_length_sample_(
static_cast<size_t>(block_duration_ms_ * sampling_khz_)),
data_pointer_(0),
max_bytes_(0),
encoded_bytes_(0),
opus_encoder_(NULL),
opus_decoder_(NULL) {}
void OpusFecTest::EncodeABlock() {
int value =
WebRtcOpus_Encode(opus_encoder_, &in_data_[data_pointer_],
block_length_sample_, max_bytes_, &bit_stream_[0]);
EXPECT_GT(value, 0);
encoded_bytes_ = static_cast<size_t>(value);
}
void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) {
int16_t audio_type;
int value_1 = 0, value_2 = 0;
if (lost_previous) {
// Decode previous frame.
if (!lost_current &&
WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_) == 1) {
value_1 =
WebRtcOpus_DecodeFec(opus_decoder_, &bit_stream_[0], encoded_bytes_,
&out_data_[0], &audio_type);
} else {
// Call decoder PLC.
while (value_1 < static_cast<int>(block_length_sample_)) {
int ret = WebRtcOpus_Decode(opus_decoder_, NULL, 0, &out_data_[value_1],
&audio_type);
EXPECT_EQ(ret, sampling_khz_ * 10); // Should return 10 ms of samples.
value_1 += ret;
}
}
EXPECT_EQ(static_cast<int>(block_length_sample_), value_1);
}
if (!lost_current) {
// Decode current frame.
value_2 = WebRtcOpus_Decode(opus_decoder_, &bit_stream_[0], encoded_bytes_,
&out_data_[value_1 * channels_], &audio_type);
EXPECT_EQ(static_cast<int>(block_length_sample_), value_2);
}
}
TEST_P(OpusFecTest, RandomPacketLossTest) {
const int kDurationMs = 200000;
int time_now_ms, fec_frames;
int actual_packet_loss_rate;
bool lost_current, lost_previous;
mode mode_set[3] = {{true, 0}, {false, 0}, {true, 50}};
lost_current = false;
for (int i = 0; i < 3; i++) {
if (mode_set[i].fec) {
EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(
opus_encoder_, mode_set[i].target_packet_loss_rate));
printf("FEC is ON, target at packet loss rate %d percent.\n",
mode_set[i].target_packet_loss_rate);
} else {
EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_));
printf("FEC is OFF.\n");
}
// In this test, we let the target packet loss rate match the actual rate.
actual_packet_loss_rate = mode_set[i].target_packet_loss_rate;
// Run every mode a certain time.
time_now_ms = 0;
fec_frames = 0;
while (time_now_ms < kDurationMs) {
// Encode & decode.
EncodeABlock();
// Check if payload has FEC.
int fec = WebRtcOpus_PacketHasFec(&bit_stream_[0], encoded_bytes_);
// If FEC is disabled or the target packet loss rate is set to 0, there
// should be no FEC in the bit stream.
if (!mode_set[i].fec || mode_set[i].target_packet_loss_rate == 0) {
EXPECT_EQ(fec, 0);
} else if (fec == 1) {
fec_frames++;
}
lost_previous = lost_current;
lost_current = rand() < actual_packet_loss_rate * (RAND_MAX / 100);
DecodeABlock(lost_previous, lost_current);
time_now_ms += block_duration_ms_;
// `data_pointer_` is incremented and wrapped across
// `loop_length_samples_`.
data_pointer_ = (data_pointer_ + block_length_sample_ * channels_) %
loop_length_samples_;
}
if (mode_set[i].fec) {
printf("%.2f percent frames has FEC.\n",
static_cast<float>(fec_frames) * block_duration_ms_ / 2000);
}
}
}
const coding_param param_set[] = {
std::make_tuple(1,
64000,
string("audio_coding/testfile32kHz"),
string("pcm")),
std::make_tuple(1,
32000,
string("audio_coding/testfile32kHz"),
string("pcm")),
std::make_tuple(2,
64000,
string("audio_coding/teststereo32kHz"),
string("pcm"))};
// 64 kbps, stereo
INSTANTIATE_TEST_SUITE_P(AllTest, OpusFecTest, ::testing::ValuesIn(param_set));
} // namespace webrtc

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_
#include <stddef.h>
#include "rtc_base/ignore_wundef.h"
RTC_PUSH_IGNORING_WUNDEF()
#include "opus.h"
#include "opus_multistream.h"
RTC_POP_IGNORING_WUNDEF()
struct WebRtcOpusEncInst {
OpusEncoder* encoder;
OpusMSEncoder* multistream_encoder;
size_t channels;
int in_dtx_mode;
bool avoid_noise_pumping_during_dtx;
int sample_rate_hz;
float smooth_energy_non_active_frames;
};
struct WebRtcOpusDecInst {
OpusDecoder* decoder;
OpusMSDecoder* multistream_decoder;
int prev_decoded_samples;
bool plc_use_prev_decoded_samples;
size_t channels;
int in_dtx_mode;
int sample_rate_hz;
};
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_

View file

@ -0,0 +1,880 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include <cstdlib>
#include <numeric>
#include "api/array_view.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/field_trial.h"
enum {
#if WEBRTC_OPUS_SUPPORT_120MS_PTIME
/* Maximum supported frame size in WebRTC is 120 ms. */
kWebRtcOpusMaxEncodeFrameSizeMs = 120,
#else
/* Maximum supported frame size in WebRTC is 60 ms. */
kWebRtcOpusMaxEncodeFrameSizeMs = 60,
#endif
/* The format allows up to 120 ms frames. Since we don't control the other
* side, we must allow for packets of that size. NetEq is currently limited
* to 60 ms on the receive side. */
kWebRtcOpusMaxDecodeFrameSizeMs = 120,
// Duration of audio that each call to packet loss concealment covers.
kWebRtcOpusPlcFrameSizeMs = 10,
};
constexpr char kPlcUsePrevDecodedSamplesFieldTrial[] =
"WebRTC-Audio-OpusPlcUsePrevDecodedSamples";
constexpr char kAvoidNoisePumpingDuringDtxFieldTrial[] =
"WebRTC-Audio-OpusAvoidNoisePumpingDuringDtx";
constexpr char kSetSignalVoiceWithDtxFieldTrial[] =
"WebRTC-Audio-OpusSetSignalVoiceWithDtx";
static int FrameSizePerChannel(int frame_size_ms, int sample_rate_hz) {
RTC_DCHECK_GT(frame_size_ms, 0);
RTC_DCHECK_EQ(frame_size_ms % 10, 0);
RTC_DCHECK_GT(sample_rate_hz, 0);
RTC_DCHECK_EQ(sample_rate_hz % 1000, 0);
return frame_size_ms * (sample_rate_hz / 1000);
}
// Maximum sample count per channel.
static int MaxFrameSizePerChannel(int sample_rate_hz) {
return FrameSizePerChannel(kWebRtcOpusMaxDecodeFrameSizeMs, sample_rate_hz);
}
// Default sample count per channel.
static int DefaultFrameSizePerChannel(int sample_rate_hz) {
return FrameSizePerChannel(20, sample_rate_hz);
}
// Returns true if the `encoded` payload corresponds to a refresh DTX packet
// whose energy is larger than the expected for non activity packets.
static bool WebRtcOpus_IsHighEnergyRefreshDtxPacket(
OpusEncInst* inst,
rtc::ArrayView<const int16_t> frame,
rtc::ArrayView<const uint8_t> encoded) {
if (encoded.size() <= 2) {
return false;
}
int number_frames =
frame.size() / DefaultFrameSizePerChannel(inst->sample_rate_hz);
if (number_frames > 0 &&
WebRtcOpus_PacketHasVoiceActivity(encoded.data(), encoded.size()) == 0) {
const float average_frame_energy =
std::accumulate(frame.begin(), frame.end(), 0.0f,
[](float a, int32_t b) { return a + b * b; }) /
number_frames;
if (WebRtcOpus_GetInDtx(inst) == 1 &&
average_frame_energy >= inst->smooth_energy_non_active_frames * 0.5f) {
// This is a refresh DTX packet as the encoder is in DTX and has
// produced a payload > 2 bytes. This refresh packet has a higher energy
// than the smooth energy of non activity frames (with a 3 dB negative
// margin) and, therefore, it is flagged as a high energy refresh DTX
// packet.
return true;
}
// The average energy is tracked in a similar way as the modeling of the
// comfort noise in the Silk decoder in Opus
// (third_party/opus/src/silk/CNG.c).
if (average_frame_energy < inst->smooth_energy_non_active_frames * 0.5f) {
inst->smooth_energy_non_active_frames = average_frame_energy;
} else {
inst->smooth_energy_non_active_frames +=
(average_frame_energy - inst->smooth_energy_non_active_frames) *
0.25f;
}
}
return false;
}
int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
size_t channels,
int32_t application,
int sample_rate_hz) {
int opus_app;
if (!inst)
return -1;
switch (application) {
case 0:
opus_app = OPUS_APPLICATION_VOIP;
break;
case 1:
opus_app = OPUS_APPLICATION_AUDIO;
break;
default:
return -1;
}
OpusEncInst* state =
reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst)));
RTC_DCHECK(state);
int error;
state->encoder = opus_encoder_create(
sample_rate_hz, static_cast<int>(channels), opus_app, &error);
if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) {
WebRtcOpus_EncoderFree(state);
return -1;
}
state->in_dtx_mode = 0;
state->channels = channels;
state->sample_rate_hz = sample_rate_hz;
state->smooth_energy_non_active_frames = 0.0f;
state->avoid_noise_pumping_during_dtx =
webrtc::field_trial::IsEnabled(kAvoidNoisePumpingDuringDtxFieldTrial);
*inst = state;
return 0;
}
int16_t WebRtcOpus_MultistreamEncoderCreate(
OpusEncInst** inst,
size_t channels,
int32_t application,
size_t streams,
size_t coupled_streams,
const unsigned char* channel_mapping) {
int opus_app;
if (!inst)
return -1;
switch (application) {
case 0:
opus_app = OPUS_APPLICATION_VOIP;
break;
case 1:
opus_app = OPUS_APPLICATION_AUDIO;
break;
default:
return -1;
}
OpusEncInst* state =
reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst)));
RTC_DCHECK(state);
int error;
const int sample_rate_hz = 48000;
state->multistream_encoder = opus_multistream_encoder_create(
sample_rate_hz, channels, streams, coupled_streams, channel_mapping,
opus_app, &error);
if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) {
WebRtcOpus_EncoderFree(state);
return -1;
}
state->in_dtx_mode = 0;
state->channels = channels;
state->sample_rate_hz = sample_rate_hz;
state->smooth_energy_non_active_frames = 0.0f;
state->avoid_noise_pumping_during_dtx = false;
*inst = state;
return 0;
}
int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
if (inst) {
if (inst->encoder) {
opus_encoder_destroy(inst->encoder);
} else {
opus_multistream_encoder_destroy(inst->multistream_encoder);
}
free(inst);
return 0;
} else {
return -1;
}
}
int WebRtcOpus_Encode(OpusEncInst* inst,
const int16_t* audio_in,
size_t samples,
size_t length_encoded_buffer,
uint8_t* encoded) {
int res;
if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
return -1;
}
if (inst->encoder) {
res = opus_encode(inst->encoder, (const opus_int16*)audio_in,
static_cast<int>(samples), encoded,
static_cast<opus_int32>(length_encoded_buffer));
} else {
res = opus_multistream_encode(
inst->multistream_encoder, (const opus_int16*)audio_in,
static_cast<int>(samples), encoded,
static_cast<opus_int32>(length_encoded_buffer));
}
if (res <= 0) {
return -1;
}
if (res <= 2) {
// Indicates DTX since the packet has nothing but a header. In principle,
// there is no need to send this packet. However, we do transmit the first
// occurrence to let the decoder know that the encoder enters DTX mode.
if (inst->in_dtx_mode) {
return 0;
} else {
inst->in_dtx_mode = 1;
return res;
}
}
if (inst->avoid_noise_pumping_during_dtx && WebRtcOpus_GetUseDtx(inst) == 1 &&
WebRtcOpus_IsHighEnergyRefreshDtxPacket(
inst, rtc::MakeArrayView(audio_in, samples),
rtc::MakeArrayView(encoded, res))) {
// This packet is a high energy refresh DTX packet. For avoiding an increase
// of the energy in the DTX region at the decoder, this packet is
// substituted by a TOC byte with one empty frame.
// The number of frames described in the TOC byte
// (https://tools.ietf.org/html/rfc6716#section-3.1) are overwritten to
// always indicate one frame (last two bits equal to 0).
encoded[0] = encoded[0] & 0b11111100;
inst->in_dtx_mode = 1;
// The payload is just the TOC byte and has 1 byte as length.
return 1;
}
inst->in_dtx_mode = 0;
return res;
}
#define ENCODER_CTL(inst, vargs) \
(inst->encoder \
? opus_encoder_ctl(inst->encoder, vargs) \
: opus_multistream_encoder_ctl(inst->multistream_encoder, vargs))
int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_BITRATE(rate));
} else {
return -1;
}
}
int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_PACKET_LOSS_PERC(loss_rate));
} else {
return -1;
}
}
int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
opus_int32 set_bandwidth;
if (!inst)
return -1;
if (frequency_hz <= 8000) {
set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
} else if (frequency_hz <= 12000) {
set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
} else if (frequency_hz <= 16000) {
set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
} else if (frequency_hz <= 24000) {
set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
} else {
set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
}
return ENCODER_CTL(inst, OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
}
int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst,
int32_t* result_hz) {
if (inst->encoder) {
if (opus_encoder_ctl(inst->encoder, OPUS_GET_MAX_BANDWIDTH(result_hz)) ==
OPUS_OK) {
return 0;
}
return -1;
}
opus_int32 max_bandwidth;
int s;
int ret;
max_bandwidth = 0;
ret = OPUS_OK;
s = 0;
while (ret == OPUS_OK) {
OpusEncoder* enc;
opus_int32 bandwidth;
ret = ENCODER_CTL(inst, OPUS_MULTISTREAM_GET_ENCODER_STATE(s, &enc));
if (ret == OPUS_BAD_ARG)
break;
if (ret != OPUS_OK)
return -1;
if (opus_encoder_ctl(enc, OPUS_GET_MAX_BANDWIDTH(&bandwidth)) != OPUS_OK)
return -1;
if (max_bandwidth != 0 && max_bandwidth != bandwidth)
return -1;
max_bandwidth = bandwidth;
s++;
}
*result_hz = max_bandwidth;
return 0;
}
int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(1));
} else {
return -1;
}
}
int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(0));
} else {
return -1;
}
}
int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
if (inst) {
if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) {
int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
if (ret != OPUS_OK) {
return ret;
}
}
return ENCODER_CTL(inst, OPUS_SET_DTX(1));
} else {
return -1;
}
}
int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
if (inst) {
if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) {
int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_AUTO));
if (ret != OPUS_OK) {
return ret;
}
}
return ENCODER_CTL(inst, OPUS_SET_DTX(0));
} else {
return -1;
}
}
int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst) {
if (inst) {
opus_int32 use_dtx;
if (ENCODER_CTL(inst, OPUS_GET_DTX(&use_dtx)) == 0) {
return use_dtx;
}
}
return -1;
}
int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_VBR(0));
} else {
return -1;
}
}
int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_VBR(1));
} else {
return -1;
}
}
int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_COMPLEXITY(complexity));
} else {
return -1;
}
}
int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) {
if (!inst) {
return -1;
}
int32_t bandwidth;
if (ENCODER_CTL(inst, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) {
return bandwidth;
} else {
return -1;
}
}
int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) {
if (inst) {
return ENCODER_CTL(inst, OPUS_SET_BANDWIDTH(bandwidth));
} else {
return -1;
}
}
int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) {
if (!inst)
return -1;
if (num_channels == 0) {
return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(OPUS_AUTO));
} else if (num_channels == 1 || num_channels == 2) {
return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(num_channels));
} else {
return -1;
}
}
int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst) {
if (!inst) {
return -1;
}
#ifdef OPUS_GET_IN_DTX
int32_t in_dtx;
if (ENCODER_CTL(inst, OPUS_GET_IN_DTX(&in_dtx)) == 0) {
return in_dtx;
}
#endif
return -1;
}
int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst,
size_t channels,
int sample_rate_hz) {
int error;
OpusDecInst* state;
if (inst != NULL) {
// Create Opus decoder state.
state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst)));
if (state == NULL) {
return -1;
}
state->decoder =
opus_decoder_create(sample_rate_hz, static_cast<int>(channels), &error);
if (error == OPUS_OK && state->decoder) {
// Creation of memory all ok.
state->channels = channels;
state->sample_rate_hz = sample_rate_hz;
state->plc_use_prev_decoded_samples =
webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial);
if (state->plc_use_prev_decoded_samples) {
state->prev_decoded_samples =
DefaultFrameSizePerChannel(state->sample_rate_hz);
}
state->in_dtx_mode = 0;
*inst = state;
return 0;
}
// If memory allocation was unsuccessful, free the entire state.
if (state->decoder) {
opus_decoder_destroy(state->decoder);
}
free(state);
}
return -1;
}
int16_t WebRtcOpus_MultistreamDecoderCreate(
OpusDecInst** inst,
size_t channels,
size_t streams,
size_t coupled_streams,
const unsigned char* channel_mapping) {
int error;
OpusDecInst* state;
if (inst != NULL) {
// Create Opus decoder state.
state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst)));
if (state == NULL) {
return -1;
}
// Create new memory, always at 48000 Hz.
state->multistream_decoder = opus_multistream_decoder_create(
48000, channels, streams, coupled_streams, channel_mapping, &error);
if (error == OPUS_OK && state->multistream_decoder) {
// Creation of memory all ok.
state->channels = channels;
state->sample_rate_hz = 48000;
state->plc_use_prev_decoded_samples =
webrtc::field_trial::IsEnabled(kPlcUsePrevDecodedSamplesFieldTrial);
if (state->plc_use_prev_decoded_samples) {
state->prev_decoded_samples =
DefaultFrameSizePerChannel(state->sample_rate_hz);
}
state->in_dtx_mode = 0;
*inst = state;
return 0;
}
// If memory allocation was unsuccessful, free the entire state.
opus_multistream_decoder_destroy(state->multistream_decoder);
free(state);
}
return -1;
}
int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
if (inst) {
if (inst->decoder) {
opus_decoder_destroy(inst->decoder);
} else if (inst->multistream_decoder) {
opus_multistream_decoder_destroy(inst->multistream_decoder);
}
free(inst);
return 0;
} else {
return -1;
}
}
size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
return inst->channels;
}
void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
if (inst->decoder) {
opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
} else {
opus_multistream_decoder_ctl(inst->multistream_decoder, OPUS_RESET_STATE);
}
inst->in_dtx_mode = 0;
}
/* For decoder to determine if it is to output speech or comfort noise. */
static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
// Audio type becomes comfort noise if `encoded_byte` is 1 and keeps
// to be so if the following `encoded_byte` are 0 or 1.
if (encoded_bytes == 0 && inst->in_dtx_mode) {
return 2; // Comfort noise.
} else if (encoded_bytes == 1 || encoded_bytes == 2) {
// TODO(henrik.lundin): There is a slight risk that a 2-byte payload is in
// fact a 1-byte TOC with a 1-byte payload. That will be erroneously
// interpreted as comfort noise output, but such a payload is probably
// faulty anyway.
// TODO(webrtc:10218): This is wrong for multistream opus. Then are several
// single-stream packets glued together with some packet size bytes in
// between. See https://tools.ietf.org/html/rfc6716#appendix-B
inst->in_dtx_mode = 1;
return 2; // Comfort noise.
} else {
inst->in_dtx_mode = 0;
return 0; // Speech.
}
}
/* `frame_size` is set to maximum Opus frame size in the normal case, and
* is set to the number of samples needed for PLC in case of losses.
* It is up to the caller to make sure the value is correct. */
static int DecodeNative(OpusDecInst* inst,
const uint8_t* encoded,
size_t encoded_bytes,
int frame_size,
int16_t* decoded,
int16_t* audio_type,
int decode_fec) {
int res = -1;
if (inst->decoder) {
res = opus_decode(
inst->decoder, encoded, static_cast<opus_int32>(encoded_bytes),
reinterpret_cast<opus_int16*>(decoded), frame_size, decode_fec);
} else {
res = opus_multistream_decode(inst->multistream_decoder, encoded,
static_cast<opus_int32>(encoded_bytes),
reinterpret_cast<opus_int16*>(decoded),
frame_size, decode_fec);
}
if (res <= 0)
return -1;
*audio_type = DetermineAudioType(inst, encoded_bytes);
return res;
}
static int DecodePlc(OpusDecInst* inst, int16_t* decoded) {
int16_t audio_type = 0;
int decoded_samples;
int plc_samples =
FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz);
if (inst->plc_use_prev_decoded_samples) {
/* The number of samples we ask for is `number_of_lost_frames` times
* `prev_decoded_samples_`. Limit the number of samples to maximum
* `MaxFrameSizePerChannel()`. */
plc_samples = inst->prev_decoded_samples;
const int max_samples_per_channel =
MaxFrameSizePerChannel(inst->sample_rate_hz);
plc_samples = plc_samples <= max_samples_per_channel
? plc_samples
: max_samples_per_channel;
}
decoded_samples =
DecodeNative(inst, NULL, 0, plc_samples, decoded, &audio_type, 0);
if (decoded_samples < 0) {
return -1;
}
return decoded_samples;
}
int WebRtcOpus_Decode(OpusDecInst* inst,
const uint8_t* encoded,
size_t encoded_bytes,
int16_t* decoded,
int16_t* audio_type) {
int decoded_samples;
if (encoded_bytes == 0) {
*audio_type = DetermineAudioType(inst, encoded_bytes);
decoded_samples = DecodePlc(inst, decoded);
} else {
decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
MaxFrameSizePerChannel(inst->sample_rate_hz),
decoded, audio_type, 0);
}
if (decoded_samples < 0) {
return -1;
}
if (inst->plc_use_prev_decoded_samples) {
/* Update decoded sample memory, to be used by the PLC in case of losses. */
inst->prev_decoded_samples = decoded_samples;
}
return decoded_samples;
}
int WebRtcOpus_DecodeFec(OpusDecInst* inst,
const uint8_t* encoded,
size_t encoded_bytes,
int16_t* decoded,
int16_t* audio_type) {
int decoded_samples;
int fec_samples;
if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
return 0;
}
fec_samples =
opus_packet_get_samples_per_frame(encoded, inst->sample_rate_hz);
decoded_samples = DecodeNative(inst, encoded, encoded_bytes, fec_samples,
decoded, audio_type, 1);
if (decoded_samples < 0) {
return -1;
}
return decoded_samples;
}
int WebRtcOpus_DurationEst(OpusDecInst* inst,
const uint8_t* payload,
size_t payload_length_bytes) {
if (payload_length_bytes == 0) {
// WebRtcOpus_Decode calls PLC when payload length is zero. So we return
// PLC duration correspondingly.
return WebRtcOpus_PlcDuration(inst);
}
int frames, samples;
frames = opus_packet_get_nb_frames(
payload, static_cast<opus_int32>(payload_length_bytes));
if (frames < 0) {
/* Invalid payload data. */
return 0;
}
samples =
frames * opus_packet_get_samples_per_frame(payload, inst->sample_rate_hz);
if (samples > 120 * inst->sample_rate_hz / 1000) {
// More than 120 ms' worth of samples.
return 0;
}
return samples;
}
int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
if (inst->plc_use_prev_decoded_samples) {
/* The number of samples we ask for is `number_of_lost_frames` times
* `prev_decoded_samples_`. Limit the number of samples to maximum
* `MaxFrameSizePerChannel()`. */
const int plc_samples = inst->prev_decoded_samples;
const int max_samples_per_channel =
MaxFrameSizePerChannel(inst->sample_rate_hz);
return plc_samples <= max_samples_per_channel ? plc_samples
: max_samples_per_channel;
}
return FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz);
}
int WebRtcOpus_FecDurationEst(const uint8_t* payload,
size_t payload_length_bytes,
int sample_rate_hz) {
if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
return 0;
}
const int samples =
opus_packet_get_samples_per_frame(payload, sample_rate_hz);
const int samples_per_ms = sample_rate_hz / 1000;
if (samples < 10 * samples_per_ms || samples > 120 * samples_per_ms) {
/* Invalid payload duration. */
return 0;
}
return samples;
}
int WebRtcOpus_NumSilkFrames(const uint8_t* payload) {
// For computing the payload length in ms, the sample rate is not important
// since it cancels out. We use 48 kHz, but any valid sample rate would work.
int payload_length_ms =
opus_packet_get_samples_per_frame(payload, 48000) / 48;
if (payload_length_ms < 10)
payload_length_ms = 10;
int silk_frames;
switch (payload_length_ms) {
case 10:
case 20:
silk_frames = 1;
break;
case 40:
silk_frames = 2;
break;
case 60:
silk_frames = 3;
break;
default:
return 0; // It is actually even an invalid packet.
}
return silk_frames;
}
// This method is based on Definition of the Opus Audio Codec
// (https://tools.ietf.org/html/rfc6716). Basically, this method is based on
// parsing the LP layer of an Opus packet, particularly the LBRR flag.
int WebRtcOpus_PacketHasFec(const uint8_t* payload,
size_t payload_length_bytes) {
if (payload == NULL || payload_length_bytes == 0)
return 0;
// In CELT_ONLY mode, packets should not have FEC.
if (payload[0] & 0x80)
return 0;
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
if (silk_frames == 0)
return 0; // Not valid.
const int channels = opus_packet_get_nb_channels(payload);
RTC_DCHECK(channels == 1 || channels == 2);
// Max number of frames in an Opus packet is 48.
opus_int16 frame_sizes[48];
const unsigned char* frame_data[48];
// Parse packet to get the frames. But we only care about the first frame,
// since we can only decode the FEC from the first one.
if (opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes),
NULL, frame_data, frame_sizes, NULL) < 0) {
return 0;
}
if (frame_sizes[0] < 1) {
return 0;
}
// A frame starts with the LP layer. The LP layer begins with two to eight
// header bits.These consist of one VAD bit per SILK frame (up to 3),
// followed by a single flag indicating the presence of LBRR frames.
// For a stereo packet, these first flags correspond to the mid channel, and
// a second set of flags is included for the side channel. Because these are
// the first symbols decoded by the range coder and because they are coded
// as binary values with uniform probability, they can be extracted directly
// from the most significant bits of the first byte of compressed data.
for (int n = 0; n < channels; n++) {
// The LBRR bit for channel 1 is on the (`silk_frames` + 1)-th bit, and
// that of channel 2 is on the |(`silk_frames` + 1) * 2 + 1|-th bit.
if (frame_data[0][0] & (0x80 >> ((n + 1) * (silk_frames + 1) - 1)))
return 1;
}
return 0;
}
int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload,
size_t payload_length_bytes) {
if (payload == NULL || payload_length_bytes == 0)
return 0;
// In CELT_ONLY mode we can not determine whether there is VAD.
if (payload[0] & 0x80)
return -1;
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
if (silk_frames == 0)
return -1;
const int channels = opus_packet_get_nb_channels(payload);
RTC_DCHECK(channels == 1 || channels == 2);
// Max number of frames in an Opus packet is 48.
opus_int16 frame_sizes[48];
const unsigned char* frame_data[48];
// Parse packet to get the frames.
int frames =
opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes),
NULL, frame_data, frame_sizes, NULL);
if (frames < 0)
return -1;
// Iterate over all Opus frames which may contain multiple SILK frames.
for (int frame = 0; frame < frames; frame++) {
if (frame_sizes[frame] < 1) {
continue;
}
if (frame_data[frame][0] >> (8 - silk_frames))
return 1;
if (channels == 2 &&
(frame_data[frame][0] << (silk_frames + 1)) >> (8 - silk_frames))
return 1;
}
return 0;
}

View file

@ -0,0 +1,547 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_
#include <stddef.h>
#include <stdint.h>
#include "modules/audio_coding/codecs/opus/opus_inst.h"
#ifdef __cplusplus
extern "C" {
#endif
// Opaque wrapper types for the codec state.
typedef struct WebRtcOpusEncInst OpusEncInst;
typedef struct WebRtcOpusDecInst OpusDecInst;
/****************************************************************************
* WebRtcOpus_EncoderCreate(...)
*
* This function creates an Opus encoder that encodes mono or stereo.
*
* Input:
* - channels : number of channels; 1 or 2.
* - application : 0 - VOIP applications.
* Favor speech intelligibility.
* 1 - Audio applications.
* Favor faithfulness to the original input.
* - sample_rate_hz : sample rate of input audio
*
* Output:
* - inst : a pointer to Encoder context that is created
* if success.
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
size_t channels,
int32_t application,
int sample_rate_hz);
/****************************************************************************
* WebRtcOpus_MultistreamEncoderCreate(...)
*
* This function creates an Opus encoder with any supported channel count.
*
* Input:
* - channels : number of channels in the input of the encoder.
* - application : 0 - VOIP applications.
* Favor speech intelligibility.
* 1 - Audio applications.
* Favor faithfulness to the original input.
* - streams : number of streams, as described in RFC 7845.
* - coupled_streams : number of coupled streams, as described in
* RFC 7845.
* - channel_mapping : the channel mapping; pointer to array of
* `channel` bytes, as described in RFC 7845.
*
* Output:
* - inst : a pointer to Encoder context that is created
* if success.
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_MultistreamEncoderCreate(
OpusEncInst** inst,
size_t channels,
int32_t application,
size_t streams,
size_t coupled_streams,
const unsigned char* channel_mapping);
int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst);
/****************************************************************************
* WebRtcOpus_Encode(...)
*
* This function encodes audio as a series of Opus frames and inserts
* it into a packet. Input buffer can be any length.
*
* Input:
* - inst : Encoder context
* - audio_in : Input speech data buffer
* - samples : Samples per channel in audio_in
* - length_encoded_buffer : Output buffer size
*
* Output:
* - encoded : Output compressed data buffer
*
* Return value : >=0 - Length (in bytes) of coded data
* -1 - Error
*/
int WebRtcOpus_Encode(OpusEncInst* inst,
const int16_t* audio_in,
size_t samples,
size_t length_encoded_buffer,
uint8_t* encoded);
/****************************************************************************
* WebRtcOpus_SetBitRate(...)
*
* This function adjusts the target bitrate of the encoder.
*
* Input:
* - inst : Encoder context
* - rate : New target bitrate
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate);
/****************************************************************************
* WebRtcOpus_SetPacketLossRate(...)
*
* This function configures the encoder's expected packet loss percentage.
*
* Input:
* - inst : Encoder context
* - loss_rate : loss percentage in the range 0-100, inclusive.
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate);
/****************************************************************************
* WebRtcOpus_SetMaxPlaybackRate(...)
*
* Configures the maximum playback rate for encoding. Due to hardware
* limitations, the receiver may render audio up to a playback rate. Opus
* encoder can use this information to optimize for network usage and encoding
* complexity. This will affect the audio bandwidth in the coded audio. However,
* the input/output sample rate is not affected.
*
* Input:
* - inst : Encoder context
* - frequency_hz : Maximum playback rate in Hz.
* This parameter can take any value. The relation
* between the value and the Opus internal mode is
* as following:
* frequency_hz <= 8000 narrow band
* 8000 < frequency_hz <= 12000 medium band
* 12000 < frequency_hz <= 16000 wide band
* 16000 < frequency_hz <= 24000 super wide band
* frequency_hz > 24000 full band
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz);
/****************************************************************************
* WebRtcOpus_GetMaxPlaybackRate(...)
*
* Queries the maximum playback rate for encoding. If different single-stream
* encoders have different maximum playback rates, this function fails.
*
* Input:
* - inst : Encoder context.
* Output:
* - result_hz : The maximum playback rate in Hz.
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst,
int32_t* result_hz);
/* TODO(minyue): Check whether an API to check the FEC and the packet loss rate
* is needed. It might not be very useful since there are not many use cases and
* the caller can always maintain the states. */
/****************************************************************************
* WebRtcOpus_EnableFec()
*
* This function enables FEC for encoding.
*
* Input:
* - inst : Encoder context
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_EnableFec(OpusEncInst* inst);
/****************************************************************************
* WebRtcOpus_DisableFec()
*
* This function disables FEC for encoding.
*
* Input:
* - inst : Encoder context
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_DisableFec(OpusEncInst* inst);
/****************************************************************************
* WebRtcOpus_EnableDtx()
*
* This function enables Opus internal DTX for encoding.
*
* Input:
* - inst : Encoder context
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst);
/****************************************************************************
* WebRtcOpus_DisableDtx()
*
* This function disables Opus internal DTX for encoding.
*
* Input:
* - inst : Encoder context
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst);
/****************************************************************************
* WebRtcOpus_GetUseDtx()
*
* This function gets the DTX configuration used for encoding.
*
* Input:
* - inst : Encoder context
*
* Return value : 0 - Encoder does not use DTX.
* 1 - Encoder uses DTX.
* -1 - Error.
*/
int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst);
/****************************************************************************
* WebRtcOpus_EnableCbr()
*
* This function enables CBR for encoding.
*
* Input:
* - inst : Encoder context
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst);
/****************************************************************************
* WebRtcOpus_DisableCbr()
*
* This function disables CBR for encoding.
*
* Input:
* - inst : Encoder context
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst);
/*
* WebRtcOpus_SetComplexity(...)
*
* This function adjusts the computational complexity. The effect is the same as
* calling the complexity setting of Opus as an Opus encoder related CTL.
*
* Input:
* - inst : Encoder context
* - complexity : New target complexity (0-10, inclusive)
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity);
/*
* WebRtcOpus_GetBandwidth(...)
*
* This function returns the current bandwidth.
*
* Input:
* - inst : Encoder context
*
* Return value : Bandwidth - Success
* -1 - Error
*/
int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst);
/*
* WebRtcOpus_SetBandwidth(...)
*
* By default Opus decides which bandwidth to encode the signal in depending on
* the the bitrate. This function overrules the previous setting and forces the
* encoder to encode in narrowband/wideband/fullband/etc.
*
* Input:
* - inst : Encoder context
* - bandwidth : New target bandwidth. Valid values are:
* OPUS_BANDWIDTH_NARROWBAND
* OPUS_BANDWIDTH_MEDIUMBAND
* OPUS_BANDWIDTH_WIDEBAND
* OPUS_BANDWIDTH_SUPERWIDEBAND
* OPUS_BANDWIDTH_FULLBAND
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth);
/*
* WebRtcOpus_GetInDtx(...)
*
* Gets the DTX state of the encoder.
*
* Input:
* - inst : Encoder context
*
* Return value : -1 - Error.
* 1 - Last encoded frame was comfort noise update during DTX.
* 0 - Last encoded frame was encoded with encoder not in DTX.
*/
int32_t WebRtcOpus_GetInDtx(OpusEncInst* inst);
/*
* WebRtcOpus_SetForceChannels(...)
*
* If the encoder is initialized as a stereo encoder, Opus will by default
* decide whether to encode in mono or stereo based on the bitrate. This
* function overrules the previous setting, and forces the encoder to encode
* in auto/mono/stereo.
*
* If the Encoder is initialized as a mono encoder, and one tries to force
* stereo, the function will return an error.
*
* Input:
* - inst : Encoder context
* - num_channels : 0 - Not forced
* 1 - Mono
* 2 - Stereo
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels);
int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst,
size_t channels,
int sample_rate_hz);
/****************************************************************************
* WebRtcOpus_MultistreamDecoderCreate(...)
*
* This function creates an Opus decoder with any supported channel count.
*
* Input:
* - channels : number of output channels that the decoder
* will produce.
* - streams : number of encoded streams, as described in
* RFC 7845.
* - coupled_streams : number of coupled streams, as described in
* RFC 7845.
* - channel_mapping : the channel mapping; pointer to array of
* `channel` bytes, as described in RFC 7845.
*
* Output:
* - inst : a pointer to a Decoder context that is created
* if success.
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_MultistreamDecoderCreate(
OpusDecInst** inst,
size_t channels,
size_t streams,
size_t coupled_streams,
const unsigned char* channel_mapping);
int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst);
/****************************************************************************
* WebRtcOpus_DecoderChannels(...)
*
* This function returns the number of channels created for Opus decoder.
*/
size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst);
/****************************************************************************
* WebRtcOpus_DecoderInit(...)
*
* This function resets state of the decoder.
*
* Input:
* - inst : Decoder context
*/
void WebRtcOpus_DecoderInit(OpusDecInst* inst);
/****************************************************************************
* WebRtcOpus_Decode(...)
*
* This function decodes an Opus packet into one or more audio frames at the
* ACM interface's sampling rate (32 kHz).
*
* Input:
* - inst : Decoder context
* - encoded : Encoded data
* - encoded_bytes : Bytes in encoded vector
*
* Output:
* - decoded : The decoded vector
* - audio_type : 1 normal, 2 CNG (for Opus it should
* always return 1 since we're not using Opus's
* built-in DTX/CNG scheme)
*
* Return value : >0 - Samples per channel in decoded vector
* -1 - Error
*/
int WebRtcOpus_Decode(OpusDecInst* inst,
const uint8_t* encoded,
size_t encoded_bytes,
int16_t* decoded,
int16_t* audio_type);
/****************************************************************************
* WebRtcOpus_DecodeFec(...)
*
* This function decodes the FEC data from an Opus packet into one or more audio
* frames at the ACM interface's sampling rate (32 kHz).
*
* Input:
* - inst : Decoder context
* - encoded : Encoded data
* - encoded_bytes : Bytes in encoded vector
*
* Output:
* - decoded : The decoded vector (previous frame)
*
* Return value : >0 - Samples per channel in decoded vector
* 0 - No FEC data in the packet
* -1 - Error
*/
int WebRtcOpus_DecodeFec(OpusDecInst* inst,
const uint8_t* encoded,
size_t encoded_bytes,
int16_t* decoded,
int16_t* audio_type);
/****************************************************************************
* WebRtcOpus_DurationEst(...)
*
* This function calculates the duration of an opus packet.
* Input:
* - inst : Decoder context
* - payload : Encoded data pointer
* - payload_length_bytes : Bytes of encoded data
*
* Return value : The duration of the packet, in samples per
* channel.
*/
int WebRtcOpus_DurationEst(OpusDecInst* inst,
const uint8_t* payload,
size_t payload_length_bytes);
/****************************************************************************
* WebRtcOpus_PlcDuration(...)
*
* This function calculates the duration of a frame returned by packet loss
* concealment (PLC).
*
* Input:
* - inst : Decoder context
*
* Return value : The duration of a frame returned by PLC, in
* samples per channel.
*/
int WebRtcOpus_PlcDuration(OpusDecInst* inst);
/* TODO(minyue): Check whether it is needed to add a decoder context to the
* arguments, like WebRtcOpus_DurationEst(...). In fact, the packet itself tells
* the duration. The decoder context in WebRtcOpus_DurationEst(...) is not used.
* So it may be advisable to remove it from WebRtcOpus_DurationEst(...). */
/****************************************************************************
* WebRtcOpus_FecDurationEst(...)
*
* This function calculates the duration of the FEC data within an opus packet.
* Input:
* - payload : Encoded data pointer
* - payload_length_bytes : Bytes of encoded data
* - sample_rate_hz : Sample rate of output audio
*
* Return value : >0 - The duration of the FEC data in the
* packet in samples per channel.
* 0 - No FEC data in the packet.
*/
int WebRtcOpus_FecDurationEst(const uint8_t* payload,
size_t payload_length_bytes,
int sample_rate_hz);
/****************************************************************************
* WebRtcOpus_PacketHasFec(...)
*
* This function detects if an opus packet has FEC.
* Input:
* - payload : Encoded data pointer
* - payload_length_bytes : Bytes of encoded data
*
* Return value : 0 - the packet does NOT contain FEC.
* 1 - the packet contains FEC.
*/
int WebRtcOpus_PacketHasFec(const uint8_t* payload,
size_t payload_length_bytes);
/****************************************************************************
* WebRtcOpus_PacketHasVoiceActivity(...)
*
* This function returns the SILK VAD information encoded in the opus packet.
* For CELT-only packets that do not have VAD information, it returns -1.
* Input:
* - payload : Encoded data pointer
* - payload_length_bytes : Bytes of encoded data
*
* Return value : 0 - no frame had the VAD flag set.
* 1 - at least one frame had the VAD flag set.
* -1 - VAD status could not be determined.
*/
int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload,
size_t payload_length_bytes);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INTERFACE_H_

View file

@ -0,0 +1,147 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "modules/audio_coding/codecs/tools/audio_codec_speed_test.h"
using ::std::string;
namespace webrtc {
static const int kOpusBlockDurationMs = 20;
static const int kOpusSamplingKhz = 48;
class OpusSpeedTest : public AudioCodecSpeedTest {
protected:
OpusSpeedTest();
void SetUp() override;
void TearDown() override;
float EncodeABlock(int16_t* in_data,
uint8_t* bit_stream,
size_t max_bytes,
size_t* encoded_bytes) override;
float DecodeABlock(const uint8_t* bit_stream,
size_t encoded_bytes,
int16_t* out_data) override;
WebRtcOpusEncInst* opus_encoder_;
WebRtcOpusDecInst* opus_decoder_;
};
OpusSpeedTest::OpusSpeedTest()
: AudioCodecSpeedTest(kOpusBlockDurationMs,
kOpusSamplingKhz,
kOpusSamplingKhz),
opus_encoder_(NULL),
opus_decoder_(NULL) {}
void OpusSpeedTest::SetUp() {
AudioCodecSpeedTest::SetUp();
// If channels_ == 1, use Opus VOIP mode, otherwise, audio mode.
int app = channels_ == 1 ? 0 : 1;
/* Create encoder memory. */
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app, 48000));
EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_, 48000));
/* Set bitrate. */
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_));
}
void OpusSpeedTest::TearDown() {
AudioCodecSpeedTest::TearDown();
/* Free memory. */
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
float OpusSpeedTest::EncodeABlock(int16_t* in_data,
uint8_t* bit_stream,
size_t max_bytes,
size_t* encoded_bytes) {
clock_t clocks = clock();
int value = WebRtcOpus_Encode(opus_encoder_, in_data, input_length_sample_,
max_bytes, bit_stream);
clocks = clock() - clocks;
EXPECT_GT(value, 0);
*encoded_bytes = static_cast<size_t>(value);
return 1000.0 * clocks / CLOCKS_PER_SEC;
}
float OpusSpeedTest::DecodeABlock(const uint8_t* bit_stream,
size_t encoded_bytes,
int16_t* out_data) {
int value;
int16_t audio_type;
clock_t clocks = clock();
value = WebRtcOpus_Decode(opus_decoder_, bit_stream, encoded_bytes, out_data,
&audio_type);
clocks = clock() - clocks;
EXPECT_EQ(output_length_sample_, static_cast<size_t>(value));
return 1000.0 * clocks / CLOCKS_PER_SEC;
}
/* Test audio length in second. */
constexpr size_t kDurationSec = 400;
#define ADD_TEST(complexity) \
TEST_P(OpusSpeedTest, OpusSetComplexityTest##complexity) { \
/* Set complexity. */ \
printf("Setting complexity to %d ...\n", complexity); \
EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, complexity)); \
EncodeDecode(kDurationSec); \
}
ADD_TEST(10)
ADD_TEST(9)
ADD_TEST(8)
ADD_TEST(7)
ADD_TEST(6)
ADD_TEST(5)
ADD_TEST(4)
ADD_TEST(3)
ADD_TEST(2)
ADD_TEST(1)
ADD_TEST(0)
#define ADD_BANDWIDTH_TEST(bandwidth) \
TEST_P(OpusSpeedTest, OpusSetBandwidthTest##bandwidth) { \
/* Set bandwidth. */ \
printf("Setting bandwidth to %d ...\n", bandwidth); \
EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, bandwidth)); \
EncodeDecode(kDurationSec); \
}
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_NARROWBAND)
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_MEDIUMBAND)
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_WIDEBAND)
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_SUPERWIDEBAND)
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_FULLBAND)
// List all test cases: (channel, bit rat, filename, extension).
const coding_param param_set[] = {
std::make_tuple(1,
64000,
string("audio_coding/speech_mono_32_48kHz"),
string("pcm"),
true),
std::make_tuple(1,
32000,
string("audio_coding/speech_mono_32_48kHz"),
string("pcm"),
true),
std::make_tuple(2,
64000,
string("audio_coding/music_stereo_48kHz"),
string("pcm"),
true)};
INSTANTIATE_TEST_SUITE_P(AllTest,
OpusSpeedTest,
::testing::ValuesIn(param_set));
} // namespace webrtc

View file

@ -0,0 +1,979 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory>
#include <string>
#include "modules/audio_coding/codecs/opus/opus_inst.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "modules/audio_coding/neteq/tools/audio_loop.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
namespace {
// Equivalent to SDP params
// {{"channel_mapping", "0,1,2,3"}, {"coupled_streams", "2"}}.
constexpr unsigned char kQuadChannelMapping[] = {0, 1, 2, 3};
constexpr int kQuadTotalStreams = 2;
constexpr int kQuadCoupledStreams = 2;
constexpr unsigned char kStereoChannelMapping[] = {0, 1};
constexpr int kStereoTotalStreams = 1;
constexpr int kStereoCoupledStreams = 1;
constexpr unsigned char kMonoChannelMapping[] = {0};
constexpr int kMonoTotalStreams = 1;
constexpr int kMonoCoupledStreams = 0;
void CreateSingleOrMultiStreamEncoder(WebRtcOpusEncInst** opus_encoder,
int channels,
int application,
bool use_multistream,
int encoder_sample_rate_hz) {
EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream);
if (use_multistream) {
EXPECT_EQ(encoder_sample_rate_hz, 48000);
if (channels == 1) {
EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate(
opus_encoder, channels, application, kMonoTotalStreams,
kMonoCoupledStreams, kMonoChannelMapping));
} else if (channels == 2) {
EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate(
opus_encoder, channels, application, kStereoTotalStreams,
kStereoCoupledStreams, kStereoChannelMapping));
} else if (channels == 4) {
EXPECT_EQ(0, WebRtcOpus_MultistreamEncoderCreate(
opus_encoder, channels, application, kQuadTotalStreams,
kQuadCoupledStreams, kQuadChannelMapping));
} else {
EXPECT_TRUE(false) << channels;
}
} else {
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(opus_encoder, channels, application,
encoder_sample_rate_hz));
}
}
void CreateSingleOrMultiStreamDecoder(WebRtcOpusDecInst** opus_decoder,
int channels,
bool use_multistream,
int decoder_sample_rate_hz) {
EXPECT_TRUE(channels == 1 || channels == 2 || use_multistream);
if (use_multistream) {
EXPECT_EQ(decoder_sample_rate_hz, 48000);
if (channels == 1) {
EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate(
opus_decoder, channels, kMonoTotalStreams,
kMonoCoupledStreams, kMonoChannelMapping));
} else if (channels == 2) {
EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate(
opus_decoder, channels, kStereoTotalStreams,
kStereoCoupledStreams, kStereoChannelMapping));
} else if (channels == 4) {
EXPECT_EQ(0, WebRtcOpus_MultistreamDecoderCreate(
opus_decoder, channels, kQuadTotalStreams,
kQuadCoupledStreams, kQuadChannelMapping));
} else {
EXPECT_TRUE(false) << channels;
}
} else {
EXPECT_EQ(0, WebRtcOpus_DecoderCreate(opus_decoder, channels,
decoder_sample_rate_hz));
}
}
int SamplesPerChannel(int sample_rate_hz, int duration_ms) {
const int samples_per_ms = rtc::CheckedDivExact(sample_rate_hz, 1000);
return samples_per_ms * duration_ms;
}
using test::AudioLoop;
using ::testing::Combine;
using ::testing::TestWithParam;
using ::testing::Values;
// Maximum number of bytes in output bitstream.
const size_t kMaxBytes = 2000;
class OpusTest
: public TestWithParam<::testing::tuple<size_t, int, bool, int, int>> {
protected:
OpusTest() = default;
void TestDtxEffect(bool dtx, int block_length_ms);
void TestCbrEffect(bool dtx, int block_length_ms);
// Prepare `speech_data_` for encoding, read from a hard-coded file.
// After preparation, `speech_data_.GetNextBlock()` returns a pointer to a
// block of `block_length_ms` milliseconds. The data is looped every
// `loop_length_ms` milliseconds.
void PrepareSpeechData(int block_length_ms, int loop_length_ms);
int EncodeDecode(WebRtcOpusEncInst* encoder,
rtc::ArrayView<const int16_t> input_audio,
WebRtcOpusDecInst* decoder,
int16_t* output_audio,
int16_t* audio_type);
void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
opus_int32 expect,
int32_t set);
void CheckAudioBounded(const int16_t* audio,
size_t samples,
size_t channels,
uint16_t bound) const;
WebRtcOpusEncInst* opus_encoder_ = nullptr;
WebRtcOpusDecInst* opus_decoder_ = nullptr;
AudioLoop speech_data_;
uint8_t bitstream_[kMaxBytes];
size_t encoded_bytes_ = 0;
const size_t channels_{std::get<0>(GetParam())};
const int application_{std::get<1>(GetParam())};
const bool use_multistream_{std::get<2>(GetParam())};
const int encoder_sample_rate_hz_{std::get<3>(GetParam())};
const int decoder_sample_rate_hz_{std::get<4>(GetParam())};
};
} // namespace
// Singlestream: Try all combinations.
INSTANTIATE_TEST_SUITE_P(Singlestream,
OpusTest,
testing::Combine(testing::Values(1, 2),
testing::Values(0, 1),
testing::Values(false),
testing::Values(16000, 48000),
testing::Values(16000, 48000)));
// Multistream: Some representative cases (only 48 kHz for now).
INSTANTIATE_TEST_SUITE_P(
Multistream,
OpusTest,
testing::Values(std::make_tuple(1, 0, true, 48000, 48000),
std::make_tuple(2, 1, true, 48000, 48000),
std::make_tuple(4, 0, true, 48000, 48000),
std::make_tuple(4, 1, true, 48000, 48000)));
void OpusTest::PrepareSpeechData(int block_length_ms, int loop_length_ms) {
std::map<int, std::string> channel_to_basename = {
{1, "audio_coding/testfile32kHz"},
{2, "audio_coding/teststereo32kHz"},
{4, "audio_coding/speech_4_channels_48k_one_second"}};
std::map<int, std::string> channel_to_suffix = {
{1, "pcm"}, {2, "pcm"}, {4, "wav"}};
const std::string file_name = webrtc::test::ResourcePath(
channel_to_basename[channels_], channel_to_suffix[channels_]);
if (loop_length_ms < block_length_ms) {
loop_length_ms = block_length_ms;
}
const int sample_rate_khz =
rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000);
EXPECT_TRUE(speech_data_.Init(file_name,
loop_length_ms * sample_rate_khz * channels_,
block_length_ms * sample_rate_khz * channels_));
}
void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
opus_int32 expect,
int32_t set) {
opus_int32 bandwidth;
EXPECT_EQ(0, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, set));
EXPECT_EQ(0, WebRtcOpus_GetMaxPlaybackRate(opus_encoder_, &bandwidth));
EXPECT_EQ(expect, bandwidth);
}
void OpusTest::CheckAudioBounded(const int16_t* audio,
size_t samples,
size_t channels,
uint16_t bound) const {
for (size_t i = 0; i < samples; ++i) {
for (size_t c = 0; c < channels; ++c) {
ASSERT_GE(audio[i * channels + c], -bound);
ASSERT_LE(audio[i * channels + c], bound);
}
}
}
int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,
rtc::ArrayView<const int16_t> input_audio,
WebRtcOpusDecInst* decoder,
int16_t* output_audio,
int16_t* audio_type) {
const int input_samples_per_channel =
rtc::CheckedDivExact(input_audio.size(), channels_);
int encoded_bytes_int =
WebRtcOpus_Encode(encoder, input_audio.data(), input_samples_per_channel,
kMaxBytes, bitstream_);
EXPECT_GE(encoded_bytes_int, 0);
encoded_bytes_ = static_cast<size_t>(encoded_bytes_int);
if (encoded_bytes_ != 0) {
int est_len = WebRtcOpus_DurationEst(decoder, bitstream_, encoded_bytes_);
int act_len = WebRtcOpus_Decode(decoder, bitstream_, encoded_bytes_,
output_audio, audio_type);
EXPECT_EQ(est_len, act_len);
return act_len;
} else {
int total_dtx_len = 0;
const int output_samples_per_channel = input_samples_per_channel *
decoder_sample_rate_hz_ /
encoder_sample_rate_hz_;
while (total_dtx_len < output_samples_per_channel) {
int est_len = WebRtcOpus_DurationEst(decoder, NULL, 0);
int act_len = WebRtcOpus_Decode(decoder, NULL, 0,
&output_audio[total_dtx_len * channels_],
audio_type);
EXPECT_EQ(est_len, act_len);
total_dtx_len += act_len;
}
return total_dtx_len;
}
}
// Test if encoder/decoder can enter DTX mode properly and do not enter DTX when
// they should not. This test is signal dependent.
void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
PrepareSpeechData(block_length_ms, 2000);
const size_t input_samples =
rtc::CheckedDivExact(encoder_sample_rate_hz_, 1000) * block_length_ms;
const size_t output_samples =
rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms;
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
// Set bitrate.
EXPECT_EQ(
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
// Set input audio as silence.
std::vector<int16_t> silence(input_samples * channels_, 0);
// Setting DTX.
EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_)
: WebRtcOpus_DisableDtx(opus_encoder_));
int16_t audio_type;
int16_t* output_data_decode = new int16_t[output_samples * channels_];
for (int i = 0; i < 100; ++i) {
EXPECT_EQ(output_samples,
static_cast<size_t>(EncodeDecode(
opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode, &audio_type)));
// If not DTX, it should never enter DTX mode. If DTX, we do not care since
// whether it enters DTX depends on the signal type.
if (!dtx) {
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
EXPECT_EQ(0, audio_type); // Speech.
}
}
// We input some silent segments. In DTX mode, the encoder will stop sending.
// However, DTX may happen after a while.
for (int i = 0; i < 30; ++i) {
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
if (!dtx) {
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
EXPECT_EQ(0, audio_type); // Speech.
} else if (encoded_bytes_ == 1) {
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
EXPECT_EQ(2, audio_type); // Comfort noise.
break;
}
}
// When Opus is in DTX, it wakes up in a regular basis. It sends two packets,
// one with an arbitrary size and the other of 1-byte, then stops sending for
// a certain number of frames.
// `max_dtx_frames` is the maximum number of frames Opus can stay in DTX.
// TODO(kwiberg): Why does this number depend on the encoding sample rate?
const int max_dtx_frames =
(encoder_sample_rate_hz_ == 16000 ? 800 : 400) / block_length_ms + 1;
// We run `kRunTimeMs` milliseconds of pure silence.
const int kRunTimeMs = 4500;
// We check that, after a `kCheckTimeMs` milliseconds (given that the CNG in
// Opus needs time to adapt), the absolute values of DTX decoded signal are
// bounded by `kOutputValueBound`.
const int kCheckTimeMs = 4000;
#if defined(OPUS_FIXED_POINT)
// Fixed-point Opus generates a random (comfort) noise, which has a less
// predictable value bound than its floating-point Opus. This value depends on
// input signal, and the time window for checking the output values (between
// `kCheckTimeMs` and `kRunTimeMs`).
const uint16_t kOutputValueBound = 30;
#else
const uint16_t kOutputValueBound = 2;
#endif
int time = 0;
while (time < kRunTimeMs) {
// DTX mode is maintained for maximum `max_dtx_frames` frames.
int i = 0;
for (; i < max_dtx_frames; ++i) {
time += block_length_ms;
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
if (dtx) {
if (encoded_bytes_ > 1)
break;
EXPECT_EQ(0U, encoded_bytes_) // Send 0 byte.
<< "Opus should have entered DTX mode.";
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
EXPECT_EQ(2, audio_type); // Comfort noise.
if (time >= kCheckTimeMs) {
CheckAudioBounded(output_data_decode, output_samples, channels_,
kOutputValueBound);
}
} else {
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
EXPECT_EQ(0, audio_type); // Speech.
}
}
if (dtx) {
// With DTX, Opus must stop transmission for some time.
EXPECT_GT(i, 1);
}
// We expect a normal payload.
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
EXPECT_EQ(0, audio_type); // Speech.
// Enters DTX again immediately.
time += block_length_ms;
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
if (dtx) {
EXPECT_EQ(1U, encoded_bytes_); // Send 1 byte.
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
EXPECT_EQ(2, audio_type); // Comfort noise.
if (time >= kCheckTimeMs) {
CheckAudioBounded(output_data_decode, output_samples, channels_,
kOutputValueBound);
}
} else {
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
EXPECT_EQ(0, audio_type); // Speech.
}
}
silence[0] = 10000;
if (dtx) {
// Verify that encoder/decoder can jump out from DTX mode.
EXPECT_EQ(output_samples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
EXPECT_EQ(0, audio_type); // Speech.
}
// Free memory.
delete[] output_data_decode;
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
// Test if CBR does what we expect.
void OpusTest::TestCbrEffect(bool cbr, int block_length_ms) {
PrepareSpeechData(block_length_ms, 2000);
const size_t output_samples =
rtc::CheckedDivExact(decoder_sample_rate_hz_, 1000) * block_length_ms;
int32_t max_pkt_size_diff = 0;
int32_t prev_pkt_size = 0;
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
// Set bitrate.
EXPECT_EQ(
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
// Setting CBR.
EXPECT_EQ(0, cbr ? WebRtcOpus_EnableCbr(opus_encoder_)
: WebRtcOpus_DisableCbr(opus_encoder_));
int16_t audio_type;
std::vector<int16_t> audio_out(output_samples * channels_);
for (int i = 0; i < 100; ++i) {
EXPECT_EQ(output_samples,
static_cast<size_t>(
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
opus_decoder_, audio_out.data(), &audio_type)));
if (prev_pkt_size > 0) {
int32_t diff = std::abs((int32_t)encoded_bytes_ - prev_pkt_size);
max_pkt_size_diff = std::max(max_pkt_size_diff, diff);
}
prev_pkt_size = rtc::checked_cast<int32_t>(encoded_bytes_);
}
if (cbr) {
EXPECT_EQ(max_pkt_size_diff, 0);
} else {
EXPECT_GT(max_pkt_size_diff, 0);
}
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
// Test failing Create.
TEST(OpusTest, OpusCreateFail) {
WebRtcOpusEncInst* opus_encoder;
WebRtcOpusDecInst* opus_decoder;
// Test to see that an invalid pointer is caught.
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(NULL, 1, 0, 48000));
// Invalid channel number.
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 257, 0, 48000));
// Invalid applciation mode.
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 2, 48000));
// Invalid sample rate.
EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 0, 12345));
EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(NULL, 1, 48000));
// Invalid channel number.
EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 257, 48000));
// Invalid sample rate.
EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 1, 12345));
}
// Test failing Free.
TEST(OpusTest, OpusFreeFail) {
// Test to see that an invalid pointer is caught.
EXPECT_EQ(-1, WebRtcOpus_EncoderFree(NULL));
EXPECT_EQ(-1, WebRtcOpus_DecoderFree(NULL));
}
// Test normal Create and Free.
TEST_P(OpusTest, OpusCreateFree) {
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
EXPECT_TRUE(opus_encoder_ != NULL);
EXPECT_TRUE(opus_decoder_ != NULL);
// Free encoder and decoder memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
#define ENCODER_CTL(inst, vargs) \
inst->encoder \
? opus_encoder_ctl(inst->encoder, vargs) \
: opus_multistream_encoder_ctl(inst->multistream_encoder, vargs)
TEST_P(OpusTest, OpusEncodeDecode) {
PrepareSpeechData(20, 20);
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
// Set bitrate.
EXPECT_EQ(
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
// Check number of channels for decoder.
EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
// Check application mode.
opus_int32 app;
ENCODER_CTL(opus_encoder_, OPUS_GET_APPLICATION(&app));
EXPECT_EQ(application_ == 0 ? OPUS_APPLICATION_VOIP : OPUS_APPLICATION_AUDIO,
app);
// Encode & decode.
int16_t audio_type;
const int decode_samples_per_channel =
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
int16_t* output_data_decode =
new int16_t[decode_samples_per_channel * channels_];
EXPECT_EQ(decode_samples_per_channel,
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
opus_decoder_, output_data_decode, &audio_type));
// Free memory.
delete[] output_data_decode;
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
TEST_P(OpusTest, OpusSetBitRate) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_SetBitRate(opus_encoder_, 60000));
// Create encoder memory, try with different bitrates.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 30000));
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 60000));
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 300000));
EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, 600000));
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
TEST_P(OpusTest, OpusSetComplexity) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 9));
// Create encoder memory, try with different complexities.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 0));
EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, 10));
EXPECT_EQ(-1, WebRtcOpus_SetComplexity(opus_encoder_, 11));
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
TEST_P(OpusTest, OpusSetBandwidth) {
if (channels_ > 2) {
// TODO(webrtc:10217): investigate why multi-stream Opus reports
// narrowband when it's configured with FULLBAND.
return;
}
PrepareSpeechData(20, 20);
int16_t audio_type;
const int decode_samples_per_channel =
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
std::unique_ptr<int16_t[]> output_data_decode(
new int16_t[decode_samples_per_channel * channels_]());
// Test without creating encoder memory.
EXPECT_EQ(-1,
WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND));
EXPECT_EQ(-1, WebRtcOpus_GetBandwidth(opus_encoder_));
// Create encoder memory, try with different bandwidths.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
EXPECT_EQ(-1, WebRtcOpus_SetBandwidth(opus_encoder_,
OPUS_BANDWIDTH_NARROWBAND - 1));
EXPECT_EQ(0,
WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND));
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode.get(), &audio_type);
EXPECT_EQ(OPUS_BANDWIDTH_NARROWBAND, WebRtcOpus_GetBandwidth(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND));
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode.get(), &audio_type);
EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND
: OPUS_BANDWIDTH_FULLBAND,
WebRtcOpus_GetBandwidth(opus_encoder_));
EXPECT_EQ(
-1, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND + 1));
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode.get(), &audio_type);
EXPECT_EQ(encoder_sample_rate_hz_ == 16000 ? OPUS_BANDWIDTH_WIDEBAND
: OPUS_BANDWIDTH_FULLBAND,
WebRtcOpus_GetBandwidth(opus_encoder_));
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
TEST_P(OpusTest, OpusForceChannels) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 1));
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
ASSERT_NE(nullptr, opus_encoder_);
if (channels_ >= 2) {
EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 3));
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 2));
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1));
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0));
} else {
EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 2));
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1));
EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 0));
}
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
// Encode and decode one frame, initialize the decoder and
// decode once more.
TEST_P(OpusTest, OpusDecodeInit) {
PrepareSpeechData(20, 20);
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
// Encode & decode.
int16_t audio_type;
const int decode_samples_per_channel =
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
int16_t* output_data_decode =
new int16_t[decode_samples_per_channel * channels_];
EXPECT_EQ(decode_samples_per_channel,
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
opus_decoder_, output_data_decode, &audio_type));
WebRtcOpus_DecoderInit(opus_decoder_);
EXPECT_EQ(decode_samples_per_channel,
WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_,
output_data_decode, &audio_type));
// Free memory.
delete[] output_data_decode;
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
TEST_P(OpusTest, OpusEnableDisableFec) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_EnableFec(opus_encoder_));
EXPECT_EQ(-1, WebRtcOpus_DisableFec(opus_encoder_));
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
EXPECT_EQ(0, WebRtcOpus_EnableFec(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DisableFec(opus_encoder_));
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
TEST_P(OpusTest, OpusEnableDisableDtx) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_EnableDtx(opus_encoder_));
EXPECT_EQ(-1, WebRtcOpus_DisableDtx(opus_encoder_));
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
opus_int32 dtx;
// DTX is off by default.
ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx));
EXPECT_EQ(0, dtx);
// Test to enable DTX.
EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_));
ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx));
EXPECT_EQ(1, dtx);
// Test to disable DTX.
EXPECT_EQ(0, WebRtcOpus_DisableDtx(opus_encoder_));
ENCODER_CTL(opus_encoder_, OPUS_GET_DTX(&dtx));
EXPECT_EQ(0, dtx);
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
TEST_P(OpusTest, OpusDtxOff) {
TestDtxEffect(false, 10);
TestDtxEffect(false, 20);
TestDtxEffect(false, 40);
}
TEST_P(OpusTest, OpusDtxOn) {
if (channels_ > 2 || application_ != 0) {
// DTX does not work with OPUS_APPLICATION_AUDIO at low complexity settings.
// TODO(webrtc:10218): adapt the test to the sizes and order of multi-stream
// DTX packets.
return;
}
TestDtxEffect(true, 10);
TestDtxEffect(true, 20);
TestDtxEffect(true, 40);
}
TEST_P(OpusTest, OpusCbrOff) {
TestCbrEffect(false, 10);
TestCbrEffect(false, 20);
TestCbrEffect(false, 40);
}
TEST_P(OpusTest, OpusCbrOn) {
TestCbrEffect(true, 10);
TestCbrEffect(true, 20);
TestCbrEffect(true, 40);
}
TEST_P(OpusTest, OpusSetPacketLossRate) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50));
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
EXPECT_EQ(0, WebRtcOpus_SetPacketLossRate(opus_encoder_, 50));
EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, -1));
EXPECT_EQ(-1, WebRtcOpus_SetPacketLossRate(opus_encoder_, 101));
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
TEST_P(OpusTest, OpusSetMaxPlaybackRate) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, 20000));
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 48000);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_FULLBAND, 24001);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 24000);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_SUPERWIDEBAND, 16001);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 16000);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_WIDEBAND, 12001);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 12000);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_MEDIUMBAND, 8001);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 8000);
SetMaxPlaybackRate(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND, 4000);
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
// Test PLC.
TEST_P(OpusTest, OpusDecodePlc) {
PrepareSpeechData(20, 20);
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
// Set bitrate.
EXPECT_EQ(
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
// Check number of channels for decoder.
EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
// Encode & decode.
int16_t audio_type;
const int decode_samples_per_channel =
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
int16_t* output_data_decode =
new int16_t[decode_samples_per_channel * channels_];
EXPECT_EQ(decode_samples_per_channel,
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(),
opus_decoder_, output_data_decode, &audio_type));
// Call decoder PLC.
constexpr int kPlcDurationMs = 10;
const int plc_samples = decoder_sample_rate_hz_ * kPlcDurationMs / 1000;
int16_t* plc_buffer = new int16_t[plc_samples * channels_];
EXPECT_EQ(plc_samples,
WebRtcOpus_Decode(opus_decoder_, NULL, 0, plc_buffer, &audio_type));
// Free memory.
delete[] plc_buffer;
delete[] output_data_decode;
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
// Duration estimation.
TEST_P(OpusTest, OpusDurationEstimation) {
PrepareSpeechData(20, 20);
// Create.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
// 10 ms. We use only first 10 ms of a 20 ms block.
auto speech_block = speech_data_.GetNextBlock();
int encoded_bytes_int = WebRtcOpus_Encode(
opus_encoder_, speech_block.data(),
rtc::CheckedDivExact(speech_block.size(), 2 * channels_), kMaxBytes,
bitstream_);
EXPECT_GE(encoded_bytes_int, 0);
EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/10),
WebRtcOpus_DurationEst(opus_decoder_, bitstream_,
static_cast<size_t>(encoded_bytes_int)));
// 20 ms
speech_block = speech_data_.GetNextBlock();
encoded_bytes_int =
WebRtcOpus_Encode(opus_encoder_, speech_block.data(),
rtc::CheckedDivExact(speech_block.size(), channels_),
kMaxBytes, bitstream_);
EXPECT_GE(encoded_bytes_int, 0);
EXPECT_EQ(SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20),
WebRtcOpus_DurationEst(opus_decoder_, bitstream_,
static_cast<size_t>(encoded_bytes_int)));
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
TEST_P(OpusTest, OpusDecodeRepacketized) {
if (channels_ > 2) {
// As per the Opus documentation
// https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/group__opus__repacketizer.html#details,
// multiple streams are not supported.
return;
}
constexpr size_t kPackets = 6;
PrepareSpeechData(20, 20 * kPackets);
// Create encoder memory.
CreateSingleOrMultiStreamEncoder(&opus_encoder_, channels_, application_,
use_multistream_, encoder_sample_rate_hz_);
ASSERT_NE(nullptr, opus_encoder_);
CreateSingleOrMultiStreamDecoder(&opus_decoder_, channels_, use_multistream_,
decoder_sample_rate_hz_);
ASSERT_NE(nullptr, opus_decoder_);
// Set bitrate.
EXPECT_EQ(
0, WebRtcOpus_SetBitRate(opus_encoder_, channels_ == 1 ? 32000 : 64000));
// Check number of channels for decoder.
EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
// Encode & decode.
int16_t audio_type;
const int decode_samples_per_channel =
SamplesPerChannel(decoder_sample_rate_hz_, /*ms=*/20);
std::unique_ptr<int16_t[]> output_data_decode(
new int16_t[kPackets * decode_samples_per_channel * channels_]);
OpusRepacketizer* rp = opus_repacketizer_create();
size_t num_packets = 0;
constexpr size_t kMaxCycles = 100;
for (size_t idx = 0; idx < kMaxCycles; ++idx) {
auto speech_block = speech_data_.GetNextBlock();
encoded_bytes_ =
WebRtcOpus_Encode(opus_encoder_, speech_block.data(),
rtc::CheckedDivExact(speech_block.size(), channels_),
kMaxBytes, bitstream_);
if (opus_repacketizer_cat(rp, bitstream_,
rtc::checked_cast<opus_int32>(encoded_bytes_)) ==
OPUS_OK) {
++num_packets;
if (num_packets == kPackets) {
break;
}
} else {
// Opus repacketizer cannot guarantee a success. We try again if it fails.
opus_repacketizer_init(rp);
num_packets = 0;
}
}
EXPECT_EQ(kPackets, num_packets);
encoded_bytes_ = opus_repacketizer_out(rp, bitstream_, kMaxBytes);
EXPECT_EQ(decode_samples_per_channel * kPackets,
static_cast<size_t>(WebRtcOpus_DurationEst(
opus_decoder_, bitstream_, encoded_bytes_)));
EXPECT_EQ(decode_samples_per_channel * kPackets,
static_cast<size_t>(
WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_,
output_data_decode.get(), &audio_type)));
// Free memory.
opus_repacketizer_destroy(rp);
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
TEST(OpusVadTest, CeltUnknownStatus) {
const uint8_t celt[] = {0x80};
EXPECT_EQ(WebRtcOpus_PacketHasVoiceActivity(celt, 1), -1);
}
TEST(OpusVadTest, Mono20msVadSet) {
uint8_t silk20msMonoVad[] = {0x78, 0x80};
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoVad, 2));
}
TEST(OpusVadTest, Mono20MsVadUnset) {
uint8_t silk20msMonoSilence[] = {0x78, 0x00};
EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(silk20msMonoSilence, 2));
}
TEST(OpusVadTest, Stereo20MsVadOnSideChannel) {
uint8_t silk20msStereoVadSideChannel[] = {0x78 | 0x04, 0x20};
EXPECT_TRUE(
WebRtcOpus_PacketHasVoiceActivity(silk20msStereoVadSideChannel, 2));
}
TEST(OpusVadTest, TwoOpusMonoFramesVadOnSecond) {
uint8_t twoMonoFrames[] = {0x78 | 0x1, 0x00, 0x80};
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(twoMonoFrames, 3));
}
} // namespace webrtc

View file

@ -0,0 +1,55 @@
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("../../../../../webrtc.gni")
visibility = [
":*",
"../../../:*",
]
if (rtc_include_tests) {
rtc_library("test") {
testonly = true
sources = [
"audio_ring_buffer.cc",
"audio_ring_buffer.h",
"blocker.cc",
"blocker.h",
"lapped_transform.cc",
"lapped_transform.h",
]
deps = [
"../../../../../common_audio",
"../../../../../common_audio:common_audio_c",
"../../../../../rtc_base:checks",
"../../../../../rtc_base/memory:aligned_malloc",
]
}
rtc_library("test_unittest") {
testonly = true
sources = [
"audio_ring_buffer_unittest.cc",
"blocker_unittest.cc",
"lapped_transform_unittest.cc",
]
deps = [
":test",
"../../../../../common_audio",
"../../../../../common_audio:common_audio_c",
"../../../../../rtc_base:macromagic",
"../../../../../test:test_support",
"//testing/gtest",
]
}
}

View file

@ -0,0 +1,76 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h"
#include "common_audio/ring_buffer.h"
#include "rtc_base/checks.h"
// This is a simple multi-channel wrapper over the ring_buffer.h C interface.
namespace webrtc {
AudioRingBuffer::AudioRingBuffer(size_t channels, size_t max_frames) {
buffers_.reserve(channels);
for (size_t i = 0; i < channels; ++i)
buffers_.push_back(WebRtc_CreateBuffer(max_frames, sizeof(float)));
}
AudioRingBuffer::~AudioRingBuffer() {
for (auto* buf : buffers_)
WebRtc_FreeBuffer(buf);
}
void AudioRingBuffer::Write(const float* const* data,
size_t channels,
size_t frames) {
RTC_DCHECK_EQ(buffers_.size(), channels);
for (size_t i = 0; i < channels; ++i) {
const size_t written = WebRtc_WriteBuffer(buffers_[i], data[i], frames);
RTC_CHECK_EQ(written, frames);
}
}
void AudioRingBuffer::Read(float* const* data, size_t channels, size_t frames) {
RTC_DCHECK_EQ(buffers_.size(), channels);
for (size_t i = 0; i < channels; ++i) {
const size_t read =
WebRtc_ReadBuffer(buffers_[i], nullptr, data[i], frames);
RTC_CHECK_EQ(read, frames);
}
}
size_t AudioRingBuffer::ReadFramesAvailable() const {
// All buffers have the same amount available.
return WebRtc_available_read(buffers_[0]);
}
size_t AudioRingBuffer::WriteFramesAvailable() const {
// All buffers have the same amount available.
return WebRtc_available_write(buffers_[0]);
}
void AudioRingBuffer::MoveReadPositionForward(size_t frames) {
for (auto* buf : buffers_) {
const size_t moved =
static_cast<size_t>(WebRtc_MoveReadPtr(buf, static_cast<int>(frames)));
RTC_CHECK_EQ(moved, frames);
}
}
void AudioRingBuffer::MoveReadPositionBackward(size_t frames) {
for (auto* buf : buffers_) {
const size_t moved = static_cast<size_t>(
-WebRtc_MoveReadPtr(buf, -static_cast<int>(frames)));
RTC_CHECK_EQ(moved, frames);
}
}
} // namespace webrtc

View file

@ -0,0 +1,57 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_
#include <stddef.h>
#include <memory>
#include <vector>
struct RingBuffer;
namespace webrtc {
// A ring buffer tailored for float deinterleaved audio. Any operation that
// cannot be performed as requested will cause a crash (e.g. insufficient data
// in the buffer to fulfill a read request.)
class AudioRingBuffer final {
public:
// Specify the number of channels and maximum number of frames the buffer will
// contain.
AudioRingBuffer(size_t channels, size_t max_frames);
~AudioRingBuffer();
// Copies `data` to the buffer and advances the write pointer. `channels` must
// be the same as at creation time.
void Write(const float* const* data, size_t channels, size_t frames);
// Copies from the buffer to `data` and advances the read pointer. `channels`
// must be the same as at creation time.
void Read(float* const* data, size_t channels, size_t frames);
size_t ReadFramesAvailable() const;
size_t WriteFramesAvailable() const;
// Moves the read position. The forward version advances the read pointer
// towards the write pointer and the backward verison withdraws the read
// pointer away from the write pointer (i.e. flushing and stuffing the buffer
// respectively.)
void MoveReadPositionForward(size_t frames);
void MoveReadPositionBackward(size_t frames);
private:
// TODO(kwiberg): Use std::vector<std::unique_ptr<RingBuffer>> instead.
std::vector<RingBuffer*> buffers_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_AUDIO_RING_BUFFER_H_

View file

@ -0,0 +1,111 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h"
#include <memory>
#include "common_audio/channel_buffer.h"
#include "test/gtest.h"
namespace webrtc {
class AudioRingBufferTest
: public ::testing::TestWithParam< ::testing::tuple<int, int, int, int> > {
};
void ReadAndWriteTest(const ChannelBuffer<float>& input,
size_t num_write_chunk_frames,
size_t num_read_chunk_frames,
size_t buffer_frames,
ChannelBuffer<float>* output) {
const size_t num_channels = input.num_channels();
const size_t total_frames = input.num_frames();
AudioRingBuffer buf(num_channels, buffer_frames);
std::unique_ptr<float*[]> slice(new float*[num_channels]);
size_t input_pos = 0;
size_t output_pos = 0;
while (input_pos + buf.WriteFramesAvailable() < total_frames) {
// Write until the buffer is as full as possible.
while (buf.WriteFramesAvailable() >= num_write_chunk_frames) {
buf.Write(input.Slice(slice.get(), input_pos), num_channels,
num_write_chunk_frames);
input_pos += num_write_chunk_frames;
}
// Read until the buffer is as empty as possible.
while (buf.ReadFramesAvailable() >= num_read_chunk_frames) {
EXPECT_LT(output_pos, total_frames);
buf.Read(output->Slice(slice.get(), output_pos), num_channels,
num_read_chunk_frames);
output_pos += num_read_chunk_frames;
}
}
// Write and read the last bit.
if (input_pos < total_frames) {
buf.Write(input.Slice(slice.get(), input_pos), num_channels,
total_frames - input_pos);
}
if (buf.ReadFramesAvailable()) {
buf.Read(output->Slice(slice.get(), output_pos), num_channels,
buf.ReadFramesAvailable());
}
EXPECT_EQ(0u, buf.ReadFramesAvailable());
}
TEST_P(AudioRingBufferTest, ReadDataMatchesWrittenData) {
const size_t kFrames = 5000;
const size_t num_channels = ::testing::get<3>(GetParam());
// Initialize the input data to an increasing sequence.
ChannelBuffer<float> input(kFrames, static_cast<int>(num_channels));
for (size_t i = 0; i < num_channels; ++i)
for (size_t j = 0; j < kFrames; ++j)
input.channels()[i][j] = (i + 1) * (j + 1);
ChannelBuffer<float> output(kFrames, static_cast<int>(num_channels));
ReadAndWriteTest(input, ::testing::get<0>(GetParam()),
::testing::get<1>(GetParam()), ::testing::get<2>(GetParam()),
&output);
// Verify the read data matches the input.
for (size_t i = 0; i < num_channels; ++i)
for (size_t j = 0; j < kFrames; ++j)
EXPECT_EQ(input.channels()[i][j], output.channels()[i][j]);
}
INSTANTIATE_TEST_SUITE_P(
AudioRingBufferTest,
AudioRingBufferTest,
::testing::Combine(::testing::Values(10, 20, 42), // num_write_chunk_frames
::testing::Values(1, 10, 17), // num_read_chunk_frames
::testing::Values(100, 256), // buffer_frames
::testing::Values(1, 4))); // num_channels
TEST_F(AudioRingBufferTest, MoveReadPosition) {
const size_t kNumChannels = 1;
const float kInputArray[] = {1, 2, 3, 4};
const size_t kNumFrames = sizeof(kInputArray) / sizeof(*kInputArray);
ChannelBuffer<float> input(kNumFrames, kNumChannels);
input.SetDataForTesting(kInputArray, kNumFrames);
AudioRingBuffer buf(kNumChannels, kNumFrames);
buf.Write(input.channels(), kNumChannels, kNumFrames);
buf.MoveReadPositionForward(3);
ChannelBuffer<float> output(1, kNumChannels);
buf.Read(output.channels(), kNumChannels, 1);
EXPECT_EQ(4, output.channels()[0][0]);
buf.MoveReadPositionBackward(3);
buf.Read(output.channels(), kNumChannels, 1);
EXPECT_EQ(2, output.channels()[0][0]);
}
} // namespace webrtc

View file

@ -0,0 +1,215 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/test/blocker.h"
#include <string.h>
#include "rtc_base/checks.h"
namespace {
// Adds `a` and `b` frame by frame into `result` (basically matrix addition).
void AddFrames(const float* const* a,
size_t a_start_index,
const float* const* b,
int b_start_index,
size_t num_frames,
size_t num_channels,
float* const* result,
size_t result_start_index) {
for (size_t i = 0; i < num_channels; ++i) {
for (size_t j = 0; j < num_frames; ++j) {
result[i][j + result_start_index] =
a[i][j + a_start_index] + b[i][j + b_start_index];
}
}
}
// Copies `src` into `dst` channel by channel.
void CopyFrames(const float* const* src,
size_t src_start_index,
size_t num_frames,
size_t num_channels,
float* const* dst,
size_t dst_start_index) {
for (size_t i = 0; i < num_channels; ++i) {
memcpy(&dst[i][dst_start_index], &src[i][src_start_index],
num_frames * sizeof(dst[i][dst_start_index]));
}
}
// Moves `src` into `dst` channel by channel.
void MoveFrames(const float* const* src,
size_t src_start_index,
size_t num_frames,
size_t num_channels,
float* const* dst,
size_t dst_start_index) {
for (size_t i = 0; i < num_channels; ++i) {
memmove(&dst[i][dst_start_index], &src[i][src_start_index],
num_frames * sizeof(dst[i][dst_start_index]));
}
}
void ZeroOut(float* const* buffer,
size_t starting_idx,
size_t num_frames,
size_t num_channels) {
for (size_t i = 0; i < num_channels; ++i) {
memset(&buffer[i][starting_idx], 0,
num_frames * sizeof(buffer[i][starting_idx]));
}
}
// Pointwise multiplies each channel of `frames` with `window`. Results are
// stored in `frames`.
void ApplyWindow(const float* window,
size_t num_frames,
size_t num_channels,
float* const* frames) {
for (size_t i = 0; i < num_channels; ++i) {
for (size_t j = 0; j < num_frames; ++j) {
frames[i][j] = frames[i][j] * window[j];
}
}
}
size_t gcd(size_t a, size_t b) {
size_t tmp;
while (b) {
tmp = a;
a = b;
b = tmp % b;
}
return a;
}
} // namespace
namespace webrtc {
Blocker::Blocker(size_t chunk_size,
size_t block_size,
size_t num_input_channels,
size_t num_output_channels,
const float* window,
size_t shift_amount,
BlockerCallback* callback)
: chunk_size_(chunk_size),
block_size_(block_size),
num_input_channels_(num_input_channels),
num_output_channels_(num_output_channels),
initial_delay_(block_size_ - gcd(chunk_size, shift_amount)),
frame_offset_(0),
input_buffer_(num_input_channels_, chunk_size_ + initial_delay_),
output_buffer_(chunk_size_ + initial_delay_, num_output_channels_),
input_block_(block_size_, num_input_channels_),
output_block_(block_size_, num_output_channels_),
window_(new float[block_size_]),
shift_amount_(shift_amount),
callback_(callback) {
RTC_CHECK_LE(num_output_channels_, num_input_channels_);
RTC_CHECK_LE(shift_amount_, block_size_);
memcpy(window_.get(), window, block_size_ * sizeof(*window_.get()));
input_buffer_.MoveReadPositionBackward(initial_delay_);
}
Blocker::~Blocker() = default;
// When block_size < chunk_size the input and output buffers look like this:
//
// delay* chunk_size chunk_size + delay*
// buffer: <-------------|---------------------|---------------|>
// _a_ _b_ _c_
//
// On each call to ProcessChunk():
// 1. New input gets read into sections _b_ and _c_ of the input buffer.
// 2. We block starting from frame_offset.
// 3. We block until we reach a block `bl` that doesn't contain any frames
// from sections _a_ or _b_ of the input buffer.
// 4. We window the current block, fire the callback for processing, window
// again, and overlap/add to the output buffer.
// 5. We copy sections _a_ and _b_ of the output buffer into output.
// 6. For both the input and the output buffers, we copy section _c_ into
// section _a_.
// 7. We set the new frame_offset to be the difference between the first frame
// of `bl` and the border between sections _b_ and _c_.
//
// When block_size > chunk_size the input and output buffers look like this:
//
// chunk_size delay* chunk_size + delay*
// buffer: <-------------|---------------------|---------------|>
// _a_ _b_ _c_
//
// On each call to ProcessChunk():
// The procedure is the same as above, except for:
// 1. New input gets read into section _c_ of the input buffer.
// 3. We block until we reach a block `bl` that doesn't contain any frames
// from section _a_ of the input buffer.
// 5. We copy section _a_ of the output buffer into output.
// 6. For both the input and the output buffers, we copy sections _b_ and _c_
// into section _a_ and _b_.
// 7. We set the new frame_offset to be the difference between the first frame
// of `bl` and the border between sections _a_ and _b_.
//
// * delay here refers to inintial_delay_
//
// TODO(claguna): Look at using ring buffers to eliminate some copies.
void Blocker::ProcessChunk(const float* const* input,
size_t chunk_size,
size_t num_input_channels,
size_t num_output_channels,
float* const* output) {
RTC_CHECK_EQ(chunk_size, chunk_size_);
RTC_CHECK_EQ(num_input_channels, num_input_channels_);
RTC_CHECK_EQ(num_output_channels, num_output_channels_);
input_buffer_.Write(input, num_input_channels, chunk_size_);
size_t first_frame_in_block = frame_offset_;
// Loop through blocks.
while (first_frame_in_block < chunk_size_) {
input_buffer_.Read(input_block_.channels(), num_input_channels,
block_size_);
input_buffer_.MoveReadPositionBackward(block_size_ - shift_amount_);
ApplyWindow(window_.get(), block_size_, num_input_channels_,
input_block_.channels());
callback_->ProcessBlock(input_block_.channels(), block_size_,
num_input_channels_, num_output_channels_,
output_block_.channels());
ApplyWindow(window_.get(), block_size_, num_output_channels_,
output_block_.channels());
AddFrames(output_buffer_.channels(), first_frame_in_block,
output_block_.channels(), 0, block_size_, num_output_channels_,
output_buffer_.channels(), first_frame_in_block);
first_frame_in_block += shift_amount_;
}
// Copy output buffer to output
CopyFrames(output_buffer_.channels(), 0, chunk_size_, num_output_channels_,
output, 0);
// Copy output buffer [chunk_size_, chunk_size_ + initial_delay]
// to output buffer [0, initial_delay], zero the rest.
MoveFrames(output_buffer_.channels(), chunk_size, initial_delay_,
num_output_channels_, output_buffer_.channels(), 0);
ZeroOut(output_buffer_.channels(), initial_delay_, chunk_size_,
num_output_channels_);
// Calculate new starting frames.
frame_offset_ = first_frame_in_block - chunk_size_;
}
} // namespace webrtc

View file

@ -0,0 +1,127 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_
#include <memory>
#include "common_audio/channel_buffer.h"
#include "modules/audio_coding/codecs/opus/test/audio_ring_buffer.h"
namespace webrtc {
// The callback function to process audio in the time domain. Input has already
// been windowed, and output will be windowed. The number of input channels
// must be >= the number of output channels.
class BlockerCallback {
public:
virtual ~BlockerCallback() {}
virtual void ProcessBlock(const float* const* input,
size_t num_frames,
size_t num_input_channels,
size_t num_output_channels,
float* const* output) = 0;
};
// The main purpose of Blocker is to abstract away the fact that often we
// receive a different number of audio frames than our transform takes. For
// example, most FFTs work best when the fft-size is a power of 2, but suppose
// we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames
// of audio, which is not a power of 2. Blocker allows us to specify the
// transform and all other necessary processing via the Process() callback
// function without any constraints on the transform-size
// (read: `block_size_`) or received-audio-size (read: `chunk_size_`).
// We handle this for the multichannel audio case, allowing for different
// numbers of input and output channels (for example, beamforming takes 2 or
// more input channels and returns 1 output channel). Audio signals are
// represented as deinterleaved floats in the range [-1, 1].
//
// Blocker is responsible for:
// - blocking audio while handling potential discontinuities on the edges
// of chunks
// - windowing blocks before sending them to Process()
// - windowing processed blocks, and overlap-adding them together before
// sending back a processed chunk
//
// To use blocker:
// 1. Impelment a BlockerCallback object `bc`.
// 2. Instantiate a Blocker object `b`, passing in `bc`.
// 3. As you receive audio, call b.ProcessChunk() to get processed audio.
//
// A small amount of delay is added to the first received chunk to deal with
// the difference in chunk/block sizes. This delay is <= chunk_size.
//
// Ownership of window is retained by the caller. That is, Blocker makes a
// copy of window and does not attempt to delete it.
class Blocker {
public:
Blocker(size_t chunk_size,
size_t block_size,
size_t num_input_channels,
size_t num_output_channels,
const float* window,
size_t shift_amount,
BlockerCallback* callback);
~Blocker();
void ProcessChunk(const float* const* input,
size_t chunk_size,
size_t num_input_channels,
size_t num_output_channels,
float* const* output);
size_t initial_delay() const { return initial_delay_; }
private:
const size_t chunk_size_;
const size_t block_size_;
const size_t num_input_channels_;
const size_t num_output_channels_;
// The number of frames of delay to add at the beginning of the first chunk.
const size_t initial_delay_;
// The frame index into the input buffer where the first block should be read
// from. This is necessary because shift_amount_ is not necessarily a
// multiple of chunk_size_, so blocks won't line up at the start of the
// buffer.
size_t frame_offset_;
// Since blocks nearly always overlap, there are certain blocks that require
// frames from the end of one chunk and the beginning of the next chunk. The
// input and output buffers are responsible for saving those frames between
// calls to ProcessChunk().
//
// Both contain |initial delay| + `chunk_size` frames. The input is a fairly
// standard FIFO, but due to the overlap-add it's harder to use an
// AudioRingBuffer for the output.
AudioRingBuffer input_buffer_;
ChannelBuffer<float> output_buffer_;
// Space for the input block (can't wrap because of windowing).
ChannelBuffer<float> input_block_;
// Space for the output block (can't wrap because of overlap/add).
ChannelBuffer<float> output_block_;
std::unique_ptr<float[]> window_;
// The amount of frames between the start of contiguous blocks. For example,
// `shift_amount_` = `block_size_` / 2 for a Hann window.
size_t shift_amount_;
BlockerCallback* callback_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_BLOCKER_H_

View file

@ -0,0 +1,293 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/test/blocker.h"
#include <memory>
#include "rtc_base/arraysize.h"
#include "test/gtest.h"
namespace {
// Callback Function to add 3 to every sample in the signal.
class PlusThreeBlockerCallback : public webrtc::BlockerCallback {
public:
void ProcessBlock(const float* const* input,
size_t num_frames,
size_t num_input_channels,
size_t num_output_channels,
float* const* output) override {
for (size_t i = 0; i < num_output_channels; ++i) {
for (size_t j = 0; j < num_frames; ++j) {
output[i][j] = input[i][j] + 3;
}
}
}
};
// No-op Callback Function.
class CopyBlockerCallback : public webrtc::BlockerCallback {
public:
void ProcessBlock(const float* const* input,
size_t num_frames,
size_t num_input_channels,
size_t num_output_channels,
float* const* output) override {
for (size_t i = 0; i < num_output_channels; ++i) {
for (size_t j = 0; j < num_frames; ++j) {
output[i][j] = input[i][j];
}
}
}
};
} // namespace
namespace webrtc {
// Tests blocking with a window that multiplies the signal by 2, a callback
// that adds 3 to each sample in the signal, and different combinations of chunk
// size, block size, and shift amount.
class BlockerTest : public ::testing::Test {
protected:
void RunTest(Blocker* blocker,
size_t chunk_size,
size_t num_frames,
const float* const* input,
float* const* input_chunk,
float* const* output,
float* const* output_chunk,
size_t num_input_channels,
size_t num_output_channels) {
size_t start = 0;
size_t end = chunk_size - 1;
while (end < num_frames) {
CopyTo(input_chunk, 0, start, num_input_channels, chunk_size, input);
blocker->ProcessChunk(input_chunk, chunk_size, num_input_channels,
num_output_channels, output_chunk);
CopyTo(output, start, 0, num_output_channels, chunk_size, output_chunk);
start += chunk_size;
end += chunk_size;
}
}
void ValidateSignalEquality(const float* const* expected,
const float* const* actual,
size_t num_channels,
size_t num_frames) {
for (size_t i = 0; i < num_channels; ++i) {
for (size_t j = 0; j < num_frames; ++j) {
EXPECT_FLOAT_EQ(expected[i][j], actual[i][j]);
}
}
}
void ValidateInitialDelay(const float* const* output,
size_t num_channels,
size_t num_frames,
size_t initial_delay) {
for (size_t i = 0; i < num_channels; ++i) {
for (size_t j = 0; j < num_frames; ++j) {
if (j < initial_delay) {
EXPECT_FLOAT_EQ(output[i][j], 0.f);
} else {
EXPECT_GT(output[i][j], 0.f);
}
}
}
}
static void CopyTo(float* const* dst,
size_t start_index_dst,
size_t start_index_src,
size_t num_channels,
size_t num_frames,
const float* const* src) {
for (size_t i = 0; i < num_channels; ++i) {
memcpy(&dst[i][start_index_dst], &src[i][start_index_src],
num_frames * sizeof(float));
}
}
};
TEST_F(BlockerTest, TestBlockerMutuallyPrimeChunkandBlockSize) {
const size_t kNumInputChannels = 3;
const size_t kNumOutputChannels = 2;
const size_t kNumFrames = 10;
const size_t kBlockSize = 4;
const size_t kChunkSize = 5;
const size_t kShiftAmount = 2;
const float kInput[kNumInputChannels][kNumFrames] = {
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
{3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput));
const float kExpectedOutput[kNumInputChannels][kNumFrames] = {
{6, 6, 12, 20, 20, 20, 20, 20, 20, 20},
{6, 6, 12, 28, 28, 28, 28, 28, 28, 28}};
ChannelBuffer<float> expected_output_cb(kNumFrames, kNumInputChannels);
expected_output_cb.SetDataForTesting(
kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput));
const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f};
ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels);
ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels);
ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels);
PlusThreeBlockerCallback callback;
Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels,
kWindow, kShiftAmount, &callback);
RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(),
input_chunk_cb.channels(), actual_output_cb.channels(),
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
ValidateSignalEquality(expected_output_cb.channels(),
actual_output_cb.channels(), kNumOutputChannels,
kNumFrames);
}
TEST_F(BlockerTest, TestBlockerMutuallyPrimeShiftAndBlockSize) {
const size_t kNumInputChannels = 3;
const size_t kNumOutputChannels = 2;
const size_t kNumFrames = 12;
const size_t kBlockSize = 4;
const size_t kChunkSize = 6;
const size_t kShiftAmount = 3;
const float kInput[kNumInputChannels][kNumFrames] = {
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput));
const float kExpectedOutput[kNumOutputChannels][kNumFrames] = {
{6, 10, 10, 20, 10, 10, 20, 10, 10, 20, 10, 10},
{6, 14, 14, 28, 14, 14, 28, 14, 14, 28, 14, 14}};
ChannelBuffer<float> expected_output_cb(kNumFrames, kNumOutputChannels);
expected_output_cb.SetDataForTesting(
kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput));
const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f};
ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels);
ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels);
ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels);
PlusThreeBlockerCallback callback;
Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels,
kWindow, kShiftAmount, &callback);
RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(),
input_chunk_cb.channels(), actual_output_cb.channels(),
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
ValidateSignalEquality(expected_output_cb.channels(),
actual_output_cb.channels(), kNumOutputChannels,
kNumFrames);
}
TEST_F(BlockerTest, TestBlockerNoOverlap) {
const size_t kNumInputChannels = 3;
const size_t kNumOutputChannels = 2;
const size_t kNumFrames = 12;
const size_t kBlockSize = 4;
const size_t kChunkSize = 4;
const size_t kShiftAmount = 4;
const float kInput[kNumInputChannels][kNumFrames] = {
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
input_cb.SetDataForTesting(kInput[0], sizeof(kInput) / sizeof(**kInput));
const float kExpectedOutput[kNumOutputChannels][kNumFrames] = {
{10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10},
{14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}};
ChannelBuffer<float> expected_output_cb(kNumFrames, kNumOutputChannels);
expected_output_cb.SetDataForTesting(
kExpectedOutput[0], sizeof(kExpectedOutput) / sizeof(**kExpectedOutput));
const float kWindow[kBlockSize] = {2.f, 2.f, 2.f, 2.f};
ChannelBuffer<float> actual_output_cb(kNumFrames, kNumOutputChannels);
ChannelBuffer<float> input_chunk_cb(kChunkSize, kNumInputChannels);
ChannelBuffer<float> output_chunk_cb(kChunkSize, kNumOutputChannels);
PlusThreeBlockerCallback callback;
Blocker blocker(kChunkSize, kBlockSize, kNumInputChannels, kNumOutputChannels,
kWindow, kShiftAmount, &callback);
RunTest(&blocker, kChunkSize, kNumFrames, input_cb.channels(),
input_chunk_cb.channels(), actual_output_cb.channels(),
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
ValidateSignalEquality(expected_output_cb.channels(),
actual_output_cb.channels(), kNumOutputChannels,
kNumFrames);
}
TEST_F(BlockerTest, InitialDelaysAreMinimum) {
const size_t kNumInputChannels = 3;
const size_t kNumOutputChannels = 2;
const size_t kNumFrames = 1280;
const size_t kChunkSize[] = {80, 80, 80, 80, 80, 80,
160, 160, 160, 160, 160, 160};
const size_t kBlockSize[] = {64, 64, 64, 128, 128, 128,
128, 128, 128, 256, 256, 256};
const size_t kShiftAmount[] = {16, 32, 64, 32, 64, 128,
32, 64, 128, 64, 128, 256};
const size_t kInitialDelay[] = {48, 48, 48, 112, 112, 112,
96, 96, 96, 224, 224, 224};
float input[kNumInputChannels][kNumFrames];
for (size_t i = 0; i < kNumInputChannels; ++i) {
for (size_t j = 0; j < kNumFrames; ++j) {
input[i][j] = i + 1;
}
}
ChannelBuffer<float> input_cb(kNumFrames, kNumInputChannels);
input_cb.SetDataForTesting(input[0], sizeof(input) / sizeof(**input));
ChannelBuffer<float> output_cb(kNumFrames, kNumOutputChannels);
CopyBlockerCallback callback;
for (size_t i = 0; i < arraysize(kChunkSize); ++i) {
std::unique_ptr<float[]> window(new float[kBlockSize[i]]);
for (size_t j = 0; j < kBlockSize[i]; ++j) {
window[j] = 1.f;
}
ChannelBuffer<float> input_chunk_cb(kChunkSize[i], kNumInputChannels);
ChannelBuffer<float> output_chunk_cb(kChunkSize[i], kNumOutputChannels);
Blocker blocker(kChunkSize[i], kBlockSize[i], kNumInputChannels,
kNumOutputChannels, window.get(), kShiftAmount[i],
&callback);
RunTest(&blocker, kChunkSize[i], kNumFrames, input_cb.channels(),
input_chunk_cb.channels(), output_cb.channels(),
output_chunk_cb.channels(), kNumInputChannels, kNumOutputChannels);
ValidateInitialDelay(output_cb.channels(), kNumOutputChannels, kNumFrames,
kInitialDelay[i]);
}
}
} // namespace webrtc

View file

@ -0,0 +1,100 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/test/lapped_transform.h"
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include "common_audio/real_fourier.h"
#include "rtc_base/checks.h"
namespace webrtc {
void LappedTransform::BlockThunk::ProcessBlock(const float* const* input,
size_t num_frames,
size_t num_input_channels,
size_t num_output_channels,
float* const* output) {
RTC_CHECK_EQ(num_input_channels, parent_->num_in_channels_);
RTC_CHECK_EQ(num_output_channels, parent_->num_out_channels_);
RTC_CHECK_EQ(parent_->block_length_, num_frames);
for (size_t i = 0; i < num_input_channels; ++i) {
memcpy(parent_->real_buf_.Row(i), input[i], num_frames * sizeof(*input[0]));
parent_->fft_->Forward(parent_->real_buf_.Row(i),
parent_->cplx_pre_.Row(i));
}
size_t block_length =
RealFourier::ComplexLength(RealFourier::FftOrder(num_frames));
RTC_CHECK_EQ(parent_->cplx_length_, block_length);
parent_->block_processor_->ProcessAudioBlock(
parent_->cplx_pre_.Array(), num_input_channels, parent_->cplx_length_,
num_output_channels, parent_->cplx_post_.Array());
for (size_t i = 0; i < num_output_channels; ++i) {
parent_->fft_->Inverse(parent_->cplx_post_.Row(i),
parent_->real_buf_.Row(i));
memcpy(output[i], parent_->real_buf_.Row(i),
num_frames * sizeof(*input[0]));
}
}
LappedTransform::LappedTransform(size_t num_in_channels,
size_t num_out_channels,
size_t chunk_length,
const float* window,
size_t block_length,
size_t shift_amount,
Callback* callback)
: blocker_callback_(this),
num_in_channels_(num_in_channels),
num_out_channels_(num_out_channels),
block_length_(block_length),
chunk_length_(chunk_length),
block_processor_(callback),
blocker_(chunk_length_,
block_length_,
num_in_channels_,
num_out_channels_,
window,
shift_amount,
&blocker_callback_),
fft_(RealFourier::Create(RealFourier::FftOrder(block_length_))),
cplx_length_(RealFourier::ComplexLength(fft_->order())),
real_buf_(num_in_channels,
block_length_,
RealFourier::kFftBufferAlignment),
cplx_pre_(num_in_channels,
cplx_length_,
RealFourier::kFftBufferAlignment),
cplx_post_(num_out_channels,
cplx_length_,
RealFourier::kFftBufferAlignment) {
RTC_CHECK(num_in_channels_ > 0);
RTC_CHECK_GT(block_length_, 0);
RTC_CHECK_GT(chunk_length_, 0);
RTC_CHECK(block_processor_);
// block_length_ power of 2?
RTC_CHECK_EQ(0, block_length_ & (block_length_ - 1));
}
LappedTransform::~LappedTransform() = default;
void LappedTransform::ProcessChunk(const float* const* in_chunk,
float* const* out_chunk) {
blocker_.ProcessChunk(in_chunk, chunk_length_, num_in_channels_,
num_out_channels_, out_chunk);
}
} // namespace webrtc

View file

@ -0,0 +1,175 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_
#define MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_
#include <complex>
#include <memory>
#include "common_audio/real_fourier.h"
#include "modules/audio_coding/codecs/opus/test/blocker.h"
#include "rtc_base/memory/aligned_malloc.h"
namespace webrtc {
// Wrapper class for aligned arrays. Every row (and the first dimension) are
// aligned to the given byte alignment.
template <typename T>
class AlignedArray {
public:
AlignedArray(size_t rows, size_t cols, size_t alignment)
: rows_(rows), cols_(cols) {
RTC_CHECK_GT(alignment, 0);
head_row_ =
static_cast<T**>(AlignedMalloc(rows_ * sizeof(*head_row_), alignment));
for (size_t i = 0; i < rows_; ++i) {
head_row_[i] = static_cast<T*>(
AlignedMalloc(cols_ * sizeof(**head_row_), alignment));
}
}
~AlignedArray() {
for (size_t i = 0; i < rows_; ++i) {
AlignedFree(head_row_[i]);
}
AlignedFree(head_row_);
}
T* const* Array() { return head_row_; }
const T* const* Array() const { return head_row_; }
T* Row(size_t row) {
RTC_CHECK_LE(row, rows_);
return head_row_[row];
}
const T* Row(size_t row) const {
RTC_CHECK_LE(row, rows_);
return head_row_[row];
}
private:
size_t rows_;
size_t cols_;
T** head_row_;
};
// Helper class for audio processing modules which operate on frequency domain
// input derived from the windowed time domain audio stream.
//
// The input audio chunk is sliced into possibly overlapping blocks, multiplied
// by a window and transformed with an FFT implementation. The transformed data
// is supplied to the given callback for processing. The processed output is
// then inverse transformed into the time domain and spliced back into a chunk
// which constitutes the final output of this processing module.
class LappedTransform {
public:
class Callback {
public:
virtual ~Callback() {}
virtual void ProcessAudioBlock(const std::complex<float>* const* in_block,
size_t num_in_channels,
size_t frames,
size_t num_out_channels,
std::complex<float>* const* out_block) = 0;
};
// Construct a transform instance. `chunk_length` is the number of samples in
// each channel. `window` defines the window, owned by the caller (a copy is
// made internally); `window` should have length equal to `block_length`.
// `block_length` defines the length of a block, in samples.
// `shift_amount` is in samples. `callback` is the caller-owned audio
// processing function called for each block of the input chunk.
LappedTransform(size_t num_in_channels,
size_t num_out_channels,
size_t chunk_length,
const float* window,
size_t block_length,
size_t shift_amount,
Callback* callback);
~LappedTransform();
// Main audio processing helper method. Internally slices `in_chunk` into
// blocks, transforms them to frequency domain, calls the callback for each
// block and returns a de-blocked time domain chunk of audio through
// `out_chunk`. Both buffers are caller-owned.
void ProcessChunk(const float* const* in_chunk, float* const* out_chunk);
// Get the chunk length.
//
// The chunk length is the number of samples per channel that must be passed
// to ProcessChunk via the parameter in_chunk.
//
// Returns the same chunk_length passed to the LappedTransform constructor.
size_t chunk_length() const { return chunk_length_; }
// Get the number of input channels.
//
// This is the number of arrays that must be passed to ProcessChunk via
// in_chunk.
//
// Returns the same num_in_channels passed to the LappedTransform constructor.
size_t num_in_channels() const { return num_in_channels_; }
// Get the number of output channels.
//
// This is the number of arrays that must be passed to ProcessChunk via
// out_chunk.
//
// Returns the same num_out_channels passed to the LappedTransform
// constructor.
size_t num_out_channels() const { return num_out_channels_; }
// Returns the initial delay.
//
// This is the delay introduced by the `blocker_` to be able to get and return
// chunks of `chunk_length`, but process blocks of `block_length`.
size_t initial_delay() const { return blocker_.initial_delay(); }
private:
// Internal middleware callback, given to the blocker. Transforms each block
// and hands it over to the processing method given at construction time.
class BlockThunk : public BlockerCallback {
public:
explicit BlockThunk(LappedTransform* parent) : parent_(parent) {}
void ProcessBlock(const float* const* input,
size_t num_frames,
size_t num_input_channels,
size_t num_output_channels,
float* const* output) override;
private:
LappedTransform* const parent_;
} blocker_callback_;
const size_t num_in_channels_;
const size_t num_out_channels_;
const size_t block_length_;
const size_t chunk_length_;
Callback* const block_processor_;
Blocker blocker_;
// TODO(alessiob): Replace RealFourier with a different FFT library.
std::unique_ptr<RealFourier> fft_;
const size_t cplx_length_;
AlignedArray<float> real_buf_;
AlignedArray<std::complex<float> > cplx_pre_;
AlignedArray<std::complex<float> > cplx_post_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_TEST_LAPPED_TRANSFORM_H_

View file

@ -0,0 +1,203 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/test/lapped_transform.h"
#include <algorithm>
#include <cmath>
#include <cstring>
#include "test/gtest.h"
using std::complex;
namespace {
class NoopCallback : public webrtc::LappedTransform::Callback {
public:
NoopCallback() : block_num_(0) {}
void ProcessAudioBlock(const complex<float>* const* in_block,
size_t in_channels,
size_t frames,
size_t out_channels,
complex<float>* const* out_block) override {
RTC_CHECK_EQ(in_channels, out_channels);
for (size_t i = 0; i < out_channels; ++i) {
memcpy(out_block[i], in_block[i], sizeof(**in_block) * frames);
}
++block_num_;
}
size_t block_num() { return block_num_; }
private:
size_t block_num_;
};
class FftCheckerCallback : public webrtc::LappedTransform::Callback {
public:
FftCheckerCallback() : block_num_(0) {}
void ProcessAudioBlock(const complex<float>* const* in_block,
size_t in_channels,
size_t frames,
size_t out_channels,
complex<float>* const* out_block) override {
RTC_CHECK_EQ(in_channels, out_channels);
size_t full_length = (frames - 1) * 2;
++block_num_;
if (block_num_ > 0) {
ASSERT_NEAR(in_block[0][0].real(), static_cast<float>(full_length),
1e-5f);
ASSERT_NEAR(in_block[0][0].imag(), 0.0f, 1e-5f);
for (size_t i = 1; i < frames; ++i) {
ASSERT_NEAR(in_block[0][i].real(), 0.0f, 1e-5f);
ASSERT_NEAR(in_block[0][i].imag(), 0.0f, 1e-5f);
}
}
}
size_t block_num() { return block_num_; }
private:
size_t block_num_;
};
void SetFloatArray(float value, int rows, int cols, float* const* array) {
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
array[i][j] = value;
}
}
}
} // namespace
namespace webrtc {
TEST(LappedTransformTest, Windowless) {
const size_t kChannels = 3;
const size_t kChunkLength = 512;
const size_t kBlockLength = 64;
const size_t kShiftAmount = 64;
NoopCallback noop;
// Rectangular window.
float window[kBlockLength];
std::fill(window, &window[kBlockLength], 1.0f);
LappedTransform trans(kChannels, kChannels, kChunkLength, window,
kBlockLength, kShiftAmount, &noop);
float in_buffer[kChannels][kChunkLength];
float* in_chunk[kChannels];
float out_buffer[kChannels][kChunkLength];
float* out_chunk[kChannels];
in_chunk[0] = in_buffer[0];
in_chunk[1] = in_buffer[1];
in_chunk[2] = in_buffer[2];
out_chunk[0] = out_buffer[0];
out_chunk[1] = out_buffer[1];
out_chunk[2] = out_buffer[2];
SetFloatArray(2.0f, kChannels, kChunkLength, in_chunk);
SetFloatArray(-1.0f, kChannels, kChunkLength, out_chunk);
trans.ProcessChunk(in_chunk, out_chunk);
for (size_t i = 0; i < kChannels; ++i) {
for (size_t j = 0; j < kChunkLength; ++j) {
ASSERT_NEAR(out_chunk[i][j], 2.0f, 1e-5f);
}
}
ASSERT_EQ(kChunkLength / kBlockLength, noop.block_num());
}
TEST(LappedTransformTest, IdentityProcessor) {
const size_t kChunkLength = 512;
const size_t kBlockLength = 64;
const size_t kShiftAmount = 32;
NoopCallback noop;
// Identity window for |overlap = block_size / 2|.
float window[kBlockLength];
std::fill(window, &window[kBlockLength], std::sqrt(0.5f));
LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kShiftAmount,
&noop);
float in_buffer[kChunkLength];
float* in_chunk = in_buffer;
float out_buffer[kChunkLength];
float* out_chunk = out_buffer;
SetFloatArray(2.0f, 1, kChunkLength, &in_chunk);
SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk);
trans.ProcessChunk(&in_chunk, &out_chunk);
for (size_t i = 0; i < kChunkLength; ++i) {
ASSERT_NEAR(out_chunk[i], (i < kBlockLength - kShiftAmount) ? 0.0f : 2.0f,
1e-5f);
}
ASSERT_EQ(kChunkLength / kShiftAmount, noop.block_num());
}
TEST(LappedTransformTest, Callbacks) {
const size_t kChunkLength = 512;
const size_t kBlockLength = 64;
FftCheckerCallback call;
// Rectangular window.
float window[kBlockLength];
std::fill(window, &window[kBlockLength], 1.0f);
LappedTransform trans(1, 1, kChunkLength, window, kBlockLength, kBlockLength,
&call);
float in_buffer[kChunkLength];
float* in_chunk = in_buffer;
float out_buffer[kChunkLength];
float* out_chunk = out_buffer;
SetFloatArray(1.0f, 1, kChunkLength, &in_chunk);
SetFloatArray(-1.0f, 1, kChunkLength, &out_chunk);
trans.ProcessChunk(&in_chunk, &out_chunk);
ASSERT_EQ(kChunkLength / kBlockLength, call.block_num());
}
TEST(LappedTransformTest, chunk_length) {
const size_t kBlockLength = 64;
FftCheckerCallback call;
const float window[kBlockLength] = {};
// Make sure that chunk_length returns the same value passed to the
// LappedTransform constructor.
{
const size_t kExpectedChunkLength = 512;
const LappedTransform trans(1, 1, kExpectedChunkLength, window,
kBlockLength, kBlockLength, &call);
EXPECT_EQ(kExpectedChunkLength, trans.chunk_length());
}
{
const size_t kExpectedChunkLength = 160;
const LappedTransform trans(1, 1, kExpectedChunkLength, window,
kBlockLength, kBlockLength, &call);
EXPECT_EQ(kExpectedChunkLength, trans.chunk_length());
}
}
} // namespace webrtc