Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,105 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/accelerate.h"
#include "api/array_view.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
namespace webrtc {
Accelerate::ReturnCodes Accelerate::Process(const int16_t* input,
size_t input_length,
bool fast_accelerate,
AudioMultiVector* output,
size_t* length_change_samples) {
// Input length must be (almost) 30 ms.
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
if (num_channels_ == 0 ||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) {
// Length of input data too short to do accelerate. Simply move all data
// from input to output.
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kError;
}
return TimeStretch::Process(input, input_length, fast_accelerate, output,
length_change_samples);
}
void Accelerate::SetParametersForPassiveSpeech(size_t /*len*/,
int16_t* best_correlation,
size_t* /*peak_index*/) const {
// When the signal does not contain any active speech, the correlation does
// not matter. Simply set it to zero.
*best_correlation = 0;
}
Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch(
const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool fast_mode,
AudioMultiVector* output) const {
// Check for strong correlation or passive speech.
// Use 8192 (0.5 in Q14) in fast mode.
const int correlation_threshold = fast_mode ? 8192 : kCorrelationThreshold;
if ((best_correlation > correlation_threshold) || !active_speech) {
// Do accelerate operation by overlap add.
// Pre-calculate common multiplication with `fs_mult_`.
// 120 corresponds to 15 ms.
size_t fs_mult_120 = fs_mult_ * 120;
if (fast_mode) {
// Fit as many multiples of `peak_index` as possible in fs_mult_120.
// TODO(henrik.lundin) Consider finding multiple correlation peaks and
// pick the one with the longest correlation lag in this case.
peak_index = (fs_mult_120 / peak_index) * peak_index;
}
RTC_DCHECK_GE(fs_mult_120, peak_index); // Should be handled in Process().
// Copy first part; 0 to 15 ms.
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, fs_mult_120 * num_channels_));
// Copy the `peak_index` starting at 15 ms to `temp_vector`.
AudioMultiVector temp_vector(num_channels_);
temp_vector.PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[fs_mult_120 * num_channels_], peak_index * num_channels_));
// Cross-fade `temp_vector` onto the end of `output`.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[(fs_mult_120 + peak_index) * num_channels_],
input_length - (fs_mult_120 + peak_index) * num_channels_));
if (active_speech) {
return kSuccess;
} else {
return kSuccessLowEnergy;
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kNoStretch;
}
}
Accelerate* AccelerateFactory::Create(
int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise) const {
return new Accelerate(sample_rate_hz, num_channels, background_noise);
}
} // namespace webrtc

View file

@ -0,0 +1,79 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_
#define MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_
#include <stddef.h>
#include <stdint.h>
#include "modules/audio_coding/neteq/time_stretch.h"
namespace webrtc {
class AudioMultiVector;
class BackgroundNoise;
// This class implements the Accelerate operation. Most of the work is done
// in the base class TimeStretch, which is shared with the PreemptiveExpand
// operation. In the Accelerate class, the operations that are specific to
// Accelerate are implemented.
class Accelerate : public TimeStretch {
public:
Accelerate(int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise)
: TimeStretch(sample_rate_hz, num_channels, background_noise) {}
Accelerate(const Accelerate&) = delete;
Accelerate& operator=(const Accelerate&) = delete;
// This method performs the actual Accelerate operation. The samples are
// read from `input`, of length `input_length` elements, and are written to
// `output`. The number of samples removed through time-stretching is
// is provided in the output `length_change_samples`. The method returns
// the outcome of the operation as an enumerator value. If `fast_accelerate`
// is true, the algorithm will relax the requirements on finding strong
// correlations, and may remove multiple pitch periods if possible.
ReturnCodes Process(const int16_t* input,
size_t input_length,
bool fast_accelerate,
AudioMultiVector* output,
size_t* length_change_samples);
protected:
// Sets the parameters `best_correlation` and `peak_index` to suitable
// values when the signal contains no active speech.
void SetParametersForPassiveSpeech(size_t len,
int16_t* best_correlation,
size_t* peak_index) const override;
// Checks the criteria for performing the time-stretching operation and,
// if possible, performs the time-stretching.
ReturnCodes CheckCriteriaAndStretch(const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool fast_mode,
AudioMultiVector* output) const override;
};
struct AccelerateFactory {
AccelerateFactory() {}
virtual ~AccelerateFactory() {}
virtual Accelerate* Create(int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise) const;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_

View file

@ -0,0 +1,526 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include <array>
#include <memory>
#include <string>
#include <vector>
#include "api/audio_codecs/opus/audio_encoder_opus.h"
#include "modules/audio_coding/codecs/g711/audio_decoder_pcm.h"
#include "modules/audio_coding/codecs/g711/audio_encoder_pcm.h"
#include "modules/audio_coding/codecs/g722/audio_decoder_g722.h"
#include "modules/audio_coding/codecs/g722/audio_encoder_g722.h"
#include "modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h"
#include "modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h"
#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h"
#include "modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h"
#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h"
#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h"
#include "rtc_base/system/arch.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
namespace {
constexpr int kOverheadBytesPerPacket = 50;
// The absolute difference between the input and output (the first channel) is
// compared vs `tolerance`. The parameter `delay` is used to correct for codec
// delays.
void CompareInputOutput(const std::vector<int16_t>& input,
const std::vector<int16_t>& output,
size_t num_samples,
size_t channels,
int tolerance,
int delay) {
ASSERT_LE(num_samples, input.size());
ASSERT_LE(num_samples * channels, output.size());
for (unsigned int n = 0; n < num_samples - delay; ++n) {
ASSERT_NEAR(input[n], output[channels * n + delay], tolerance)
<< "Exit test on first diff; n = " << n;
}
}
// The absolute difference between the first two channels in `output` is
// compared vs `tolerance`.
void CompareTwoChannels(const std::vector<int16_t>& output,
size_t samples_per_channel,
size_t channels,
int tolerance) {
ASSERT_GE(channels, 2u);
ASSERT_LE(samples_per_channel * channels, output.size());
for (unsigned int n = 0; n < samples_per_channel; ++n)
ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance)
<< "Stereo samples differ.";
}
// Calculates mean-squared error between input and output (the first channel).
// The parameter `delay` is used to correct for codec delays.
double MseInputOutput(const std::vector<int16_t>& input,
const std::vector<int16_t>& output,
size_t num_samples,
size_t channels,
int delay) {
RTC_DCHECK_LT(delay, static_cast<int>(num_samples));
RTC_DCHECK_LE(num_samples, input.size());
RTC_DCHECK_LE(num_samples * channels, output.size());
if (num_samples == 0)
return 0.0;
double squared_sum = 0.0;
for (unsigned int n = 0; n < num_samples - delay; ++n) {
squared_sum += (input[n] - output[channels * n + delay]) *
(input[n] - output[channels * n + delay]);
}
return squared_sum / (num_samples - delay);
}
} // namespace
class AudioDecoderTest : public ::testing::Test {
protected:
AudioDecoderTest()
: input_audio_(
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
32000),
codec_input_rate_hz_(32000), // Legacy default value.
frame_size_(0),
data_length_(0),
channels_(1),
payload_type_(17),
decoder_(NULL) {}
~AudioDecoderTest() override {}
void SetUp() override {
if (audio_encoder_)
codec_input_rate_hz_ = audio_encoder_->SampleRateHz();
// Create arrays.
ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0";
}
void TearDown() override {
delete decoder_;
decoder_ = NULL;
}
virtual void InitEncoder() {}
// TODO(henrik.lundin) Change return type to size_t once most/all overriding
// implementations are gone.
virtual int EncodeFrame(const int16_t* input,
size_t input_len_samples,
rtc::Buffer* output) {
AudioEncoder::EncodedInfo encoded_info;
const size_t samples_per_10ms = audio_encoder_->SampleRateHz() / 100;
RTC_CHECK_EQ(samples_per_10ms * audio_encoder_->Num10MsFramesInNextPacket(),
input_len_samples);
std::unique_ptr<int16_t[]> interleaved_input(
new int16_t[channels_ * samples_per_10ms]);
for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) {
EXPECT_EQ(0u, encoded_info.encoded_bytes);
// Duplicate the mono input signal to however many channels the test
// wants.
test::InputAudioFile::DuplicateInterleaved(input + i * samples_per_10ms,
samples_per_10ms, channels_,
interleaved_input.get());
encoded_info =
audio_encoder_->Encode(0,
rtc::ArrayView<const int16_t>(
interleaved_input.get(),
audio_encoder_->NumChannels() *
audio_encoder_->SampleRateHz() / 100),
output);
}
EXPECT_EQ(payload_type_, encoded_info.payload_type);
return static_cast<int>(encoded_info.encoded_bytes);
}
// Encodes and decodes audio. The absolute difference between the input and
// output is compared vs `tolerance`, and the mean-squared error is compared
// with `mse`. The encoded stream should contain `expected_bytes`. For stereo
// audio, the absolute difference between the two channels is compared vs
// `channel_diff_tolerance`.
void EncodeDecodeTest(size_t expected_bytes,
int tolerance,
double mse,
int delay = 0,
int channel_diff_tolerance = 0) {
ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
ASSERT_GE(channel_diff_tolerance, 0)
<< "Test must define a channel_diff_tolerance >= 0";
size_t processed_samples = 0u;
size_t encoded_bytes = 0u;
InitEncoder();
std::vector<int16_t> input;
std::vector<int16_t> decoded;
while (processed_samples + frame_size_ <= data_length_) {
// Extend input vector with `frame_size_`.
input.resize(input.size() + frame_size_, 0);
// Read from input file.
ASSERT_GE(input.size() - processed_samples, frame_size_);
ASSERT_TRUE(input_audio_.Read(frame_size_, codec_input_rate_hz_,
&input[processed_samples]));
rtc::Buffer encoded;
size_t enc_len =
EncodeFrame(&input[processed_samples], frame_size_, &encoded);
// Make sure that frame_size_ * channels_ samples are allocated and free.
decoded.resize((processed_samples + frame_size_) * channels_, 0);
const std::vector<AudioDecoder::ParseResult> parse_result =
decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0);
RTC_CHECK_EQ(parse_result.size(), size_t{1});
auto decode_result = parse_result[0].frame->Decode(
rtc::ArrayView<int16_t>(&decoded[processed_samples * channels_],
frame_size_ * channels_ * sizeof(int16_t)));
RTC_CHECK(decode_result.has_value());
EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples);
encoded_bytes += enc_len;
processed_samples += frame_size_;
}
// For some codecs it doesn't make sense to check expected number of bytes,
// since the number can vary for different platforms. Opus is such a codec.
// In this case expected_bytes is set to 0.
if (expected_bytes) {
EXPECT_EQ(expected_bytes, encoded_bytes);
}
CompareInputOutput(input, decoded, processed_samples, channels_, tolerance,
delay);
if (channels_ == 2)
CompareTwoChannels(decoded, processed_samples, channels_,
channel_diff_tolerance);
EXPECT_LE(
MseInputOutput(input, decoded, processed_samples, channels_, delay),
mse);
}
// Encodes a payload and decodes it twice with decoder re-init before each
// decode. Verifies that the decoded result is the same.
void ReInitTest() {
InitEncoder();
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
std::array<rtc::Buffer, 2> encoded;
EncodeFrame(input.get(), frame_size_, &encoded[0]);
// Make a copy.
encoded[1].SetData(encoded[0].data(), encoded[0].size());
std::array<std::vector<int16_t>, 2> outputs;
for (size_t i = 0; i < outputs.size(); ++i) {
outputs[i].resize(frame_size_ * channels_);
decoder_->Reset();
const std::vector<AudioDecoder::ParseResult> parse_result =
decoder_->ParsePayload(std::move(encoded[i]), /*timestamp=*/0);
RTC_CHECK_EQ(parse_result.size(), size_t{1});
auto decode_result = parse_result[0].frame->Decode(outputs[i]);
RTC_CHECK(decode_result.has_value());
EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples);
}
EXPECT_EQ(outputs[0], outputs[1]);
}
// Call DecodePlc and verify that the correct number of samples is produced.
void DecodePlcTest() {
InitEncoder();
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
rtc::Buffer encoded;
EncodeFrame(input.get(), frame_size_, &encoded);
decoder_->Reset();
std::vector<int16_t> output(frame_size_ * channels_);
const std::vector<AudioDecoder::ParseResult> parse_result =
decoder_->ParsePayload(std::move(encoded), /*timestamp=*/0);
RTC_CHECK_EQ(parse_result.size(), size_t{1});
auto decode_result = parse_result[0].frame->Decode(output);
RTC_CHECK(decode_result.has_value());
EXPECT_EQ(frame_size_ * channels_, decode_result->num_decoded_samples);
// Call DecodePlc and verify that we get one frame of data.
// (Overwrite the output from the above Decode call, but that does not
// matter.)
size_t dec_len =
decoder_->DecodePlc(/*num_frames=*/1, /*decoded=*/output.data());
EXPECT_EQ(frame_size_ * channels_, dec_len);
}
test::ResampleInputAudioFile input_audio_;
int codec_input_rate_hz_;
size_t frame_size_;
size_t data_length_;
size_t channels_;
const int payload_type_;
AudioDecoder* decoder_;
std::unique_ptr<AudioEncoder> audio_encoder_;
};
class AudioDecoderPcmUTest : public AudioDecoderTest {
protected:
AudioDecoderPcmUTest() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcmU(1);
AudioEncoderPcmU::Config config;
config.frame_size_ms = static_cast<int>(frame_size_ / 8);
config.payload_type = payload_type_;
audio_encoder_.reset(new AudioEncoderPcmU(config));
}
};
class AudioDecoderPcmATest : public AudioDecoderTest {
protected:
AudioDecoderPcmATest() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcmA(1);
AudioEncoderPcmA::Config config;
config.frame_size_ms = static_cast<int>(frame_size_ / 8);
config.payload_type = payload_type_;
audio_encoder_.reset(new AudioEncoderPcmA(config));
}
};
class AudioDecoderPcm16BTest : public AudioDecoderTest {
protected:
AudioDecoderPcm16BTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
frame_size_ = 20 * codec_input_rate_hz_ / 1000;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1);
RTC_DCHECK(decoder_);
AudioEncoderPcm16B::Config config;
config.sample_rate_hz = codec_input_rate_hz_;
config.frame_size_ms =
static_cast<int>(frame_size_ / (config.sample_rate_hz / 1000));
config.payload_type = payload_type_;
audio_encoder_.reset(new AudioEncoderPcm16B(config));
}
};
class AudioDecoderIlbcTest : public AudioDecoderTest {
protected:
AudioDecoderIlbcTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 8000;
frame_size_ = 240;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderIlbcImpl;
RTC_DCHECK(decoder_);
AudioEncoderIlbcConfig config;
config.frame_size_ms = 30;
audio_encoder_.reset(new AudioEncoderIlbcImpl(config, payload_type_));
}
// Overload the default test since iLBC's function WebRtcIlbcfix_NetEqPlc does
// not return any data. It simply resets a few states and returns 0.
void DecodePlcTest() {
InitEncoder();
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
rtc::Buffer encoded;
size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded);
AudioDecoder::SpeechType speech_type;
decoder_->Reset();
std::unique_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
size_t dec_len = decoder_->Decode(
encoded.data(), enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t), output.get(), &speech_type);
EXPECT_EQ(frame_size_, dec_len);
// Simply call DecodePlc and verify that we get 0 as return value.
EXPECT_EQ(0U, decoder_->DecodePlc(1, output.get()));
}
};
class AudioDecoderG722Test : public AudioDecoderTest {
protected:
AudioDecoderG722Test() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderG722Impl;
RTC_DCHECK(decoder_);
AudioEncoderG722Config config;
config.frame_size_ms = 10;
config.num_channels = 1;
audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
}
};
class AudioDecoderG722StereoTest : public AudioDecoderTest {
protected:
AudioDecoderG722StereoTest() : AudioDecoderTest() {
channels_ = 2;
codec_input_rate_hz_ = 16000;
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderG722StereoImpl;
RTC_DCHECK(decoder_);
AudioEncoderG722Config config;
config.frame_size_ms = 10;
config.num_channels = 2;
audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
}
};
class AudioDecoderOpusTest
: public AudioDecoderTest,
public testing::WithParamInterface<std::tuple<int, int>> {
protected:
AudioDecoderOpusTest() : AudioDecoderTest() {
channels_ = opus_num_channels_;
codec_input_rate_hz_ = opus_sample_rate_hz_;
frame_size_ = rtc::CheckedDivExact(opus_sample_rate_hz_, 100);
data_length_ = 10 * frame_size_;
decoder_ =
new AudioDecoderOpusImpl(opus_num_channels_, opus_sample_rate_hz_);
AudioEncoderOpusConfig config;
config.frame_size_ms = 10;
config.sample_rate_hz = opus_sample_rate_hz_;
config.num_channels = opus_num_channels_;
config.application = opus_num_channels_ == 1
? AudioEncoderOpusConfig::ApplicationMode::kVoip
: AudioEncoderOpusConfig::ApplicationMode::kAudio;
audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(config, payload_type_);
audio_encoder_->OnReceivedOverhead(kOverheadBytesPerPacket);
}
const int opus_sample_rate_hz_{std::get<0>(GetParam())};
const int opus_num_channels_{std::get<1>(GetParam())};
};
INSTANTIATE_TEST_SUITE_P(Param,
AudioDecoderOpusTest,
testing::Combine(testing::Values(16000, 48000),
testing::Values(1, 2)));
TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
int tolerance = 251;
double mse = 1734.0;
EncodeDecodeTest(data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
namespace {
int SetAndGetTargetBitrate(AudioEncoder* audio_encoder, int rate) {
audio_encoder->OnReceivedUplinkBandwidth(rate, absl::nullopt);
return audio_encoder->GetTargetBitrate();
}
void TestSetAndGetTargetBitratesWithFixedCodec(AudioEncoder* audio_encoder,
int fixed_rate) {
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, 32000));
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate - 1));
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate));
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate + 1));
}
} // namespace
TEST_F(AudioDecoderPcmUTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
}
TEST_F(AudioDecoderPcmATest, EncodeDecode) {
int tolerance = 308;
double mse = 1931.0;
EncodeDecodeTest(data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderPcmATest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
}
TEST_F(AudioDecoderPcm16BTest, EncodeDecode) {
int tolerance = 0;
double mse = 0.0;
EncodeDecodeTest(2 * data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderPcm16BTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(),
codec_input_rate_hz_ * 16);
}
TEST_F(AudioDecoderIlbcTest, EncodeDecode) {
int tolerance = 6808;
double mse = 2.13e6;
int delay = 80; // Delay from input to output.
EncodeDecodeTest(500, tolerance, mse, delay);
ReInitTest();
EXPECT_TRUE(decoder_->HasDecodePlc());
DecodePlcTest();
}
TEST_F(AudioDecoderIlbcTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 13333);
}
TEST_F(AudioDecoderG722Test, EncodeDecode) {
int tolerance = 6176;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderG722Test, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
}
TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
int tolerance = 6176;
int channel_diff_tolerance = 0;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderG722StereoTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 128000);
}
// TODO(http://bugs.webrtc.org/12518): Enable the test after Opus has been
// updated.
TEST_P(AudioDecoderOpusTest, DISABLED_EncodeDecode) {
constexpr int tolerance = 6176;
constexpr int channel_diff_tolerance = 6;
constexpr double mse = 238630.0;
constexpr int delay = 22; // Delay from input to output.
EncodeDecodeTest(0, tolerance, mse, delay, channel_diff_tolerance);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_P(AudioDecoderOpusTest, SetTargetBitrate) {
const int overhead_rate =
8 * kOverheadBytesPerPacket * codec_input_rate_hz_ / frame_size_;
EXPECT_EQ(6000,
SetAndGetTargetBitrate(audio_encoder_.get(), 5999 + overhead_rate));
EXPECT_EQ(6000,
SetAndGetTargetBitrate(audio_encoder_.get(), 6000 + overhead_rate));
EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder_.get(),
32000 + overhead_rate));
EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(),
510000 + overhead_rate));
EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder_.get(),
511000 + overhead_rate));
}
} // namespace webrtc

View file

@ -0,0 +1,221 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include <algorithm>
#include "rtc_base/checks.h"
namespace webrtc {
AudioMultiVector::AudioMultiVector(size_t N) {
RTC_DCHECK_GT(N, 0);
if (N < 1)
N = 1;
for (size_t n = 0; n < N; ++n) {
channels_.push_back(new AudioVector);
}
num_channels_ = N;
}
AudioMultiVector::AudioMultiVector(size_t N, size_t initial_size) {
RTC_DCHECK_GT(N, 0);
if (N < 1)
N = 1;
for (size_t n = 0; n < N; ++n) {
channels_.push_back(new AudioVector(initial_size));
}
num_channels_ = N;
}
AudioMultiVector::~AudioMultiVector() {
std::vector<AudioVector*>::iterator it = channels_.begin();
while (it != channels_.end()) {
delete (*it);
++it;
}
}
void AudioMultiVector::Clear() {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->Clear();
}
}
void AudioMultiVector::Zeros(size_t length) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->Clear();
channels_[i]->Extend(length);
}
}
void AudioMultiVector::CopyTo(AudioMultiVector* copy_to) const {
if (copy_to) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->CopyTo(&(*copy_to)[i]);
}
}
}
void AudioMultiVector::PushBackInterleaved(
rtc::ArrayView<const int16_t> append_this) {
RTC_DCHECK_EQ(append_this.size() % num_channels_, 0);
if (append_this.empty()) {
return;
}
if (num_channels_ == 1) {
// Special case to avoid extra allocation and data shuffling.
channels_[0]->PushBack(append_this.data(), append_this.size());
return;
}
size_t length_per_channel = append_this.size() / num_channels_;
int16_t* temp_array = new int16_t[length_per_channel]; // Temporary storage.
for (size_t channel = 0; channel < num_channels_; ++channel) {
// Copy elements to `temp_array`.
for (size_t i = 0; i < length_per_channel; ++i) {
temp_array[i] = append_this[channel + i * num_channels_];
}
channels_[channel]->PushBack(temp_array, length_per_channel);
}
delete[] temp_array;
}
void AudioMultiVector::PushBack(const AudioMultiVector& append_this) {
RTC_DCHECK_EQ(num_channels_, append_this.num_channels_);
if (num_channels_ == append_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PushBack(append_this[i]);
}
}
}
void AudioMultiVector::PushBackFromIndex(const AudioMultiVector& append_this,
size_t index) {
RTC_DCHECK_LT(index, append_this.Size());
index = std::min(index, append_this.Size() - 1);
size_t length = append_this.Size() - index;
RTC_DCHECK_EQ(num_channels_, append_this.num_channels_);
if (num_channels_ == append_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PushBack(append_this[i], length, index);
}
}
}
void AudioMultiVector::PopFront(size_t length) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PopFront(length);
}
}
void AudioMultiVector::PopBack(size_t length) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PopBack(length);
}
}
size_t AudioMultiVector::ReadInterleaved(size_t length,
int16_t* destination) const {
return ReadInterleavedFromIndex(0, length, destination);
}
size_t AudioMultiVector::ReadInterleavedFromIndex(size_t start_index,
size_t length,
int16_t* destination) const {
RTC_DCHECK(destination);
size_t index = 0; // Number of elements written to `destination` so far.
RTC_DCHECK_LE(start_index, Size());
start_index = std::min(start_index, Size());
if (length + start_index > Size()) {
length = Size() - start_index;
}
if (num_channels_ == 1) {
// Special case to avoid the nested for loop below.
(*this)[0].CopyTo(length, start_index, destination);
return length;
}
for (size_t i = 0; i < length; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
destination[index] = (*this)[channel][i + start_index];
++index;
}
}
return index;
}
size_t AudioMultiVector::ReadInterleavedFromEnd(size_t length,
int16_t* destination) const {
length = std::min(length, Size()); // Cannot read more than Size() elements.
return ReadInterleavedFromIndex(Size() - length, length, destination);
}
void AudioMultiVector::OverwriteAt(const AudioMultiVector& insert_this,
size_t length,
size_t position) {
RTC_DCHECK_EQ(num_channels_, insert_this.num_channels_);
// Cap `length` at the length of `insert_this`.
RTC_DCHECK_LE(length, insert_this.Size());
length = std::min(length, insert_this.Size());
if (num_channels_ == insert_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->OverwriteAt(insert_this[i], length, position);
}
}
}
void AudioMultiVector::CrossFade(const AudioMultiVector& append_this,
size_t fade_length) {
RTC_DCHECK_EQ(num_channels_, append_this.num_channels_);
if (num_channels_ == append_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->CrossFade(append_this[i], fade_length);
}
}
}
size_t AudioMultiVector::Channels() const {
return num_channels_;
}
size_t AudioMultiVector::Size() const {
RTC_DCHECK(channels_[0]);
return channels_[0]->Size();
}
void AudioMultiVector::AssertSize(size_t required_size) {
if (Size() < required_size) {
size_t extend_length = required_size - Size();
for (size_t channel = 0; channel < num_channels_; ++channel) {
channels_[channel]->Extend(extend_length);
}
}
}
bool AudioMultiVector::Empty() const {
RTC_DCHECK(channels_[0]);
return channels_[0]->Empty();
}
void AudioMultiVector::CopyChannel(size_t from_channel, size_t to_channel) {
RTC_DCHECK_LT(from_channel, num_channels_);
RTC_DCHECK_LT(to_channel, num_channels_);
channels_[from_channel]->CopyTo(channels_[to_channel]);
}
const AudioVector& AudioMultiVector::operator[](size_t index) const {
return *(channels_[index]);
}
AudioVector& AudioMultiVector::operator[](size_t index) {
return *(channels_[index]);
}
} // namespace webrtc

View file

@ -0,0 +1,138 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_
#define MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_
#include <stdint.h>
#include <string.h>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_coding/neteq/audio_vector.h"
namespace webrtc {
class AudioMultiVector {
public:
// Creates an empty AudioMultiVector with `N` audio channels. `N` must be
// larger than 0.
explicit AudioMultiVector(size_t N);
// Creates an AudioMultiVector with `N` audio channels, each channel having
// an initial size. `N` must be larger than 0.
AudioMultiVector(size_t N, size_t initial_size);
virtual ~AudioMultiVector();
AudioMultiVector(const AudioMultiVector&) = delete;
AudioMultiVector& operator=(const AudioMultiVector&) = delete;
// Deletes all values and make the vector empty.
virtual void Clear();
// Clears the vector and inserts `length` zeros into each channel.
virtual void Zeros(size_t length);
// Copies all values from this vector to `copy_to`. Any contents in `copy_to`
// are deleted. After the operation is done, `copy_to` will be an exact
// replica of this object. The source and the destination must have the same
// number of channels.
virtual void CopyTo(AudioMultiVector* copy_to) const;
// Appends the contents of `append_this` to the end of this object. The array
// is assumed to be channel-interleaved. The length must be an even multiple
// of this object's number of channels. The length of this object is increased
// with the length of the array divided by the number of channels.
void PushBackInterleaved(rtc::ArrayView<const int16_t> append_this);
// Appends the contents of AudioMultiVector `append_this` to this object. The
// length of this object is increased with the length of `append_this`.
virtual void PushBack(const AudioMultiVector& append_this);
// Appends the contents of AudioMultiVector `append_this` to this object,
// taken from `index` up until the end of `append_this`. The length of this
// object is increased.
virtual void PushBackFromIndex(const AudioMultiVector& append_this,
size_t index);
// Removes `length` elements from the beginning of this object, from each
// channel.
virtual void PopFront(size_t length);
// Removes `length` elements from the end of this object, from each
// channel.
virtual void PopBack(size_t length);
// Reads `length` samples from each channel and writes them interleaved to
// `destination`. The total number of elements written to `destination` is
// returned, i.e., `length` * number of channels. If the AudioMultiVector
// contains less than `length` samples per channel, this is reflected in the
// return value.
virtual size_t ReadInterleaved(size_t length, int16_t* destination) const;
// Like ReadInterleaved() above, but reads from `start_index` instead of from
// the beginning.
virtual size_t ReadInterleavedFromIndex(size_t start_index,
size_t length,
int16_t* destination) const;
// Like ReadInterleaved() above, but reads from the end instead of from
// the beginning.
virtual size_t ReadInterleavedFromEnd(size_t length,
int16_t* destination) const;
// Overwrites each channel in this AudioMultiVector with values taken from
// `insert_this`. The values are taken from the beginning of `insert_this` and
// are inserted starting at `position`. `length` values are written into each
// channel. If `length` and `position` are selected such that the new data
// extends beyond the end of the current AudioVector, the vector is extended
// to accommodate the new data. `length` is limited to the length of
// `insert_this`.
virtual void OverwriteAt(const AudioMultiVector& insert_this,
size_t length,
size_t position);
// Appends `append_this` to the end of the current vector. Lets the two
// vectors overlap by `fade_length` samples (per channel), and cross-fade
// linearly in this region.
virtual void CrossFade(const AudioMultiVector& append_this,
size_t fade_length);
// Returns the number of channels.
virtual size_t Channels() const;
// Returns the number of elements per channel in this AudioMultiVector.
virtual size_t Size() const;
// Verify that each channel can hold at least `required_size` elements. If
// not, extend accordingly.
virtual void AssertSize(size_t required_size);
virtual bool Empty() const;
// Copies the data between two channels in the AudioMultiVector. The method
// does not add any new channel. Thus, `from_channel` and `to_channel` must
// both be valid channel numbers.
virtual void CopyChannel(size_t from_channel, size_t to_channel);
// Accesses and modifies a channel (i.e., an AudioVector object) of this
// AudioMultiVector.
const AudioVector& operator[](size_t index) const;
AudioVector& operator[](size_t index);
protected:
std::vector<AudioVector*> channels_;
size_t num_channels_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_

View file

@ -0,0 +1,323 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include <stdlib.h>
#include <string>
#include <vector>
#include "rtc_base/numerics/safe_conversions.h"
#include "test/gtest.h"
namespace webrtc {
// This is a value-parameterized test. The test cases are instantiated with
// different values for the test parameter, which is used to determine the
// number of channels in the AudioMultiBuffer. Note that it is not possible
// to combine typed testing with value-parameterized testing, and since the
// tests for AudioVector already covers a number of different type parameters,
// this test focuses on testing different number of channels, and keeping the
// value type constant.
class AudioMultiVectorTest : public ::testing::TestWithParam<size_t> {
protected:
AudioMultiVectorTest()
: num_channels_(GetParam()), // Get the test parameter.
array_interleaved_(num_channels_ * array_length()) {}
~AudioMultiVectorTest() = default;
virtual void SetUp() {
// Populate test arrays.
for (size_t i = 0; i < array_length(); ++i) {
array_[i] = static_cast<int16_t>(i);
}
int16_t* ptr = array_interleaved_.data();
// Write 100, 101, 102, ... for first channel.
// Write 200, 201, 202, ... for second channel.
// And so on.
for (size_t i = 0; i < array_length(); ++i) {
for (size_t j = 1; j <= num_channels_; ++j) {
*ptr = rtc::checked_cast<int16_t>(j * 100 + i);
++ptr;
}
}
}
size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); }
const size_t num_channels_;
int16_t array_[10];
std::vector<int16_t> array_interleaved_;
};
// Create and destroy AudioMultiVector objects, both empty and with a predefined
// length.
TEST_P(AudioMultiVectorTest, CreateAndDestroy) {
AudioMultiVector vec1(num_channels_);
EXPECT_TRUE(vec1.Empty());
EXPECT_EQ(num_channels_, vec1.Channels());
EXPECT_EQ(0u, vec1.Size());
size_t initial_size = 17;
AudioMultiVector vec2(num_channels_, initial_size);
EXPECT_FALSE(vec2.Empty());
EXPECT_EQ(num_channels_, vec2.Channels());
EXPECT_EQ(initial_size, vec2.Size());
}
// Test the subscript operator [] for getting and setting.
TEST_P(AudioMultiVectorTest, SubscriptOperator) {
AudioMultiVector vec(num_channels_, array_length());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < array_length(); ++i) {
vec[channel][i] = static_cast<int16_t>(i);
// Make sure to use the const version.
const AudioVector& audio_vec = vec[channel];
EXPECT_EQ(static_cast<int16_t>(i), audio_vec[i]);
}
}
}
// Test the PushBackInterleaved method and the CopyFrom method. The Clear
// method is also invoked.
TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_);
AudioMultiVector vec_copy(num_channels_);
vec.CopyTo(&vec_copy); // Copy from `vec` to `vec_copy`.
ASSERT_EQ(num_channels_, vec.Channels());
ASSERT_EQ(array_length(), vec.Size());
ASSERT_EQ(num_channels_, vec_copy.Channels());
ASSERT_EQ(array_length(), vec_copy.Size());
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(static_cast<int16_t>((channel + 1) * 100 + i), vec[channel][i]);
EXPECT_EQ(vec[channel][i], vec_copy[channel][i]);
}
}
// Clear `vec` and verify that it is empty.
vec.Clear();
EXPECT_TRUE(vec.Empty());
// Now copy the empty vector and verify that the copy becomes empty too.
vec.CopyTo(&vec_copy);
EXPECT_TRUE(vec_copy.Empty());
}
// Try to copy to a NULL pointer. Nothing should happen.
TEST_P(AudioMultiVectorTest, CopyToNull) {
AudioMultiVector vec(num_channels_);
AudioMultiVector* vec_copy = NULL;
vec.PushBackInterleaved(array_interleaved_);
vec.CopyTo(vec_copy);
}
// Test the PushBack method with another AudioMultiVector as input argument.
TEST_P(AudioMultiVectorTest, PushBackVector) {
AudioMultiVector vec1(num_channels_, array_length());
AudioMultiVector vec2(num_channels_, array_length());
// Set the first vector to [0, 1, ..., array_length() - 1] +
// 100 * channel_number.
// Set the second vector to [array_length(), array_length() + 1, ...,
// 2 * array_length() - 1] + 100 * channel_number.
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < array_length(); ++i) {
vec1[channel][i] = static_cast<int16_t>(i + 100 * channel);
vec2[channel][i] =
static_cast<int16_t>(i + 100 * channel + array_length());
}
}
// Append vec2 to the back of vec1.
vec1.PushBack(vec2);
ASSERT_EQ(2u * array_length(), vec1.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2 * array_length(); ++i) {
EXPECT_EQ(static_cast<int16_t>(i + 100 * channel), vec1[channel][i]);
}
}
}
// Test the PushBackFromIndex method.
TEST_P(AudioMultiVectorTest, PushBackFromIndex) {
AudioMultiVector vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_);
AudioMultiVector vec2(num_channels_);
// Append vec1 to the back of vec2 (which is empty). Read vec1 from the second
// last element.
vec2.PushBackFromIndex(vec1, array_length() - 2);
ASSERT_EQ(2u, vec2.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2; ++i) {
EXPECT_EQ(array_interleaved_[channel +
num_channels_ * (array_length() - 2 + i)],
vec2[channel][i]);
}
}
}
// Starts with pushing some values to the vector, then test the Zeros method.
TEST_P(AudioMultiVectorTest, Zeros) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_);
vec.Zeros(2 * array_length());
ASSERT_EQ(num_channels_, vec.Channels());
ASSERT_EQ(2u * array_length(), vec.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2 * array_length(); ++i) {
EXPECT_EQ(0, vec[channel][i]);
}
}
}
// Test the ReadInterleaved method
TEST_P(AudioMultiVectorTest, ReadInterleaved) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_);
int16_t* output = new int16_t[array_interleaved_.size()];
// Read 5 samples.
size_t read_samples = 5;
EXPECT_EQ(num_channels_ * read_samples,
vec.ReadInterleaved(read_samples, output));
EXPECT_EQ(0, memcmp(array_interleaved_.data(), output,
read_samples * sizeof(int16_t)));
// Read too many samples. Expect to get all samples from the vector.
EXPECT_EQ(array_interleaved_.size(),
vec.ReadInterleaved(array_length() + 1, output));
EXPECT_EQ(0, memcmp(array_interleaved_.data(), output,
read_samples * sizeof(int16_t)));
delete[] output;
}
// Test the PopFront method.
TEST_P(AudioMultiVectorTest, PopFront) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_);
vec.PopFront(1); // Remove one element from each channel.
ASSERT_EQ(array_length() - 1u, vec.Size());
// Let `ptr` point to the second element of the first channel in the
// interleaved array.
int16_t* ptr = &array_interleaved_[num_channels_];
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
EXPECT_EQ(*ptr, vec[channel][i]);
++ptr;
}
}
vec.PopFront(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the PopBack method.
TEST_P(AudioMultiVectorTest, PopBack) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_);
vec.PopBack(1); // Remove one element from each channel.
ASSERT_EQ(array_length() - 1u, vec.Size());
// Let `ptr` point to the first element of the first channel in the
// interleaved array.
int16_t* ptr = array_interleaved_.data();
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
EXPECT_EQ(*ptr, vec[channel][i]);
++ptr;
}
}
vec.PopBack(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the AssertSize method.
TEST_P(AudioMultiVectorTest, AssertSize) {
AudioMultiVector vec(num_channels_, array_length());
EXPECT_EQ(array_length(), vec.Size());
// Start with asserting with smaller sizes than already allocated.
vec.AssertSize(0);
vec.AssertSize(array_length() - 1);
// Nothing should have changed.
EXPECT_EQ(array_length(), vec.Size());
// Assert with one element longer than already allocated.
vec.AssertSize(array_length() + 1);
// Expect vector to have grown.
EXPECT_EQ(array_length() + 1, vec.Size());
// Also check the individual AudioVectors.
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
EXPECT_EQ(array_length() + 1u, vec[channel].Size());
}
}
// Test the PushBack method with another AudioMultiVector as input argument.
TEST_P(AudioMultiVectorTest, OverwriteAt) {
AudioMultiVector vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_);
AudioMultiVector vec2(num_channels_);
vec2.Zeros(3); // 3 zeros in each channel.
// Overwrite vec2 at position 5.
vec1.OverwriteAt(vec2, 3, 5);
// Verify result.
// Length remains the same.
ASSERT_EQ(array_length(), vec1.Size());
int16_t* ptr = array_interleaved_.data();
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
if (i >= 5 && i <= 7) {
// Elements 5, 6, 7 should have been replaced with zeros.
EXPECT_EQ(0, vec1[channel][i]);
} else {
EXPECT_EQ(*ptr, vec1[channel][i]);
}
++ptr;
}
}
}
// Test the CopyChannel method, when the test is instantiated with at least two
// channels.
TEST_P(AudioMultiVectorTest, CopyChannel) {
if (num_channels_ < 2)
return;
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_);
// Create a reference copy.
AudioMultiVector ref(num_channels_);
ref.PushBack(vec);
// Copy from first to last channel.
vec.CopyChannel(0, num_channels_ - 1);
// Verify that the first and last channels are identical; the others should
// be left untouched.
for (size_t i = 0; i < array_length(); ++i) {
// Verify that all but the last channel are untouched.
for (size_t channel = 0; channel < num_channels_ - 1; ++channel) {
EXPECT_EQ(ref[channel][i], vec[channel][i]);
}
// Verify that the last and the first channels are identical.
EXPECT_EQ(vec[0][i], vec[num_channels_ - 1][i]);
}
}
TEST_P(AudioMultiVectorTest, PushBackEmptyArray) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved({});
EXPECT_TRUE(vec.Empty());
}
INSTANTIATE_TEST_SUITE_P(TestNumChannels,
AudioMultiVectorTest,
::testing::Values(static_cast<size_t>(1),
static_cast<size_t>(2),
static_cast<size_t>(5)));
} // namespace webrtc

View file

@ -0,0 +1,380 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/audio_vector.h"
#include <algorithm>
#include <memory>
#include "rtc_base/checks.h"
namespace webrtc {
AudioVector::AudioVector() : AudioVector(kDefaultInitialSize) {
Clear();
}
AudioVector::AudioVector(size_t initial_size)
: array_(new int16_t[initial_size + 1]),
capacity_(initial_size + 1),
begin_index_(0),
end_index_(capacity_ - 1) {
memset(array_.get(), 0, capacity_ * sizeof(int16_t));
}
AudioVector::~AudioVector() = default;
void AudioVector::Clear() {
end_index_ = begin_index_ = 0;
}
void AudioVector::CopyTo(AudioVector* copy_to) const {
RTC_DCHECK(copy_to);
copy_to->Reserve(Size());
CopyTo(Size(), 0, copy_to->array_.get());
copy_to->begin_index_ = 0;
copy_to->end_index_ = Size();
}
void AudioVector::CopyTo(size_t length,
size_t position,
int16_t* copy_to) const {
if (length == 0)
return;
length = std::min(length, Size() - position);
const size_t copy_index = (begin_index_ + position) % capacity_;
const size_t first_chunk_length = std::min(length, capacity_ - copy_index);
memcpy(copy_to, &array_[copy_index], first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(&copy_to[first_chunk_length], array_.get(),
remaining_length * sizeof(int16_t));
}
}
void AudioVector::PushFront(const AudioVector& prepend_this) {
const size_t length = prepend_this.Size();
if (length == 0)
return;
// Although the subsequent calling to PushFront does Reserve in it, it is
// always more efficient to do a big Reserve first.
Reserve(Size() + length);
const size_t first_chunk_length =
std::min(length, prepend_this.capacity_ - prepend_this.begin_index_);
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0)
PushFront(prepend_this.array_.get(), remaining_length);
PushFront(&prepend_this.array_[prepend_this.begin_index_],
first_chunk_length);
}
void AudioVector::PushFront(const int16_t* prepend_this, size_t length) {
if (length == 0)
return;
Reserve(Size() + length);
const size_t first_chunk_length = std::min(length, begin_index_);
memcpy(&array_[begin_index_ - first_chunk_length],
&prepend_this[length - first_chunk_length],
first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(&array_[capacity_ - remaining_length], prepend_this,
remaining_length * sizeof(int16_t));
}
begin_index_ = (begin_index_ + capacity_ - length) % capacity_;
}
void AudioVector::PushBack(const AudioVector& append_this) {
PushBack(append_this, append_this.Size(), 0);
}
void AudioVector::PushBack(const AudioVector& append_this,
size_t length,
size_t position) {
RTC_DCHECK_LE(position, append_this.Size());
RTC_DCHECK_LE(length, append_this.Size() - position);
if (length == 0)
return;
// Although the subsequent calling to PushBack does Reserve in it, it is
// always more efficient to do a big Reserve first.
Reserve(Size() + length);
const size_t start_index =
(append_this.begin_index_ + position) % append_this.capacity_;
const size_t first_chunk_length =
std::min(length, append_this.capacity_ - start_index);
PushBack(&append_this.array_[start_index], first_chunk_length);
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0)
PushBack(append_this.array_.get(), remaining_length);
}
void AudioVector::PushBack(const int16_t* append_this, size_t length) {
if (length == 0)
return;
Reserve(Size() + length);
const size_t first_chunk_length = std::min(length, capacity_ - end_index_);
memcpy(&array_[end_index_], append_this,
first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(array_.get(), &append_this[first_chunk_length],
remaining_length * sizeof(int16_t));
}
end_index_ = (end_index_ + length) % capacity_;
}
void AudioVector::PopFront(size_t length) {
if (length == 0)
return;
length = std::min(length, Size());
begin_index_ = (begin_index_ + length) % capacity_;
}
void AudioVector::PopBack(size_t length) {
if (length == 0)
return;
// Never remove more than what is in the array.
length = std::min(length, Size());
end_index_ = (end_index_ + capacity_ - length) % capacity_;
}
void AudioVector::Extend(size_t extra_length) {
if (extra_length == 0)
return;
InsertZerosByPushBack(extra_length, Size());
}
void AudioVector::InsertAt(const int16_t* insert_this,
size_t length,
size_t position) {
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
// When inserting to a position closer to the beginning, it is more efficient
// to insert by pushing front than to insert by pushing back, since less data
// will be moved, vice versa.
if (position <= Size() - position) {
InsertByPushFront(insert_this, length, position);
} else {
InsertByPushBack(insert_this, length, position);
}
}
void AudioVector::InsertZerosAt(size_t length, size_t position) {
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
// When inserting to a position closer to the beginning, it is more efficient
// to insert by pushing front than to insert by pushing back, since less data
// will be moved, vice versa.
if (position <= Size() - position) {
InsertZerosByPushFront(length, position);
} else {
InsertZerosByPushBack(length, position);
}
}
void AudioVector::OverwriteAt(const AudioVector& insert_this,
size_t length,
size_t position) {
RTC_DCHECK_LE(length, insert_this.Size());
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
// Although the subsequent calling to OverwriteAt does Reserve in it, it is
// always more efficient to do a big Reserve first.
size_t new_size = std::max(Size(), position + length);
Reserve(new_size);
const size_t first_chunk_length =
std::min(length, insert_this.capacity_ - insert_this.begin_index_);
OverwriteAt(&insert_this.array_[insert_this.begin_index_], first_chunk_length,
position);
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
OverwriteAt(insert_this.array_.get(), remaining_length,
position + first_chunk_length);
}
}
void AudioVector::OverwriteAt(const int16_t* insert_this,
size_t length,
size_t position) {
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
size_t new_size = std::max(Size(), position + length);
Reserve(new_size);
const size_t overwrite_index = (begin_index_ + position) % capacity_;
const size_t first_chunk_length =
std::min(length, capacity_ - overwrite_index);
memcpy(&array_[overwrite_index], insert_this,
first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(array_.get(), &insert_this[first_chunk_length],
remaining_length * sizeof(int16_t));
}
end_index_ = (begin_index_ + new_size) % capacity_;
}
void AudioVector::CrossFade(const AudioVector& append_this,
size_t fade_length) {
// Fade length cannot be longer than the current vector or `append_this`.
RTC_DCHECK_LE(fade_length, Size());
RTC_DCHECK_LE(fade_length, append_this.Size());
fade_length = std::min(fade_length, Size());
fade_length = std::min(fade_length, append_this.Size());
size_t position = Size() - fade_length + begin_index_;
// Cross fade the overlapping regions.
// `alpha` is the mixing factor in Q14.
// TODO(hlundin): Consider skipping +1 in the denominator to produce a
// smoother cross-fade, in particular at the end of the fade.
int alpha_step = 16384 / (static_cast<int>(fade_length) + 1);
int alpha = 16384;
for (size_t i = 0; i < fade_length; ++i) {
alpha -= alpha_step;
array_[(position + i) % capacity_] =
(alpha * array_[(position + i) % capacity_] +
(16384 - alpha) * append_this[i] + 8192) >>
14;
}
RTC_DCHECK_GE(alpha, 0); // Verify that the slope was correct.
// Append what is left of `append_this`.
size_t samples_to_push_back = append_this.Size() - fade_length;
if (samples_to_push_back > 0)
PushBack(append_this, samples_to_push_back, fade_length);
}
// Returns the number of elements in this AudioVector.
size_t AudioVector::Size() const {
return (end_index_ + capacity_ - begin_index_) % capacity_;
}
// Returns true if this AudioVector is empty.
bool AudioVector::Empty() const {
return begin_index_ == end_index_;
}
void AudioVector::Reserve(size_t n) {
if (capacity_ > n)
return;
const size_t length = Size();
// Reserve one more sample to remove the ambiguity between empty vector and
// full vector. Therefore `begin_index_` == `end_index_` indicates empty
// vector, and `begin_index_` == (`end_index_` + 1) % capacity indicates
// full vector.
std::unique_ptr<int16_t[]> temp_array(new int16_t[n + 1]);
CopyTo(length, 0, temp_array.get());
array_.swap(temp_array);
begin_index_ = 0;
end_index_ = length;
capacity_ = n + 1;
}
void AudioVector::InsertByPushBack(const int16_t* insert_this,
size_t length,
size_t position) {
const size_t move_chunk_length = Size() - position;
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (move_chunk_length > 0) {
// TODO(minyue): see if it is possible to avoid copying to a buffer.
temp_array.reset(new int16_t[move_chunk_length]);
CopyTo(move_chunk_length, position, temp_array.get());
PopBack(move_chunk_length);
}
Reserve(Size() + length + move_chunk_length);
PushBack(insert_this, length);
if (move_chunk_length > 0)
PushBack(temp_array.get(), move_chunk_length);
}
void AudioVector::InsertByPushFront(const int16_t* insert_this,
size_t length,
size_t position) {
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (position > 0) {
// TODO(minyue): see if it is possible to avoid copying to a buffer.
temp_array.reset(new int16_t[position]);
CopyTo(position, 0, temp_array.get());
PopFront(position);
}
Reserve(Size() + length + position);
PushFront(insert_this, length);
if (position > 0)
PushFront(temp_array.get(), position);
}
void AudioVector::InsertZerosByPushBack(size_t length, size_t position) {
const size_t move_chunk_length = Size() - position;
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (move_chunk_length > 0) {
temp_array.reset(new int16_t[move_chunk_length]);
CopyTo(move_chunk_length, position, temp_array.get());
PopBack(move_chunk_length);
}
Reserve(Size() + length + move_chunk_length);
const size_t first_zero_chunk_length =
std::min(length, capacity_ - end_index_);
memset(&array_[end_index_], 0, first_zero_chunk_length * sizeof(int16_t));
const size_t remaining_zero_length = length - first_zero_chunk_length;
if (remaining_zero_length > 0)
memset(array_.get(), 0, remaining_zero_length * sizeof(int16_t));
end_index_ = (end_index_ + length) % capacity_;
if (move_chunk_length > 0)
PushBack(temp_array.get(), move_chunk_length);
}
void AudioVector::InsertZerosByPushFront(size_t length, size_t position) {
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (position > 0) {
temp_array.reset(new int16_t[position]);
CopyTo(position, 0, temp_array.get());
PopFront(position);
}
Reserve(Size() + length + position);
const size_t first_zero_chunk_length = std::min(length, begin_index_);
memset(&array_[begin_index_ - first_zero_chunk_length], 0,
first_zero_chunk_length * sizeof(int16_t));
const size_t remaining_zero_length = length - first_zero_chunk_length;
if (remaining_zero_length > 0)
memset(&array_[capacity_ - remaining_zero_length], 0,
remaining_zero_length * sizeof(int16_t));
begin_index_ = (begin_index_ + capacity_ - length) % capacity_;
if (position > 0)
PushFront(temp_array.get(), position);
}
} // namespace webrtc

View file

@ -0,0 +1,172 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_
#define MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_
#include <string.h>
#include <cstdint>
#include <memory>
#include "rtc_base/checks.h"
namespace webrtc {
class AudioVector {
public:
// Creates an empty AudioVector.
AudioVector();
// Creates an AudioVector with an initial size.
explicit AudioVector(size_t initial_size);
virtual ~AudioVector();
AudioVector(const AudioVector&) = delete;
AudioVector& operator=(const AudioVector&) = delete;
// Deletes all values and make the vector empty.
virtual void Clear();
// Copies all values from this vector to `copy_to`. Any contents in `copy_to`
// are deleted before the copy operation. After the operation is done,
// `copy_to` will be an exact replica of this object.
virtual void CopyTo(AudioVector* copy_to) const;
// Copies `length` values from `position` in this vector to `copy_to`.
virtual void CopyTo(size_t length, size_t position, int16_t* copy_to) const;
// Prepends the contents of AudioVector `prepend_this` to this object. The
// length of this object is increased with the length of `prepend_this`.
virtual void PushFront(const AudioVector& prepend_this);
// Same as above, but with an array `prepend_this` with `length` elements as
// source.
virtual void PushFront(const int16_t* prepend_this, size_t length);
// Same as PushFront but will append to the end of this object.
virtual void PushBack(const AudioVector& append_this);
// Appends a segment of `append_this` to the end of this object. The segment
// starts from `position` and has `length` samples.
virtual void PushBack(const AudioVector& append_this,
size_t length,
size_t position);
// Same as PushFront but will append to the end of this object.
virtual void PushBack(const int16_t* append_this, size_t length);
// Removes `length` elements from the beginning of this object.
virtual void PopFront(size_t length);
// Removes `length` elements from the end of this object.
virtual void PopBack(size_t length);
// Extends this object with `extra_length` elements at the end. The new
// elements are initialized to zero.
virtual void Extend(size_t extra_length);
// Inserts `length` elements taken from the array `insert_this` and insert
// them at `position`. The length of the AudioVector is increased by `length`.
// `position` = 0 means that the new values are prepended to the vector.
// `position` = Size() means that the new values are appended to the vector.
virtual void InsertAt(const int16_t* insert_this,
size_t length,
size_t position);
// Like InsertAt, but inserts `length` zero elements at `position`.
virtual void InsertZerosAt(size_t length, size_t position);
// Overwrites `length` elements of this AudioVector starting from `position`
// with first values in `AudioVector`. The definition of `position`
// is the same as for InsertAt(). If `length` and `position` are selected
// such that the new data extends beyond the end of the current AudioVector,
// the vector is extended to accommodate the new data.
virtual void OverwriteAt(const AudioVector& insert_this,
size_t length,
size_t position);
// Overwrites `length` elements of this AudioVector with values taken from the
// array `insert_this`, starting at `position`. The definition of `position`
// is the same as for InsertAt(). If `length` and `position` are selected
// such that the new data extends beyond the end of the current AudioVector,
// the vector is extended to accommodate the new data.
virtual void OverwriteAt(const int16_t* insert_this,
size_t length,
size_t position);
// Appends `append_this` to the end of the current vector. Lets the two
// vectors overlap by `fade_length` samples, and cross-fade linearly in this
// region.
virtual void CrossFade(const AudioVector& append_this, size_t fade_length);
// Returns the number of elements in this AudioVector.
virtual size_t Size() const;
// Returns true if this AudioVector is empty.
virtual bool Empty() const;
// Accesses and modifies an element of AudioVector.
inline const int16_t& operator[](size_t index) const {
return array_[WrapIndex(index, begin_index_, capacity_)];
}
inline int16_t& operator[](size_t index) {
return array_[WrapIndex(index, begin_index_, capacity_)];
}
private:
static const size_t kDefaultInitialSize = 10;
// This method is used by the [] operators to calculate an index within the
// capacity of the array, but without using the modulo operation (%).
static inline size_t WrapIndex(size_t index,
size_t begin_index,
size_t capacity) {
RTC_DCHECK_LT(index, capacity);
RTC_DCHECK_LT(begin_index, capacity);
size_t ix = begin_index + index;
RTC_DCHECK_GE(ix, index); // Check for overflow.
if (ix >= capacity) {
ix -= capacity;
}
RTC_DCHECK_LT(ix, capacity);
return ix;
}
void Reserve(size_t n);
void InsertByPushBack(const int16_t* insert_this,
size_t length,
size_t position);
void InsertByPushFront(const int16_t* insert_this,
size_t length,
size_t position);
void InsertZerosByPushBack(size_t length, size_t position);
void InsertZerosByPushFront(size_t length, size_t position);
std::unique_ptr<int16_t[]> array_;
size_t capacity_; // Allocated number of samples in the array.
// The index of the first sample in `array_`, except when
// |begin_index_ == end_index_|, which indicates an empty buffer.
size_t begin_index_;
// The index of the sample after the last sample in `array_`.
size_t end_index_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_

View file

@ -0,0 +1,384 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/audio_vector.h"
#include <stdlib.h>
#include <string>
#include "rtc_base/numerics/safe_conversions.h"
#include "test/gtest.h"
namespace webrtc {
class AudioVectorTest : public ::testing::Test {
protected:
virtual void SetUp() {
// Populate test array.
for (size_t i = 0; i < array_length(); ++i) {
array_[i] = rtc::checked_cast<int16_t>(i);
}
}
size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); }
int16_t array_[10];
};
// Create and destroy AudioVector objects, both empty and with a predefined
// length.
TEST_F(AudioVectorTest, CreateAndDestroy) {
AudioVector vec1;
EXPECT_TRUE(vec1.Empty());
EXPECT_EQ(0u, vec1.Size());
size_t initial_size = 17;
AudioVector vec2(initial_size);
EXPECT_FALSE(vec2.Empty());
EXPECT_EQ(initial_size, vec2.Size());
}
// Test the subscript operator [] for getting and setting.
TEST_F(AudioVectorTest, SubscriptOperator) {
AudioVector vec(array_length());
for (size_t i = 0; i < array_length(); ++i) {
vec[i] = static_cast<int16_t>(i);
const int16_t& value = vec[i]; // Make sure to use the const version.
EXPECT_EQ(static_cast<int16_t>(i), value);
}
}
// Test the PushBack method and the CopyFrom method. The Clear method is also
// invoked.
TEST_F(AudioVectorTest, PushBackAndCopy) {
AudioVector vec;
AudioVector vec_copy;
vec.PushBack(array_, array_length());
vec.CopyTo(&vec_copy); // Copy from `vec` to `vec_copy`.
ASSERT_EQ(array_length(), vec.Size());
ASSERT_EQ(array_length(), vec_copy.Size());
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[i]);
EXPECT_EQ(array_[i], vec_copy[i]);
}
// Clear `vec` and verify that it is empty.
vec.Clear();
EXPECT_TRUE(vec.Empty());
// Now copy the empty vector and verify that the copy becomes empty too.
vec.CopyTo(&vec_copy);
EXPECT_TRUE(vec_copy.Empty());
}
// Test the PushBack method with another AudioVector as input argument.
TEST_F(AudioVectorTest, PushBackVector) {
static const size_t kLength = 10;
AudioVector vec1(kLength);
AudioVector vec2(kLength);
// Set the first vector to [0, 1, ..., kLength - 1].
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = static_cast<int16_t>(i);
vec2[i] = static_cast<int16_t>(i + kLength);
}
// Append vec2 to the back of vec1.
vec1.PushBack(vec2);
ASSERT_EQ(2 * kLength, vec1.Size());
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(static_cast<int16_t>(i), vec1[i]);
}
}
// Test the PushFront method.
TEST_F(AudioVectorTest, PushFront) {
AudioVector vec;
vec.PushFront(array_, array_length());
ASSERT_EQ(array_length(), vec.Size());
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[i]);
}
}
// Test the PushFront method with another AudioVector as input argument.
TEST_F(AudioVectorTest, PushFrontVector) {
static const size_t kLength = 10;
AudioVector vec1(kLength);
AudioVector vec2(kLength);
// Set the first vector to [0, 1, ..., kLength - 1].
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = static_cast<int16_t>(i);
vec2[i] = static_cast<int16_t>(i + kLength);
}
// Prepend vec1 to the front of vec2.
vec2.PushFront(vec1);
ASSERT_EQ(2 * kLength, vec2.Size());
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(static_cast<int16_t>(i), vec2[i]);
}
}
// Test the PopFront method.
TEST_F(AudioVectorTest, PopFront) {
AudioVector vec;
vec.PushBack(array_, array_length());
vec.PopFront(1); // Remove one element.
EXPECT_EQ(array_length() - 1u, vec.Size());
for (size_t i = 0; i < array_length() - 1; ++i) {
EXPECT_EQ(static_cast<int16_t>(i + 1), vec[i]);
}
vec.PopFront(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the PopBack method.
TEST_F(AudioVectorTest, PopBack) {
AudioVector vec;
vec.PushBack(array_, array_length());
vec.PopBack(1); // Remove one element.
EXPECT_EQ(array_length() - 1u, vec.Size());
for (size_t i = 0; i < array_length() - 1; ++i) {
EXPECT_EQ(static_cast<int16_t>(i), vec[i]);
}
vec.PopBack(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the Extend method.
TEST_F(AudioVectorTest, Extend) {
AudioVector vec;
vec.PushBack(array_, array_length());
vec.Extend(5); // Extend with 5 elements, which should all be zeros.
ASSERT_EQ(array_length() + 5u, vec.Size());
// Verify that all are zero.
for (size_t i = array_length(); i < array_length() + 5; ++i) {
EXPECT_EQ(0, vec[i]);
}
}
// Test the InsertAt method with an insert position in the middle of the vector.
TEST_F(AudioVectorTest, InsertAt) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = 5;
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1,
// `insert_position`, `insert_position` + 1, ..., kLength - 1}.
size_t pos = 0;
for (int i = 0; i < insert_position; ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (size_t i = insert_position; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
}
// Test the InsertZerosAt method with an insert position in the middle of the
// vector. Use the InsertAt method as reference.
TEST_F(AudioVectorTest, InsertZerosAt) {
AudioVector vec;
AudioVector vec_ref;
vec.PushBack(array_, array_length());
vec_ref.PushBack(array_, array_length());
static const int kNewLength = 5;
int insert_position = 5;
vec.InsertZerosAt(kNewLength, insert_position);
int16_t new_array[kNewLength] = {0}; // All zero elements.
vec_ref.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vectors are identical.
ASSERT_EQ(vec_ref.Size(), vec.Size());
for (size_t i = 0; i < vec.Size(); ++i) {
EXPECT_EQ(vec_ref[i], vec[i]);
}
}
// Test the InsertAt method with an insert position at the start of the vector.
TEST_F(AudioVectorTest, InsertAtBeginning) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = 0;
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {100, 101, ..., 100 + kNewLength - 1,
// 0, 1, ..., kLength - 1}.
size_t pos = 0;
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (size_t i = insert_position; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
}
// Test the InsertAt method with an insert position at the end of the vector.
TEST_F(AudioVectorTest, InsertAtEnd) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = rtc::checked_cast<int>(array_length());
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
size_t pos = 0;
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
}
// Test the InsertAt method with an insert position beyond the end of the
// vector. Verify that a position beyond the end of the vector does not lead to
// an error. The expected outcome is the same as if the vector end was used as
// input position. That is, the input position should be capped at the maximum
// allowed value.
TEST_F(AudioVectorTest, InsertBeyondEnd) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position =
rtc::checked_cast<int>(array_length() + 10); // Too large.
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
size_t pos = 0;
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
}
// Test the OverwriteAt method with a position such that all of the new values
// fit within the old vector.
TEST_F(AudioVectorTest, OverwriteAt) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
size_t insert_position = 2;
vec.OverwriteAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1,
// `insert_position`, `insert_position` + 1, ..., kLength - 1}.
size_t pos = 0;
for (pos = 0; pos < insert_position; ++pos) {
EXPECT_EQ(array_[pos], vec[pos]);
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (; pos < array_length(); ++pos) {
EXPECT_EQ(array_[pos], vec[pos]);
}
}
// Test the OverwriteAt method with a position such that some of the new values
// extend beyond the end of the current vector. This is valid, and the vector is
// expected to expand to accommodate the new values.
TEST_F(AudioVectorTest, OverwriteBeyondEnd) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = rtc::checked_cast<int>(array_length() - 2);
vec.OverwriteAt(new_array, kNewLength, insert_position);
ASSERT_EQ(array_length() - 2u + kNewLength, vec.Size());
// Verify that the vector looks as follows:
// {0, ..., `insert_position` - 1, 100, 101, ..., 100 + kNewLength - 1,
// `insert_position`, `insert_position` + 1, ..., kLength - 1}.
int pos = 0;
for (pos = 0; pos < insert_position; ++pos) {
EXPECT_EQ(array_[pos], vec[pos]);
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
// Verify that we checked to the end of `vec`.
EXPECT_EQ(vec.Size(), static_cast<size_t>(pos));
}
TEST_F(AudioVectorTest, CrossFade) {
static const size_t kLength = 100;
static const size_t kFadeLength = 10;
AudioVector vec1(kLength);
AudioVector vec2(kLength);
// Set all vector elements to 0 in `vec1` and 100 in `vec2`.
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = 0;
vec2[i] = 100;
}
vec1.CrossFade(vec2, kFadeLength);
ASSERT_EQ(2 * kLength - kFadeLength, vec1.Size());
// First part untouched.
for (size_t i = 0; i < kLength - kFadeLength; ++i) {
EXPECT_EQ(0, vec1[i]);
}
// Check mixing zone.
for (size_t i = 0; i < kFadeLength; ++i) {
EXPECT_NEAR((i + 1) * 100 / (kFadeLength + 1),
vec1[kLength - kFadeLength + i], 1);
}
// Second part untouched.
for (size_t i = kLength; i < vec1.Size(); ++i) {
EXPECT_EQ(100, vec1[i]);
}
}
} // namespace webrtc

View file

@ -0,0 +1,300 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/background_noise.h"
#include <string.h> // memcpy
#include <algorithm> // min, max
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/cross_correlation.h"
namespace webrtc {
namespace {
constexpr size_t kMaxSampleRate = 48000;
} // namespace
// static
constexpr size_t BackgroundNoise::kMaxLpcOrder;
BackgroundNoise::BackgroundNoise(size_t num_channels)
: num_channels_(num_channels),
channel_parameters_(new ChannelParameters[num_channels_]) {
Reset();
}
BackgroundNoise::~BackgroundNoise() {}
void BackgroundNoise::Reset() {
initialized_ = false;
for (size_t channel = 0; channel < num_channels_; ++channel) {
channel_parameters_[channel].Reset();
}
}
bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
bool filter_params_saved = false;
int32_t auto_correlation[kMaxLpcOrder + 1];
int16_t filter_output[kMaxLpcOrder + kResidualLength];
int16_t reflection_coefficients[kMaxLpcOrder];
int16_t lpc_coefficients[kMaxLpcOrder + 1];
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
RTC_DCHECK_GE(sync_buffer.Size(), kVecLen);
sync_buffer[channel_ix].CopyTo(kVecLen, sync_buffer.Size() - kVecLen,
temp_signal);
int32_t sample_energy =
CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation);
if (sample_energy < parameters.energy_update_threshold) {
// Generate LPC coefficients.
if (auto_correlation[0] <= 0) {
// Center value in auto-correlation is not positive. Do not update.
return filter_params_saved;
}
// Regardless of whether the filter is actually updated or not,
// update energy threshold levels, since we have in fact observed
// a low energy signal.
if (sample_energy < parameters.energy_update_threshold) {
// Never go under 1.0 in average sample energy.
parameters.energy_update_threshold = std::max(sample_energy, 1);
parameters.low_energy_update_threshold = 0;
}
// Only update BGN if filter is stable, i.e., if return value from
// Levinson-Durbin function is 1.
if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients,
reflection_coefficients,
kMaxLpcOrder) != 1) {
return filter_params_saved;
}
// Generate the CNG gain factor by looking at the energy of the residual.
WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
filter_output, lpc_coefficients,
kMaxLpcOrder + 1, kResidualLength);
int32_t residual_energy = WebRtcSpl_DotProductWithScale(
filter_output, filter_output, kResidualLength, 0);
// Check spectral flatness.
// Comparing the residual variance with the input signal variance tells
// if the spectrum is flat or not.
// If 5 * residual_energy >= 16 * sample_energy, the spectrum is flat
// enough. Also ensure that the energy is non-zero.
if ((sample_energy > 0) &&
(int64_t{5} * residual_energy >= int64_t{16} * sample_energy)) {
// Spectrum is flat enough; save filter parameters.
// `temp_signal` + `kVecLen` - `kMaxLpcOrder` points at the first of the
// `kMaxLpcOrder` samples in the residual signal, which will form the
// filter state for the next noise generation.
SaveParameters(channel_ix, lpc_coefficients,
temp_signal + kVecLen - kMaxLpcOrder, sample_energy,
residual_energy);
filter_params_saved = true;
}
} else {
// Will only happen if `sample_energy` is not low enough. Increase the
// threshold for update so that it increases by a factor 4 in 4 seconds.
IncrementEnergyThreshold(channel_ix, sample_energy);
}
}
return filter_params_saved;
}
void BackgroundNoise::GenerateBackgroundNoise(
rtc::ArrayView<const int16_t> random_vector,
size_t channel,
int mute_slope,
bool too_many_expands,
size_t num_noise_samples,
int16_t* buffer) {
constexpr size_t kNoiseLpcOrder = kMaxLpcOrder;
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
RTC_DCHECK_LE(num_noise_samples, (kMaxSampleRate / 8000 * 125));
RTC_DCHECK_GE(random_vector.size(), num_noise_samples);
int16_t* noise_samples = &buffer[kNoiseLpcOrder];
if (initialized()) {
// Use background noise parameters.
memcpy(noise_samples - kNoiseLpcOrder, FilterState(channel),
sizeof(int16_t) * kNoiseLpcOrder);
int dc_offset = 0;
if (ScaleShift(channel) > 1) {
dc_offset = 1 << (ScaleShift(channel) - 1);
}
// Scale random vector to correct energy level.
WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector.data(),
Scale(channel), dc_offset,
ScaleShift(channel), num_noise_samples);
WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples,
Filter(channel), kNoiseLpcOrder + 1,
num_noise_samples);
SetFilterState(
channel,
{&(noise_samples[num_noise_samples - kNoiseLpcOrder]), kNoiseLpcOrder});
// Unmute the background noise.
int16_t bgn_mute_factor = MuteFactor(channel);
if (bgn_mute_factor < 16384) {
WebRtcSpl_AffineTransformVector(noise_samples, noise_samples,
bgn_mute_factor, 8192, 14,
num_noise_samples);
}
// Update mute_factor in BackgroundNoise class.
SetMuteFactor(channel, bgn_mute_factor);
} else {
// BGN parameters have not been initialized; use zero noise.
memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples);
}
}
int32_t BackgroundNoise::Energy(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
return channel_parameters_[channel].energy;
}
void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) {
RTC_DCHECK_LT(channel, num_channels_);
channel_parameters_[channel].mute_factor = value;
}
int16_t BackgroundNoise::MuteFactor(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
return channel_parameters_[channel].mute_factor;
}
const int16_t* BackgroundNoise::Filter(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
return channel_parameters_[channel].filter;
}
const int16_t* BackgroundNoise::FilterState(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
return channel_parameters_[channel].filter_state;
}
void BackgroundNoise::SetFilterState(size_t channel,
rtc::ArrayView<const int16_t> input) {
RTC_DCHECK_LT(channel, num_channels_);
size_t length = std::min(input.size(), kMaxLpcOrder);
memcpy(channel_parameters_[channel].filter_state, input.data(),
length * sizeof(int16_t));
}
int16_t BackgroundNoise::Scale(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
return channel_parameters_[channel].scale;
}
int16_t BackgroundNoise::ScaleShift(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
return channel_parameters_[channel].scale_shift;
}
int32_t BackgroundNoise::CalculateAutoCorrelation(
const int16_t* signal,
size_t length,
int32_t* auto_correlation) const {
static const int kCorrelationStep = -1;
const int correlation_scale =
CrossCorrelationWithAutoShift(signal, signal, length, kMaxLpcOrder + 1,
kCorrelationStep, auto_correlation);
// Number of shifts to normalize energy to energy/sample.
int energy_sample_shift = kLogVecLen - correlation_scale;
return auto_correlation[0] >> energy_sample_shift;
}
void BackgroundNoise::IncrementEnergyThreshold(size_t channel,
int32_t sample_energy) {
// TODO(hlundin): Simplify the below threshold update. What this code
// does is simply "threshold += (increment * threshold) >> 16", but due
// to the limited-width operations, it is not exactly the same. The
// difference should be inaudible, but bit-exactness would not be
// maintained.
RTC_DCHECK_LT(channel, num_channels_);
ChannelParameters& parameters = channel_parameters_[channel];
int32_t temp_energy =
(kThresholdIncrement * parameters.low_energy_update_threshold) >> 16;
temp_energy +=
kThresholdIncrement * (parameters.energy_update_threshold & 0xFF);
temp_energy +=
(kThresholdIncrement * ((parameters.energy_update_threshold >> 8) & 0xFF))
<< 8;
parameters.low_energy_update_threshold += temp_energy;
parameters.energy_update_threshold +=
kThresholdIncrement * (parameters.energy_update_threshold >> 16);
parameters.energy_update_threshold +=
parameters.low_energy_update_threshold >> 16;
parameters.low_energy_update_threshold =
parameters.low_energy_update_threshold & 0x0FFFF;
// Update maximum energy.
// Decrease by a factor 1/1024 each time.
parameters.max_energy = parameters.max_energy - (parameters.max_energy >> 10);
if (sample_energy > parameters.max_energy) {
parameters.max_energy = sample_energy;
}
// Set `energy_update_threshold` to no less than 60 dB lower than
// `max_energy_`. Adding 524288 assures proper rounding.
int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20;
if (energy_update_threshold > parameters.energy_update_threshold) {
parameters.energy_update_threshold = energy_update_threshold;
}
}
void BackgroundNoise::SaveParameters(size_t channel,
const int16_t* lpc_coefficients,
const int16_t* filter_state,
int32_t sample_energy,
int32_t residual_energy) {
RTC_DCHECK_LT(channel, num_channels_);
ChannelParameters& parameters = channel_parameters_[channel];
memcpy(parameters.filter, lpc_coefficients,
(kMaxLpcOrder + 1) * sizeof(int16_t));
memcpy(parameters.filter_state, filter_state, kMaxLpcOrder * sizeof(int16_t));
// Save energy level and update energy threshold levels.
// Never get under 1.0 in average sample energy.
parameters.energy = std::max(sample_energy, 1);
parameters.energy_update_threshold = parameters.energy;
parameters.low_energy_update_threshold = 0;
// Normalize residual_energy to 29 or 30 bits before sqrt.
int16_t norm_shift = WebRtcSpl_NormW32(residual_energy) - 1;
if (norm_shift & 0x1) {
norm_shift -= 1; // Even number of shifts required.
}
residual_energy = WEBRTC_SPL_SHIFT_W32(residual_energy, norm_shift);
// Calculate scale and shift factor.
parameters.scale = static_cast<int16_t>(WebRtcSpl_SqrtFloor(residual_energy));
// Add 13 to the `scale_shift_`, since the random numbers table is in
// Q13.
// TODO(hlundin): Move the "13" to where the `scale_shift_` is used?
parameters.scale_shift =
static_cast<int16_t>(13 + ((kLogResidualLength + norm_shift) / 2));
initialized_ = true;
}
} // namespace webrtc

View file

@ -0,0 +1,138 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_
#define MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_
#include <string.h> // size_t
#include <memory>
#include "api/array_view.h"
namespace webrtc {
// Forward declarations.
class AudioMultiVector;
class PostDecodeVad;
// This class handles estimation of background noise parameters.
class BackgroundNoise {
public:
// TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10.
// Will work anyway, but probably sound a little worse.
static constexpr size_t kMaxLpcOrder = 8; // 32000 / 8000 + 4.
explicit BackgroundNoise(size_t num_channels);
virtual ~BackgroundNoise();
BackgroundNoise(const BackgroundNoise&) = delete;
BackgroundNoise& operator=(const BackgroundNoise&) = delete;
void Reset();
// Updates the parameter estimates based on the signal currently in the
// `sync_buffer`.
// Returns true if the filter parameters are updated.
bool Update(const AudioMultiVector& sync_buffer);
// Generates background noise given a random vector and writes the output to
// `buffer`.
void GenerateBackgroundNoise(rtc::ArrayView<const int16_t> random_vector,
size_t channel,
int mute_slope,
bool too_many_expands,
size_t num_noise_samples,
int16_t* buffer);
// Returns `energy_` for `channel`.
int32_t Energy(size_t channel) const;
// Sets the value of `mute_factor_` for `channel` to `value`.
void SetMuteFactor(size_t channel, int16_t value);
// Returns `mute_factor_` for `channel`.
int16_t MuteFactor(size_t channel) const;
// Returns a pointer to `filter_` for `channel`.
const int16_t* Filter(size_t channel) const;
// Returns a pointer to `filter_state_` for `channel`.
const int16_t* FilterState(size_t channel) const;
// Copies `input` to the filter state. Will not copy more than `kMaxLpcOrder`
// elements.
void SetFilterState(size_t channel, rtc::ArrayView<const int16_t> input);
// Returns `scale_` for `channel`.
int16_t Scale(size_t channel) const;
// Returns `scale_shift_` for `channel`.
int16_t ScaleShift(size_t channel) const;
// Accessors.
bool initialized() const { return initialized_; }
private:
static const int kThresholdIncrement = 229; // 0.0035 in Q16.
static const size_t kVecLen = 256;
static const int kLogVecLen = 8; // log2(kVecLen).
static const size_t kResidualLength = 64;
static const int16_t kLogResidualLength = 6; // log2(kResidualLength)
struct ChannelParameters {
// Constructor.
ChannelParameters() { Reset(); }
void Reset() {
energy = 2500;
max_energy = 0;
energy_update_threshold = 500000;
low_energy_update_threshold = 0;
memset(filter_state, 0, sizeof(filter_state));
memset(filter, 0, sizeof(filter));
filter[0] = 4096;
mute_factor = 0;
scale = 20000;
scale_shift = 24;
}
int32_t energy;
int32_t max_energy;
int32_t energy_update_threshold;
int32_t low_energy_update_threshold;
int16_t filter_state[kMaxLpcOrder];
int16_t filter[kMaxLpcOrder + 1];
int16_t mute_factor;
int16_t scale;
int16_t scale_shift;
};
int32_t CalculateAutoCorrelation(const int16_t* signal,
size_t length,
int32_t* auto_correlation) const;
// Increments the energy threshold by a factor 1 + `kThresholdIncrement`.
void IncrementEnergyThreshold(size_t channel, int32_t sample_energy);
// Updates the filter parameters.
void SaveParameters(size_t channel,
const int16_t* lpc_coefficients,
const int16_t* filter_state,
int32_t sample_energy,
int32_t residual_energy);
size_t num_channels_;
std::unique_ptr<ChannelParameters[]> channel_parameters_;
bool initialized_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_

View file

@ -0,0 +1,26 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for BackgroundNoise class.
#include "modules/audio_coding/neteq/background_noise.h"
#include "test/gtest.h"
namespace webrtc {
TEST(BackgroundNoise, CreateAndDestroy) {
size_t channels = 1;
BackgroundNoise bgn(channels);
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -0,0 +1,64 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include <stdint.h>
#include <algorithm>
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
BufferLevelFilter::BufferLevelFilter() {
Reset();
}
void BufferLevelFilter::Reset() {
filtered_current_level_ = 0;
level_factor_ = 253;
}
void BufferLevelFilter::Update(size_t buffer_size_samples,
int time_stretched_samples) {
// Filter:
// `filtered_current_level_` = `level_factor_` * `filtered_current_level_` +
// (1 - `level_factor_`) * `buffer_size_samples`
// `level_factor_` and `filtered_current_level_` are in Q8.
// `buffer_size_samples` is in Q0.
const int64_t filtered_current_level =
(level_factor_* int64_t{filtered_current_level_} >> 8) +
(256 - level_factor_) * rtc::dchecked_cast<int64_t>(buffer_size_samples);
// Account for time-scale operations (accelerate and pre-emptive expand) and
// make sure that the filtered value remains non-negative.
filtered_current_level_ = rtc::saturated_cast<int>(std::max<int64_t>(
0, filtered_current_level - int64_t{time_stretched_samples} * (1 << 8)));
}
void BufferLevelFilter::SetFilteredBufferLevel(int buffer_size_samples) {
filtered_current_level_ =
rtc::saturated_cast<int>(int64_t{buffer_size_samples} * 256);
}
void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level_ms) {
if (target_buffer_level_ms <= 20) {
level_factor_ = 251;
} else if (target_buffer_level_ms <= 60) {
level_factor_ = 252;
} else if (target_buffer_level_ms <= 140) {
level_factor_ = 253;
} else {
level_factor_ = 254;
}
}
} // namespace webrtc

View file

@ -0,0 +1,54 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
#define MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
#include <stddef.h>
#include <stdint.h>
namespace webrtc {
class BufferLevelFilter {
public:
BufferLevelFilter();
virtual ~BufferLevelFilter() {}
BufferLevelFilter(const BufferLevelFilter&) = delete;
BufferLevelFilter& operator=(const BufferLevelFilter&) = delete;
virtual void Reset();
// Updates the filter. Current buffer size is `buffer_size_samples`.
// `time_stretched_samples` is subtracted from the filtered value (thus
// bypassing the filter operation).
virtual void Update(size_t buffer_size_samples, int time_stretched_samples);
// Set the filtered buffer level to a particular value directly. This should
// only be used in case of large changes in buffer size, such as buffer
// flushes.
virtual void SetFilteredBufferLevel(int buffer_size_samples);
// The target level is used to select the appropriate filter coefficient.
virtual void SetTargetBufferLevel(int target_buffer_level_ms);
// Returns filtered current level in number of samples.
virtual int filtered_current_level() const {
// Round to nearest whole sample.
return (int64_t{filtered_current_level_} + (1 << 7)) >> 8;
}
private:
int level_factor_; // Filter factor for the buffer level filter in Q8.
int filtered_current_level_; // Filtered current buffer level in Q8.
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_

View file

@ -0,0 +1,116 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for BufferLevelFilter class.
#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include <math.h> // Access to pow function.
#include "rtc_base/strings/string_builder.h"
#include "test/gtest.h"
namespace webrtc {
TEST(BufferLevelFilter, CreateAndDestroy) {
BufferLevelFilter* filter = new BufferLevelFilter();
EXPECT_EQ(0, filter->filtered_current_level());
delete filter;
}
TEST(BufferLevelFilter, ConvergenceTest) {
BufferLevelFilter filter;
for (int times = 10; times <= 50; times += 10) {
for (int value = 100; value <= 200; value += 10) {
filter.Reset();
filter.SetTargetBufferLevel(20); // Makes filter coefficient 251/256.
rtc::StringBuilder ss;
ss << "times = " << times << ", value = " << value;
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
for (int i = 0; i < times; ++i) {
filter.Update(value, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be (theoretically)
// (1 - (251/256) ^ `times`) * `value`.
double expected_value_double = (1 - pow(251.0 / 256.0, times)) * value;
int expected_value = static_cast<int>(expected_value_double);
// The actual value may differ slightly from the expected value due to
// intermediate-stage rounding errors in the filter implementation.
// This is why we have to use EXPECT_NEAR with a tolerance of +/-1.
EXPECT_NEAR(expected_value, filter.filtered_current_level(), 1);
}
}
}
// Verify that target buffer level impacts on the filter convergence.
TEST(BufferLevelFilter, FilterFactor) {
BufferLevelFilter filter;
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
filter.SetTargetBufferLevel(60); // Makes filter coefficient 252/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be
// (1 - (252/256) ^ `kTimes`) * `kValue`.
int expected_value = 15;
EXPECT_EQ(expected_value, filter.filtered_current_level());
filter.Reset();
filter.SetTargetBufferLevel(140); // Makes filter coefficient 253/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be
// (1 - (253/256) ^ `kTimes`) * `kValue`.
expected_value = 11;
EXPECT_EQ(expected_value, filter.filtered_current_level());
filter.Reset();
filter.SetTargetBufferLevel(160); // Makes filter coefficient 254/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be
// (1 - (254/256) ^ `kTimes`) * `kValue`.
expected_value = 8;
EXPECT_EQ(expected_value, filter.filtered_current_level());
}
TEST(BufferLevelFilter, TimeStretchedSamples) {
BufferLevelFilter filter;
filter.SetTargetBufferLevel(20); // Makes filter coefficient 251/256.
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
const int kTimeStretchedSamples = 3;
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0);
}
// Expect the filtered value to be
// (1 - (251/256) ^ `kTimes`) * `kValue`.
const int kExpectedValue = 18;
EXPECT_EQ(kExpectedValue, filter.filtered_current_level());
// Update filter again, now with non-zero value for packet length.
// Set the current filtered value to be the input, in order to isolate the
// impact of `kTimeStretchedSamples`.
filter.Update(filter.filtered_current_level(), kTimeStretchedSamples);
EXPECT_EQ(kExpectedValue - kTimeStretchedSamples,
filter.filtered_current_level());
// Try negative value and verify that we come back to the previous result.
filter.Update(filter.filtered_current_level(), -kTimeStretchedSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level());
}
} // namespace webrtc

View file

@ -0,0 +1,129 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/comfort_noise.h"
#include <cstdint>
#include <memory>
#include "api/array_view.h"
#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/audio_vector.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/dsp_helper.h"
#include "modules/audio_coding/neteq/sync_buffer.h"
#include "rtc_base/buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
void ComfortNoise::Reset() {
first_call_ = true;
}
int ComfortNoise::UpdateParameters(const Packet& packet) {
// Get comfort noise decoder.
if (decoder_database_->SetActiveCngDecoder(packet.payload_type) != kOK) {
return kUnknownPayloadType;
}
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
RTC_DCHECK(cng_decoder);
cng_decoder->UpdateSid(packet.payload);
return kOK;
}
int ComfortNoise::Generate(size_t requested_length, AudioMultiVector* output) {
// TODO(hlundin): Change to an enumerator and skip assert.
RTC_DCHECK(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
fs_hz_ == 48000);
// Not adapted for multi-channel yet.
if (output->Channels() != 1) {
RTC_LOG(LS_ERROR) << "No multi-channel support";
return kMultiChannelNotSupported;
}
size_t number_of_samples = requested_length;
bool new_period = false;
if (first_call_) {
// Generate noise and overlap slightly with old data.
number_of_samples = requested_length + overlap_length_;
new_period = true;
}
output->AssertSize(number_of_samples);
// Get the decoder from the database.
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
if (!cng_decoder) {
RTC_LOG(LS_ERROR) << "Unknwown payload type";
return kUnknownPayloadType;
}
std::unique_ptr<int16_t[]> temp(new int16_t[number_of_samples]);
if (!cng_decoder->Generate(
rtc::ArrayView<int16_t>(temp.get(), number_of_samples), new_period)) {
// Error returned.
output->Zeros(requested_length);
RTC_LOG(LS_ERROR)
<< "ComfortNoiseDecoder::Genererate failed to generate comfort noise";
return kInternalError;
}
(*output)[0].OverwriteAt(temp.get(), number_of_samples, 0);
if (first_call_) {
// Set tapering window parameters. Values are in Q15.
int16_t muting_window; // Mixing factor for overlap data.
int16_t muting_window_increment; // Mixing factor increment (negative).
int16_t unmuting_window; // Mixing factor for comfort noise.
int16_t unmuting_window_increment; // Mixing factor increment.
if (fs_hz_ == 8000) {
muting_window = DspHelper::kMuteFactorStart8kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
} else if (fs_hz_ == 16000) {
muting_window = DspHelper::kMuteFactorStart16kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
} else if (fs_hz_ == 32000) {
muting_window = DspHelper::kMuteFactorStart32kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
} else { // fs_hz_ == 48000
muting_window = DspHelper::kMuteFactorStart48kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
}
// Do overlap-add between new vector and overlap.
size_t start_ix = sync_buffer_->Size() - overlap_length_;
for (size_t i = 0; i < overlap_length_; i++) {
/* overlapVec[i] = WinMute * overlapVec[i] + WinUnMute * outData[i] */
// The expression (*output)[0][i] is the i-th element in the first
// channel.
(*sync_buffer_)[0][start_ix + i] =
(((*sync_buffer_)[0][start_ix + i] * muting_window) +
((*output)[0][i] * unmuting_window) + 16384) >>
15;
muting_window += muting_window_increment;
unmuting_window += unmuting_window_increment;
}
// Remove `overlap_length_` samples from the front of `output` since they
// were mixed into `sync_buffer_` above.
output->PopFront(overlap_length_);
}
first_call_ = false;
return kOK;
}
} // namespace webrtc

View file

@ -0,0 +1,72 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_
#define MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_
#include <stddef.h>
namespace webrtc {
// Forward declarations.
class AudioMultiVector;
class DecoderDatabase;
class SyncBuffer;
struct Packet;
// This class acts as an interface to the CNG generator.
class ComfortNoise {
public:
enum ReturnCodes {
kOK = 0,
kUnknownPayloadType,
kInternalError,
kMultiChannelNotSupported
};
ComfortNoise(int fs_hz,
DecoderDatabase* decoder_database,
SyncBuffer* sync_buffer)
: fs_hz_(fs_hz),
first_call_(true),
overlap_length_(5 * fs_hz_ / 8000),
decoder_database_(decoder_database),
sync_buffer_(sync_buffer) {}
ComfortNoise(const ComfortNoise&) = delete;
ComfortNoise& operator=(const ComfortNoise&) = delete;
// Resets the state. Should be called before each new comfort noise period.
void Reset();
// Update the comfort noise generator with the parameters in `packet`.
int UpdateParameters(const Packet& packet);
// Generates `requested_length` samples of comfort noise and writes to
// `output`. If this is the first in call after Reset (or first after creating
// the object), it will also mix in comfort noise at the end of the
// SyncBuffer object provided in the constructor.
int Generate(size_t requested_length, AudioMultiVector* output);
// Returns the last error code that was produced by the comfort noise
// decoder. Returns 0 if no error has been encountered since the last reset.
int internal_error_code() { return internal_error_code_; }
private:
int fs_hz_;
bool first_call_;
size_t overlap_length_;
DecoderDatabase* decoder_database_;
SyncBuffer* sync_buffer_;
int internal_error_code_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_

View file

@ -0,0 +1,31 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for ComfortNoise class.
#include "modules/audio_coding/neteq/comfort_noise.h"
#include "modules/audio_coding/neteq/mock/mock_decoder_database.h"
#include "modules/audio_coding/neteq/sync_buffer.h"
#include "test/gtest.h"
namespace webrtc {
TEST(ComfortNoise, CreateAndDestroy) {
int fs = 8000;
MockDecoderDatabase db;
SyncBuffer sync_buffer(1, 1000);
ComfortNoise cn(fs, &db, &sync_buffer);
EXPECT_CALL(db, Die()); // Called when `db` goes out of scope.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -0,0 +1,55 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/cross_correlation.h"
#include <cstdlib>
#include <limits>
#include "common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
// This function decides the overflow-protecting scaling and calls
// WebRtcSpl_CrossCorrelation.
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
const int16_t* sequence_2,
size_t sequence_1_length,
size_t cross_correlation_length,
int cross_correlation_step,
int32_t* cross_correlation) {
// Find the element that has the maximum absolute value of sequence_1 and 2.
// Note that these values may be negative.
const int16_t max_1 =
WebRtcSpl_MaxAbsElementW16(sequence_1, sequence_1_length);
const int sequence_2_shift =
cross_correlation_step * (static_cast<int>(cross_correlation_length) - 1);
const int16_t* sequence_2_start =
sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift;
const size_t sequence_2_length =
sequence_1_length + std::abs(sequence_2_shift);
const int16_t max_2 =
WebRtcSpl_MaxAbsElementW16(sequence_2_start, sequence_2_length);
// In order to avoid overflow when computing the sum we should scale the
// samples so that (in_vector_length * max_1 * max_2) will not overflow.
const int64_t max_value =
abs(max_1 * max_2) * static_cast<int64_t>(sequence_1_length);
const int32_t factor = max_value >> 31;
const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2,
sequence_1_length, cross_correlation_length,
scaling, cross_correlation_step);
return scaling;
}
} // namespace webrtc

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
#define MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
#include <stddef.h>
#include <stdint.h>
namespace webrtc {
// The function calculates the cross-correlation between two sequences
// `sequence_1` and `sequence_2`. `sequence_1` is taken as reference, with
// `sequence_1_length` as its length. `sequence_2` slides for the calculation of
// cross-correlation. The result will be saved in `cross_correlation`.
// `cross_correlation_length` correlation points are calculated.
// The corresponding lag starts from 0, and increases with a step of
// `cross_correlation_step`. The result is without normalization. To avoid
// overflow, the result will be right shifted. The amount of shifts will be
// returned.
//
// Input:
// - sequence_1 : First sequence (reference).
// - sequence_2 : Second sequence (sliding during calculation).
// - sequence_1_length : Length of `sequence_1`.
// - cross_correlation_length : Number of cross-correlations to calculate.
// - cross_correlation_step : Step in the lag for the cross-correlation.
//
// Output:
// - cross_correlation : The cross-correlation in Q(-right_shifts)
//
// Return:
// Number of right shifts in cross_correlation.
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
const int16_t* sequence_2,
size_t sequence_1_length,
size_t cross_correlation_length,
int cross_correlation_step,
int32_t* cross_correlation);
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_

View file

@ -0,0 +1,469 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/decision_logic.h"
#include <stdio.h>
#include <cstdint>
#include <memory>
#include "absl/types/optional.h"
#include "api/neteq/neteq.h"
#include "api/neteq/neteq_controller.h"
#include "modules/audio_coding/neteq/packet_arrival_history.h"
#include "modules/audio_coding/neteq/packet_buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/struct_parameters_parser.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
constexpr int kPostponeDecodingLevel = 50;
constexpr int kTargetLevelWindowMs = 100;
constexpr int kMaxWaitForPacketMs = 100;
// The granularity of delay adjustments (accelerate/preemptive expand) is 15ms,
// but round up since the clock has a granularity of 10ms.
constexpr int kDelayAdjustmentGranularityMs = 20;
constexpr int kReinitAfterExpandsMs = 1000;
std::unique_ptr<DelayManager> CreateDelayManager(
const NetEqController::Config& neteq_config) {
DelayManager::Config config;
config.max_packets_in_buffer = neteq_config.max_packets_in_buffer;
config.base_minimum_delay_ms = neteq_config.base_min_delay_ms;
config.Log();
return std::make_unique<DelayManager>(config, neteq_config.tick_timer);
}
bool IsTimestretch(NetEq::Mode mode) {
return mode == NetEq::Mode::kAccelerateSuccess ||
mode == NetEq::Mode::kAccelerateLowEnergy ||
mode == NetEq::Mode::kPreemptiveExpandSuccess ||
mode == NetEq::Mode::kPreemptiveExpandLowEnergy;
}
bool IsCng(NetEq::Mode mode) {
return mode == NetEq::Mode::kRfc3389Cng ||
mode == NetEq::Mode::kCodecInternalCng;
}
bool IsExpand(NetEq::Mode mode) {
return mode == NetEq::Mode::kExpand || mode == NetEq::Mode::kCodecPlc;
}
} // namespace
DecisionLogic::Config::Config() {
StructParametersParser::Create(
"enable_stable_delay_mode", &enable_stable_delay_mode, //
"combine_concealment_decision", &combine_concealment_decision, //
"packet_history_size_ms", &packet_history_size_ms, //
"cng_timeout_ms", &cng_timeout_ms, //
"deceleration_target_level_offset_ms",
&deceleration_target_level_offset_ms)
->Parse(webrtc::field_trial::FindFullName(
"WebRTC-Audio-NetEqDecisionLogicConfig"));
RTC_LOG(LS_INFO) << "NetEq decision logic config:"
<< " enable_stable_delay_mode=" << enable_stable_delay_mode
<< " combine_concealment_decision="
<< combine_concealment_decision
<< " packet_history_size_ms=" << packet_history_size_ms
<< " cng_timeout_ms=" << cng_timeout_ms.value_or(-1)
<< " deceleration_target_level_offset_ms="
<< deceleration_target_level_offset_ms;
}
DecisionLogic::DecisionLogic(NetEqController::Config config)
: DecisionLogic(config,
CreateDelayManager(config),
std::make_unique<BufferLevelFilter>()) {}
DecisionLogic::DecisionLogic(
NetEqController::Config config,
std::unique_ptr<DelayManager> delay_manager,
std::unique_ptr<BufferLevelFilter> buffer_level_filter,
std::unique_ptr<PacketArrivalHistory> packet_arrival_history)
: delay_manager_(std::move(delay_manager)),
buffer_level_filter_(std::move(buffer_level_filter)),
packet_arrival_history_(packet_arrival_history
? std::move(packet_arrival_history)
: std::make_unique<PacketArrivalHistory>(
config.tick_timer,
config_.packet_history_size_ms)),
tick_timer_(config.tick_timer),
disallow_time_stretching_(!config.allow_time_stretching),
timescale_countdown_(
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)) {}
DecisionLogic::~DecisionLogic() = default;
void DecisionLogic::SoftReset() {
packet_length_samples_ = 0;
sample_memory_ = 0;
prev_time_scale_ = false;
timescale_countdown_ =
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
time_stretched_cn_samples_ = 0;
delay_manager_->Reset();
buffer_level_filter_->Reset();
packet_arrival_history_->Reset();
}
void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
// TODO(hlundin): Change to an enumerator and skip assert.
RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 ||
fs_hz == 48000);
sample_rate_khz_ = fs_hz / 1000;
output_size_samples_ = output_size_samples;
packet_arrival_history_->set_sample_rate(fs_hz);
}
NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status,
bool* reset_decoder) {
prev_time_scale_ = prev_time_scale_ && IsTimestretch(status.last_mode);
if (prev_time_scale_) {
timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
}
if (!IsCng(status.last_mode) &&
!(config_.combine_concealment_decision && IsExpand(status.last_mode))) {
FilterBufferLevel(status.packet_buffer_info.span_samples);
}
// Guard for errors, to avoid getting stuck in error mode.
if (status.last_mode == NetEq::Mode::kError) {
if (!status.next_packet) {
return NetEq::Operation::kExpand;
} else {
// Use kUndefined to flag for a reset.
return NetEq::Operation::kUndefined;
}
}
if (status.next_packet && status.next_packet->is_cng) {
return CngOperation(status);
}
// Handle the case with no packet at all available (except maybe DTMF).
if (!status.next_packet) {
return NoPacket(status);
}
// If the expand period was very long, reset NetEQ since it is likely that the
// sender was restarted.
if (!config_.combine_concealment_decision && IsExpand(status.last_mode) &&
status.generated_noise_samples >
static_cast<size_t>(kReinitAfterExpandsMs * sample_rate_khz_)) {
*reset_decoder = true;
return NetEq::Operation::kNormal;
}
if (PostponeDecode(status)) {
return NoPacket(status);
}
const uint32_t five_seconds_samples =
static_cast<uint32_t>(5000 * sample_rate_khz_);
// Check if the required packet is available.
if (status.target_timestamp == status.next_packet->timestamp) {
return ExpectedPacketAvailable(status);
}
if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp,
status.target_timestamp,
five_seconds_samples)) {
return FuturePacketAvailable(status);
}
// This implies that available_timestamp < target_timestamp, which can
// happen when a new stream or codec is received. Signal for a reset.
return NetEq::Operation::kUndefined;
}
int DecisionLogic::TargetLevelMs() const {
int target_delay_ms = delay_manager_->TargetDelayMs();
if (!config_.enable_stable_delay_mode) {
target_delay_ms =
std::max(target_delay_ms,
static_cast<int>(packet_length_samples_ / sample_rate_khz_));
}
return target_delay_ms;
}
int DecisionLogic::UnlimitedTargetLevelMs() const {
return delay_manager_->UnlimitedTargetLevelMs();
}
int DecisionLogic::GetFilteredBufferLevel() const {
return buffer_level_filter_->filtered_current_level();
}
absl::optional<int> DecisionLogic::PacketArrived(
int fs_hz,
bool should_update_stats,
const PacketArrivedInfo& info) {
buffer_flush_ = buffer_flush_ || info.buffer_flush;
if (!should_update_stats || info.is_cng_or_dtmf) {
return absl::nullopt;
}
if (info.packet_length_samples > 0 && fs_hz > 0 &&
info.packet_length_samples != packet_length_samples_) {
packet_length_samples_ = info.packet_length_samples;
delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz);
}
bool inserted = packet_arrival_history_->Insert(info.main_timestamp,
info.packet_length_samples);
if (!inserted || packet_arrival_history_->size() < 2) {
// No meaningful delay estimate unless at least 2 packets have arrived.
return absl::nullopt;
}
int arrival_delay_ms =
packet_arrival_history_->GetDelayMs(info.main_timestamp);
bool reordered =
!packet_arrival_history_->IsNewestRtpTimestamp(info.main_timestamp);
delay_manager_->Update(arrival_delay_ms, reordered);
return arrival_delay_ms;
}
void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
buffer_level_filter_->SetTargetBufferLevel(TargetLevelMs());
int time_stretched_samples = time_stretched_cn_samples_;
if (prev_time_scale_) {
time_stretched_samples += sample_memory_;
}
if (buffer_flush_) {
buffer_level_filter_->SetFilteredBufferLevel(buffer_size_samples);
buffer_flush_ = false;
} else {
buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples);
}
prev_time_scale_ = false;
time_stretched_cn_samples_ = 0;
}
NetEq::Operation DecisionLogic::CngOperation(
NetEqController::NetEqStatus status) {
// Signed difference between target and available timestamp.
int32_t timestamp_diff = static_cast<int32_t>(
static_cast<uint32_t>(status.generated_noise_samples +
status.target_timestamp) -
status.next_packet->timestamp);
int optimal_level_samp = TargetLevelMs() * sample_rate_khz_;
const int64_t excess_waiting_time_samp =
-static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
if (excess_waiting_time_samp > optimal_level_samp / 2) {
// The waiting time for this packet will be longer than 1.5
// times the wanted buffer delay. Apply fast-forward to cut the
// waiting time down to the optimal.
noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ +
excess_waiting_time_samp);
timestamp_diff =
rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
}
if (timestamp_diff < 0 && status.last_mode == NetEq::Mode::kRfc3389Cng) {
// Not time to play this packet yet. Wait another round before using this
// packet. Keep on playing CNG from previous CNG parameters.
return NetEq::Operation::kRfc3389CngNoPacket;
} else {
// Otherwise, go for the CNG packet now.
noise_fast_forward_ = 0;
return NetEq::Operation::kRfc3389Cng;
}
}
NetEq::Operation DecisionLogic::NoPacket(NetEqController::NetEqStatus status) {
switch (status.last_mode) {
case NetEq::Mode::kRfc3389Cng:
return NetEq::Operation::kRfc3389CngNoPacket;
case NetEq::Mode::kCodecInternalCng: {
// Stop CNG after a timeout.
if (config_.cng_timeout_ms &&
status.generated_noise_samples >
static_cast<size_t>(*config_.cng_timeout_ms * sample_rate_khz_)) {
return NetEq::Operation::kExpand;
}
return NetEq::Operation::kCodecInternalCng;
}
default:
return status.play_dtmf ? NetEq::Operation::kDtmf
: NetEq::Operation::kExpand;
}
}
NetEq::Operation DecisionLogic::ExpectedPacketAvailable(
NetEqController::NetEqStatus status) {
if (!disallow_time_stretching_ && status.last_mode != NetEq::Mode::kExpand &&
!status.play_dtmf) {
if (config_.enable_stable_delay_mode) {
const int playout_delay_ms = GetPlayoutDelayMs(status);
const int64_t low_limit = TargetLevelMs();
const int64_t high_limit = low_limit +
packet_arrival_history_->GetMaxDelayMs() +
kDelayAdjustmentGranularityMs;
if (playout_delay_ms >= high_limit * 4) {
return NetEq::Operation::kFastAccelerate;
}
if (TimescaleAllowed()) {
if (playout_delay_ms >= high_limit) {
return NetEq::Operation::kAccelerate;
}
if (playout_delay_ms < low_limit) {
return NetEq::Operation::kPreemptiveExpand;
}
}
} else {
const int target_level_samples = TargetLevelMs() * sample_rate_khz_;
const int low_limit = std::max(
target_level_samples * 3 / 4,
target_level_samples -
config_.deceleration_target_level_offset_ms * sample_rate_khz_);
const int high_limit = std::max(
target_level_samples,
low_limit + kDelayAdjustmentGranularityMs * sample_rate_khz_);
const int buffer_level_samples =
buffer_level_filter_->filtered_current_level();
if (buffer_level_samples >= high_limit * 4)
return NetEq::Operation::kFastAccelerate;
if (TimescaleAllowed()) {
if (buffer_level_samples >= high_limit)
return NetEq::Operation::kAccelerate;
if (buffer_level_samples < low_limit)
return NetEq::Operation::kPreemptiveExpand;
}
}
}
return NetEq::Operation::kNormal;
}
NetEq::Operation DecisionLogic::FuturePacketAvailable(
NetEqController::NetEqStatus status) {
// Required packet is not available, but a future packet is.
// Check if we should continue with an ongoing concealment because the new
// packet is too far into the future.
if (config_.combine_concealment_decision || IsCng(status.last_mode)) {
const int buffer_delay_samples =
config_.combine_concealment_decision
? status.packet_buffer_info.span_samples_wait_time
: status.packet_buffer_info.span_samples;
const int buffer_delay_ms = buffer_delay_samples / sample_rate_khz_;
const int high_limit = TargetLevelMs() + kTargetLevelWindowMs / 2;
const int low_limit =
std::max(0, TargetLevelMs() - kTargetLevelWindowMs / 2);
const bool above_target_delay = buffer_delay_ms > high_limit;
const bool below_target_delay = buffer_delay_ms < low_limit;
if ((PacketTooEarly(status) && !above_target_delay) ||
(below_target_delay && !config_.combine_concealment_decision)) {
return NoPacket(status);
}
uint32_t timestamp_leap =
status.next_packet->timestamp - status.target_timestamp;
if (config_.combine_concealment_decision) {
if (timestamp_leap != status.generated_noise_samples) {
// The delay was adjusted, reinitialize the buffer level filter.
buffer_level_filter_->SetFilteredBufferLevel(buffer_delay_samples);
}
} else {
time_stretched_cn_samples_ =
timestamp_leap - status.generated_noise_samples;
}
} else if (IsExpand(status.last_mode) && ShouldContinueExpand(status)) {
return NoPacket(status);
}
// Time to play the next packet.
switch (status.last_mode) {
case NetEq::Mode::kExpand:
return NetEq::Operation::kMerge;
case NetEq::Mode::kCodecPlc:
case NetEq::Mode::kRfc3389Cng:
case NetEq::Mode::kCodecInternalCng:
return NetEq::Operation::kNormal;
default:
return status.play_dtmf ? NetEq::Operation::kDtmf
: NetEq::Operation::kExpand;
}
}
bool DecisionLogic::UnderTargetLevel() const {
return buffer_level_filter_->filtered_current_level() <
TargetLevelMs() * sample_rate_khz_;
}
bool DecisionLogic::PostponeDecode(NetEqController::NetEqStatus status) const {
// Make sure we don't restart audio too soon after CNG or expand to avoid
// running out of data right away again.
const size_t min_buffer_level_samples =
TargetLevelMs() * sample_rate_khz_ * kPostponeDecodingLevel / 100;
const size_t buffer_level_samples =
config_.combine_concealment_decision
? status.packet_buffer_info.span_samples_wait_time
: status.packet_buffer_info.span_samples;
if (buffer_level_samples >= min_buffer_level_samples) {
return false;
}
// Don't postpone decoding if there is a future DTX packet in the packet
// buffer.
if (status.packet_buffer_info.dtx_or_cng) {
return false;
}
// Continue CNG until the buffer is at least at the minimum level.
if (config_.combine_concealment_decision && IsCng(status.last_mode)) {
return true;
}
// Only continue expand if the mute factor is low enough (otherwise the
// expansion was short enough to not be noticable). Note that the MuteFactor
// is in Q14, so a value of 16384 corresponds to 1.
if (IsExpand(status.last_mode) && status.expand_mutefactor < 16384 / 2) {
return true;
}
return false;
}
bool DecisionLogic::ReinitAfterExpands(
NetEqController::NetEqStatus status) const {
const uint32_t timestamp_leap =
status.next_packet->timestamp - status.target_timestamp;
return timestamp_leap >=
static_cast<uint32_t>(kReinitAfterExpandsMs * sample_rate_khz_);
}
bool DecisionLogic::PacketTooEarly(NetEqController::NetEqStatus status) const {
const uint32_t timestamp_leap =
status.next_packet->timestamp - status.target_timestamp;
return timestamp_leap > status.generated_noise_samples;
}
bool DecisionLogic::MaxWaitForPacket(
NetEqController::NetEqStatus status) const {
return status.generated_noise_samples >=
static_cast<size_t>(kMaxWaitForPacketMs * sample_rate_khz_);
}
bool DecisionLogic::ShouldContinueExpand(
NetEqController::NetEqStatus status) const {
return !ReinitAfterExpands(status) && !MaxWaitForPacket(status) &&
PacketTooEarly(status) && UnderTargetLevel();
}
int DecisionLogic::GetPlayoutDelayMs(
NetEqController::NetEqStatus status) const {
uint32_t playout_timestamp =
status.target_timestamp - status.sync_buffer_samples;
return packet_arrival_history_->GetDelayMs(playout_timestamp);
}
} // namespace webrtc

View file

@ -0,0 +1,183 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_
#define MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_
#include <memory>
#include "api/neteq/neteq.h"
#include "api/neteq/neteq_controller.h"
#include "api/neteq/tick_timer.h"
#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include "modules/audio_coding/neteq/delay_manager.h"
#include "modules/audio_coding/neteq/packet_arrival_history.h"
#include "rtc_base/experiments/field_trial_parser.h"
namespace webrtc {
// This is the class for the decision tree implementation.
class DecisionLogic : public NetEqController {
public:
DecisionLogic(NetEqController::Config config);
DecisionLogic(
NetEqController::Config config,
std::unique_ptr<DelayManager> delay_manager,
std::unique_ptr<BufferLevelFilter> buffer_level_filter,
std::unique_ptr<PacketArrivalHistory> packet_arrival_history = nullptr);
~DecisionLogic() override;
DecisionLogic(const DecisionLogic&) = delete;
DecisionLogic& operator=(const DecisionLogic&) = delete;
// Not used.
void Reset() override {}
// Resets parts of the state. Typically done when switching codecs.
void SoftReset() override;
// Sets the sample rate and the output block size.
void SetSampleRate(int fs_hz, size_t output_size_samples) override;
// Given info about the latest received packet, and current jitter buffer
// status, returns the operation. `target_timestamp` and `expand_mutefactor`
// are provided for reference. `last_packet_samples` is the number of samples
// obtained from the last decoded frame. If there is a packet available, it
// should be supplied in `packet`; otherwise it should be NULL. The mode
// resulting from the last call to NetEqImpl::GetAudio is supplied in
// `last_mode`. If there is a DTMF event to play, `play_dtmf` should be set to
// true. The output variable `reset_decoder` will be set to true if a reset is
// required; otherwise it is left unchanged (i.e., it can remain true if it
// was true before the call).
NetEq::Operation GetDecision(const NetEqController::NetEqStatus& status,
bool* reset_decoder) override;
void ExpandDecision(NetEq::Operation operation) override {}
// Adds `value` to `sample_memory_`.
void AddSampleMemory(int32_t value) override { sample_memory_ += value; }
int TargetLevelMs() const override;
int UnlimitedTargetLevelMs() const override;
absl::optional<int> PacketArrived(int fs_hz,
bool should_update_stats,
const PacketArrivedInfo& info) override;
void RegisterEmptyPacket() override {}
bool SetMaximumDelay(int delay_ms) override {
return delay_manager_->SetMaximumDelay(delay_ms);
}
bool SetMinimumDelay(int delay_ms) override {
return delay_manager_->SetMinimumDelay(delay_ms);
}
bool SetBaseMinimumDelay(int delay_ms) override {
return delay_manager_->SetBaseMinimumDelay(delay_ms);
}
int GetBaseMinimumDelay() const override {
return delay_manager_->GetBaseMinimumDelay();
}
bool PeakFound() const override { return false; }
int GetFilteredBufferLevel() const override;
// Accessors and mutators.
void set_sample_memory(int32_t value) override { sample_memory_ = value; }
size_t noise_fast_forward() const override { return noise_fast_forward_; }
size_t packet_length_samples() const override {
return packet_length_samples_;
}
void set_packet_length_samples(size_t value) override {
packet_length_samples_ = value;
}
void set_prev_time_scale(bool value) override { prev_time_scale_ = value; }
private:
// The value 5 sets maximum time-stretch rate to about 100 ms/s.
static const int kMinTimescaleInterval = 5;
// Updates the `buffer_level_filter_` with the current buffer level
// `buffer_size_samples`.
void FilterBufferLevel(size_t buffer_size_samples);
// Returns the operation given that the next available packet is a comfort
// noise payload (RFC 3389 only, not codec-internal).
virtual NetEq::Operation CngOperation(NetEqController::NetEqStatus status);
// Returns the operation given that no packets are available (except maybe
// a DTMF event, flagged by setting `play_dtmf` true).
virtual NetEq::Operation NoPacket(NetEqController::NetEqStatus status);
// Returns the operation to do given that the expected packet is available.
virtual NetEq::Operation ExpectedPacketAvailable(
NetEqController::NetEqStatus status);
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
virtual NetEq::Operation FuturePacketAvailable(
NetEqController::NetEqStatus status);
// Checks if enough time has elapsed since the last successful timescale
// operation was done (i.e., accelerate or preemptive expand).
bool TimescaleAllowed() const {
return !timescale_countdown_ || timescale_countdown_->Finished();
}
// Checks if the current (filtered) buffer level is under the target level.
bool UnderTargetLevel() const;
// Checks if an ongoing concealment should be continued due to low buffer
// level, even though the next packet is available.
bool PostponeDecode(NetEqController::NetEqStatus status) const;
// Checks if the timestamp leap is so long into the future that a reset due
// to exceeding the expand limit will be done.
bool ReinitAfterExpands(NetEqController::NetEqStatus status) const;
// Checks if we still have not done enough expands to cover the distance from
// the last decoded packet to the next available packet.
bool PacketTooEarly(NetEqController::NetEqStatus status) const;
bool MaxWaitForPacket(NetEqController::NetEqStatus status) const;
bool ShouldContinueExpand(NetEqController::NetEqStatus status) const;
int GetPlayoutDelayMs(NetEqController::NetEqStatus status) const;
// Runtime configurable options through field trial
// WebRTC-Audio-NetEqDecisionLogicConfig.
struct Config {
Config();
bool enable_stable_delay_mode = true;
bool combine_concealment_decision = true;
int deceleration_target_level_offset_ms = 85;
int packet_history_size_ms = 2000;
absl::optional<int> cng_timeout_ms = 1000;
};
Config config_;
std::unique_ptr<DelayManager> delay_manager_;
std::unique_ptr<BufferLevelFilter> buffer_level_filter_;
std::unique_ptr<PacketArrivalHistory> packet_arrival_history_;
const TickTimer* tick_timer_;
int sample_rate_khz_;
size_t output_size_samples_;
size_t noise_fast_forward_ = 0;
size_t packet_length_samples_ = 0;
int sample_memory_ = 0;
bool prev_time_scale_ = false;
bool disallow_time_stretching_;
std::unique_ptr<TickTimer::Countdown> timescale_countdown_;
int time_stretched_cn_samples_ = 0;
bool buffer_flush_ = false;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_

View file

@ -0,0 +1,204 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DecisionLogic class and derived classes.
#include "modules/audio_coding/neteq/decision_logic.h"
#include "api/neteq/neteq_controller.h"
#include "api/neteq/tick_timer.h"
#include "modules/audio_coding/neteq/delay_manager.h"
#include "modules/audio_coding/neteq/mock/mock_buffer_level_filter.h"
#include "modules/audio_coding/neteq/mock/mock_delay_manager.h"
#include "modules/audio_coding/neteq/mock/mock_packet_arrival_history.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
constexpr int kSampleRate = 8000;
constexpr int kSamplesPerMs = kSampleRate / 1000;
constexpr int kOutputSizeSamples = kSamplesPerMs * 10;
constexpr int kMinTimescaleInterval = 5;
NetEqController::NetEqStatus CreateNetEqStatus(NetEq::Mode last_mode,
int current_delay_ms) {
NetEqController::NetEqStatus status;
status.play_dtmf = false;
status.last_mode = last_mode;
status.target_timestamp = 1234;
status.generated_noise_samples = 0;
status.expand_mutefactor = 0;
status.packet_buffer_info.num_samples = current_delay_ms * kSamplesPerMs;
status.packet_buffer_info.span_samples = current_delay_ms * kSamplesPerMs;
status.packet_buffer_info.span_samples_wait_time =
current_delay_ms * kSamplesPerMs;
status.packet_buffer_info.dtx_or_cng = false;
status.next_packet = {status.target_timestamp, false, false};
return status;
}
using ::testing::_;
using ::testing::Return;
} // namespace
class DecisionLogicTest : public ::testing::Test {
protected:
DecisionLogicTest() {
NetEqController::Config config;
config.tick_timer = &tick_timer_;
config.allow_time_stretching = true;
auto delay_manager = std::make_unique<MockDelayManager>(
DelayManager::Config(), config.tick_timer);
mock_delay_manager_ = delay_manager.get();
auto buffer_level_filter = std::make_unique<MockBufferLevelFilter>();
mock_buffer_level_filter_ = buffer_level_filter.get();
auto packet_arrival_history =
std::make_unique<MockPacketArrivalHistory>(&tick_timer_);
mock_packet_arrival_history_ = packet_arrival_history.get();
decision_logic_ = std::make_unique<DecisionLogic>(
config, std::move(delay_manager), std::move(buffer_level_filter),
std::move(packet_arrival_history));
decision_logic_->SetSampleRate(kSampleRate, kOutputSizeSamples);
}
TickTimer tick_timer_;
std::unique_ptr<DecisionLogic> decision_logic_;
MockDelayManager* mock_delay_manager_;
MockBufferLevelFilter* mock_buffer_level_filter_;
MockPacketArrivalHistory* mock_packet_arrival_history_;
};
TEST_F(DecisionLogicTest, NormalOperation) {
EXPECT_CALL(*mock_delay_manager_, TargetDelayMs())
.WillRepeatedly(Return(100));
EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_))
.WillRepeatedly(Return(100));
EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs())
.WillRepeatedly(Return(0));
bool reset_decoder = false;
tick_timer_.Increment(kMinTimescaleInterval + 1);
EXPECT_EQ(decision_logic_->GetDecision(
CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder),
NetEq::Operation::kNormal);
EXPECT_FALSE(reset_decoder);
}
TEST_F(DecisionLogicTest, Accelerate) {
EXPECT_CALL(*mock_delay_manager_, TargetDelayMs())
.WillRepeatedly(Return(100));
EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_))
.WillRepeatedly(Return(150));
EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs())
.WillRepeatedly(Return(0));
bool reset_decoder = false;
tick_timer_.Increment(kMinTimescaleInterval + 1);
EXPECT_EQ(decision_logic_->GetDecision(
CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder),
NetEq::Operation::kAccelerate);
EXPECT_FALSE(reset_decoder);
}
TEST_F(DecisionLogicTest, FastAccelerate) {
EXPECT_CALL(*mock_delay_manager_, TargetDelayMs())
.WillRepeatedly(Return(100));
EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_))
.WillRepeatedly(Return(500));
EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs())
.WillRepeatedly(Return(0));
bool reset_decoder = false;
tick_timer_.Increment(kMinTimescaleInterval + 1);
EXPECT_EQ(decision_logic_->GetDecision(
CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder),
NetEq::Operation::kFastAccelerate);
EXPECT_FALSE(reset_decoder);
}
TEST_F(DecisionLogicTest, PreemptiveExpand) {
EXPECT_CALL(*mock_delay_manager_, TargetDelayMs())
.WillRepeatedly(Return(100));
EXPECT_CALL(*mock_packet_arrival_history_, GetDelayMs(_))
.WillRepeatedly(Return(50));
EXPECT_CALL(*mock_packet_arrival_history_, GetMaxDelayMs())
.WillRepeatedly(Return(0));
bool reset_decoder = false;
tick_timer_.Increment(kMinTimescaleInterval + 1);
EXPECT_EQ(decision_logic_->GetDecision(
CreateNetEqStatus(NetEq::Mode::kNormal, 100), &reset_decoder),
NetEq::Operation::kPreemptiveExpand);
EXPECT_FALSE(reset_decoder);
}
TEST_F(DecisionLogicTest, PostponeDecodeAfterExpand) {
EXPECT_CALL(*mock_delay_manager_, TargetDelayMs())
.WillRepeatedly(Return(500));
// Below 50% target delay threshold.
bool reset_decoder = false;
EXPECT_EQ(decision_logic_->GetDecision(
CreateNetEqStatus(NetEq::Mode::kExpand, 200), &reset_decoder),
NetEq::Operation::kExpand);
EXPECT_FALSE(reset_decoder);
// Above 50% target delay threshold.
EXPECT_EQ(decision_logic_->GetDecision(
CreateNetEqStatus(NetEq::Mode::kExpand, 250), &reset_decoder),
NetEq::Operation::kNormal);
EXPECT_FALSE(reset_decoder);
}
TEST_F(DecisionLogicTest, TimeStrechComfortNoise) {
EXPECT_CALL(*mock_delay_manager_, TargetDelayMs())
.WillRepeatedly(Return(500));
{
bool reset_decoder = false;
// Below target window.
auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 200);
status.generated_noise_samples = 400 * kSamplesPerMs;
status.next_packet->timestamp =
status.target_timestamp + 400 * kSamplesPerMs;
EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder),
NetEq::Operation::kCodecInternalCng);
EXPECT_FALSE(reset_decoder);
}
{
bool reset_decoder = false;
// Above target window.
auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 600);
status.generated_noise_samples = 200 * kSamplesPerMs;
status.next_packet->timestamp =
status.target_timestamp + 400 * kSamplesPerMs;
EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder),
NetEq::Operation::kNormal);
EXPECT_FALSE(reset_decoder);
}
}
TEST_F(DecisionLogicTest, CngTimeout) {
auto status = CreateNetEqStatus(NetEq::Mode::kCodecInternalCng, 0);
status.next_packet = absl::nullopt;
status.generated_noise_samples = kSamplesPerMs * 500;
bool reset_decoder = false;
EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder),
NetEq::Operation::kCodecInternalCng);
status.generated_noise_samples = kSamplesPerMs * 1010;
EXPECT_EQ(decision_logic_->GetDecision(status, &reset_decoder),
NetEq::Operation::kExpand);
}
} // namespace webrtc

View file

@ -0,0 +1,285 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/decoder_database.h"
#include <stddef.h>
#include <cstdint>
#include <list>
#include <type_traits>
#include <utility>
#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
#include "api/audio_codecs/audio_decoder.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/strings/audio_format_to_string.h"
namespace webrtc {
DecoderDatabase::DecoderDatabase(
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
absl::optional<AudioCodecPairId> codec_pair_id)
: active_decoder_type_(-1),
active_cng_decoder_type_(-1),
decoder_factory_(decoder_factory),
codec_pair_id_(codec_pair_id) {}
DecoderDatabase::~DecoderDatabase() = default;
DecoderDatabase::DecoderInfo::DecoderInfo(
const SdpAudioFormat& audio_format,
absl::optional<AudioCodecPairId> codec_pair_id,
AudioDecoderFactory* factory,
absl::string_view codec_name)
: name_(codec_name),
audio_format_(audio_format),
codec_pair_id_(codec_pair_id),
factory_(factory),
cng_decoder_(CngDecoder::Create(audio_format)),
subtype_(SubtypeFromFormat(audio_format)) {}
DecoderDatabase::DecoderInfo::DecoderInfo(
const SdpAudioFormat& audio_format,
absl::optional<AudioCodecPairId> codec_pair_id,
AudioDecoderFactory* factory)
: DecoderInfo(audio_format, codec_pair_id, factory, audio_format.name) {}
DecoderDatabase::DecoderInfo::DecoderInfo(DecoderInfo&&) = default;
DecoderDatabase::DecoderInfo::~DecoderInfo() = default;
AudioDecoder* DecoderDatabase::DecoderInfo::GetDecoder() const {
if (subtype_ != Subtype::kNormal) {
// These are handled internally, so they have no AudioDecoder objects.
return nullptr;
}
if (!decoder_) {
// TODO(ossu): Keep a check here for now, since a number of tests create
// DecoderInfos without factories.
RTC_DCHECK(factory_);
decoder_ = factory_->MakeAudioDecoder(audio_format_, codec_pair_id_);
}
RTC_DCHECK(decoder_) << "Failed to create: " << rtc::ToString(audio_format_);
return decoder_.get();
}
bool DecoderDatabase::DecoderInfo::IsType(absl::string_view name) const {
return absl::EqualsIgnoreCase(audio_format_.name, name);
}
absl::optional<DecoderDatabase::DecoderInfo::CngDecoder>
DecoderDatabase::DecoderInfo::CngDecoder::Create(const SdpAudioFormat& format) {
if (absl::EqualsIgnoreCase(format.name, "CN")) {
// CN has a 1:1 RTP clock rate to sample rate ratio.
const int sample_rate_hz = format.clockrate_hz;
RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 ||
sample_rate_hz == 32000 || sample_rate_hz == 48000);
return DecoderDatabase::DecoderInfo::CngDecoder{sample_rate_hz};
} else {
return absl::nullopt;
}
}
DecoderDatabase::DecoderInfo::Subtype
DecoderDatabase::DecoderInfo::SubtypeFromFormat(const SdpAudioFormat& format) {
if (absl::EqualsIgnoreCase(format.name, "CN")) {
return Subtype::kComfortNoise;
} else if (absl::EqualsIgnoreCase(format.name, "telephone-event")) {
return Subtype::kDtmf;
} else if (absl::EqualsIgnoreCase(format.name, "red")) {
return Subtype::kRed;
}
return Subtype::kNormal;
}
bool DecoderDatabase::Empty() const {
return decoders_.empty();
}
int DecoderDatabase::Size() const {
return static_cast<int>(decoders_.size());
}
std::vector<int> DecoderDatabase::SetCodecs(
const std::map<int, SdpAudioFormat>& codecs) {
// First collect all payload types that we'll remove or reassign, then remove
// them from the database.
std::vector<int> changed_payload_types;
for (const std::pair<uint8_t, const DecoderInfo&> kv : decoders_) {
auto i = codecs.find(kv.first);
if (i == codecs.end() || i->second != kv.second.GetFormat()) {
changed_payload_types.push_back(kv.first);
}
}
for (int pl_type : changed_payload_types) {
Remove(pl_type);
}
// Enter the new and changed payload type mappings into the database.
for (const auto& kv : codecs) {
const int& rtp_payload_type = kv.first;
const SdpAudioFormat& audio_format = kv.second;
RTC_DCHECK_GE(rtp_payload_type, 0);
RTC_DCHECK_LE(rtp_payload_type, 0x7f);
if (decoders_.count(rtp_payload_type) == 0) {
decoders_.insert(std::make_pair(
rtp_payload_type,
DecoderInfo(audio_format, codec_pair_id_, decoder_factory_.get())));
} else {
// The mapping for this payload type hasn't changed.
}
}
return changed_payload_types;
}
int DecoderDatabase::RegisterPayload(int rtp_payload_type,
const SdpAudioFormat& audio_format) {
if (rtp_payload_type < 0 || rtp_payload_type > 0x7f) {
return kInvalidRtpPayloadType;
}
const auto ret = decoders_.insert(std::make_pair(
rtp_payload_type,
DecoderInfo(audio_format, codec_pair_id_, decoder_factory_.get())));
if (ret.second == false) {
// Database already contains a decoder with type `rtp_payload_type`.
return kDecoderExists;
}
return kOK;
}
int DecoderDatabase::Remove(uint8_t rtp_payload_type) {
if (decoders_.erase(rtp_payload_type) == 0) {
// No decoder with that `rtp_payload_type`.
return kDecoderNotFound;
}
if (active_decoder_type_ == rtp_payload_type) {
active_decoder_type_ = -1; // No active decoder.
}
if (active_cng_decoder_type_ == rtp_payload_type) {
active_cng_decoder_type_ = -1; // No active CNG decoder.
}
return kOK;
}
void DecoderDatabase::RemoveAll() {
decoders_.clear();
active_decoder_type_ = -1; // No active decoder.
active_cng_decoder_type_ = -1; // No active CNG decoder.
}
const DecoderDatabase::DecoderInfo* DecoderDatabase::GetDecoderInfo(
uint8_t rtp_payload_type) const {
DecoderMap::const_iterator it = decoders_.find(rtp_payload_type);
if (it == decoders_.end()) {
// Decoder not found.
return NULL;
}
return &it->second;
}
int DecoderDatabase::SetActiveDecoder(uint8_t rtp_payload_type,
bool* new_decoder) {
// Check that `rtp_payload_type` exists in the database.
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
if (!info) {
// Decoder not found.
return kDecoderNotFound;
}
RTC_CHECK(!info->IsComfortNoise());
RTC_DCHECK(new_decoder);
*new_decoder = false;
if (active_decoder_type_ < 0) {
// This is the first active decoder.
*new_decoder = true;
} else if (active_decoder_type_ != rtp_payload_type) {
// Moving from one active decoder to another. Delete the first one.
const DecoderInfo* old_info = GetDecoderInfo(active_decoder_type_);
RTC_DCHECK(old_info);
old_info->DropDecoder();
*new_decoder = true;
}
active_decoder_type_ = rtp_payload_type;
return kOK;
}
AudioDecoder* DecoderDatabase::GetActiveDecoder() const {
if (active_decoder_type_ < 0) {
// No active decoder.
return NULL;
}
return GetDecoder(active_decoder_type_);
}
int DecoderDatabase::SetActiveCngDecoder(uint8_t rtp_payload_type) {
// Check that `rtp_payload_type` exists in the database.
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
if (!info) {
// Decoder not found.
return kDecoderNotFound;
}
if (active_cng_decoder_type_ >= 0 &&
active_cng_decoder_type_ != rtp_payload_type) {
// Moving from one active CNG decoder to another. Delete the first one.
RTC_DCHECK(active_cng_decoder_);
active_cng_decoder_.reset();
}
active_cng_decoder_type_ = rtp_payload_type;
return kOK;
}
ComfortNoiseDecoder* DecoderDatabase::GetActiveCngDecoder() const {
if (active_cng_decoder_type_ < 0) {
// No active CNG decoder.
return NULL;
}
if (!active_cng_decoder_) {
active_cng_decoder_.reset(new ComfortNoiseDecoder);
}
return active_cng_decoder_.get();
}
AudioDecoder* DecoderDatabase::GetDecoder(uint8_t rtp_payload_type) const {
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
return info ? info->GetDecoder() : nullptr;
}
bool DecoderDatabase::IsComfortNoise(uint8_t rtp_payload_type) const {
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
return info && info->IsComfortNoise();
}
bool DecoderDatabase::IsDtmf(uint8_t rtp_payload_type) const {
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
return info && info->IsDtmf();
}
bool DecoderDatabase::IsRed(uint8_t rtp_payload_type) const {
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
return info && info->IsRed();
}
int DecoderDatabase::CheckPayloadTypes(const PacketList& packet_list) const {
PacketList::const_iterator it;
for (it = packet_list.begin(); it != packet_list.end(); ++it) {
if (!GetDecoderInfo(it->payload_type)) {
// Payload type is not found.
RTC_LOG(LS_WARNING) << "CheckPayloadTypes: unknown RTP payload type "
<< static_cast<int>(it->payload_type);
return kDecoderNotFound;
}
}
return kOK;
}
} // namespace webrtc

View file

@ -0,0 +1,204 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_
#define MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_
#include <map>
#include <memory>
#include <string>
#include "absl/strings/string_view.h"
#include "api/audio_codecs/audio_decoder_factory.h"
#include "api/audio_codecs/audio_format.h"
#include "api/scoped_refptr.h"
#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
#include "modules/audio_coding/neteq/packet.h"
namespace webrtc {
class DecoderDatabase {
public:
enum DatabaseReturnCodes {
kOK = 0,
kInvalidRtpPayloadType = -1,
kCodecNotSupported = -2,
kInvalidSampleRate = -3,
kDecoderExists = -4,
kDecoderNotFound = -5,
kInvalidPointer = -6
};
// Class that stores decoder info in the database.
class DecoderInfo {
public:
DecoderInfo(const SdpAudioFormat& audio_format,
absl::optional<AudioCodecPairId> codec_pair_id,
AudioDecoderFactory* factory,
absl::string_view codec_name);
explicit DecoderInfo(const SdpAudioFormat& audio_format,
absl::optional<AudioCodecPairId> codec_pair_id,
AudioDecoderFactory* factory = nullptr);
DecoderInfo(DecoderInfo&&);
~DecoderInfo();
// Get the AudioDecoder object, creating it first if necessary.
AudioDecoder* GetDecoder() const;
// Delete the AudioDecoder object, unless it's external. (This means we can
// always recreate it later if we need it.)
void DropDecoder() const { decoder_.reset(); }
int SampleRateHz() const {
if (IsDtmf()) {
// DTMF has a 1:1 mapping between clock rate and sample rate.
return audio_format_.clockrate_hz;
}
const AudioDecoder* decoder = GetDecoder();
RTC_DCHECK_EQ(1, !!decoder + !!cng_decoder_);
return decoder ? decoder->SampleRateHz() : cng_decoder_->sample_rate_hz;
}
const SdpAudioFormat& GetFormat() const { return audio_format_; }
// Returns true if the decoder's format is comfort noise.
bool IsComfortNoise() const {
RTC_DCHECK_EQ(!!cng_decoder_, subtype_ == Subtype::kComfortNoise);
return subtype_ == Subtype::kComfortNoise;
}
// Returns true if the decoder's format is DTMF.
bool IsDtmf() const { return subtype_ == Subtype::kDtmf; }
// Returns true if the decoder's format is RED.
bool IsRed() const { return subtype_ == Subtype::kRed; }
// Returns true if the decoder's format is named `name`.
bool IsType(absl::string_view name) const;
const std::string& get_name() const { return name_; }
private:
// TODO(ossu): `name_` is kept here while we retain the old external
// decoder interface. Remove this once using an
// AudioDecoderFactory has supplanted the old functionality.
const std::string name_;
const SdpAudioFormat audio_format_;
const absl::optional<AudioCodecPairId> codec_pair_id_;
AudioDecoderFactory* const factory_;
mutable std::unique_ptr<AudioDecoder> decoder_;
// Set iff this is a comfort noise decoder.
struct CngDecoder {
static absl::optional<CngDecoder> Create(const SdpAudioFormat& format);
int sample_rate_hz;
};
const absl::optional<CngDecoder> cng_decoder_;
enum class Subtype : int8_t { kNormal, kComfortNoise, kDtmf, kRed };
static Subtype SubtypeFromFormat(const SdpAudioFormat& format);
const Subtype subtype_;
};
// Maximum value for 8 bits, and an invalid RTP payload type (since it is
// only 7 bits).
static const uint8_t kRtpPayloadTypeError = 0xFF;
DecoderDatabase(
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
absl::optional<AudioCodecPairId> codec_pair_id);
virtual ~DecoderDatabase();
DecoderDatabase(const DecoderDatabase&) = delete;
DecoderDatabase& operator=(const DecoderDatabase&) = delete;
// Returns true if the database is empty.
virtual bool Empty() const;
// Returns the number of decoders registered in the database.
virtual int Size() const;
// Replaces the existing set of decoders with the given set. Returns the
// payload types that were reassigned or removed while doing so.
virtual std::vector<int> SetCodecs(
const std::map<int, SdpAudioFormat>& codecs);
// Registers a decoder for the given payload type. Returns kOK on success;
// otherwise an error code.
virtual int RegisterPayload(int rtp_payload_type,
const SdpAudioFormat& audio_format);
// Removes the entry for `rtp_payload_type` from the database.
// Returns kDecoderNotFound or kOK depending on the outcome of the operation.
virtual int Remove(uint8_t rtp_payload_type);
// Remove all entries.
virtual void RemoveAll();
// Returns a pointer to the DecoderInfo struct for `rtp_payload_type`. If
// no decoder is registered with that `rtp_payload_type`, NULL is returned.
virtual const DecoderInfo* GetDecoderInfo(uint8_t rtp_payload_type) const;
// Sets the active decoder to be `rtp_payload_type`. If this call results in a
// change of active decoder, `new_decoder` is set to true. The previous active
// decoder's AudioDecoder object is deleted.
virtual int SetActiveDecoder(uint8_t rtp_payload_type, bool* new_decoder);
// Returns the current active decoder, or NULL if no active decoder exists.
virtual AudioDecoder* GetActiveDecoder() const;
// Sets the active comfort noise decoder to be `rtp_payload_type`. If this
// call results in a change of active comfort noise decoder, the previous
// active decoder's AudioDecoder object is deleted.
virtual int SetActiveCngDecoder(uint8_t rtp_payload_type);
// Returns the current active comfort noise decoder, or NULL if no active
// comfort noise decoder exists.
virtual ComfortNoiseDecoder* GetActiveCngDecoder() const;
// The following are utility methods: they will look up DecoderInfo through
// GetDecoderInfo and call the respective method on that info object, if it
// exists.
// Returns a pointer to the AudioDecoder object associated with
// `rtp_payload_type`, or NULL if none is registered. If the AudioDecoder
// object does not exist for that decoder, the object is created.
AudioDecoder* GetDecoder(uint8_t rtp_payload_type) const;
// Returns true if `rtp_payload_type` is registered as comfort noise.
bool IsComfortNoise(uint8_t rtp_payload_type) const;
// Returns true if `rtp_payload_type` is registered as DTMF.
bool IsDtmf(uint8_t rtp_payload_type) const;
// Returns true if `rtp_payload_type` is registered as RED.
bool IsRed(uint8_t rtp_payload_type) const;
// Returns kOK if all packets in `packet_list` carry payload types that are
// registered in the database. Otherwise, returns kDecoderNotFound.
int CheckPayloadTypes(const PacketList& packet_list) const;
private:
typedef std::map<uint8_t, DecoderInfo> DecoderMap;
DecoderMap decoders_;
int active_decoder_type_;
int active_cng_decoder_type_;
mutable std::unique_ptr<ComfortNoiseDecoder> active_cng_decoder_;
rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_;
const absl::optional<AudioCodecPairId> codec_pair_id_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_

View file

@ -0,0 +1,227 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/decoder_database.h"
#include <stdlib.h>
#include <string>
#include "api/audio_codecs/builtin_audio_decoder_factory.h"
#include "test/gmock.h"
#include "test/gtest.h"
#include "test/mock_audio_decoder.h"
#include "test/mock_audio_decoder_factory.h"
using ::testing::_;
using ::testing::Invoke;
namespace webrtc {
TEST(DecoderDatabase, CreateAndDestroy) {
DecoderDatabase db(rtc::make_ref_counted<MockAudioDecoderFactory>(),
absl::nullopt);
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, InsertAndRemove) {
auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>();
DecoderDatabase db(factory, absl::nullopt);
const uint8_t kPayloadType = 0;
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, SdpAudioFormat(kCodecName, 8000, 1)));
EXPECT_EQ(1, db.Size());
EXPECT_FALSE(db.Empty());
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType));
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, InsertAndRemoveAll) {
auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>();
DecoderDatabase db(factory, absl::nullopt);
const std::string kCodecName1 = "Robert\'); DROP TABLE Students;";
const std::string kCodecName2 = "https://xkcd.com/327/";
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(0, SdpAudioFormat(kCodecName1, 8000, 1)));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(1, SdpAudioFormat(kCodecName2, 8000, 1)));
EXPECT_EQ(2, db.Size());
EXPECT_FALSE(db.Empty());
db.RemoveAll();
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, GetDecoderInfo) {
auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>();
auto* decoder = new MockAudioDecoder;
EXPECT_CALL(*factory, MakeAudioDecoderMock(_, _, _))
.WillOnce(Invoke([decoder](const SdpAudioFormat& format,
absl::optional<AudioCodecPairId> codec_pair_id,
std::unique_ptr<AudioDecoder>* dec) {
EXPECT_EQ("pcmu", format.name);
dec->reset(decoder);
}));
DecoderDatabase db(factory, absl::nullopt);
const uint8_t kPayloadType = 0;
const std::string kCodecName = "pcmu";
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, SdpAudioFormat(kCodecName, 8000, 1)));
const DecoderDatabase::DecoderInfo* info;
info = db.GetDecoderInfo(kPayloadType);
ASSERT_TRUE(info != NULL);
EXPECT_TRUE(info->IsType("pcmu"));
EXPECT_EQ(kCodecName, info->get_name());
EXPECT_EQ(decoder, db.GetDecoder(kPayloadType));
info = db.GetDecoderInfo(kPayloadType + 1); // Other payload type.
EXPECT_TRUE(info == NULL); // Should not be found.
}
TEST(DecoderDatabase, GetDecoder) {
DecoderDatabase db(CreateBuiltinAudioDecoderFactory(), absl::nullopt);
const uint8_t kPayloadType = 0;
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, SdpAudioFormat("l16", 8000, 1)));
AudioDecoder* dec = db.GetDecoder(kPayloadType);
ASSERT_TRUE(dec != NULL);
}
TEST(DecoderDatabase, TypeTests) {
auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>();
DecoderDatabase db(factory, absl::nullopt);
const uint8_t kPayloadTypePcmU = 0;
const uint8_t kPayloadTypeCng = 13;
const uint8_t kPayloadTypeDtmf = 100;
const uint8_t kPayloadTypeRed = 101;
const uint8_t kPayloadNotUsed = 102;
// Load into database.
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypePcmU, SdpAudioFormat("pcmu", 8000, 1)));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeCng, SdpAudioFormat("cn", 8000, 1)));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeDtmf,
SdpAudioFormat("telephone-event", 8000, 1)));
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeRed, SdpAudioFormat("red", 8000, 1)));
EXPECT_EQ(4, db.Size());
// Test.
EXPECT_FALSE(db.IsComfortNoise(kPayloadNotUsed));
EXPECT_FALSE(db.IsDtmf(kPayloadNotUsed));
EXPECT_FALSE(db.IsRed(kPayloadNotUsed));
EXPECT_FALSE(db.IsComfortNoise(kPayloadTypePcmU));
EXPECT_FALSE(db.IsDtmf(kPayloadTypePcmU));
EXPECT_FALSE(db.IsRed(kPayloadTypePcmU));
EXPECT_TRUE(db.IsComfortNoise(kPayloadTypeCng));
EXPECT_TRUE(db.IsDtmf(kPayloadTypeDtmf));
EXPECT_TRUE(db.IsRed(kPayloadTypeRed));
}
TEST(DecoderDatabase, CheckPayloadTypes) {
constexpr int kNumPayloads = 10;
auto factory = rtc::make_ref_counted<MockAudioDecoderFactory>();
DecoderDatabase db(factory, absl::nullopt);
// Load a number of payloads into the database. Payload types are 0, 1, ...,
// while the decoder type is the same for all payload types (this does not
// matter for the test).
for (uint8_t payload_type = 0; payload_type < kNumPayloads; ++payload_type) {
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(payload_type, SdpAudioFormat("pcmu", 8000, 1)));
}
PacketList packet_list;
for (int i = 0; i < kNumPayloads + 1; ++i) {
// Create packet with payload type `i`. The last packet will have a payload
// type that is not registered in the decoder database.
Packet packet;
packet.payload_type = i;
packet_list.push_back(std::move(packet));
}
// Expect to return false, since the last packet is of an unknown type.
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.CheckPayloadTypes(packet_list));
packet_list.pop_back(); // Remove the unknown one.
EXPECT_EQ(DecoderDatabase::kOK, db.CheckPayloadTypes(packet_list));
// Delete all packets.
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
it = packet_list.erase(it);
}
}
#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
#define IF_ISAC(x) x
#else
#define IF_ISAC(x) DISABLED_##x
#endif
// Test the methods for setting and getting active speech and CNG decoders.
TEST(DecoderDatabase, IF_ISAC(ActiveDecoders)) {
DecoderDatabase db(CreateBuiltinAudioDecoderFactory(), absl::nullopt);
// Load payload types.
ASSERT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(0, SdpAudioFormat("pcmu", 8000, 1)));
ASSERT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(103, SdpAudioFormat("isac", 16000, 1)));
ASSERT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(13, SdpAudioFormat("cn", 8000, 1)));
// Verify that no decoders are active from the start.
EXPECT_EQ(NULL, db.GetActiveDecoder());
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
// Set active speech codec.
bool changed; // Should be true when the active decoder changed.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
EXPECT_TRUE(changed);
AudioDecoder* decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
// Set the same again. Expect no change.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
EXPECT_FALSE(changed);
decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
// Change active decoder.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(103, &changed));
EXPECT_TRUE(changed);
decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
// Remove the active decoder, and verify that the active becomes NULL.
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(103));
EXPECT_EQ(NULL, db.GetActiveDecoder());
// Set active CNG codec.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveCngDecoder(13));
ComfortNoiseDecoder* cng = db.GetActiveCngDecoder();
ASSERT_FALSE(cng == NULL); // Should get a decoder here.
// Remove the active CNG decoder, and verify that the active becomes NULL.
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(13));
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
// Try to set non-existing codecs as active.
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.SetActiveDecoder(17, &changed));
EXPECT_EQ(DecoderDatabase::kDecoderNotFound, db.SetActiveCngDecoder(17));
}
} // namespace webrtc

View file

@ -0,0 +1,31 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/default_neteq_factory.h"
#include <utility>
#include "modules/audio_coding/neteq/neteq_impl.h"
namespace webrtc {
DefaultNetEqFactory::DefaultNetEqFactory() = default;
DefaultNetEqFactory::~DefaultNetEqFactory() = default;
std::unique_ptr<NetEq> DefaultNetEqFactory::CreateNetEq(
const NetEq::Config& config,
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
Clock* clock) const {
return std::make_unique<NetEqImpl>(
config, NetEqImpl::Dependencies(config, clock, decoder_factory,
controller_factory_));
}
} // namespace webrtc

View file

@ -0,0 +1,41 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_
#define MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_
#include <memory>
#include "api/audio_codecs/audio_decoder_factory.h"
#include "api/neteq/default_neteq_controller_factory.h"
#include "api/neteq/neteq_factory.h"
#include "api/scoped_refptr.h"
#include "system_wrappers/include/clock.h"
namespace webrtc {
class DefaultNetEqFactory : public NetEqFactory {
public:
DefaultNetEqFactory();
~DefaultNetEqFactory() override;
DefaultNetEqFactory(const DefaultNetEqFactory&) = delete;
DefaultNetEqFactory& operator=(const DefaultNetEqFactory&) = delete;
std::unique_ptr<NetEq> CreateNetEq(
const NetEq::Config& config,
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
Clock* clock) const override;
private:
const DefaultNetEqControllerFactory controller_factory_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_DEFAULT_NETEQ_FACTORY_H_

View file

@ -0,0 +1,207 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/delay_manager.h"
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <memory>
#include <numeric>
#include <string>
#include "modules/include/module_common_types_public.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/struct_parameters_parser.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "rtc_base/numerics/safe_minmax.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
constexpr int kMinBaseMinimumDelayMs = 0;
constexpr int kMaxBaseMinimumDelayMs = 10000;
constexpr int kStartDelayMs = 80;
std::unique_ptr<ReorderOptimizer> MaybeCreateReorderOptimizer(
const DelayManager::Config& config) {
if (!config.use_reorder_optimizer) {
return nullptr;
}
return std::make_unique<ReorderOptimizer>(
(1 << 15) * config.reorder_forget_factor, config.ms_per_loss_percent,
config.start_forget_weight);
}
} // namespace
DelayManager::Config::Config() {
StructParametersParser::Create( //
"quantile", &quantile, //
"forget_factor", &forget_factor, //
"start_forget_weight", &start_forget_weight, //
"resample_interval_ms", &resample_interval_ms, //
"use_reorder_optimizer", &use_reorder_optimizer, //
"reorder_forget_factor", &reorder_forget_factor, //
"ms_per_loss_percent", &ms_per_loss_percent)
->Parse(webrtc::field_trial::FindFullName(
"WebRTC-Audio-NetEqDelayManagerConfig"));
}
void DelayManager::Config::Log() {
RTC_LOG(LS_INFO) << "Delay manager config:"
" quantile="
<< quantile << " forget_factor=" << forget_factor
<< " start_forget_weight=" << start_forget_weight.value_or(0)
<< " resample_interval_ms="
<< resample_interval_ms.value_or(0)
<< " use_reorder_optimizer=" << use_reorder_optimizer
<< " reorder_forget_factor=" << reorder_forget_factor
<< " ms_per_loss_percent=" << ms_per_loss_percent;
}
DelayManager::DelayManager(const Config& config, const TickTimer* tick_timer)
: max_packets_in_buffer_(config.max_packets_in_buffer),
underrun_optimizer_(tick_timer,
(1 << 30) * config.quantile,
(1 << 15) * config.forget_factor,
config.start_forget_weight,
config.resample_interval_ms),
reorder_optimizer_(MaybeCreateReorderOptimizer(config)),
base_minimum_delay_ms_(config.base_minimum_delay_ms),
effective_minimum_delay_ms_(config.base_minimum_delay_ms),
minimum_delay_ms_(0),
maximum_delay_ms_(0),
target_level_ms_(kStartDelayMs) {
RTC_DCHECK_GE(base_minimum_delay_ms_, 0);
Reset();
}
DelayManager::~DelayManager() {}
void DelayManager::Update(int arrival_delay_ms, bool reordered) {
if (!reorder_optimizer_ || !reordered) {
underrun_optimizer_.Update(arrival_delay_ms);
}
target_level_ms_ =
underrun_optimizer_.GetOptimalDelayMs().value_or(kStartDelayMs);
if (reorder_optimizer_) {
reorder_optimizer_->Update(arrival_delay_ms, reordered, target_level_ms_);
target_level_ms_ = std::max(
target_level_ms_, reorder_optimizer_->GetOptimalDelayMs().value_or(0));
}
unlimited_target_level_ms_ = target_level_ms_;
target_level_ms_ = std::max(target_level_ms_, effective_minimum_delay_ms_);
if (maximum_delay_ms_ > 0) {
target_level_ms_ = std::min(target_level_ms_, maximum_delay_ms_);
}
if (packet_len_ms_ > 0) {
// Limit to 75% of maximum buffer size.
target_level_ms_ = std::min(
target_level_ms_, 3 * max_packets_in_buffer_ * packet_len_ms_ / 4);
}
}
int DelayManager::SetPacketAudioLength(int length_ms) {
if (length_ms <= 0) {
RTC_LOG_F(LS_ERROR) << "length_ms = " << length_ms;
return -1;
}
packet_len_ms_ = length_ms;
return 0;
}
void DelayManager::Reset() {
packet_len_ms_ = 0;
underrun_optimizer_.Reset();
target_level_ms_ = kStartDelayMs;
if (reorder_optimizer_) {
reorder_optimizer_->Reset();
}
}
int DelayManager::TargetDelayMs() const {
return target_level_ms_;
}
int DelayManager::UnlimitedTargetLevelMs() const {
return unlimited_target_level_ms_;
}
bool DelayManager::IsValidMinimumDelay(int delay_ms) const {
return 0 <= delay_ms && delay_ms <= MinimumDelayUpperBound();
}
bool DelayManager::IsValidBaseMinimumDelay(int delay_ms) const {
return kMinBaseMinimumDelayMs <= delay_ms &&
delay_ms <= kMaxBaseMinimumDelayMs;
}
bool DelayManager::SetMinimumDelay(int delay_ms) {
if (!IsValidMinimumDelay(delay_ms)) {
return false;
}
minimum_delay_ms_ = delay_ms;
UpdateEffectiveMinimumDelay();
return true;
}
bool DelayManager::SetMaximumDelay(int delay_ms) {
// If `delay_ms` is zero then it unsets the maximum delay and target level is
// unconstrained by maximum delay.
if (delay_ms != 0 && delay_ms < minimum_delay_ms_) {
// Maximum delay shouldn't be less than minimum delay or less than a packet.
return false;
}
maximum_delay_ms_ = delay_ms;
UpdateEffectiveMinimumDelay();
return true;
}
bool DelayManager::SetBaseMinimumDelay(int delay_ms) {
if (!IsValidBaseMinimumDelay(delay_ms)) {
return false;
}
base_minimum_delay_ms_ = delay_ms;
UpdateEffectiveMinimumDelay();
return true;
}
int DelayManager::GetBaseMinimumDelay() const {
return base_minimum_delay_ms_;
}
void DelayManager::UpdateEffectiveMinimumDelay() {
// Clamp `base_minimum_delay_ms_` into the range which can be effectively
// used.
const int base_minimum_delay_ms =
rtc::SafeClamp(base_minimum_delay_ms_, 0, MinimumDelayUpperBound());
effective_minimum_delay_ms_ =
std::max(minimum_delay_ms_, base_minimum_delay_ms);
}
int DelayManager::MinimumDelayUpperBound() const {
// Choose the lowest possible bound discarding 0 cases which mean the value
// is not set and unconstrained.
int q75 = max_packets_in_buffer_ * packet_len_ms_ * 3 / 4;
q75 = q75 > 0 ? q75 : kMaxBaseMinimumDelayMs;
const int maximum_delay_ms =
maximum_delay_ms_ > 0 ? maximum_delay_ms_ : kMaxBaseMinimumDelayMs;
return std::min(maximum_delay_ms, q75);
}
} // namespace webrtc

View file

@ -0,0 +1,121 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_
#define MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_
#include <string.h> // Provide access to size_t.
#include <deque>
#include <memory>
#include "absl/types/optional.h"
#include "api/neteq/tick_timer.h"
#include "modules/audio_coding/neteq/histogram.h"
#include "modules/audio_coding/neteq/reorder_optimizer.h"
#include "modules/audio_coding/neteq/underrun_optimizer.h"
namespace webrtc {
class DelayManager {
public:
struct Config {
Config();
void Log();
// Options that can be configured via field trial.
double quantile = 0.95;
double forget_factor = 0.983;
absl::optional<double> start_forget_weight = 2;
absl::optional<int> resample_interval_ms = 500;
bool use_reorder_optimizer = true;
double reorder_forget_factor = 0.9993;
int ms_per_loss_percent = 20;
// Options that are externally populated.
int max_packets_in_buffer = 200;
int base_minimum_delay_ms = 0;
};
DelayManager(const Config& config, const TickTimer* tick_timer);
virtual ~DelayManager();
DelayManager(const DelayManager&) = delete;
DelayManager& operator=(const DelayManager&) = delete;
// Updates the delay manager that a new packet arrived with delay
// `arrival_delay_ms`. This updates the statistics and a new target buffer
// level is calculated. The `reordered` flag indicates if the packet was
// reordered.
virtual void Update(int arrival_delay_ms, bool reordered);
// Resets all state.
virtual void Reset();
// Gets the target buffer level in milliseconds. If a minimum or maximum delay
// has been set, the target delay reported here also respects the configured
// min/max delay.
virtual int TargetDelayMs() const;
// Reports the target delay that would be used if no minimum/maximum delay
// would be set.
virtual int UnlimitedTargetLevelMs() const;
// Notifies the DelayManager of how much audio data is carried in each packet.
virtual int SetPacketAudioLength(int length_ms);
// Accessors and mutators.
// Assuming `delay` is in valid range.
virtual bool SetMinimumDelay(int delay_ms);
virtual bool SetMaximumDelay(int delay_ms);
virtual bool SetBaseMinimumDelay(int delay_ms);
virtual int GetBaseMinimumDelay() const;
// These accessors are only intended for testing purposes.
int effective_minimum_delay_ms_for_test() const {
return effective_minimum_delay_ms_;
}
private:
// Provides value which minimum delay can't exceed based on current buffer
// size and given `maximum_delay_ms_`. Lower bound is a constant 0.
int MinimumDelayUpperBound() const;
// Updates `effective_minimum_delay_ms_` delay based on current
// `minimum_delay_ms_`, `base_minimum_delay_ms_` and `maximum_delay_ms_`
// and buffer size.
void UpdateEffectiveMinimumDelay();
// Makes sure that `delay_ms` is less than maximum delay, if any maximum
// is set. Also, if possible check `delay_ms` to be less than 75% of
// `max_packets_in_buffer_`.
bool IsValidMinimumDelay(int delay_ms) const;
bool IsValidBaseMinimumDelay(int delay_ms) const;
// TODO(jakobi): set maximum buffer delay instead of number of packets.
const int max_packets_in_buffer_;
UnderrunOptimizer underrun_optimizer_;
std::unique_ptr<ReorderOptimizer> reorder_optimizer_;
int base_minimum_delay_ms_;
int effective_minimum_delay_ms_; // Used as lower bound for target delay.
int minimum_delay_ms_; // Externally set minimum delay.
int maximum_delay_ms_; // Externally set maximum allowed delay.
int packet_len_ms_ = 0;
int target_level_ms_ = 0; // Currently preferred buffer level.
int unlimited_target_level_ms_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_

View file

@ -0,0 +1,246 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DelayManager class.
#include "modules/audio_coding/neteq/delay_manager.h"
#include <math.h>
#include <memory>
#include "absl/types/optional.h"
#include "modules/audio_coding/neteq/histogram.h"
#include "modules/audio_coding/neteq/mock/mock_histogram.h"
#include "modules/audio_coding/neteq/mock/mock_statistics_calculator.h"
#include "rtc_base/checks.h"
#include "test/field_trial.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
constexpr int kMaxNumberOfPackets = 200;
constexpr int kTimeStepMs = 10;
constexpr int kFrameSizeMs = 20;
constexpr int kMaxBufferSizeMs = kMaxNumberOfPackets * kFrameSizeMs;
} // namespace
class DelayManagerTest : public ::testing::Test {
protected:
DelayManagerTest();
virtual void SetUp();
void Update(int delay);
void IncreaseTime(int inc_ms);
TickTimer tick_timer_;
DelayManager dm_;
};
DelayManagerTest::DelayManagerTest()
: dm_(DelayManager::Config(), &tick_timer_) {}
void DelayManagerTest::SetUp() {
dm_.SetPacketAudioLength(kFrameSizeMs);
}
void DelayManagerTest::Update(int delay) {
dm_.Update(delay, false);
}
void DelayManagerTest::IncreaseTime(int inc_ms) {
for (int t = 0; t < inc_ms; t += kTimeStepMs) {
tick_timer_.Increment();
}
}
TEST_F(DelayManagerTest, CreateAndDestroy) {
// Nothing to do here. The test fixture creates and destroys the DelayManager
// object.
}
TEST_F(DelayManagerTest, UpdateNormal) {
for (int i = 0; i < 50; ++i) {
Update(0);
IncreaseTime(kFrameSizeMs);
}
EXPECT_EQ(20, dm_.TargetDelayMs());
}
TEST_F(DelayManagerTest, MaxDelay) {
Update(0);
const int kMaxDelayMs = 60;
EXPECT_GT(dm_.TargetDelayMs(), kMaxDelayMs);
EXPECT_TRUE(dm_.SetMaximumDelay(kMaxDelayMs));
Update(0);
EXPECT_EQ(kMaxDelayMs, dm_.TargetDelayMs());
}
TEST_F(DelayManagerTest, MinDelay) {
Update(0);
int kMinDelayMs = 7 * kFrameSizeMs;
EXPECT_LT(dm_.TargetDelayMs(), kMinDelayMs);
dm_.SetMinimumDelay(kMinDelayMs);
IncreaseTime(kFrameSizeMs);
Update(0);
EXPECT_EQ(kMinDelayMs, dm_.TargetDelayMs());
}
TEST_F(DelayManagerTest, BaseMinimumDelayCheckValidRange) {
// Base minimum delay should be between [0, 10000] milliseconds.
EXPECT_FALSE(dm_.SetBaseMinimumDelay(-1));
EXPECT_FALSE(dm_.SetBaseMinimumDelay(10001));
EXPECT_EQ(dm_.GetBaseMinimumDelay(), 0);
EXPECT_TRUE(dm_.SetBaseMinimumDelay(7999));
EXPECT_EQ(dm_.GetBaseMinimumDelay(), 7999);
}
TEST_F(DelayManagerTest, BaseMinimumDelayLowerThanMinimumDelay) {
constexpr int kBaseMinimumDelayMs = 100;
constexpr int kMinimumDelayMs = 200;
// Base minimum delay sets lower bound on minimum. That is why when base
// minimum delay is lower than minimum delay we use minimum delay.
RTC_DCHECK_LT(kBaseMinimumDelayMs, kMinimumDelayMs);
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs));
EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs));
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs);
}
TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanMinimumDelay) {
constexpr int kBaseMinimumDelayMs = 70;
constexpr int kMinimumDelayMs = 30;
// Base minimum delay sets lower bound on minimum. That is why when base
// minimum delay is greater than minimum delay we use base minimum delay.
RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs);
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs));
EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs));
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMs);
}
TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanBufferSize) {
constexpr int kBaseMinimumDelayMs = kMaxBufferSizeMs + 1;
constexpr int kMinimumDelayMs = 12;
constexpr int kMaximumDelayMs = 20;
constexpr int kMaxBufferSizeMsQ75 = 3 * kMaxBufferSizeMs / 4;
EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs));
// Base minimum delay is greater than minimum delay, that is why we clamp
// it to current the highest possible value which is maximum delay.
RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs);
RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaxBufferSizeMs);
RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaximumDelayMs);
RTC_DCHECK_LT(kMaximumDelayMs, kMaxBufferSizeMsQ75);
EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs));
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs));
// Unset maximum value.
EXPECT_TRUE(dm_.SetMaximumDelay(0));
// With maximum value unset, the highest possible value now is 75% of
// currently possible maximum buffer size.
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMaxBufferSizeMsQ75);
}
TEST_F(DelayManagerTest, BaseMinimumDelayGreaterThanMaximumDelay) {
constexpr int kMaximumDelayMs = 400;
constexpr int kBaseMinimumDelayMs = kMaximumDelayMs + 1;
constexpr int kMinimumDelayMs = 20;
// Base minimum delay is greater than minimum delay, that is why we clamp
// it to current the highest possible value which is kMaximumDelayMs.
RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs);
RTC_DCHECK_GT(kBaseMinimumDelayMs, kMaximumDelayMs);
RTC_DCHECK_LT(kMaximumDelayMs, kMaxBufferSizeMs);
EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs));
EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs));
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs));
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMaximumDelayMs);
}
TEST_F(DelayManagerTest, BaseMinimumDelayLowerThanMaxSize) {
constexpr int kMaximumDelayMs = 400;
constexpr int kBaseMinimumDelayMs = kMaximumDelayMs - 1;
constexpr int kMinimumDelayMs = 20;
// Base minimum delay is greater than minimum delay, and lower than maximum
// delays that is why it is used.
RTC_DCHECK_GT(kBaseMinimumDelayMs, kMinimumDelayMs);
RTC_DCHECK_LT(kBaseMinimumDelayMs, kMaximumDelayMs);
EXPECT_TRUE(dm_.SetMaximumDelay(kMaximumDelayMs));
EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs));
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs));
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMs);
}
TEST_F(DelayManagerTest, MinimumDelayMemorization) {
// Check that when we increase base minimum delay to value higher than
// minimum delay then minimum delay is still memorized. This allows to
// restore effective minimum delay to memorized minimum delay value when we
// decrease base minimum delay.
constexpr int kBaseMinimumDelayMsLow = 10;
constexpr int kMinimumDelayMs = 20;
constexpr int kBaseMinimumDelayMsHigh = 30;
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsLow));
EXPECT_TRUE(dm_.SetMinimumDelay(kMinimumDelayMs));
// Minimum delay is used as it is higher than base minimum delay.
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs);
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsHigh));
// Base minimum delay is used as it is now higher than minimum delay.
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kBaseMinimumDelayMsHigh);
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMsLow));
// Check that minimum delay is memorized and is used again.
EXPECT_EQ(dm_.effective_minimum_delay_ms_for_test(), kMinimumDelayMs);
}
TEST_F(DelayManagerTest, BaseMinimumDelay) {
// First packet arrival.
Update(0);
constexpr int kBaseMinimumDelayMs = 7 * kFrameSizeMs;
EXPECT_LT(dm_.TargetDelayMs(), kBaseMinimumDelayMs);
EXPECT_TRUE(dm_.SetBaseMinimumDelay(kBaseMinimumDelayMs));
EXPECT_EQ(dm_.GetBaseMinimumDelay(), kBaseMinimumDelayMs);
IncreaseTime(kFrameSizeMs);
Update(0);
EXPECT_EQ(dm_.GetBaseMinimumDelay(), kBaseMinimumDelayMs);
EXPECT_EQ(kBaseMinimumDelayMs, dm_.TargetDelayMs());
}
TEST_F(DelayManagerTest, Failures) {
// Wrong packet size.
EXPECT_EQ(-1, dm_.SetPacketAudioLength(0));
EXPECT_EQ(-1, dm_.SetPacketAudioLength(-1));
// Minimum delay higher than a maximum delay is not accepted.
EXPECT_TRUE(dm_.SetMaximumDelay(20));
EXPECT_FALSE(dm_.SetMinimumDelay(40));
// Maximum delay less than minimum delay is not accepted.
EXPECT_TRUE(dm_.SetMaximumDelay(100));
EXPECT_TRUE(dm_.SetMinimumDelay(80));
EXPECT_FALSE(dm_.SetMaximumDelay(60));
}
} // namespace webrtc

View file

@ -0,0 +1,373 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/dsp_helper.h"
#include <string.h> // Access to memset.
#include <algorithm> // Access to min, max.
#include "common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
// Table of constants used in method DspHelper::ParabolicFit().
const int16_t DspHelper::kParabolaCoefficients[17][3] = {
{120, 32, 64}, {140, 44, 75}, {150, 50, 80}, {160, 57, 85},
{180, 72, 96}, {200, 89, 107}, {210, 98, 112}, {220, 108, 117},
{240, 128, 128}, {260, 150, 139}, {270, 162, 144}, {280, 174, 149},
{300, 200, 160}, {320, 228, 171}, {330, 242, 176}, {340, 257, 181},
{360, 288, 192}};
// Filter coefficients used when downsampling from the indicated sample rates
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. The corresponding Q0
// values are provided in the comments before each array.
// Q0 values: {0.3, 0.4, 0.3}.
const int16_t DspHelper::kDownsample8kHzTbl[3] = {1229, 1638, 1229};
// Q0 values: {0.15, 0.2, 0.3, 0.2, 0.15}.
const int16_t DspHelper::kDownsample16kHzTbl[5] = {614, 819, 1229, 819, 614};
// Q0 values: {0.1425, 0.1251, 0.1525, 0.1628, 0.1525, 0.1251, 0.1425}.
const int16_t DspHelper::kDownsample32kHzTbl[7] = {584, 512, 625, 667,
625, 512, 584};
// Q0 values: {0.2487, 0.0952, 0.1042, 0.1074, 0.1042, 0.0952, 0.2487}.
const int16_t DspHelper::kDownsample48kHzTbl[7] = {1019, 390, 427, 440,
427, 390, 1019};
int DspHelper::RampSignal(const int16_t* input,
size_t length,
int factor,
int increment,
int16_t* output) {
int factor_q20 = (factor << 6) + 32;
// TODO(hlundin): Add 32 to factor_q20 when converting back to Q14?
for (size_t i = 0; i < length; ++i) {
output[i] = (factor * input[i] + 8192) >> 14;
factor_q20 += increment;
factor_q20 = std::max(factor_q20, 0); // Never go negative.
factor = std::min(factor_q20 >> 6, 16384);
}
return factor;
}
int DspHelper::RampSignal(int16_t* signal,
size_t length,
int factor,
int increment) {
return RampSignal(signal, length, factor, increment, signal);
}
int DspHelper::RampSignal(AudioVector* signal,
size_t start_index,
size_t length,
int factor,
int increment) {
int factor_q20 = (factor << 6) + 32;
// TODO(hlundin): Add 32 to factor_q20 when converting back to Q14?
for (size_t i = start_index; i < start_index + length; ++i) {
(*signal)[i] = (factor * (*signal)[i] + 8192) >> 14;
factor_q20 += increment;
factor_q20 = std::max(factor_q20, 0); // Never go negative.
factor = std::min(factor_q20 >> 6, 16384);
}
return factor;
}
int DspHelper::RampSignal(AudioMultiVector* signal,
size_t start_index,
size_t length,
int factor,
int increment) {
RTC_DCHECK_LE(start_index + length, signal->Size());
if (start_index + length > signal->Size()) {
// Wrong parameters. Do nothing and return the scale factor unaltered.
return factor;
}
int end_factor = 0;
// Loop over the channels, starting at the same `factor` each time.
for (size_t channel = 0; channel < signal->Channels(); ++channel) {
end_factor =
RampSignal(&(*signal)[channel], start_index, length, factor, increment);
}
return end_factor;
}
void DspHelper::PeakDetection(int16_t* data,
size_t data_length,
size_t num_peaks,
int fs_mult,
size_t* peak_index,
int16_t* peak_value) {
size_t min_index = 0;
size_t max_index = 0;
for (size_t i = 0; i <= num_peaks - 1; i++) {
if (num_peaks == 1) {
// Single peak. The parabola fit assumes that an extra point is
// available; worst case it gets a zero on the high end of the signal.
// TODO(hlundin): This can potentially get much worse. It breaks the
// API contract, that the length of `data` is `data_length`.
data_length++;
}
peak_index[i] = WebRtcSpl_MaxIndexW16(data, data_length - 1);
if (i != num_peaks - 1) {
min_index = (peak_index[i] > 2) ? (peak_index[i] - 2) : 0;
max_index = std::min(data_length - 1, peak_index[i] + 2);
}
if ((peak_index[i] != 0) && (peak_index[i] != (data_length - 2))) {
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
&peak_value[i]);
} else {
if (peak_index[i] == data_length - 2) {
if (data[peak_index[i]] > data[peak_index[i] + 1]) {
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
&peak_value[i]);
} else if (data[peak_index[i]] <= data[peak_index[i] + 1]) {
// Linear approximation.
peak_value[i] = (data[peak_index[i]] + data[peak_index[i] + 1]) >> 1;
peak_index[i] = (peak_index[i] * 2 + 1) * fs_mult;
}
} else {
peak_value[i] = data[peak_index[i]];
peak_index[i] = peak_index[i] * 2 * fs_mult;
}
}
if (i != num_peaks - 1) {
memset(&data[min_index], 0,
sizeof(data[0]) * (max_index - min_index + 1));
}
}
}
void DspHelper::ParabolicFit(int16_t* signal_points,
int fs_mult,
size_t* peak_index,
int16_t* peak_value) {
uint16_t fit_index[13];
if (fs_mult == 1) {
fit_index[0] = 0;
fit_index[1] = 8;
fit_index[2] = 16;
} else if (fs_mult == 2) {
fit_index[0] = 0;
fit_index[1] = 4;
fit_index[2] = 8;
fit_index[3] = 12;
fit_index[4] = 16;
} else if (fs_mult == 4) {
fit_index[0] = 0;
fit_index[1] = 2;
fit_index[2] = 4;
fit_index[3] = 6;
fit_index[4] = 8;
fit_index[5] = 10;
fit_index[6] = 12;
fit_index[7] = 14;
fit_index[8] = 16;
} else {
fit_index[0] = 0;
fit_index[1] = 1;
fit_index[2] = 3;
fit_index[3] = 4;
fit_index[4] = 5;
fit_index[5] = 7;
fit_index[6] = 8;
fit_index[7] = 9;
fit_index[8] = 11;
fit_index[9] = 12;
fit_index[10] = 13;
fit_index[11] = 15;
fit_index[12] = 16;
}
// num = -3 * signal_points[0] + 4 * signal_points[1] - signal_points[2];
// den = signal_points[0] - 2 * signal_points[1] + signal_points[2];
int32_t num =
(signal_points[0] * -3) + (signal_points[1] * 4) - signal_points[2];
int32_t den = signal_points[0] + (signal_points[1] * -2) + signal_points[2];
int32_t temp = num * 120;
int flag = 1;
int16_t stp = kParabolaCoefficients[fit_index[fs_mult]][0] -
kParabolaCoefficients[fit_index[fs_mult - 1]][0];
int16_t strt = (kParabolaCoefficients[fit_index[fs_mult]][0] +
kParabolaCoefficients[fit_index[fs_mult - 1]][0]) /
2;
int16_t lmt;
if (temp < -den * strt) {
lmt = strt - stp;
while (flag) {
if ((flag == fs_mult) || (temp > -den * lmt)) {
*peak_value =
(den * kParabolaCoefficients[fit_index[fs_mult - flag]][1] +
num * kParabolaCoefficients[fit_index[fs_mult - flag]][2] +
signal_points[0] * 256) /
256;
*peak_index = *peak_index * 2 * fs_mult - flag;
flag = 0;
} else {
flag++;
lmt -= stp;
}
}
} else if (temp > -den * (strt + stp)) {
lmt = strt + 2 * stp;
while (flag) {
if ((flag == fs_mult) || (temp < -den * lmt)) {
int32_t temp_term_1 =
den * kParabolaCoefficients[fit_index[fs_mult + flag]][1];
int32_t temp_term_2 =
num * kParabolaCoefficients[fit_index[fs_mult + flag]][2];
int32_t temp_term_3 = signal_points[0] * 256;
*peak_value = (temp_term_1 + temp_term_2 + temp_term_3) / 256;
*peak_index = *peak_index * 2 * fs_mult + flag;
flag = 0;
} else {
flag++;
lmt += stp;
}
}
} else {
*peak_value = signal_points[1];
*peak_index = *peak_index * 2 * fs_mult;
}
}
size_t DspHelper::MinDistortion(const int16_t* signal,
size_t min_lag,
size_t max_lag,
size_t length,
int32_t* distortion_value) {
size_t best_index = 0;
int32_t min_distortion = WEBRTC_SPL_WORD32_MAX;
for (size_t i = min_lag; i <= max_lag; i++) {
int32_t sum_diff = 0;
const int16_t* data1 = signal;
const int16_t* data2 = signal - i;
for (size_t j = 0; j < length; j++) {
sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]);
}
// Compare with previous minimum.
if (sum_diff < min_distortion) {
min_distortion = sum_diff;
best_index = i;
}
}
*distortion_value = min_distortion;
return best_index;
}
void DspHelper::CrossFade(const int16_t* input1,
const int16_t* input2,
size_t length,
int16_t* mix_factor,
int16_t factor_decrement,
int16_t* output) {
int16_t factor = *mix_factor;
int16_t complement_factor = 16384 - factor;
for (size_t i = 0; i < length; i++) {
output[i] =
(factor * input1[i] + complement_factor * input2[i] + 8192) >> 14;
factor -= factor_decrement;
complement_factor += factor_decrement;
}
*mix_factor = factor;
}
void DspHelper::UnmuteSignal(const int16_t* input,
size_t length,
int16_t* factor,
int increment,
int16_t* output) {
uint16_t factor_16b = *factor;
int32_t factor_32b = (static_cast<int32_t>(factor_16b) << 6) + 32;
for (size_t i = 0; i < length; i++) {
output[i] = (factor_16b * input[i] + 8192) >> 14;
factor_32b = std::max(factor_32b + increment, 0);
factor_16b = std::min(16384, factor_32b >> 6);
}
*factor = factor_16b;
}
void DspHelper::MuteSignal(int16_t* signal, int mute_slope, size_t length) {
int32_t factor = (16384 << 6) + 32;
for (size_t i = 0; i < length; i++) {
signal[i] = ((factor >> 6) * signal[i] + 8192) >> 14;
factor -= mute_slope;
}
}
int DspHelper::DownsampleTo4kHz(const int16_t* input,
size_t input_length,
size_t output_length,
int input_rate_hz,
bool compensate_delay,
int16_t* output) {
// Set filter parameters depending on input frequency.
// NOTE: The phase delay values are wrong compared to the true phase delay
// of the filters. However, the error is preserved (through the +1 term) for
// consistency.
const int16_t* filter_coefficients; // Filter coefficients.
size_t filter_length; // Number of coefficients.
size_t filter_delay; // Phase delay in samples.
int16_t factor; // Conversion rate (inFsHz / 8000).
switch (input_rate_hz) {
case 8000: {
filter_length = 3;
factor = 2;
filter_coefficients = kDownsample8kHzTbl;
filter_delay = 1 + 1;
break;
}
case 16000: {
filter_length = 5;
factor = 4;
filter_coefficients = kDownsample16kHzTbl;
filter_delay = 2 + 1;
break;
}
case 32000: {
filter_length = 7;
factor = 8;
filter_coefficients = kDownsample32kHzTbl;
filter_delay = 3 + 1;
break;
}
case 48000: {
filter_length = 7;
factor = 12;
filter_coefficients = kDownsample48kHzTbl;
filter_delay = 3 + 1;
break;
}
default: {
RTC_DCHECK_NOTREACHED();
return -1;
}
}
if (!compensate_delay) {
// Disregard delay compensation.
filter_delay = 0;
}
// Returns -1 if input signal is too short; 0 otherwise.
return WebRtcSpl_DownsampleFast(
&input[filter_length - 1], input_length - filter_length + 1, output,
output_length, filter_coefficients, filter_length, factor, filter_delay);
}
} // namespace webrtc

View file

@ -0,0 +1,161 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_
#define MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_
#include <stdint.h>
#include <string.h>
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/audio_vector.h"
namespace webrtc {
// This class contains various signal processing functions, all implemented as
// static methods.
class DspHelper {
public:
// Filter coefficients used when downsampling from the indicated sample rates
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12.
static const int16_t kDownsample8kHzTbl[3];
static const int16_t kDownsample16kHzTbl[5];
static const int16_t kDownsample32kHzTbl[7];
static const int16_t kDownsample48kHzTbl[7];
// Constants used to mute and unmute over 5 samples. The coefficients are
// in Q15.
static const int kMuteFactorStart8kHz = 27307;
static const int kMuteFactorIncrement8kHz = -5461;
static const int kUnmuteFactorStart8kHz = 5461;
static const int kUnmuteFactorIncrement8kHz = 5461;
static const int kMuteFactorStart16kHz = 29789;
static const int kMuteFactorIncrement16kHz = -2979;
static const int kUnmuteFactorStart16kHz = 2979;
static const int kUnmuteFactorIncrement16kHz = 2979;
static const int kMuteFactorStart32kHz = 31208;
static const int kMuteFactorIncrement32kHz = -1560;
static const int kUnmuteFactorStart32kHz = 1560;
static const int kUnmuteFactorIncrement32kHz = 1560;
static const int kMuteFactorStart48kHz = 31711;
static const int kMuteFactorIncrement48kHz = -1057;
static const int kUnmuteFactorStart48kHz = 1057;
static const int kUnmuteFactorIncrement48kHz = 1057;
// Multiplies the signal with a gradually changing factor.
// The first sample is multiplied with `factor` (in Q14). For each sample,
// `factor` is increased (additive) by the `increment` (in Q20), which can
// be negative. Returns the scale factor after the last increment.
static int RampSignal(const int16_t* input,
size_t length,
int factor,
int increment,
int16_t* output);
// Same as above, but with the samples of `signal` being modified in-place.
static int RampSignal(int16_t* signal,
size_t length,
int factor,
int increment);
// Same as above, but processes `length` samples from `signal`, starting at
// `start_index`.
static int RampSignal(AudioVector* signal,
size_t start_index,
size_t length,
int factor,
int increment);
// Same as above, but for an AudioMultiVector.
static int RampSignal(AudioMultiVector* signal,
size_t start_index,
size_t length,
int factor,
int increment);
// Peak detection with parabolic fit. Looks for `num_peaks` maxima in `data`,
// having length `data_length` and sample rate multiplier `fs_mult`. The peak
// locations and values are written to the arrays `peak_index` and
// `peak_value`, respectively. Both arrays must hold at least `num_peaks`
// elements.
static void PeakDetection(int16_t* data,
size_t data_length,
size_t num_peaks,
int fs_mult,
size_t* peak_index,
int16_t* peak_value);
// Estimates the height and location of a maximum. The three values in the
// array `signal_points` are used as basis for a parabolic fit, which is then
// used to find the maximum in an interpolated signal. The `signal_points` are
// assumed to be from a 4 kHz signal, while the maximum, written to
// `peak_index` and `peak_value` is given in the full sample rate, as
// indicated by the sample rate multiplier `fs_mult`.
static void ParabolicFit(int16_t* signal_points,
int fs_mult,
size_t* peak_index,
int16_t* peak_value);
// Calculates the sum-abs-diff for `signal` when compared to a displaced
// version of itself. Returns the displacement lag that results in the minimum
// distortion. The resulting distortion is written to `distortion_value`.
// The values of `min_lag` and `max_lag` are boundaries for the search.
static size_t MinDistortion(const int16_t* signal,
size_t min_lag,
size_t max_lag,
size_t length,
int32_t* distortion_value);
// Mixes `length` samples from `input1` and `input2` together and writes the
// result to `output`. The gain for `input1` starts at `mix_factor` (Q14) and
// is decreased by `factor_decrement` (Q14) for each sample. The gain for
// `input2` is the complement 16384 - mix_factor.
static void CrossFade(const int16_t* input1,
const int16_t* input2,
size_t length,
int16_t* mix_factor,
int16_t factor_decrement,
int16_t* output);
// Scales `input` with an increasing gain. Applies `factor` (Q14) to the first
// sample and increases the gain by `increment` (Q20) for each sample. The
// result is written to `output`. `length` samples are processed.
static void UnmuteSignal(const int16_t* input,
size_t length,
int16_t* factor,
int increment,
int16_t* output);
// Starts at unity gain and gradually fades out `signal`. For each sample,
// the gain is reduced by `mute_slope` (Q14). `length` samples are processed.
static void MuteSignal(int16_t* signal, int mute_slope, size_t length);
// Downsamples `input` from `sample_rate_hz` to 4 kHz sample rate. The input
// has `input_length` samples, and the method will write `output_length`
// samples to `output`. Compensates for the phase delay of the downsampling
// filters if `compensate_delay` is true. Returns -1 if the input is too short
// to produce `output_length` samples, otherwise 0.
static int DownsampleTo4kHz(const int16_t* input,
size_t input_length,
size_t output_length,
int input_rate_hz,
bool compensate_delay,
int16_t* output);
DspHelper(const DspHelper&) = delete;
DspHelper& operator=(const DspHelper&) = delete;
private:
// Table of constants used in method DspHelper::ParabolicFit().
static const int16_t kParabolaCoefficients[17][3];
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_

View file

@ -0,0 +1,88 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/dsp_helper.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "test/gtest.h"
namespace webrtc {
TEST(DspHelper, RampSignalArray) {
static const int kLen = 100;
int16_t input[kLen];
int16_t output[kLen];
// Fill input with 1000.
for (int i = 0; i < kLen; ++i) {
input[i] = 1000;
}
int start_factor = 0;
// Ramp from 0 to 1 (in Q14) over the array. Note that `increment` is in Q20,
// while the factor is in Q14, hence the shift by 6.
int increment = (16384 << 6) / kLen;
// Test first method.
int stop_factor =
DspHelper::RampSignal(input, kLen, start_factor, increment, output);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
for (int i = 0; i < kLen; ++i) {
EXPECT_EQ(1000 * i / kLen, output[i]);
}
// Test second method. (Note that this modifies `input`.)
stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
for (int i = 0; i < kLen; ++i) {
EXPECT_EQ(1000 * i / kLen, input[i]);
}
}
TEST(DspHelper, RampSignalAudioMultiVector) {
static const int kLen = 100;
static const int kChannels = 5;
AudioMultiVector input(kChannels, kLen * 3);
// Fill input with 1000.
for (int i = 0; i < kLen * 3; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
input[channel][i] = 1000;
}
}
// We want to start ramping at `start_index` and keep ramping for `kLen`
// samples.
int start_index = kLen;
int start_factor = 0;
// Ramp from 0 to 1 (in Q14) in `kLen` samples. Note that `increment` is in
// Q20, while the factor is in Q14, hence the shift by 6.
int increment = (16384 << 6) / kLen;
int stop_factor =
DspHelper::RampSignal(&input, start_index, kLen, start_factor, increment);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
// Verify that the first `kLen` samples are left untouched.
int i;
for (i = 0; i < kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000, input[channel][i]);
}
}
// Verify that the next block of `kLen` samples are ramped.
for (; i < 2 * kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000 * (i - kLen) / kLen, input[channel][i]);
}
}
// Verify the last `kLen` samples are left untouched.
for (; i < 3 * kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000, input[channel][i]);
}
}
}
} // namespace webrtc

View file

@ -0,0 +1,242 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/dtmf_buffer.h"
#include <algorithm> // max
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no
// longer required, this #define should be removed (and the code that it
// enables).
#define LEGACY_BITEXACT
namespace webrtc {
DtmfBuffer::DtmfBuffer(int fs_hz) {
SetSampleRate(fs_hz);
}
DtmfBuffer::~DtmfBuffer() = default;
void DtmfBuffer::Flush() {
buffer_.clear();
}
// The ParseEvent method parses 4 bytes from `payload` according to this format
// from RFC 4733:
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// Legend (adapted from RFC 4733)
// - event: The event field is a number between 0 and 255 identifying a
// specific telephony event. The buffer will not accept any event
// numbers larger than 15.
// - E: If set to a value of one, the "end" bit indicates that this
// packet contains the end of the event. For long-lasting events
// that have to be split into segments, only the final packet for
// the final segment will have the E bit set.
// - R: Reserved.
// - volume: For DTMF digits and other events representable as tones, this
// field describes the power level of the tone, expressed in dBm0
// after dropping the sign. Power levels range from 0 to -63 dBm0.
// Thus, larger values denote lower volume. The buffer discards
// values larger than 36 (i.e., lower than -36 dBm0).
// - duration: The duration field indicates the duration of the event or segment
// being reported, in timestamp units, expressed as an unsigned
// integer in network byte order. For a non-zero value, the event
// or segment began at the instant identified by the RTP timestamp
// and has so far lasted as long as indicated by this parameter.
// The event may or may not have ended. If the event duration
// exceeds the maximum representable by the duration field, the
// event is split into several contiguous segments. The buffer will
// discard zero-duration events.
//
int DtmfBuffer::ParseEvent(uint32_t rtp_timestamp,
const uint8_t* payload,
size_t payload_length_bytes,
DtmfEvent* event) {
RTC_CHECK(payload);
RTC_CHECK(event);
if (payload_length_bytes < 4) {
RTC_LOG(LS_WARNING) << "ParseEvent payload too short";
return kPayloadTooShort;
}
event->event_no = payload[0];
event->end_bit = ((payload[1] & 0x80) != 0);
event->volume = (payload[1] & 0x3F);
event->duration = payload[2] << 8 | payload[3];
event->timestamp = rtp_timestamp;
return kOK;
}
// Inserts a DTMF event into the buffer. The event should be parsed from the
// bit stream using the ParseEvent method above before inserting it in the
// buffer.
// DTMF events can be quite long, and in most cases the duration of the event
// is not known when the first packet describing it is sent. To deal with that,
// the RFC 4733 specifies that multiple packets are sent for one and the same
// event as it is being created (typically, as the user is pressing the key).
// These packets will all share the same start timestamp and event number,
// while the duration will be the cumulative duration from the start. When
// inserting a new event, the InsertEvent method tries to find a matching event
// already in the buffer. If so, the new event is simply merged with the
// existing one.
int DtmfBuffer::InsertEvent(const DtmfEvent& event) {
if (event.event_no < 0 || event.event_no > 15 || event.volume < 0 ||
event.volume > 63 || event.duration <= 0 || event.duration > 65535) {
RTC_LOG(LS_WARNING) << "InsertEvent invalid parameters";
return kInvalidEventParameters;
}
DtmfList::iterator it = buffer_.begin();
while (it != buffer_.end()) {
if (MergeEvents(it, event)) {
// A matching event was found and the new event was merged.
return kOK;
}
++it;
}
buffer_.push_back(event);
// Sort the buffer using CompareEvents to rank the events.
buffer_.sort(CompareEvents);
return kOK;
}
bool DtmfBuffer::GetEvent(uint32_t current_timestamp, DtmfEvent* event) {
DtmfList::iterator it = buffer_.begin();
while (it != buffer_.end()) {
// `event_end` is an estimate of where the current event ends. If the end
// bit is set, we know that the event ends at `timestamp` + `duration`.
uint32_t event_end = it->timestamp + it->duration;
#ifdef LEGACY_BITEXACT
bool next_available = false;
#endif
if (!it->end_bit) {
// If the end bit is not set, we allow extrapolation of the event for
// some time.
event_end += max_extrapolation_samples_;
DtmfList::iterator next = it;
++next;
if (next != buffer_.end()) {
// If there is a next event in the buffer, we will not extrapolate over
// the start of that new event.
event_end = std::min(event_end, next->timestamp);
#ifdef LEGACY_BITEXACT
next_available = true;
#endif
}
}
if (current_timestamp >= it->timestamp &&
current_timestamp <= event_end) { // TODO(hlundin): Change to <.
// Found a matching event.
if (event) {
event->event_no = it->event_no;
event->end_bit = it->end_bit;
event->volume = it->volume;
event->duration = it->duration;
event->timestamp = it->timestamp;
}
#ifdef LEGACY_BITEXACT
if (it->end_bit && current_timestamp + frame_len_samples_ >= event_end) {
// We are done playing this. Erase the event.
buffer_.erase(it);
}
#endif
return true;
} else if (current_timestamp > event_end) { // TODO(hlundin): Change to >=.
// Erase old event. Operation returns a valid pointer to the next element
// in the list.
#ifdef LEGACY_BITEXACT
if (!next_available) {
if (event) {
event->event_no = it->event_no;
event->end_bit = it->end_bit;
event->volume = it->volume;
event->duration = it->duration;
event->timestamp = it->timestamp;
}
it = buffer_.erase(it);
return true;
} else {
it = buffer_.erase(it);
}
#else
it = buffer_.erase(it);
#endif
} else {
++it;
}
}
return false;
}
size_t DtmfBuffer::Length() const {
return buffer_.size();
}
bool DtmfBuffer::Empty() const {
return buffer_.empty();
}
int DtmfBuffer::SetSampleRate(int fs_hz) {
if (fs_hz != 8000 && fs_hz != 16000 && fs_hz != 32000 && fs_hz != 48000) {
return kInvalidSampleRate;
}
max_extrapolation_samples_ = 7 * fs_hz / 100;
frame_len_samples_ = fs_hz / 100;
return kOK;
}
// The method returns true if the two events are considered to be the same.
// The are defined as equal if they share the same timestamp and event number.
// The special case with long-lasting events that have to be split into segments
// is not handled in this method. These will be treated as separate events in
// the buffer.
bool DtmfBuffer::SameEvent(const DtmfEvent& a, const DtmfEvent& b) {
return (a.event_no == b.event_no) && (a.timestamp == b.timestamp);
}
bool DtmfBuffer::MergeEvents(DtmfList::iterator it, const DtmfEvent& event) {
if (SameEvent(*it, event)) {
if (!it->end_bit) {
// Do not extend the duration of an event for which the end bit was
// already received.
it->duration = std::max(event.duration, it->duration);
}
if (event.end_bit) {
it->end_bit = true;
}
return true;
} else {
return false;
}
}
// Returns true if `a` goes before `b` in the sorting order ("`a` < `b`").
// The events are ranked using their start timestamp (taking wrap-around into
// account). In the unlikely situation that two events share the same start
// timestamp, the event number is used to rank the two. Note that packets
// that belong to the same events, and therefore sharing the same start
// timestamp, have already been merged before the sort method is called.
bool DtmfBuffer::CompareEvents(const DtmfEvent& a, const DtmfEvent& b) {
if (a.timestamp == b.timestamp) {
return a.event_no < b.event_no;
}
// Take wrap-around into account.
return (static_cast<uint32_t>(b.timestamp - a.timestamp) < 0xFFFFFFFF / 2);
}
} // namespace webrtc

View file

@ -0,0 +1,104 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_
#define MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_
#include <stddef.h>
#include <stdint.h>
#include <list>
namespace webrtc {
struct DtmfEvent {
uint32_t timestamp;
int event_no;
int volume;
int duration;
bool end_bit;
// Constructors
DtmfEvent()
: timestamp(0), event_no(0), volume(0), duration(0), end_bit(false) {}
DtmfEvent(uint32_t ts, int ev, int vol, int dur, bool end)
: timestamp(ts), event_no(ev), volume(vol), duration(dur), end_bit(end) {}
};
// This is the buffer holding DTMF events while waiting for them to be played.
class DtmfBuffer {
public:
enum BufferReturnCodes {
kOK = 0,
kInvalidPointer,
kPayloadTooShort,
kInvalidEventParameters,
kInvalidSampleRate
};
// Set up the buffer for use at sample rate `fs_hz`.
explicit DtmfBuffer(int fs_hz);
virtual ~DtmfBuffer();
DtmfBuffer(const DtmfBuffer&) = delete;
DtmfBuffer& operator=(const DtmfBuffer&) = delete;
// Flushes the buffer.
virtual void Flush();
// Static method to parse 4 bytes from `payload` as a DTMF event (RFC 4733)
// and write the parsed information into the struct `event`. Input variable
// `rtp_timestamp` is simply copied into the struct.
static int ParseEvent(uint32_t rtp_timestamp,
const uint8_t* payload,
size_t payload_length_bytes,
DtmfEvent* event);
// Inserts `event` into the buffer. The method looks for a matching event and
// merges the two if a match is found.
virtual int InsertEvent(const DtmfEvent& event);
// Checks if a DTMF event should be played at time `current_timestamp`. If so,
// the method returns true; otherwise false. The parameters of the event to
// play will be written to `event`.
virtual bool GetEvent(uint32_t current_timestamp, DtmfEvent* event);
// Number of events in the buffer.
virtual size_t Length() const;
virtual bool Empty() const;
// Set a new sample rate.
virtual int SetSampleRate(int fs_hz);
private:
typedef std::list<DtmfEvent> DtmfList;
int max_extrapolation_samples_;
int frame_len_samples_; // TODO(hlundin): Remove this later.
// Compares two events and returns true if they are the same.
static bool SameEvent(const DtmfEvent& a, const DtmfEvent& b);
// Merges `event` to the event pointed out by `it`. The method checks that
// the two events are the same (using the SameEvent method), and merges them
// if that was the case, returning true. If the events are not the same, false
// is returned.
bool MergeEvents(DtmfList::iterator it, const DtmfEvent& event);
// Method used by the sort algorithm to rank events in the buffer.
static bool CompareEvents(const DtmfEvent& a, const DtmfEvent& b);
DtmfList buffer_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_

View file

@ -0,0 +1,297 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/dtmf_buffer.h"
#ifdef WIN32
#include <winsock2.h> // ntohl()
#else
#include <arpa/inet.h> // ntohl()
#endif
#include <iostream>
#include "test/gtest.h"
// Modify the tests so that they pass with the modifications done to DtmfBuffer
// for backwards bit-exactness. Once bit-exactness is no longer required, this
// #define should be removed (and the code that it enables).
#define LEGACY_BITEXACT
namespace webrtc {
static int sample_rate_hz = 8000;
static uint32_t MakeDtmfPayload(int event, bool end, int volume, int duration) {
uint32_t payload = 0;
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
payload |= (event & 0x00FF) << 24;
payload |= (end ? 0x00800000 : 0x00000000);
payload |= (volume & 0x003F) << 16;
payload |= (duration & 0xFFFF);
payload = ntohl(payload);
return payload;
}
static bool EqualEvents(const DtmfEvent& a, const DtmfEvent& b) {
return (a.duration == b.duration && a.end_bit == b.end_bit &&
a.event_no == b.event_no && a.timestamp == b.timestamp &&
a.volume == b.volume);
}
TEST(DtmfBuffer, CreateAndDestroy) {
DtmfBuffer* buffer = new DtmfBuffer(sample_rate_hz);
delete buffer;
}
// Test the event parser.
TEST(DtmfBuffer, ParseEvent) {
int event_no = 7;
bool end_bit = true;
int volume = 17;
int duration = 4711;
uint32_t timestamp = 0x12345678;
uint32_t payload = MakeDtmfPayload(event_no, end_bit, volume, duration);
uint8_t* payload_ptr = reinterpret_cast<uint8_t*>(&payload);
DtmfEvent event;
EXPECT_EQ(DtmfBuffer::kOK, DtmfBuffer::ParseEvent(timestamp, payload_ptr,
sizeof(payload), &event));
EXPECT_EQ(duration, event.duration);
EXPECT_EQ(end_bit, event.end_bit);
EXPECT_EQ(event_no, event.event_no);
EXPECT_EQ(timestamp, event.timestamp);
EXPECT_EQ(volume, event.volume);
EXPECT_EQ(DtmfBuffer::kPayloadTooShort,
DtmfBuffer::ParseEvent(timestamp, payload_ptr, 3, &event));
}
TEST(DtmfBuffer, SimpleInsertAndGet) {
int event_no = 7;
bool end_bit = true;
int volume = 17;
int duration = 4711;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
DtmfEvent out_event;
// Too early to get event.
EXPECT_FALSE(buffer.GetEvent(timestamp - 10, &out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Get the event at its starting timestamp.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Get the event some time into the event.
EXPECT_TRUE(buffer.GetEvent(timestamp + duration / 2, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Give a "current" timestamp after the event has ended.
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp + duration + 10, &out_event));
#endif
EXPECT_FALSE(buffer.GetEvent(timestamp + duration + 10, &out_event));
EXPECT_EQ(0u, buffer.Length());
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, MergingPackets) {
int event_no = 0;
bool end_bit = false;
int volume = 17;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
event.duration += 80;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
event.duration += 80;
event.end_bit = true;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
EXPECT_EQ(1u, buffer.Length());
DtmfEvent out_event;
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
}
// This test case inserts one shorter event completely overlapped by one longer
// event. The expected outcome is that only the longer event is played.
TEST(DtmfBuffer, OverlappingEvents) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678 + 80;
DtmfEvent short_event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(short_event));
event_no = 10;
end_bit = false;
timestamp = 0x12345678;
DtmfEvent long_event(timestamp, event_no, volume, duration, end_bit);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
long_event.duration += 80;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
long_event.duration += 80;
long_event.end_bit = true;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
EXPECT_EQ(2u, buffer.Length());
DtmfEvent out_event;
// Expect to get the long event.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(long_event, out_event));
// Expect no more events.
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(
buffer.GetEvent(timestamp + long_event.duration + 10, &out_event));
EXPECT_TRUE(EqualEvents(long_event, out_event));
EXPECT_TRUE(
buffer.GetEvent(timestamp + long_event.duration + 10, &out_event));
EXPECT_TRUE(EqualEvents(short_event, out_event));
#else
EXPECT_FALSE(
buffer.GetEvent(timestamp + long_event.duration + 10, &out_event));
#endif
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, ExtrapolationTime) {
int event_no = 0;
bool end_bit = false;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event1(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(1u, buffer.Length());
DtmfEvent out_event;
// Get the event at the start.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
// Also get the event 100 samples after the end of the event (since we're
// missing the end bit).
uint32_t timestamp_now = timestamp + duration + 100;
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
// Insert another event starting back-to-back with the previous event.
timestamp += duration;
event_no = 1;
DtmfEvent event2(timestamp, event_no, volume, duration, end_bit);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(2u, buffer.Length());
// Now we expect to get the new event when supplying `timestamp_now`.
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(EqualEvents(event2, out_event));
// Expect the the first event to be erased now.
EXPECT_EQ(1u, buffer.Length());
// Move `timestamp_now` to more than 560 samples after the end of the second
// event. Expect that event to be erased.
timestamp_now = timestamp + duration + 600;
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
#endif
EXPECT_FALSE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, TimestampWraparound) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp1 = 0xFFFFFFFF - duration;
DtmfEvent event1(timestamp1, event_no, volume, duration, end_bit);
uint32_t timestamp2 = 0;
DtmfEvent event2(timestamp2, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(2u, buffer.Length());
DtmfEvent out_event;
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
#ifdef LEGACY_BITEXACT
EXPECT_EQ(1u, buffer.Length());
#else
EXPECT_EQ(2u, buffer.Length());
#endif
buffer.Flush();
// Reverse the insert order. Expect same results.
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(2u, buffer.Length());
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
#ifdef LEGACY_BITEXACT
EXPECT_EQ(1u, buffer.Length());
#else
EXPECT_EQ(2u, buffer.Length());
#endif
}
TEST(DtmfBuffer, InvalidEvents) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
// Invalid event number.
event.event_no = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.event_no = 16;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.event_no = 0; // Valid value;
// Invalid volume.
event.volume = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.volume = 64;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.volume = 0; // Valid value;
// Invalid duration.
event.duration = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 0;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 0xFFFF + 1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 1; // Valid value;
// Finish with a valid event, just to verify that all is ok.
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
}
} // namespace webrtc

View file

@ -0,0 +1,215 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This class provides a generator for DTMF tones. The tone generation is based
// on a sinusoid recursion. Each sinusoid is generated using a recursion
// formula; x[n] = a * x[n-1] - x[n-2], where the coefficient
// a = 2*cos(2*pi*f/fs). The recursion is started with x[-1] = 0 and
// x[-2] = sin(2*pi*f/fs). (Note that with this initialization, the resulting
// sinusoid gets a "negative" rotation; x[n] = sin(-2*pi*f/fs * n + phi), but
// kept this way due to historical reasons.)
// TODO(hlundin): Change to positive rotation?
//
// Each key on the telephone keypad corresponds to an "event", 0-15. Each event
// is mapped to a tone pair, with a low and a high frequency. There are four
// low and four high frequencies, each corresponding to a row and column,
// respectively, on the keypad as illustrated below.
//
// 1209 Hz 1336 Hz 1477 Hz 1633 Hz
// 697 Hz 1 2 3 12
// 770 Hz 4 5 6 13
// 852 Hz 7 8 9 14
// 941 Hz 10 0 11 15
#include "modules/audio_coding/neteq/dtmf_tone_generator.h"
#include "modules/audio_coding/neteq/audio_vector.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
namespace webrtc {
// The filter coefficient a = 2*cos(2*pi*f/fs) for the low frequency tone, for
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
// Values are in Q14.
const int DtmfToneGenerator::kCoeff1[4][16] = {
{24219, 27980, 27980, 27980, 26956, 26956, 26956, 25701, 25701, 25701,
24219, 24219, 27980, 26956, 25701, 24219},
{30556, 31548, 31548, 31548, 31281, 31281, 31281, 30951, 30951, 30951,
30556, 30556, 31548, 31281, 30951, 30556},
{32210, 32462, 32462, 32462, 32394, 32394, 32394, 32311, 32311, 32311,
32210, 32210, 32462, 32394, 32311, 32210},
{32520, 32632, 32632, 32632, 32602, 32602, 32602, 32564, 32564, 32564,
32520, 32520, 32632, 32602, 32564, 32520}};
// The filter coefficient a = 2*cos(2*pi*f/fs) for the high frequency tone, for
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
// Values are in Q14.
const int DtmfToneGenerator::kCoeff2[4][16] = {
{16325, 19073, 16325, 13085, 19073, 16325, 13085, 19073, 16325, 13085,
19073, 13085, 9315, 9315, 9315, 9315},
{28361, 29144, 28361, 27409, 29144, 28361, 27409, 29144, 28361, 27409,
29144, 27409, 26258, 26258, 26258, 26258},
{31647, 31849, 31647, 31400, 31849, 31647, 31400, 31849, 31647, 31400,
31849, 31400, 31098, 31098, 31098, 31098},
{32268, 32359, 32268, 32157, 32359, 32268, 32157, 32359, 32268, 32157,
32359, 32157, 32022, 32022, 32022, 32022}};
// The initialization value x[-2] = sin(2*pi*f/fs) for the low frequency tone,
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
// Values are in Q14.
const int DtmfToneGenerator::kInitValue1[4][16] = {
{11036, 8528, 8528, 8528, 9315, 9315, 9315, 10163, 10163, 10163, 11036,
11036, 8528, 9315, 10163, 11036},
{5918, 4429, 4429, 4429, 4879, 4879, 4879, 5380, 5380, 5380, 5918, 5918,
4429, 4879, 5380, 5918},
{3010, 2235, 2235, 2235, 2468, 2468, 2468, 2728, 2728, 2728, 3010, 3010,
2235, 2468, 2728, 3010},
{2013, 1493, 1493, 1493, 1649, 1649, 1649, 1823, 1823, 1823, 2013, 2013,
1493, 1649, 1823, 2013}};
// The initialization value x[-2] = sin(2*pi*f/fs) for the high frequency tone,
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
// Values are in Q14.
const int DtmfToneGenerator::kInitValue2[4][16] = {
{14206, 13323, 14206, 15021, 13323, 14206, 15021, 13323, 14206, 15021,
13323, 15021, 15708, 15708, 15708, 15708},
{8207, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8979,
9801, 9801, 9801, 9801},
{4249, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4685,
5164, 5164, 5164, 5164},
{2851, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 3148,
3476, 3476, 3476, 3476}};
// Amplitude multipliers for volume values 0 through 63, corresponding to
// 0 dBm0 through -63 dBm0. Values are in Q14.
// for a in range(0, 64):
// print round(16141.0 * 10**(-float(a)/20))
const int DtmfToneGenerator::kAmplitude[64] = {
16141, 14386, 12821, 11427, 10184, 9077, 8090, 7210, 6426, 5727, 5104,
4549, 4054, 3614, 3221, 2870, 2558, 2280, 2032, 1811, 1614, 1439,
1282, 1143, 1018, 908, 809, 721, 643, 573, 510, 455, 405,
361, 322, 287, 256, 228, 203, 181, 161, 144, 128, 114,
102, 91, 81, 72, 64, 57, 51, 45, 41, 36, 32,
29, 26, 23, 20, 18, 16, 14, 13, 11};
// Constructor.
DtmfToneGenerator::DtmfToneGenerator()
: initialized_(false), coeff1_(0), coeff2_(0), amplitude_(0) {}
// Initialize the DTMF generator with sample rate fs Hz (8000, 16000, 32000,
// 48000), event (0-15) and attenuation (0-36 dB).
// Returns 0 on success, otherwise an error code.
int DtmfToneGenerator::Init(int fs, int event, int attenuation) {
initialized_ = false;
size_t fs_index;
if (fs == 8000) {
fs_index = 0;
} else if (fs == 16000) {
fs_index = 1;
} else if (fs == 32000) {
fs_index = 2;
} else if (fs == 48000) {
fs_index = 3;
} else {
RTC_DCHECK_NOTREACHED();
fs_index = 1; // Default to 8000 Hz.
}
if (event < 0 || event > 15) {
return kParameterError; // Invalid event number.
}
if (attenuation < 0 || attenuation > 63) {
return kParameterError; // Invalid attenuation.
}
// Look up oscillator coefficient for low and high frequencies.
RTC_DCHECK_LE(0, fs_index);
RTC_DCHECK_GT(arraysize(kCoeff1), fs_index);
RTC_DCHECK_GT(arraysize(kCoeff2), fs_index);
RTC_DCHECK_LE(0, event);
RTC_DCHECK_GT(arraysize(kCoeff1[fs_index]), event);
RTC_DCHECK_GT(arraysize(kCoeff2[fs_index]), event);
coeff1_ = kCoeff1[fs_index][event];
coeff2_ = kCoeff2[fs_index][event];
// Look up amplitude multiplier.
RTC_DCHECK_LE(0, attenuation);
RTC_DCHECK_GT(arraysize(kAmplitude), attenuation);
amplitude_ = kAmplitude[attenuation];
// Initialize sample history.
RTC_DCHECK_LE(0, fs_index);
RTC_DCHECK_GT(arraysize(kInitValue1), fs_index);
RTC_DCHECK_GT(arraysize(kInitValue2), fs_index);
RTC_DCHECK_LE(0, event);
RTC_DCHECK_GT(arraysize(kInitValue1[fs_index]), event);
RTC_DCHECK_GT(arraysize(kInitValue2[fs_index]), event);
sample_history1_[0] = kInitValue1[fs_index][event];
sample_history1_[1] = 0;
sample_history2_[0] = kInitValue2[fs_index][event];
sample_history2_[1] = 0;
initialized_ = true;
return 0;
}
// Reset tone generator to uninitialized state.
void DtmfToneGenerator::Reset() {
initialized_ = false;
}
// Generate num_samples of DTMF signal and write to `output`.
int DtmfToneGenerator::Generate(size_t num_samples, AudioMultiVector* output) {
if (!initialized_) {
return kNotInitialized;
}
if (!output) {
return kParameterError;
}
output->AssertSize(num_samples);
for (size_t i = 0; i < num_samples; ++i) {
// Use recursion formula y[n] = a * y[n - 1] - y[n - 2].
int16_t temp_val_low =
((coeff1_ * sample_history1_[1] + 8192) >> 14) - sample_history1_[0];
int16_t temp_val_high =
((coeff2_ * sample_history2_[1] + 8192) >> 14) - sample_history2_[0];
// Update recursion memory.
sample_history1_[0] = sample_history1_[1];
sample_history1_[1] = temp_val_low;
sample_history2_[0] = sample_history2_[1];
sample_history2_[1] = temp_val_high;
// Attenuate the low frequency tone 3 dB.
int32_t temp_val =
kAmpMultiplier * temp_val_low + temp_val_high * (1 << 15);
// Normalize the signal to Q14 with proper rounding.
temp_val = (temp_val + 16384) >> 15;
// Scale the signal to correct volume.
(*output)[0][i] =
static_cast<int16_t>((temp_val * amplitude_ + 8192) >> 14);
}
// Copy first channel to all other channels.
for (size_t channel = 1; channel < output->Channels(); ++channel) {
output->CopyChannel(0, channel);
}
return static_cast<int>(num_samples);
}
bool DtmfToneGenerator::initialized() const {
return initialized_;
}
} // namespace webrtc

View file

@ -0,0 +1,57 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_
#define MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_
#include <stddef.h>
#include <stdint.h>
#include "modules/audio_coding/neteq/audio_multi_vector.h"
namespace webrtc {
// This class provides a generator for DTMF tones.
class DtmfToneGenerator {
public:
enum ReturnCodes {
kNotInitialized = -1,
kParameterError = -2,
};
DtmfToneGenerator();
virtual ~DtmfToneGenerator() {}
DtmfToneGenerator(const DtmfToneGenerator&) = delete;
DtmfToneGenerator& operator=(const DtmfToneGenerator&) = delete;
virtual int Init(int fs, int event, int attenuation);
virtual void Reset();
virtual int Generate(size_t num_samples, AudioMultiVector* output);
virtual bool initialized() const;
private:
static const int kCoeff1[4][16]; // 1st oscillator model coefficient table.
static const int kCoeff2[4][16]; // 2nd oscillator model coefficient table.
static const int kInitValue1[4][16]; // Initialization for 1st oscillator.
static const int kInitValue2[4][16]; // Initialization for 2nd oscillator.
static const int kAmplitude[64]; // Amplitude for 0 through -63 dBm0.
static const int16_t kAmpMultiplier = 23171; // 3 dB attenuation (in Q15).
bool initialized_; // True if generator is initialized properly.
int coeff1_; // 1st oscillator coefficient for this event.
int coeff2_; // 2nd oscillator coefficient for this event.
int amplitude_; // Amplitude for this event.
int16_t sample_history1_[2]; // Last 2 samples for the 1st oscillator.
int16_t sample_history2_[2]; // Last 2 samples for the 2nd oscillator.
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_

View file

@ -0,0 +1,180 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DtmfToneGenerator class.
#include "modules/audio_coding/neteq/dtmf_tone_generator.h"
#include <math.h>
#include "common_audio/include/audio_util.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "rtc_base/strings/string_builder.h"
#include "test/gtest.h"
namespace webrtc {
class DtmfToneGeneratorTest : public ::testing::Test {
protected:
static const double kLowFreqHz[16];
static const double kHighFreqHz[16];
// This is the attenuation applied to all cases.
const double kBaseAttenuation = 16141.0 / 16384.0;
const double k3dbAttenuation = 23171.0 / 32768;
const int kNumSamples = 10;
void TestAllTones(int fs_hz, int channels) {
AudioMultiVector signal(channels);
for (int event = 0; event <= 15; ++event) {
rtc::StringBuilder ss;
ss << "Checking event " << event << " at sample rate " << fs_hz;
SCOPED_TRACE(ss.str());
const int kAttenuation = 0;
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, kAttenuation));
EXPECT_TRUE(tone_gen_.initialized());
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal));
double f1 = kLowFreqHz[event];
double f2 = kHighFreqHz[event];
const double pi = 3.14159265358979323846;
for (int n = 0; n < kNumSamples; ++n) {
double x = k3dbAttenuation * sin(2.0 * pi * f1 / fs_hz * (-n - 1)) +
sin(2.0 * pi * f2 / fs_hz * (-n - 1));
x *= kBaseAttenuation;
x = ldexp(x, 14); // Scale to Q14.
for (int channel = 0; channel < channels; ++channel) {
EXPECT_NEAR(x, static_cast<double>(signal[channel][n]), 25);
}
}
tone_gen_.Reset();
EXPECT_FALSE(tone_gen_.initialized());
}
}
void TestAmplitudes(int fs_hz, int channels) {
AudioMultiVector signal(channels);
AudioMultiVector ref_signal(channels);
const int event_vec[] = {0, 4, 9, 13}; // Test a few events.
for (int e = 0; e < 4; ++e) {
int event = event_vec[e];
// Create full-scale reference.
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, 0)); // 0 attenuation.
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &ref_signal));
// Test every 5 steps (to save time).
for (int attenuation = 1; attenuation <= 63; attenuation += 5) {
rtc::StringBuilder ss;
ss << "Checking event " << event << " at sample rate " << fs_hz;
ss << "; attenuation " << attenuation;
SCOPED_TRACE(ss.str());
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, attenuation));
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal));
for (int n = 0; n < kNumSamples; ++n) {
double attenuation_factor =
DbToRatio(-static_cast<float>(attenuation));
// Verify that the attenuation is correct.
for (int channel = 0; channel < channels; ++channel) {
EXPECT_NEAR(attenuation_factor * ref_signal[channel][n],
signal[channel][n], 2);
}
}
tone_gen_.Reset();
}
}
}
DtmfToneGenerator tone_gen_;
};
// Low and high frequencies for events 0 through 15.
const double DtmfToneGeneratorTest::kLowFreqHz[16] = {
941.0, 697.0, 697.0, 697.0, 770.0, 770.0, 770.0, 852.0,
852.0, 852.0, 941.0, 941.0, 697.0, 770.0, 852.0, 941.0};
const double DtmfToneGeneratorTest::kHighFreqHz[16] = {
1336.0, 1209.0, 1336.0, 1477.0, 1209.0, 1336.0, 1477.0, 1209.0,
1336.0, 1477.0, 1209.0, 1477.0, 1633.0, 1633.0, 1633.0, 1633.0};
TEST_F(DtmfToneGeneratorTest, Test8000Mono) {
TestAllTones(8000, 1);
TestAmplitudes(8000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test16000Mono) {
TestAllTones(16000, 1);
TestAmplitudes(16000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test32000Mono) {
TestAllTones(32000, 1);
TestAmplitudes(32000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test48000Mono) {
TestAllTones(48000, 1);
TestAmplitudes(48000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test8000Stereo) {
TestAllTones(8000, 2);
TestAmplitudes(8000, 2);
}
TEST_F(DtmfToneGeneratorTest, Test16000Stereo) {
TestAllTones(16000, 2);
TestAmplitudes(16000, 2);
}
TEST_F(DtmfToneGeneratorTest, Test32000Stereo) {
TestAllTones(32000, 2);
TestAmplitudes(32000, 2);
}
TEST_F(DtmfToneGeneratorTest, Test48000Stereo) {
TestAllTones(48000, 2);
TestAmplitudes(48000, 2);
}
TEST(DtmfToneGenerator, TestErrors) {
DtmfToneGenerator tone_gen;
const int kNumSamples = 10;
AudioMultiVector signal(1); // One channel.
// Try to generate tones without initializing.
EXPECT_EQ(DtmfToneGenerator::kNotInitialized,
tone_gen.Generate(kNumSamples, &signal));
const int fs = 16000; // Valid sample rate.
const int event = 7; // Valid event.
const int attenuation = 0; // Valid attenuation.
// Initialize with invalid event -1.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Init(fs, -1, attenuation));
// Initialize with invalid event 16.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Init(fs, 16, attenuation));
// Initialize with invalid attenuation -1.
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, -1));
// Initialize with invalid attenuation 64.
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, 64));
EXPECT_FALSE(tone_gen.initialized()); // Should still be uninitialized.
// Initialize with valid parameters.
ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation));
EXPECT_TRUE(tone_gen.initialized());
// NULL pointer to destination.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Generate(kNumSamples, NULL));
}
} // namespace webrtc

View file

@ -0,0 +1,888 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/expand.h"
#include <string.h> // memset
#include <algorithm> // min, max
#include <limits> // numeric_limits<T>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/background_noise.h"
#include "modules/audio_coding/neteq/cross_correlation.h"
#include "modules/audio_coding/neteq/dsp_helper.h"
#include "modules/audio_coding/neteq/random_vector.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "modules/audio_coding/neteq/sync_buffer.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
Expand::Expand(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels)
: random_vector_(random_vector),
sync_buffer_(sync_buffer),
first_expand_(true),
fs_hz_(fs),
num_channels_(num_channels),
consecutive_expands_(0),
background_noise_(background_noise),
statistics_(statistics),
overlap_length_(5 * fs / 8000),
lag_index_direction_(0),
current_lag_index_(0),
stop_muting_(false),
expand_duration_samples_(0),
channel_parameters_(new ChannelParameters[num_channels_]) {
RTC_DCHECK(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000);
RTC_DCHECK_LE(fs,
static_cast<int>(kMaxSampleRate)); // Should not be possible.
RTC_DCHECK_GT(num_channels_, 0);
memset(expand_lags_, 0, sizeof(expand_lags_));
Reset();
}
Expand::~Expand() = default;
void Expand::Reset() {
first_expand_ = true;
consecutive_expands_ = 0;
max_lag_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].expand_vector0.Clear();
channel_parameters_[ix].expand_vector1.Clear();
}
}
int Expand::Process(AudioMultiVector* output) {
int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30];
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
static const int kTempDataSize = 3600;
int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.
int16_t* voiced_vector_storage = temp_data;
int16_t* voiced_vector = &voiced_vector_storage[overlap_length_];
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder;
int fs_mult = fs_hz_ / 8000;
if (first_expand_) {
// Perform initial setup if this is the first expansion since last reset.
AnalyzeSignal(random_vector);
first_expand_ = false;
expand_duration_samples_ = 0;
} else {
// This is not the first expansion, parameters are already estimated.
// Extract a noise segment.
size_t rand_length = max_lag_;
// This only applies to SWB where length could be larger than 256.
RTC_DCHECK_LE(rand_length, kMaxSampleRate / 8000 * 120 + 30);
GenerateRandomVector(2, rand_length, random_vector);
}
// Generate signal.
UpdateLagIndex();
// Voiced part.
// Generate a weighted vector with the current lag.
size_t expansion_vector_length = max_lag_ + overlap_length_;
size_t current_lag = expand_lags_[current_lag_index_];
// Copy lag+overlap data.
size_t expansion_vector_position =
expansion_vector_length - current_lag - overlap_length_;
size_t temp_length = current_lag + overlap_length_;
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
if (current_lag_index_ == 0) {
// Use only expand_vector0.
RTC_DCHECK_LE(expansion_vector_position + temp_length,
parameters.expand_vector0.Size());
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
voiced_vector_storage);
} else if (current_lag_index_ == 1) {
std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]);
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
temp_0.get());
std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]);
parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position,
temp_1.get());
// Mix 3/4 of expand_vector0 with 1/4 of expand_vector1.
WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 3, temp_1.get(), 1, 2,
voiced_vector_storage, temp_length);
} else if (current_lag_index_ == 2) {
// Mix 1/2 of expand_vector0 with 1/2 of expand_vector1.
RTC_DCHECK_LE(expansion_vector_position + temp_length,
parameters.expand_vector0.Size());
RTC_DCHECK_LE(expansion_vector_position + temp_length,
parameters.expand_vector1.Size());
std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]);
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
temp_0.get());
std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]);
parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position,
temp_1.get());
WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 1, temp_1.get(), 1, 1,
voiced_vector_storage, temp_length);
}
// Get tapering window parameters. Values are in Q15.
int16_t muting_window, muting_window_increment;
int16_t unmuting_window, unmuting_window_increment;
if (fs_hz_ == 8000) {
muting_window = DspHelper::kMuteFactorStart8kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
} else if (fs_hz_ == 16000) {
muting_window = DspHelper::kMuteFactorStart16kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
} else if (fs_hz_ == 32000) {
muting_window = DspHelper::kMuteFactorStart32kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
} else { // fs_ == 48000
muting_window = DspHelper::kMuteFactorStart48kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
}
// Smooth the expanded if it has not been muted to a low amplitude and
// `current_voice_mix_factor` is larger than 0.5.
if ((parameters.mute_factor > 819) &&
(parameters.current_voice_mix_factor > 8192)) {
size_t start_ix = sync_buffer_->Size() - overlap_length_;
for (size_t i = 0; i < overlap_length_; i++) {
// Do overlap add between new vector and overlap.
(*sync_buffer_)[channel_ix][start_ix + i] =
(((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) +
(((parameters.mute_factor * voiced_vector_storage[i]) >> 14) *
unmuting_window) +
16384) >>
15;
muting_window += muting_window_increment;
unmuting_window += unmuting_window_increment;
}
} else if (parameters.mute_factor == 0) {
// The expanded signal will consist of only comfort noise if
// mute_factor = 0. Set the output length to 15 ms for best noise
// production.
// TODO(hlundin): This has been disabled since the length of
// parameters.expand_vector0 and parameters.expand_vector1 no longer
// match with expand_lags_, causing invalid reads and writes. Is it a good
// idea to enable this again, and solve the vector size problem?
// max_lag_ = fs_mult * 120;
// expand_lags_[0] = fs_mult * 120;
// expand_lags_[1] = fs_mult * 120;
// expand_lags_[2] = fs_mult * 120;
}
// Unvoiced part.
// Filter `scaled_random_vector` through `ar_filter_`.
memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state,
sizeof(int16_t) * kUnvoicedLpcOrder);
int32_t add_constant = 0;
if (parameters.ar_gain_scale > 0) {
add_constant = 1 << (parameters.ar_gain_scale - 1);
}
WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector,
parameters.ar_gain, add_constant,
parameters.ar_gain_scale, current_lag);
WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector,
parameters.ar_filter, kUnvoicedLpcOrder + 1,
current_lag);
memcpy(parameters.ar_filter_state,
&(unvoiced_vector[current_lag - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
// Combine voiced and unvoiced contributions.
// Set a suitable cross-fading slope.
// For lag =
// <= 31 * fs_mult => go from 1 to 0 in about 8 ms;
// (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms;
// >= 64 * fs_mult => go from 1 to 0 in about 32 ms.
// temp_shift = getbits(max_lag_) - 5.
int temp_shift =
(31 - WebRtcSpl_NormW32(rtc::dchecked_cast<int32_t>(max_lag_))) - 5;
int16_t mix_factor_increment = 256 >> temp_shift;
if (stop_muting_) {
mix_factor_increment = 0;
}
// Create combined signal by shifting in more and more of unvoiced part.
temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment).
size_t temp_length =
(parameters.current_voice_mix_factor - parameters.voice_mix_factor) >>
temp_shift;
temp_length = std::min(temp_length, current_lag);
DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_length,
&parameters.current_voice_mix_factor,
mix_factor_increment, temp_data);
// End of cross-fading period was reached before end of expanded signal
// path. Mix the rest with a fixed mixing factor.
if (temp_length < current_lag) {
if (mix_factor_increment != 0) {
parameters.current_voice_mix_factor = parameters.voice_mix_factor;
}
int16_t temp_scale = 16384 - parameters.current_voice_mix_factor;
WebRtcSpl_ScaleAndAddVectorsWithRound(
voiced_vector + temp_length, parameters.current_voice_mix_factor,
unvoiced_vector + temp_length, temp_scale, 14,
temp_data + temp_length, current_lag - temp_length);
}
// Select muting slope depending on how many consecutive expands we have
// done.
if (consecutive_expands_ == 3) {
// Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms.
// mute_slope = 0.0010 / fs_mult in Q20.
parameters.mute_slope = std::max(parameters.mute_slope, 1049 / fs_mult);
}
if (consecutive_expands_ == 7) {
// Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms.
// mute_slope = 0.0020 / fs_mult in Q20.
parameters.mute_slope = std::max(parameters.mute_slope, 2097 / fs_mult);
}
// Mute segment according to slope value.
if ((consecutive_expands_ != 0) || !parameters.onset) {
// Mute to the previous level, then continue with the muting.
WebRtcSpl_AffineTransformVector(
temp_data, temp_data, parameters.mute_factor, 8192, 14, current_lag);
if (!stop_muting_) {
DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag);
// Shift by 6 to go from Q20 to Q14.
// TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong.
// Legacy.
int16_t gain = static_cast<int16_t>(
16384 - (((current_lag * parameters.mute_slope) + 8192) >> 6));
gain = ((gain * parameters.mute_factor) + 8192) >> 14;
// Guard against getting stuck with very small (but sometimes audible)
// gain.
if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) {
parameters.mute_factor = 0;
} else {
parameters.mute_factor = gain;
}
}
}
// Background noise part.
background_noise_->GenerateBackgroundNoise(
random_vector, channel_ix, channel_parameters_[channel_ix].mute_slope,
TooManyExpands(), current_lag, unvoiced_array_memory);
// Add background noise to the combined voiced-unvoiced signal.
for (size_t i = 0; i < current_lag; i++) {
temp_data[i] = temp_data[i] + noise_vector[i];
}
if (channel_ix == 0) {
output->AssertSize(current_lag);
} else {
RTC_DCHECK_EQ(output->Size(), current_lag);
}
(*output)[channel_ix].OverwriteAt(temp_data, current_lag, 0);
}
// Increase call number and cap it.
consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands
? kMaxConsecutiveExpands
: consecutive_expands_ + 1;
expand_duration_samples_ += output->Size();
// Clamp the duration counter at 2 seconds.
expand_duration_samples_ = std::min(expand_duration_samples_,
rtc::dchecked_cast<size_t>(fs_hz_ * 2));
return 0;
}
void Expand::SetParametersForNormalAfterExpand() {
current_lag_index_ = 0;
lag_index_direction_ = 0;
stop_muting_ = true; // Do not mute signal any more.
statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_);
statistics_->EndExpandEvent(fs_hz_);
}
void Expand::SetParametersForMergeAfterExpand() {
current_lag_index_ = -1; /* out of the 3 possible ones */
lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
stop_muting_ = true;
statistics_->EndExpandEvent(fs_hz_);
}
bool Expand::Muted() const {
if (first_expand_ || stop_muting_)
return false;
RTC_DCHECK(channel_parameters_);
for (size_t ch = 0; ch < num_channels_; ++ch) {
if (channel_parameters_[ch].mute_factor != 0)
return false;
}
return true;
}
size_t Expand::overlap_length() const {
return overlap_length_;
}
void Expand::InitializeForAnExpandPeriod() {
lag_index_direction_ = 1;
current_lag_index_ = -1;
stop_muting_ = false;
random_vector_->set_seed_increment(1);
consecutive_expands_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14.
channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14.
// Start with 0 gain for background noise.
background_noise_->SetMuteFactor(ix, 0);
}
}
bool Expand::TooManyExpands() {
return consecutive_expands_ >= kMaxConsecutiveExpands;
}
void Expand::AnalyzeSignal(int16_t* random_vector) {
int32_t auto_correlation[kUnvoicedLpcOrder + 1];
int16_t reflection_coeff[kUnvoicedLpcOrder];
int16_t correlation_vector[kMaxSampleRate / 8000 * 102];
size_t best_correlation_index[kNumCorrelationCandidates];
int16_t best_correlation[kNumCorrelationCandidates];
size_t best_distortion_index[kNumCorrelationCandidates];
int16_t best_distortion[kNumCorrelationCandidates];
int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1];
int32_t best_distortion_w32[kNumCorrelationCandidates];
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
int fs_mult = fs_hz_ / 8000;
// Pre-calculate common multiplications with fs_mult.
size_t fs_mult_4 = static_cast<size_t>(fs_mult * 4);
size_t fs_mult_20 = static_cast<size_t>(fs_mult * 20);
size_t fs_mult_120 = static_cast<size_t>(fs_mult * 120);
size_t fs_mult_dist_len = fs_mult * kDistortionLength;
size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;
const size_t signal_length = static_cast<size_t>(256 * fs_mult);
const size_t audio_history_position = sync_buffer_->Size() - signal_length;
std::unique_ptr<int16_t[]> audio_history(new int16_t[signal_length]);
(*sync_buffer_)[0].CopyTo(signal_length, audio_history_position,
audio_history.get());
// Initialize.
InitializeForAnExpandPeriod();
// Calculate correlation in downsampled domain (4 kHz sample rate).
size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
// If it is decided to break bit-exactness `correlation_length` should be
// initialized to the return value of Correlation().
Correlation(audio_history.get(), signal_length, correlation_vector);
// Find peaks in correlation vector.
DspHelper::PeakDetection(correlation_vector, correlation_length,
kNumCorrelationCandidates, fs_mult,
best_correlation_index, best_correlation);
// Adjust peak locations; cross-correlation lags start at 2.5 ms
// (20 * fs_mult samples).
best_correlation_index[0] += fs_mult_20;
best_correlation_index[1] += fs_mult_20;
best_correlation_index[2] += fs_mult_20;
// Calculate distortion around the `kNumCorrelationCandidates` best lags.
int distortion_scale = 0;
for (size_t i = 0; i < kNumCorrelationCandidates; i++) {
size_t min_index =
std::max(fs_mult_20, best_correlation_index[i] - fs_mult_4);
size_t max_index =
std::min(fs_mult_120 - 1, best_correlation_index[i] + fs_mult_4);
best_distortion_index[i] = DspHelper::MinDistortion(
&(audio_history[signal_length - fs_mult_dist_len]), min_index,
max_index, fs_mult_dist_len, &best_distortion_w32[i]);
distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]),
distortion_scale);
}
// Shift the distortion values to fit in 16 bits.
WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates,
best_distortion_w32, distortion_scale);
// Find the maximizing index `i` of the cost function
// f[i] = best_correlation[i] / best_distortion[i].
int32_t best_ratio = std::numeric_limits<int32_t>::min();
size_t best_index = std::numeric_limits<size_t>::max();
for (size_t i = 0; i < kNumCorrelationCandidates; ++i) {
int32_t ratio;
if (best_distortion[i] > 0) {
ratio = (best_correlation[i] * (1 << 16)) / best_distortion[i];
} else if (best_correlation[i] == 0) {
ratio = 0; // No correlation set result to zero.
} else {
ratio = std::numeric_limits<int32_t>::max(); // Denominator is zero.
}
if (ratio > best_ratio) {
best_index = i;
best_ratio = ratio;
}
}
size_t distortion_lag = best_distortion_index[best_index];
size_t correlation_lag = best_correlation_index[best_index];
max_lag_ = std::max(distortion_lag, correlation_lag);
// Calculate the exact best correlation in the range between
// `correlation_lag` and `distortion_lag`.
correlation_length = std::max(std::min(distortion_lag + 10, fs_mult_120),
static_cast<size_t>(60 * fs_mult));
size_t start_index = std::min(distortion_lag, correlation_lag);
size_t correlation_lags = static_cast<size_t>(
WEBRTC_SPL_ABS_W16((distortion_lag - correlation_lag)) + 1);
RTC_DCHECK_LE(correlation_lags, static_cast<size_t>(99 * fs_mult + 1));
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
if (channel_ix > 0) {
// When channel_ix == 0, audio_history contains the correct audio. For the
// other cases, we will have to copy the correct channel into
// audio_history.
(*sync_buffer_)[channel_ix].CopyTo(signal_length, audio_history_position,
audio_history.get());
}
// Calculate suitable scaling.
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(
&audio_history[signal_length - correlation_length - start_index -
correlation_lags],
correlation_length + start_index + correlation_lags - 1);
int correlation_scale =
(31 - WebRtcSpl_NormW32(signal_max * signal_max)) +
(31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;
correlation_scale = std::max(0, correlation_scale);
// Calculate the correlation, store in `correlation_vector2`.
WebRtcSpl_CrossCorrelation(
correlation_vector2,
&(audio_history[signal_length - correlation_length]),
&(audio_history[signal_length - correlation_length - start_index]),
correlation_length, correlation_lags, correlation_scale, -1);
// Find maximizing index.
best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);
int32_t max_correlation = correlation_vector2[best_index];
// Compensate index with start offset.
best_index = best_index + start_index;
// Calculate energies.
int32_t energy1 = WebRtcSpl_DotProductWithScale(
&(audio_history[signal_length - correlation_length]),
&(audio_history[signal_length - correlation_length]),
correlation_length, correlation_scale);
int32_t energy2 = WebRtcSpl_DotProductWithScale(
&(audio_history[signal_length - correlation_length - best_index]),
&(audio_history[signal_length - correlation_length - best_index]),
correlation_length, correlation_scale);
// Calculate the correlation coefficient between the two portions of the
// signal.
int32_t corr_coefficient;
if ((energy1 > 0) && (energy2 > 0)) {
int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0);
int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
// Make sure total scaling is even (to simplify scale factor after sqrt).
if ((energy1_scale + energy2_scale) & 1) {
// If sum is odd, add 1 to make it even.
energy1_scale += 1;
}
int32_t scaled_energy1 = energy1 >> energy1_scale;
int32_t scaled_energy2 = energy2 >> energy2_scale;
int16_t sqrt_energy_product = static_cast<int16_t>(
WebRtcSpl_SqrtFloor(scaled_energy1 * scaled_energy2));
// Calculate max_correlation / sqrt(energy1 * energy2) in Q14.
int cc_shift = 14 - (energy1_scale + energy2_scale) / 2;
max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift);
corr_coefficient =
WebRtcSpl_DivW32W16(max_correlation, sqrt_energy_product);
// Cap at 1.0 in Q14.
corr_coefficient = std::min(16384, corr_coefficient);
} else {
corr_coefficient = 0;
}
// Extract the two vectors expand_vector0 and expand_vector1 from
// `audio_history`.
size_t expansion_length = max_lag_ + overlap_length_;
const int16_t* vector1 = &(audio_history[signal_length - expansion_length]);
const int16_t* vector2 = vector1 - distortion_lag;
// Normalize the second vector to the same energy as the first.
energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length,
correlation_scale);
energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length,
correlation_scale);
// Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0,
// i.e., energy1 / energy2 is within 0.25 - 4.
int16_t amplitude_ratio;
if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) {
// Energy constraint fulfilled. Use both vectors and scale them
// accordingly.
int32_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
int32_t scaled_energy1 = scaled_energy2 - 13;
// Calculate scaled_energy1 / scaled_energy2 in Q13.
int32_t energy_ratio =
WebRtcSpl_DivW32W16(WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1),
static_cast<int16_t>(energy2 >> scaled_energy2));
// Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26).
amplitude_ratio =
static_cast<int16_t>(WebRtcSpl_SqrtFloor(energy_ratio << 13));
// Copy the two vectors and give them the same energy.
parameters.expand_vector0.Clear();
parameters.expand_vector0.PushBack(vector1, expansion_length);
parameters.expand_vector1.Clear();
if (parameters.expand_vector1.Size() < expansion_length) {
parameters.expand_vector1.Extend(expansion_length -
parameters.expand_vector1.Size());
}
std::unique_ptr<int16_t[]> temp_1(new int16_t[expansion_length]);
WebRtcSpl_AffineTransformVector(
temp_1.get(), const_cast<int16_t*>(vector2), amplitude_ratio, 4096,
13, expansion_length);
parameters.expand_vector1.OverwriteAt(temp_1.get(), expansion_length, 0);
} else {
// Energy change constraint not fulfilled. Only use last vector.
parameters.expand_vector0.Clear();
parameters.expand_vector0.PushBack(vector1, expansion_length);
// Copy from expand_vector0 to expand_vector1.
parameters.expand_vector0.CopyTo(&parameters.expand_vector1);
// Set the energy_ratio since it is used by muting slope.
if ((energy1 / 4 < energy2) || (energy2 == 0)) {
amplitude_ratio = 4096; // 0.5 in Q13.
} else {
amplitude_ratio = 16384; // 2.0 in Q13.
}
}
// Set the 3 lag values.
if (distortion_lag == correlation_lag) {
expand_lags_[0] = distortion_lag;
expand_lags_[1] = distortion_lag;
expand_lags_[2] = distortion_lag;
} else {
// `distortion_lag` and `correlation_lag` are not equal; use different
// combinations of the two.
// First lag is `distortion_lag` only.
expand_lags_[0] = distortion_lag;
// Second lag is the average of the two.
expand_lags_[1] = (distortion_lag + correlation_lag) / 2;
// Third lag is the average again, but rounding towards `correlation_lag`.
if (distortion_lag > correlation_lag) {
expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;
} else {
expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;
}
}
// Calculate the LPC and the gain of the filters.
// Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.
size_t temp_index =
signal_length - fs_mult_lpc_analysis_len - kUnvoicedLpcOrder;
// Copy signal to temporary vector to be able to pad with leading zeros.
int16_t* temp_signal =
new int16_t[fs_mult_lpc_analysis_len + kUnvoicedLpcOrder];
memset(temp_signal, 0,
sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));
memcpy(&temp_signal[kUnvoicedLpcOrder],
&audio_history[temp_index + kUnvoicedLpcOrder],
sizeof(int16_t) * fs_mult_lpc_analysis_len);
CrossCorrelationWithAutoShift(
&temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder],
fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation);
delete[] temp_signal;
// Verify that variance is positive.
if (auto_correlation[0] > 0) {
// Estimate AR filter parameters using Levinson-Durbin algorithm;
// kUnvoicedLpcOrder + 1 filter coefficients.
int16_t stability =
WebRtcSpl_LevinsonDurbin(auto_correlation, parameters.ar_filter,
reflection_coeff, kUnvoicedLpcOrder);
// Keep filter parameters only if filter is stable.
if (stability != 1) {
// Set first coefficient to 4096 (1.0 in Q12).
parameters.ar_filter[0] = 4096;
// Set remaining `kUnvoicedLpcOrder` coefficients to zero.
WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder);
}
}
if (channel_ix == 0) {
// Extract a noise segment.
size_t noise_length;
if (distortion_lag < 40) {
noise_length = 2 * distortion_lag + 30;
} else {
noise_length = distortion_lag + 30;
}
if (noise_length <= RandomVector::kRandomTableSize) {
memcpy(random_vector, RandomVector::kRandomTable,
sizeof(int16_t) * noise_length);
} else {
// Only applies to SWB where length could be larger than
// `kRandomTableSize`.
memcpy(random_vector, RandomVector::kRandomTable,
sizeof(int16_t) * RandomVector::kRandomTableSize);
RTC_DCHECK_LE(noise_length, kMaxSampleRate / 8000 * 120 + 30);
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(
noise_length - RandomVector::kRandomTableSize,
&random_vector[RandomVector::kRandomTableSize]);
}
}
// Set up state vector and calculate scale factor for unvoiced filtering.
memcpy(parameters.ar_filter_state,
&(audio_history[signal_length - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
memcpy(unvoiced_vector - kUnvoicedLpcOrder,
&(audio_history[signal_length - 128 - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
WebRtcSpl_FilterMAFastQ12(&audio_history[signal_length - 128],
unvoiced_vector, parameters.ar_filter,
kUnvoicedLpcOrder + 1, 128);
const int unvoiced_max_abs = [&] {
const int16_t max_abs = WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128);
// Since WebRtcSpl_MaxAbsValueW16 returns 2^15 - 1 when the input contains
// -2^15, we have to conservatively bump the return value by 1
// if it is 2^15 - 1.
return max_abs == WEBRTC_SPL_WORD16_MAX ? max_abs + 1 : max_abs;
}();
// Pick the smallest n such that 2^n > unvoiced_max_abs; then the maximum
// value of the dot product is less than 2^7 * 2^(2*n) = 2^(2*n + 7), so to
// prevent overflows we want 2n + 7 <= 31, which means we should shift by
// 2n + 7 - 31 bits, if this value is greater than zero.
int unvoiced_prescale =
std::max(0, 2 * WebRtcSpl_GetSizeInBits(unvoiced_max_abs) - 24);
int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale(
unvoiced_vector, unvoiced_vector, 128, unvoiced_prescale);
// Normalize `unvoiced_energy` to 28 or 29 bits to preserve sqrt() accuracy.
int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3;
// Make sure we do an odd number of shifts since we already have 7 shifts
// from dividing with 128 earlier. This will make the total scale factor
// even, which is suitable for the sqrt.
unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1);
unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale);
int16_t unvoiced_gain =
static_cast<int16_t>(WebRtcSpl_SqrtFloor(unvoiced_energy));
parameters.ar_gain_scale =
13 + (unvoiced_scale + 7 - unvoiced_prescale) / 2;
parameters.ar_gain = unvoiced_gain;
// Calculate voice_mix_factor from corr_coefficient.
// Let x = corr_coefficient. Then, we compute:
// if (x > 0.48)
// voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096;
// else
// voice_mix_factor = 0;
if (corr_coefficient > 7875) {
int16_t x1, x2, x3;
// `corr_coefficient` is in Q14.
x1 = static_cast<int16_t>(corr_coefficient);
x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14.
x3 = (x1 * x2) >> 14;
static const int kCoefficients[4] = {-5179, 19931, -16422, 5776};
int32_t temp_sum = kCoefficients[0] * 16384;
temp_sum += kCoefficients[1] * x1;
temp_sum += kCoefficients[2] * x2;
temp_sum += kCoefficients[3] * x3;
parameters.voice_mix_factor =
static_cast<int16_t>(std::min(temp_sum / 4096, 16384));
parameters.voice_mix_factor =
std::max(parameters.voice_mix_factor, static_cast<int16_t>(0));
} else {
parameters.voice_mix_factor = 0;
}
// Calculate muting slope. Reuse value from earlier scaling of
// `expand_vector0` and `expand_vector1`.
int16_t slope = amplitude_ratio;
if (slope > 12288) {
// slope > 1.5.
// Calculate (1 - (1 / slope)) / distortion_lag =
// (slope - 1) / (distortion_lag * slope).
// `slope` is in Q13, so 1 corresponds to 8192. Shift up to Q25 before
// the division.
// Shift the denominator from Q13 to Q5 before the division. The result of
// the division will then be in Q20.
int16_t denom =
rtc::saturated_cast<int16_t>((distortion_lag * slope) >> 8);
int temp_ratio = WebRtcSpl_DivW32W16((slope - 8192) << 12, denom);
if (slope > 14746) {
// slope > 1.8.
// Divide by 2, with proper rounding.
parameters.mute_slope = (temp_ratio + 1) / 2;
} else {
// Divide by 8, with proper rounding.
parameters.mute_slope = (temp_ratio + 4) / 8;
}
parameters.onset = true;
} else {
// Calculate (1 - slope) / distortion_lag.
// Shift `slope` by 7 to Q20 before the division. The result is in Q20.
parameters.mute_slope = WebRtcSpl_DivW32W16(
(8192 - slope) * 128, static_cast<int16_t>(distortion_lag));
if (parameters.voice_mix_factor <= 13107) {
// Make sure the mute factor decreases from 1.0 to 0.9 in no more than
// 6.25 ms.
// mute_slope >= 0.005 / fs_mult in Q20.
parameters.mute_slope = std::max(5243 / fs_mult, parameters.mute_slope);
} else if (slope > 8028) {
parameters.mute_slope = 0;
}
parameters.onset = false;
}
}
}
Expand::ChannelParameters::ChannelParameters()
: mute_factor(16384),
ar_gain(0),
ar_gain_scale(0),
voice_mix_factor(0),
current_voice_mix_factor(0),
onset(false),
mute_slope(0) {
memset(ar_filter, 0, sizeof(ar_filter));
memset(ar_filter_state, 0, sizeof(ar_filter_state));
}
void Expand::Correlation(const int16_t* input,
size_t input_length,
int16_t* output) const {
// Set parameters depending on sample rate.
const int16_t* filter_coefficients;
size_t num_coefficients;
int16_t downsampling_factor;
if (fs_hz_ == 8000) {
num_coefficients = 3;
downsampling_factor = 2;
filter_coefficients = DspHelper::kDownsample8kHzTbl;
} else if (fs_hz_ == 16000) {
num_coefficients = 5;
downsampling_factor = 4;
filter_coefficients = DspHelper::kDownsample16kHzTbl;
} else if (fs_hz_ == 32000) {
num_coefficients = 7;
downsampling_factor = 8;
filter_coefficients = DspHelper::kDownsample32kHzTbl;
} else { // fs_hz_ == 48000.
num_coefficients = 7;
downsampling_factor = 12;
filter_coefficients = DspHelper::kDownsample48kHzTbl;
}
// Correlate from lag 10 to lag 60 in downsampled domain.
// (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.)
static const size_t kCorrelationStartLag = 10;
static const size_t kNumCorrelationLags = 54;
static const size_t kCorrelationLength = 60;
// Downsample to 4 kHz sample rate.
static const size_t kDownsampledLength =
kCorrelationStartLag + kNumCorrelationLags + kCorrelationLength;
int16_t downsampled_input[kDownsampledLength];
static const size_t kFilterDelay = 0;
WebRtcSpl_DownsampleFast(
input + input_length - kDownsampledLength * downsampling_factor,
kDownsampledLength * downsampling_factor, downsampled_input,
kDownsampledLength, filter_coefficients, num_coefficients,
downsampling_factor, kFilterDelay);
// Normalize `downsampled_input` to using all 16 bits.
int16_t max_value =
WebRtcSpl_MaxAbsValueW16(downsampled_input, kDownsampledLength);
int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);
WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,
downsampled_input, norm_shift);
int32_t correlation[kNumCorrelationLags];
CrossCorrelationWithAutoShift(
&downsampled_input[kDownsampledLength - kCorrelationLength],
&downsampled_input[kDownsampledLength - kCorrelationLength -
kCorrelationStartLag],
kCorrelationLength, kNumCorrelationLags, -1, correlation);
// Normalize and move data from 32-bit to 16-bit vector.
int32_t max_correlation =
WebRtcSpl_MaxAbsValueW32(correlation, kNumCorrelationLags);
int16_t norm_shift2 = static_cast<int16_t>(
std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));
WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,
norm_shift2);
}
void Expand::UpdateLagIndex() {
current_lag_index_ = current_lag_index_ + lag_index_direction_;
// Change direction if needed.
if (current_lag_index_ <= 0) {
lag_index_direction_ = 1;
}
if (current_lag_index_ >= kNumLags - 1) {
lag_index_direction_ = -1;
}
}
Expand* ExpandFactory::Create(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels) const {
return new Expand(background_noise, sync_buffer, random_vector, statistics,
fs, num_channels);
}
void Expand::GenerateRandomVector(int16_t seed_increment,
size_t length,
int16_t* random_vector) {
// TODO(turajs): According to hlundin The loop should not be needed. Should be
// just as good to generate all of the vector in one call.
size_t samples_generated = 0;
const size_t kMaxRandSamples = RandomVector::kRandomTableSize;
while (samples_generated < length) {
size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);
random_vector_->IncreaseSeedIncrement(seed_increment);
random_vector_->Generate(rand_length, &random_vector[samples_generated]);
samples_generated += rand_length;
}
}
} // namespace webrtc

View file

@ -0,0 +1,153 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_EXPAND_H_
#define MODULES_AUDIO_CODING_NETEQ_EXPAND_H_
#include <memory>
#include "modules/audio_coding/neteq/audio_vector.h"
namespace webrtc {
// Forward declarations.
class AudioMultiVector;
class BackgroundNoise;
class RandomVector;
class StatisticsCalculator;
class SyncBuffer;
// This class handles extrapolation of audio data from the sync_buffer to
// produce packet-loss concealment.
// TODO(hlundin): Refactor this class to divide the long methods into shorter
// ones.
class Expand {
public:
Expand(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels);
virtual ~Expand();
Expand(const Expand&) = delete;
Expand& operator=(const Expand&) = delete;
// Resets the object.
virtual void Reset();
// The main method to produce concealment data. The data is appended to the
// end of `output`.
virtual int Process(AudioMultiVector* output);
// Prepare the object to do extra expansion during normal operation following
// a period of expands.
virtual void SetParametersForNormalAfterExpand();
// Prepare the object to do extra expansion during merge operation following
// a period of expands.
virtual void SetParametersForMergeAfterExpand();
// Returns the mute factor for `channel`.
int16_t MuteFactor(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
return channel_parameters_[channel].mute_factor;
}
// Returns true if expansion has been faded down to zero amplitude (for all
// channels); false otherwise.
bool Muted() const;
// Accessors and mutators.
virtual size_t overlap_length() const;
size_t max_lag() const { return max_lag_; }
protected:
static const int kMaxConsecutiveExpands = 200;
void GenerateRandomVector(int16_t seed_increment,
size_t length,
int16_t* random_vector);
// Initializes member variables at the beginning of an expand period.
void InitializeForAnExpandPeriod();
bool TooManyExpands();
// Analyzes the signal history in `sync_buffer_`, and set up all parameters
// necessary to produce concealment data.
void AnalyzeSignal(int16_t* random_vector);
RandomVector* const random_vector_;
SyncBuffer* const sync_buffer_;
bool first_expand_;
const int fs_hz_;
const size_t num_channels_;
int consecutive_expands_;
private:
static const size_t kUnvoicedLpcOrder = 6;
static const size_t kNumCorrelationCandidates = 3;
static const size_t kDistortionLength = 20;
static const size_t kLpcAnalysisLength = 160;
static const size_t kMaxSampleRate = 48000;
static const int kNumLags = 3;
struct ChannelParameters {
ChannelParameters();
int16_t mute_factor;
int16_t ar_filter[kUnvoicedLpcOrder + 1];
int16_t ar_filter_state[kUnvoicedLpcOrder];
int16_t ar_gain;
int16_t ar_gain_scale;
int16_t voice_mix_factor; /* Q14 */
int16_t current_voice_mix_factor; /* Q14 */
AudioVector expand_vector0;
AudioVector expand_vector1;
bool onset;
int mute_slope; /* Q20 */
};
// Calculate the auto-correlation of `input`, with length `input_length`
// samples. The correlation is calculated from a downsampled version of
// `input`, and is written to `output`.
void Correlation(const int16_t* input,
size_t input_length,
int16_t* output) const;
void UpdateLagIndex();
BackgroundNoise* const background_noise_;
StatisticsCalculator* const statistics_;
const size_t overlap_length_;
size_t max_lag_;
size_t expand_lags_[kNumLags];
int lag_index_direction_;
int current_lag_index_;
bool stop_muting_;
size_t expand_duration_samples_;
std::unique_ptr<ChannelParameters[]> channel_parameters_;
};
struct ExpandFactory {
ExpandFactory() {}
virtual ~ExpandFactory() {}
virtual Expand* Create(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels) const;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_EXPAND_H_

View file

@ -0,0 +1,71 @@
/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/expand_uma_logger.h"
#include "absl/strings/string_view.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
std::unique_ptr<TickTimer::Countdown> GetNewCountdown(
const TickTimer& tick_timer,
int logging_period_s) {
return tick_timer.GetNewCountdown((logging_period_s * 1000) /
tick_timer.ms_per_tick());
}
} // namespace
ExpandUmaLogger::ExpandUmaLogger(absl::string_view uma_name,
int logging_period_s,
const TickTimer* tick_timer)
: uma_name_(uma_name),
logging_period_s_(logging_period_s),
tick_timer_(*tick_timer),
timer_(GetNewCountdown(tick_timer_, logging_period_s_)) {
RTC_DCHECK(tick_timer);
RTC_DCHECK_GT(logging_period_s_, 0);
}
ExpandUmaLogger::~ExpandUmaLogger() = default;
void ExpandUmaLogger::UpdateSampleCounter(uint64_t samples,
int sample_rate_hz) {
if ((last_logged_value_ && *last_logged_value_ > samples) ||
sample_rate_hz_ != sample_rate_hz) {
// Sanity checks. The incremental counter moved backwards, or sample rate
// changed.
last_logged_value_.reset();
}
last_value_ = samples;
sample_rate_hz_ = sample_rate_hz;
if (!last_logged_value_) {
last_logged_value_ = absl::optional<uint64_t>(samples);
}
if (!timer_->Finished()) {
// Not yet time to log.
return;
}
RTC_DCHECK(last_logged_value_);
RTC_DCHECK_GE(last_value_, *last_logged_value_);
const uint64_t diff = last_value_ - *last_logged_value_;
last_logged_value_ = absl::optional<uint64_t>(last_value_);
// Calculate rate in percent.
RTC_DCHECK_GT(sample_rate_hz, 0);
const int rate = (100 * diff) / (sample_rate_hz * logging_period_s_);
RTC_DCHECK_GE(rate, 0);
RTC_DCHECK_LE(rate, 100);
RTC_HISTOGRAM_PERCENTAGE_SPARSE(uma_name_, rate);
timer_ = GetNewCountdown(tick_timer_, logging_period_s_);
}
} // namespace webrtc

View file

@ -0,0 +1,57 @@
/* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_
#define MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_
#include <stdint.h>
#include <memory>
#include <string>
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/neteq/tick_timer.h"
namespace webrtc {
// This class is used to periodically log values to a UMA histogram. The caller
// is expected to update this class with an incremental sample counter which
// counts expand samples. At the end of each logging period, the class will
// calculate the fraction of samples that were expand samples during that period
// and report that in percent. The logging period must be strictly positive.
// Does not take ownership of tick_timer and the pointer must refer to a valid
// object that outlives the one constructed.
class ExpandUmaLogger {
public:
ExpandUmaLogger(absl::string_view uma_name,
int logging_period_s,
const TickTimer* tick_timer);
~ExpandUmaLogger();
ExpandUmaLogger(const ExpandUmaLogger&) = delete;
ExpandUmaLogger& operator=(const ExpandUmaLogger&) = delete;
// In this call, value should be an incremental sample counter. The sample
// rate must be strictly positive.
void UpdateSampleCounter(uint64_t value, int sample_rate_hz);
private:
const std::string uma_name_;
const int logging_period_s_;
const TickTimer& tick_timer_;
std::unique_ptr<TickTimer::Countdown> timer_;
absl::optional<uint64_t> last_logged_value_;
uint64_t last_value_ = 0;
int sample_rate_hz_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_EXPAND_UMA_LOGGER_H_

View file

@ -0,0 +1,203 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Expand class.
#include "modules/audio_coding/neteq/expand.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/background_noise.h"
#include "modules/audio_coding/neteq/random_vector.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "modules/audio_coding/neteq/sync_buffer.h"
#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
TEST(Expand, CreateAndDestroy) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
}
TEST(Expand, CreateUsingFactory) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
ExpandFactory expand_factory;
Expand* expand = expand_factory.Create(&bgn, &sync_buffer, &random_vector,
&statistics, fs, channels);
EXPECT_TRUE(expand != NULL);
delete expand;
}
namespace {
class FakeStatisticsCalculator : public StatisticsCalculator {
public:
void LogDelayedPacketOutageEvent(int num_samples, int fs_hz) override {
last_outage_duration_samples_ = num_samples;
}
int last_outage_duration_samples() const {
return last_outage_duration_samples_;
}
private:
int last_outage_duration_samples_ = 0;
};
// This is the same size that is given to the SyncBuffer object in NetEq.
const size_t kNetEqSyncBufferLengthMs = 720;
} // namespace
class ExpandTest : public ::testing::Test {
protected:
ExpandTest()
: input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
32000),
test_sample_rate_hz_(32000),
num_channels_(1),
background_noise_(num_channels_),
sync_buffer_(num_channels_,
kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000),
expand_(&background_noise_,
&sync_buffer_,
&random_vector_,
&statistics_,
test_sample_rate_hz_,
num_channels_) {
input_file_.set_output_rate_hz(test_sample_rate_hz_);
}
void SetUp() override {
// Fast-forward the input file until there is speech (about 1.1 second into
// the file).
const int speech_start_samples =
static_cast<int>(test_sample_rate_hz_ * 1.1f);
ASSERT_TRUE(input_file_.Seek(speech_start_samples));
// Pre-load the sync buffer with speech data.
std::unique_ptr<int16_t[]> temp(new int16_t[sync_buffer_.Size()]);
ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get()));
sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0);
ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels.";
}
test::ResampleInputAudioFile input_file_;
int test_sample_rate_hz_;
size_t num_channels_;
BackgroundNoise background_noise_;
SyncBuffer sync_buffer_;
RandomVector random_vector_;
FakeStatisticsCalculator statistics_;
Expand expand_;
};
// This test calls the expand object to produce concealment data a few times,
// and then ends by calling SetParametersForNormalAfterExpand. This simulates
// the situation where the packet next up for decoding was just delayed, not
// lost.
TEST_F(ExpandTest, DelayedPacketOutage) {
AudioMultiVector output(num_channels_);
size_t sum_output_len_samples = 0;
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(0, expand_.Process(&output));
EXPECT_GT(output.Size(), 0u);
sum_output_len_samples += output.Size();
EXPECT_EQ(0, statistics_.last_outage_duration_samples());
}
expand_.SetParametersForNormalAfterExpand();
// Convert `sum_output_len_samples` to milliseconds.
EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples),
statistics_.last_outage_duration_samples());
}
// This test is similar to DelayedPacketOutage, but ends by calling
// SetParametersForMergeAfterExpand. This simulates the situation where the
// packet next up for decoding was actually lost (or at least a later packet
// arrived before it).
TEST_F(ExpandTest, LostPacketOutage) {
AudioMultiVector output(num_channels_);
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(0, expand_.Process(&output));
EXPECT_GT(output.Size(), 0u);
EXPECT_EQ(0, statistics_.last_outage_duration_samples());
}
expand_.SetParametersForMergeAfterExpand();
EXPECT_EQ(0, statistics_.last_outage_duration_samples());
}
// This test is similar to the DelayedPacketOutage test above, but with the
// difference that Expand::Reset() is called after 5 calls to Expand::Process().
// This should reset the statistics, and will in the end lead to an outage of
// 5 periods instead of 10.
TEST_F(ExpandTest, CheckOutageStatsAfterReset) {
AudioMultiVector output(num_channels_);
size_t sum_output_len_samples = 0;
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(0, expand_.Process(&output));
EXPECT_GT(output.Size(), 0u);
sum_output_len_samples += output.Size();
if (i == 5) {
expand_.Reset();
sum_output_len_samples = 0;
}
EXPECT_EQ(0, statistics_.last_outage_duration_samples());
}
expand_.SetParametersForNormalAfterExpand();
// Convert `sum_output_len_samples` to milliseconds.
EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples),
statistics_.last_outage_duration_samples());
}
namespace {
// Runs expand until Muted() returns true. Times out after 1000 calls.
void ExpandUntilMuted(size_t num_channels, Expand* expand) {
EXPECT_FALSE(expand->Muted()) << "Instance is muted from the start";
AudioMultiVector output(num_channels);
int num_calls = 0;
while (!expand->Muted()) {
ASSERT_LT(num_calls++, 1000) << "Test timed out";
EXPECT_EQ(0, expand->Process(&output));
}
}
} // namespace
// Verifies that Muted() returns true after a long expand period. Also verifies
// that Muted() is reset to false after calling Reset(),
// SetParametersForMergeAfterExpand() and SetParametersForNormalAfterExpand().
TEST_F(ExpandTest, Muted) {
ExpandUntilMuted(num_channels_, &expand_);
expand_.Reset();
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
ExpandUntilMuted(num_channels_, &expand_);
expand_.SetParametersForMergeAfterExpand();
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
expand_.Reset(); // Must reset in order to start a new expand period.
ExpandUntilMuted(num_channels_, &expand_);
expand_.SetParametersForNormalAfterExpand();
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -0,0 +1,102 @@
<!-- go/cmark -->
<!--* freshness: {owner: 'jakobi' reviewed: '2021-04-13'} *-->
# NetEq
NetEq is the audio jitter buffer and packet loss concealer. The jitter buffer is
an adaptive jitter buffer, meaning that the buffering delay is continuously
optimized based on the network conditions. Its main goal is to ensure a smooth
playout of incoming audio packets from the network with a low amount of audio
artifacts (alterations to the original content of the packets) while at the same
time keep the delay as low as possible.
## API
At a high level, the NetEq API has two main functions:
[`InsertPacket`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=198;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72)
and
[`GetAudio`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=219;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72).
### InsertPacket
[`InsertPacket`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=198;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72)
delivers an RTP packet from the network to NetEq where the following happens:
1. The packet is discarded if it is too late for playout (for example if it was
reordered). Otherwize it is put into the packet buffer where it is stored
until it is time for playout. If the buffer is full, discard all the
existing packets (this should be rare).
2. The interarrival time between packets is analyzed and statistics is updated
which is used to derive a new target playout delay. The interarrival time is
measured in the number of GetAudio ticks and thus clock drift between the
sender and receiver can be accounted for.
### GetAudio
[`GetAudio`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=219;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72)
pulls 10 ms of audio from NetEq for playout. A much simplified decision logic is
as follows:
1. If there is 10 ms audio in the sync buffer then return that.
2. If the next packet is available (based on RTP timestamp) in the packet
buffer then decode it and append the result to the sync buffer.
1. Compare the current delay estimate (filtered buffer level) with the
target delay and time stretch (accelerate or decelerate) the contents of
the sync buffer if the buffer level is too high or too low.
2. Return 10 ms of audio from the sync buffer.
3. If the last decoded packet was a discontinuous transmission (DTX) packet
then generate comfort noise.
4. If there is no available packet for decoding due to the next packet having
not arrived or been lost then generate packet loss concealment by
extrapolating the remaining audio in the sync buffer or by asking the
decoder to produce it.
In summary, the output is the result one of the following operations:
* Normal: audio decoded from a packet.
* Acceleration: accelerated playout of a decoded packet.
* Preemptive expand: decelerated playout of a decoded packet.
* Expand: packet loss concealment generated by NetEq or the decoder.
* Merge: audio stitched together from packet loss concealment to decoded data
in case of a loss.
* Comfort noise (CNG): comfort noise generated by NetEq or the decoder between
talk spurts due to discontinuous transmission of packets (DTX).
## Statistics
There are a number of functions that can be used to query the internal state of
NetEq, statistics about the type of audio output and latency metrics such as how
long time packets have waited in the buffer.
* [`NetworkStatistics`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=273;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72):
instantaneous values or stats averaged over the duration since last call to
this function.
* [`GetLifetimeStatistics`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=280;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72):
cumulative stats that persist over the lifetime of the class.
* [`GetOperationsAndState`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/neteq/neteq.h;l=284;drc=4461f059d180fe8c2886d422ebd1cb55b5c83e72):
information about the internal state of NetEq (is only inteded to be used
for testing and debugging).
## Tests and tools
* [`neteq_rtpplay`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc;drc=cee751abff598fc19506f77de08bea7c61b9dcca):
Simulate NetEq behavior based on either an RTP dump, a PCAP file or an RTC
event log. A replacement audio file can also be used instead of the original
payload. Outputs aggregated statistics and optionally an audio file to
listen to.
* [`neteq_speed_test`](https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_coding/neteq/test/neteq_speed_test.cc;drc=2ab97f6f8e27b47c0d9beeb8b6ca5387bda9f55c):
Measure performance of NetEq, used on perf bots.
* Unit tests including bit exactness tests where RTP file is used as an input
to NetEq, the output is concatenated and a checksum is calculated and
compared against a reference.
## Other responsibilities
* Dual-tone multi-frequency signaling (DTMF): receive telephone events and
produce dual tone waveforms.
* Forward error correction (RED or codec inband FEC): split inserted packets
and prioritize the payloads.
* NACK (negative acknowledgement): keep track of lost packets and generate a
list of packets to NACK.
* Audio/video sync: NetEq can be instructed to increase the latency in order
to keep audio and video in sync.

View file

@ -0,0 +1,149 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/histogram.h"
#include <algorithm>
#include <cstdlib>
#include <numeric>
#include "absl/types/optional.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
Histogram::Histogram(size_t num_buckets,
int forget_factor,
absl::optional<double> start_forget_weight)
: buckets_(num_buckets, 0),
forget_factor_(0),
base_forget_factor_(forget_factor),
add_count_(0),
start_forget_weight_(start_forget_weight) {
RTC_DCHECK_LT(base_forget_factor_, 1 << 15);
}
Histogram::~Histogram() {}
// Each element in the vector is first multiplied by the forgetting factor
// `forget_factor_`. Then the vector element indicated by `iat_packets` is then
// increased (additive) by 1 - `forget_factor_`. This way, the probability of
// `value` is slightly increased, while the sum of the histogram remains
// constant (=1).
// Due to inaccuracies in the fixed-point arithmetic, the histogram may no
// longer sum up to 1 (in Q30) after the update. To correct this, a correction
// term is added or subtracted from the first element (or elements) of the
// vector.
// The forgetting factor `forget_factor_` is also updated. When the DelayManager
// is reset, the factor is set to 0 to facilitate rapid convergence in the
// beginning. With each update of the histogram, the factor is increased towards
// the steady-state value `base_forget_factor_`.
void Histogram::Add(int value) {
RTC_DCHECK(value >= 0);
RTC_DCHECK(value < static_cast<int>(buckets_.size()));
int vector_sum = 0; // Sum up the vector elements as they are processed.
// Multiply each element in `buckets_` with `forget_factor_`.
for (int& bucket : buckets_) {
bucket = (static_cast<int64_t>(bucket) * forget_factor_) >> 15;
vector_sum += bucket;
}
// Increase the probability for the currently observed inter-arrival time
// by 1 - `forget_factor_`. The factor is in Q15, `buckets_` in Q30.
// Thus, left-shift 15 steps to obtain result in Q30.
buckets_[value] += (32768 - forget_factor_) << 15;
vector_sum += (32768 - forget_factor_) << 15; // Add to vector sum.
// `buckets_` should sum up to 1 (in Q30), but it may not due to
// fixed-point rounding errors.
vector_sum -= 1 << 30; // Should be zero. Compensate if not.
if (vector_sum != 0) {
// Modify a few values early in `buckets_`.
int flip_sign = vector_sum > 0 ? -1 : 1;
for (int& bucket : buckets_) {
// Add/subtract 1/16 of the element, but not more than `vector_sum`.
int correction = flip_sign * std::min(std::abs(vector_sum), bucket >> 4);
bucket += correction;
vector_sum += correction;
if (std::abs(vector_sum) == 0) {
break;
}
}
}
RTC_DCHECK(vector_sum == 0); // Verify that the above is correct.
++add_count_;
// Update `forget_factor_` (changes only during the first seconds after a
// reset). The factor converges to `base_forget_factor_`.
if (start_forget_weight_) {
if (forget_factor_ != base_forget_factor_) {
int old_forget_factor = forget_factor_;
int forget_factor =
(1 << 15) * (1 - start_forget_weight_.value() / (add_count_ + 1));
forget_factor_ =
std::max(0, std::min(base_forget_factor_, forget_factor));
// The histogram is updated recursively by forgetting the old histogram
// with `forget_factor_` and adding a new sample multiplied by |1 -
// forget_factor_|. We need to make sure that the effective weight on the
// new sample is no smaller than those on the old samples, i.e., to
// satisfy the following DCHECK.
RTC_DCHECK_GE((1 << 15) - forget_factor_,
((1 << 15) - old_forget_factor) * forget_factor_ >> 15);
}
} else {
forget_factor_ += (base_forget_factor_ - forget_factor_ + 3) >> 2;
}
}
int Histogram::Quantile(int probability) {
// Find the bucket for which the probability of observing an
// inter-arrival time larger than or equal to `index` is larger than or
// equal to `probability`. The sought probability is estimated using
// the histogram as the reverse cumulant PDF, i.e., the sum of elements from
// the end up until `index`. Now, since the sum of all elements is 1
// (in Q30) by definition, and since the solution is often a low value for
// `iat_index`, it is more efficient to start with `sum` = 1 and subtract
// elements from the start of the histogram.
int inverse_probability = (1 << 30) - probability;
size_t index = 0; // Start from the beginning of `buckets_`.
int sum = 1 << 30; // Assign to 1 in Q30.
sum -= buckets_[index];
while ((sum > inverse_probability) && (index < buckets_.size() - 1)) {
// Subtract the probabilities one by one until the sum is no longer greater
// than `inverse_probability`.
++index;
sum -= buckets_[index];
}
return static_cast<int>(index);
}
// Set the histogram vector to an exponentially decaying distribution
// buckets_[i] = 0.5^(i+1), i = 0, 1, 2, ...
// buckets_ is in Q30.
void Histogram::Reset() {
// Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum
// of buckets_ is 1.
uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary.
for (int& bucket : buckets_) {
temp_prob >>= 1;
bucket = temp_prob << 16;
}
forget_factor_ = 0; // Adapt the histogram faster for the first few packets.
add_count_ = 0;
}
int Histogram::NumBuckets() const {
return buckets_.size();
}
} // namespace webrtc

View file

@ -0,0 +1,64 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_
#define MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_
#include <string.h> // Provide access to size_t.
#include <vector>
#include "absl/types/optional.h"
namespace webrtc {
class Histogram {
public:
// Creates histogram with capacity `num_buckets` and `forget_factor` in Q15.
Histogram(size_t num_buckets,
int forget_factor,
absl::optional<double> start_forget_weight = absl::nullopt);
virtual ~Histogram();
// Resets the histogram to the default start distribution.
virtual void Reset();
// Add entry in bucket `index`.
virtual void Add(int index);
// Calculates the quantile at `probability` (in Q30) of the histogram
// distribution.
virtual int Quantile(int probability);
// Returns the number of buckets in the histogram.
virtual int NumBuckets() const;
// Returns the probability for each bucket in Q30.
const std::vector<int>& buckets() const { return buckets_; }
// Accessors only intended for testing purposes.
int base_forget_factor_for_testing() const { return base_forget_factor_; }
int forget_factor_for_testing() const { return forget_factor_; }
absl::optional<double> start_forget_weight_for_testing() const {
return start_forget_weight_;
}
private:
std::vector<int> buckets_;
int forget_factor_; // Q15
const int base_forget_factor_;
int add_count_;
const absl::optional<double> start_forget_weight_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_HISTOGRAM_H_

View file

@ -0,0 +1,73 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/histogram.h"
#include <cmath>
#include "test/gtest.h"
namespace webrtc {
TEST(HistogramTest, Initialization) {
Histogram histogram(65, 32440);
histogram.Reset();
const auto& buckets = histogram.buckets();
double sum = 0.0;
for (size_t i = 0; i < buckets.size(); i++) {
EXPECT_NEAR(ldexp(std::pow(0.5, static_cast<int>(i + 1)), 30), buckets[i],
65537);
// Tolerance 65537 in Q30 corresponds to a delta of approximately 0.00006.
sum += buckets[i];
}
EXPECT_EQ(1 << 30, static_cast<int>(sum)); // Should be 1 in Q30.
}
TEST(HistogramTest, Add) {
Histogram histogram(10, 32440);
histogram.Reset();
const std::vector<int> before = histogram.buckets();
const int index = 5;
histogram.Add(index);
const std::vector<int> after = histogram.buckets();
EXPECT_GT(after[index], before[index]);
int sum = 0;
for (int bucket : after) {
sum += bucket;
}
EXPECT_EQ(1 << 30, sum);
}
TEST(HistogramTest, ForgetFactor) {
Histogram histogram(10, 32440);
histogram.Reset();
const std::vector<int> before = histogram.buckets();
const int index = 4;
histogram.Add(index);
const std::vector<int> after = histogram.buckets();
for (int i = 0; i < histogram.NumBuckets(); ++i) {
if (i != index) {
EXPECT_LT(after[i], before[i]);
}
}
}
TEST(HistogramTest, ReachSteadyStateForgetFactor) {
static constexpr int kSteadyStateForgetFactor = (1 << 15) * 0.9993;
Histogram histogram(100, kSteadyStateForgetFactor, 1.0);
histogram.Reset();
int n = (1 << 15) / ((1 << 15) - kSteadyStateForgetFactor);
for (int i = 0; i < n; ++i) {
histogram.Add(0);
}
EXPECT_EQ(histogram.forget_factor_for_testing(), kSteadyStateForgetFactor);
}
} // namespace webrtc

View file

@ -0,0 +1,385 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/merge.h"
#include <string.h> // memmove, memcpy, memset, size_t
#include <algorithm> // min, max
#include <memory>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/cross_correlation.h"
#include "modules/audio_coding/neteq/dsp_helper.h"
#include "modules/audio_coding/neteq/expand.h"
#include "modules/audio_coding/neteq/sync_buffer.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
Merge::Merge(int fs_hz,
size_t num_channels,
Expand* expand,
SyncBuffer* sync_buffer)
: fs_hz_(fs_hz),
num_channels_(num_channels),
fs_mult_(fs_hz_ / 8000),
timestamps_per_call_(static_cast<size_t>(fs_hz_ / 100)),
expand_(expand),
sync_buffer_(sync_buffer),
expanded_(num_channels_) {
RTC_DCHECK_GT(num_channels_, 0);
}
Merge::~Merge() = default;
size_t Merge::Process(int16_t* input,
size_t input_length,
AudioMultiVector* output) {
// TODO(hlundin): Change to an enumerator and skip assert.
RTC_DCHECK(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
fs_hz_ == 48000);
RTC_DCHECK_LE(fs_hz_, kMaxSampleRate); // Should not be possible.
if (input_length == 0) {
return 0;
}
size_t old_length;
size_t expand_period;
// Get expansion data to overlap and mix with.
size_t expanded_length = GetExpandedSignal(&old_length, &expand_period);
// Transfer input signal to an AudioMultiVector.
AudioMultiVector input_vector(num_channels_);
input_vector.PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
size_t input_length_per_channel = input_vector.Size();
RTC_DCHECK_EQ(input_length_per_channel, input_length / num_channels_);
size_t best_correlation_index = 0;
size_t output_length = 0;
std::unique_ptr<int16_t[]> input_channel(
new int16_t[input_length_per_channel]);
std::unique_ptr<int16_t[]> expanded_channel(new int16_t[expanded_length]);
for (size_t channel = 0; channel < num_channels_; ++channel) {
input_vector[channel].CopyTo(input_length_per_channel, 0,
input_channel.get());
expanded_[channel].CopyTo(expanded_length, 0, expanded_channel.get());
const int16_t new_mute_factor = std::min<int16_t>(
16384, SignalScaling(input_channel.get(), input_length_per_channel,
expanded_channel.get()));
if (channel == 0) {
// Downsample, correlate, and find strongest correlation period for the
// reference (i.e., first) channel only.
// Downsample to 4kHz sample rate.
Downsample(input_channel.get(), input_length_per_channel,
expanded_channel.get(), expanded_length);
// Calculate the lag of the strongest correlation period.
best_correlation_index = CorrelateAndPeakSearch(
old_length, input_length_per_channel, expand_period);
}
temp_data_.resize(input_length_per_channel + best_correlation_index);
int16_t* decoded_output = temp_data_.data() + best_correlation_index;
// Mute the new decoded data if needed (and unmute it linearly).
// This is the overlapping part of expanded_signal.
size_t interpolation_length =
std::min(kMaxCorrelationLength * fs_mult_,
expanded_length - best_correlation_index);
interpolation_length =
std::min(interpolation_length, input_length_per_channel);
RTC_DCHECK_LE(new_mute_factor, 16384);
int16_t mute_factor =
std::max(expand_->MuteFactor(channel), new_mute_factor);
RTC_DCHECK_GE(mute_factor, 0);
if (mute_factor < 16384) {
// Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,
// and so on, or as fast as it takes to come back to full gain within the
// frame length.
const int back_to_fullscale_inc = static_cast<int>(
((16384 - mute_factor) << 6) / input_length_per_channel);
const int increment = std::max(4194 / fs_mult_, back_to_fullscale_inc);
mute_factor = static_cast<int16_t>(DspHelper::RampSignal(
input_channel.get(), interpolation_length, mute_factor, increment));
DspHelper::UnmuteSignal(&input_channel[interpolation_length],
input_length_per_channel - interpolation_length,
&mute_factor, increment,
&decoded_output[interpolation_length]);
} else {
// No muting needed.
memmove(
&decoded_output[interpolation_length],
&input_channel[interpolation_length],
sizeof(int16_t) * (input_length_per_channel - interpolation_length));
}
// Do overlap and mix linearly.
int16_t increment =
static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14.
int16_t local_mute_factor = 16384 - increment;
memmove(temp_data_.data(), expanded_channel.get(),
sizeof(int16_t) * best_correlation_index);
DspHelper::CrossFade(&expanded_channel[best_correlation_index],
input_channel.get(), interpolation_length,
&local_mute_factor, increment, decoded_output);
output_length = best_correlation_index + input_length_per_channel;
if (channel == 0) {
RTC_DCHECK(output->Empty()); // Output should be empty at this point.
output->AssertSize(output_length);
} else {
RTC_DCHECK_EQ(output->Size(), output_length);
}
(*output)[channel].OverwriteAt(temp_data_.data(), output_length, 0);
}
// Copy back the first part of the data to `sync_buffer_` and remove it from
// `output`.
sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index());
output->PopFront(old_length);
// Return new added length. `old_length` samples were borrowed from
// `sync_buffer_`.
RTC_DCHECK_GE(output_length, old_length);
return output_length - old_length;
}
size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) {
// Check how much data that is left since earlier.
*old_length = sync_buffer_->FutureLength();
// Should never be less than overlap_length.
RTC_DCHECK_GE(*old_length, expand_->overlap_length());
// Generate data to merge the overlap with using expand.
expand_->SetParametersForMergeAfterExpand();
if (*old_length >= 210 * kMaxSampleRate / 8000) {
// TODO(hlundin): Write test case for this.
// The number of samples available in the sync buffer is more than what fits
// in expanded_signal. Keep the first 210 * kMaxSampleRate / 8000 samples,
// but shift them towards the end of the buffer. This is ok, since all of
// the buffer will be expand data anyway, so as long as the beginning is
// left untouched, we're fine.
size_t length_diff = *old_length - 210 * kMaxSampleRate / 8000;
sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index());
*old_length = 210 * kMaxSampleRate / 8000;
// This is the truncated length.
}
// This assert should always be true thanks to the if statement above.
RTC_DCHECK_GE(210 * kMaxSampleRate / 8000, *old_length);
AudioMultiVector expanded_temp(num_channels_);
expand_->Process(&expanded_temp);
*expand_period = expanded_temp.Size(); // Samples per channel.
expanded_.Clear();
// Copy what is left since earlier into the expanded vector.
expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index());
RTC_DCHECK_EQ(expanded_.Size(), *old_length);
RTC_DCHECK_GT(expanded_temp.Size(), 0);
// Do "ugly" copy and paste from the expanded in order to generate more data
// to correlate (but not interpolate) with.
const size_t required_length = static_cast<size_t>((120 + 80 + 2) * fs_mult_);
if (expanded_.Size() < required_length) {
while (expanded_.Size() < required_length) {
// Append one more pitch period each time.
expanded_.PushBack(expanded_temp);
}
// Trim the length to exactly `required_length`.
expanded_.PopBack(expanded_.Size() - required_length);
}
RTC_DCHECK_GE(expanded_.Size(), required_length);
return required_length;
}
int16_t Merge::SignalScaling(const int16_t* input,
size_t input_length,
const int16_t* expanded_signal) const {
// Adjust muting factor if new vector is more or less of the BGN energy.
const auto mod_input_length = rtc::SafeMin<size_t>(
64 * rtc::dchecked_cast<size_t>(fs_mult_), input_length);
const int16_t expanded_max =
WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
int32_t factor =
(expanded_max * expanded_max) / (std::numeric_limits<int32_t>::max() /
static_cast<int32_t>(mod_input_length));
const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
int32_t energy_expanded = WebRtcSpl_DotProductWithScale(
expanded_signal, expanded_signal, mod_input_length, expanded_shift);
// Calculate energy of input signal.
const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
factor = (input_max * input_max) / (std::numeric_limits<int32_t>::max() /
static_cast<int32_t>(mod_input_length));
const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
int32_t energy_input = WebRtcSpl_DotProductWithScale(
input, input, mod_input_length, input_shift);
// Align to the same Q-domain.
if (input_shift > expanded_shift) {
energy_expanded = energy_expanded >> (input_shift - expanded_shift);
} else {
energy_input = energy_input >> (expanded_shift - input_shift);
}
// Calculate muting factor to use for new frame.
int16_t mute_factor;
if (energy_input > energy_expanded) {
// Normalize `energy_input` to 14 bits.
int16_t temp_shift = WebRtcSpl_NormW32(energy_input) - 17;
energy_input = WEBRTC_SPL_SHIFT_W32(energy_input, temp_shift);
// Put `energy_expanded` in a domain 14 higher, so that
// energy_expanded / energy_input is in Q14.
energy_expanded = WEBRTC_SPL_SHIFT_W32(energy_expanded, temp_shift + 14);
// Calculate sqrt(energy_expanded / energy_input) in Q14.
mute_factor = static_cast<int16_t>(
WebRtcSpl_SqrtFloor((energy_expanded / energy_input) << 14));
} else {
// Set to 1 (in Q14) when `expanded` has higher energy than `input`.
mute_factor = 16384;
}
return mute_factor;
}
// TODO(hlundin): There are some parameter values in this method that seem
// strange. Compare with Expand::Correlation.
void Merge::Downsample(const int16_t* input,
size_t input_length,
const int16_t* expanded_signal,
size_t expanded_length) {
const int16_t* filter_coefficients;
size_t num_coefficients;
int decimation_factor = fs_hz_ / 4000;
static const size_t kCompensateDelay = 0;
size_t length_limit = static_cast<size_t>(fs_hz_ / 100); // 10 ms in samples.
if (fs_hz_ == 8000) {
filter_coefficients = DspHelper::kDownsample8kHzTbl;
num_coefficients = 3;
} else if (fs_hz_ == 16000) {
filter_coefficients = DspHelper::kDownsample16kHzTbl;
num_coefficients = 5;
} else if (fs_hz_ == 32000) {
filter_coefficients = DspHelper::kDownsample32kHzTbl;
num_coefficients = 7;
} else { // fs_hz_ == 48000
filter_coefficients = DspHelper::kDownsample48kHzTbl;
num_coefficients = 7;
}
size_t signal_offset = num_coefficients - 1;
WebRtcSpl_DownsampleFast(
&expanded_signal[signal_offset], expanded_length - signal_offset,
expanded_downsampled_, kExpandDownsampLength, filter_coefficients,
num_coefficients, decimation_factor, kCompensateDelay);
if (input_length <= length_limit) {
// Not quite long enough, so we have to cheat a bit.
// If the input is shorter than the offset, we consider the input to be 0
// length. This will cause us to skip the downsampling since it makes no
// sense anyway, and input_downsampled_ will be filled with zeros. This is
// clearly a pathological case, and the signal quality will suffer, but
// there is not much we can do.
const size_t temp_len =
input_length > signal_offset ? input_length - signal_offset : 0;
// TODO(hlundin): Should `downsamp_temp_len` be corrected for round-off
// errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor?
size_t downsamp_temp_len = temp_len / decimation_factor;
if (downsamp_temp_len > 0) {
WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len,
input_downsampled_, downsamp_temp_len,
filter_coefficients, num_coefficients,
decimation_factor, kCompensateDelay);
}
memset(&input_downsampled_[downsamp_temp_len], 0,
sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len));
} else {
WebRtcSpl_DownsampleFast(
&input[signal_offset], input_length - signal_offset, input_downsampled_,
kInputDownsampLength, filter_coefficients, num_coefficients,
decimation_factor, kCompensateDelay);
}
}
size_t Merge::CorrelateAndPeakSearch(size_t start_position,
size_t input_length,
size_t expand_period) const {
// Calculate correlation without any normalization.
const size_t max_corr_length = kMaxCorrelationLength;
size_t stop_position_downsamp =
std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);
int32_t correlation[kMaxCorrelationLength];
CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_,
kInputDownsampLength, stop_position_downsamp, 1,
correlation);
// Normalize correlation to 14 bits and copy to a 16-bit array.
const size_t pad_length = expand_->overlap_length() - 1;
const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;
std::unique_ptr<int16_t[]> correlation16(
new int16_t[correlation_buffer_size]);
memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t));
int16_t* correlation_ptr = &correlation16[pad_length];
int32_t max_correlation =
WebRtcSpl_MaxAbsValueW32(correlation, stop_position_downsamp);
int norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation));
WebRtcSpl_VectorBitShiftW32ToW16(correlation_ptr, stop_position_downsamp,
correlation, norm_shift);
// Calculate allowed starting point for peak finding.
// The peak location bestIndex must fulfill two criteria:
// (1) w16_bestIndex + input_length <
// timestamps_per_call_ + expand_->overlap_length();
// (2) w16_bestIndex + input_length < start_position.
size_t start_index = timestamps_per_call_ + expand_->overlap_length();
start_index = std::max(start_position, start_index);
start_index = (input_length > start_index) ? 0 : (start_index - input_length);
// Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.)
size_t start_index_downsamp = start_index / (fs_mult_ * 2);
// Calculate a modified `stop_position_downsamp` to account for the increased
// start index `start_index_downsamp` and the effective array length.
size_t modified_stop_pos =
std::min(stop_position_downsamp,
kMaxCorrelationLength + pad_length - start_index_downsamp);
size_t best_correlation_index;
int16_t best_correlation;
static const size_t kNumCorrelationCandidates = 1;
DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp],
modified_stop_pos, kNumCorrelationCandidates,
fs_mult_, &best_correlation_index,
&best_correlation);
// Compensate for modified start index.
best_correlation_index += start_index;
// Ensure that underrun does not occur for 10ms case => we have to get at
// least 10ms + overlap . (This should never happen thanks to the above
// modification of peak-finding starting point.)
while (((best_correlation_index + input_length) <
(timestamps_per_call_ + expand_->overlap_length())) ||
((best_correlation_index + input_length) < start_position)) {
RTC_DCHECK_NOTREACHED(); // Should never happen.
best_correlation_index += expand_period; // Jump one lag ahead.
}
return best_correlation_index;
}
size_t Merge::RequiredFutureSamples() {
return fs_hz_ / 100 * num_channels_; // 10 ms.
}
} // namespace webrtc

View file

@ -0,0 +1,101 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MERGE_H_
#define MODULES_AUDIO_CODING_NETEQ_MERGE_H_
#include "modules/audio_coding/neteq/audio_multi_vector.h"
namespace webrtc {
// Forward declarations.
class Expand;
class SyncBuffer;
// This class handles the transition from expansion to normal operation.
// When a packet is not available for decoding when needed, the expand operation
// is called to generate extrapolation data. If the missing packet arrives,
// i.e., it was just delayed, it can be decoded and appended directly to the
// end of the expanded data (thanks to how the Expand class operates). However,
// if a later packet arrives instead, the loss is a fact, and the new data must
// be stitched together with the end of the expanded data. This stitching is
// what the Merge class does.
class Merge {
public:
Merge(int fs_hz,
size_t num_channels,
Expand* expand,
SyncBuffer* sync_buffer);
virtual ~Merge();
Merge(const Merge&) = delete;
Merge& operator=(const Merge&) = delete;
// The main method to produce the audio data. The decoded data is supplied in
// `input`, having `input_length` samples in total for all channels
// (interleaved). The result is written to `output`. The number of channels
// allocated in `output` defines the number of channels that will be used when
// de-interleaving `input`.
virtual size_t Process(int16_t* input,
size_t input_length,
AudioMultiVector* output);
virtual size_t RequiredFutureSamples();
protected:
const int fs_hz_;
const size_t num_channels_;
private:
static const int kMaxSampleRate = 48000;
static const size_t kExpandDownsampLength = 100;
static const size_t kInputDownsampLength = 40;
static const size_t kMaxCorrelationLength = 60;
// Calls `expand_` to get more expansion data to merge with. The data is
// written to `expanded_signal_`. Returns the length of the expanded data,
// while `expand_period` will be the number of samples in one expansion period
// (typically one pitch period). The value of `old_length` will be the number
// of samples that were taken from the `sync_buffer_`.
size_t GetExpandedSignal(size_t* old_length, size_t* expand_period);
// Analyzes `input` and `expanded_signal` and returns muting factor (Q14) to
// be used on the new data.
int16_t SignalScaling(const int16_t* input,
size_t input_length,
const int16_t* expanded_signal) const;
// Downsamples `input` (`input_length` samples) and `expanded_signal` to
// 4 kHz sample rate. The downsampled signals are written to
// `input_downsampled_` and `expanded_downsampled_`, respectively.
void Downsample(const int16_t* input,
size_t input_length,
const int16_t* expanded_signal,
size_t expanded_length);
// Calculates cross-correlation between `input_downsampled_` and
// `expanded_downsampled_`, and finds the correlation maximum. The maximizing
// lag is returned.
size_t CorrelateAndPeakSearch(size_t start_position,
size_t input_length,
size_t expand_period) const;
const int fs_mult_; // fs_hz_ / 8000.
const size_t timestamps_per_call_;
Expand* expand_;
SyncBuffer* sync_buffer_;
int16_t expanded_downsampled_[kExpandDownsampLength];
int16_t input_downsampled_[kInputDownsampLength];
AudioMultiVector expanded_;
std::vector<int16_t> temp_data_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MERGE_H_

View file

@ -0,0 +1,121 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Merge class.
#include "modules/audio_coding/neteq/merge.h"
#include <algorithm>
#include <vector>
#include "modules/audio_coding/neteq/background_noise.h"
#include "modules/audio_coding/neteq/expand.h"
#include "modules/audio_coding/neteq/random_vector.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "modules/audio_coding/neteq/sync_buffer.h"
#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
TEST(Merge, CreateAndDestroy) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
Merge merge(fs, channels, &expand, &sync_buffer);
}
namespace {
// This is the same size that is given to the SyncBuffer object in NetEq.
const size_t kNetEqSyncBufferLengthMs = 720;
} // namespace
class MergeTest : public testing::TestWithParam<size_t> {
protected:
MergeTest()
: input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
32000),
test_sample_rate_hz_(8000),
num_channels_(1),
background_noise_(num_channels_),
sync_buffer_(num_channels_,
kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000),
expand_(&background_noise_,
&sync_buffer_,
&random_vector_,
&statistics_,
test_sample_rate_hz_,
num_channels_),
merge_(test_sample_rate_hz_, num_channels_, &expand_, &sync_buffer_) {
input_file_.set_output_rate_hz(test_sample_rate_hz_);
}
void SetUp() override {
// Fast-forward the input file until there is speech (about 1.1 second into
// the file).
const int speech_start_samples =
static_cast<int>(test_sample_rate_hz_ * 1.1f);
ASSERT_TRUE(input_file_.Seek(speech_start_samples));
// Pre-load the sync buffer with speech data.
std::unique_ptr<int16_t[]> temp(new int16_t[sync_buffer_.Size()]);
ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get()));
sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0);
// Move index such that the sync buffer appears to have 5 ms left to play.
sync_buffer_.set_next_index(sync_buffer_.next_index() -
test_sample_rate_hz_ * 5 / 1000);
ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels.";
ASSERT_GT(sync_buffer_.FutureLength(), 0u);
}
test::ResampleInputAudioFile input_file_;
int test_sample_rate_hz_;
size_t num_channels_;
BackgroundNoise background_noise_;
SyncBuffer sync_buffer_;
RandomVector random_vector_;
StatisticsCalculator statistics_;
Expand expand_;
Merge merge_;
};
TEST_P(MergeTest, Process) {
AudioMultiVector output(num_channels_);
// Start by calling Expand once, to prime the state.
EXPECT_EQ(0, expand_.Process(&output));
EXPECT_GT(output.Size(), 0u);
output.Clear();
// Now call Merge, but with a very short decoded input. Try different length
// if the input.
const size_t input_len = GetParam();
std::vector<int16_t> input(input_len, 17);
merge_.Process(input.data(), input_len, &output);
EXPECT_GT(output.Size(), 0u);
}
// Instantiate with values for the input length that are interesting in
// Merge::Downsample. Why are these values interesting?
// - In 8000 Hz sample rate, signal_offset in Merge::Downsample will be 2, so
// the values 1, 2, 3 are just around that value.
// - Also in 8000 Hz, the variable length_limit in the same method will be 80,
// so values 80 and 81 will be on either side of the branch point
// "input_length <= length_limit".
// - Finally, 160 is simply 20 ms in 8000 Hz, which is a common packet size.
INSTANTIATE_TEST_SUITE_P(DifferentInputLengths,
MergeTest,
testing::Values(1, 2, 3, 80, 81, 160));
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -0,0 +1,28 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include "test/gmock.h"
namespace webrtc {
class MockBufferLevelFilter : public BufferLevelFilter {
public:
MOCK_METHOD(void,
Update,
(size_t buffer_size_samples, int time_stretched_samples));
MOCK_METHOD(int, filtered_current_level, (), (const));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_

View file

@ -0,0 +1,50 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_
#include <string>
#include "modules/audio_coding/neteq/decoder_database.h"
#include "test/gmock.h"
namespace webrtc {
class MockDecoderDatabase : public DecoderDatabase {
public:
explicit MockDecoderDatabase(
rtc::scoped_refptr<AudioDecoderFactory> factory = nullptr)
: DecoderDatabase(factory, absl::nullopt) {}
~MockDecoderDatabase() override { Die(); }
MOCK_METHOD(void, Die, ());
MOCK_METHOD(bool, Empty, (), (const, override));
MOCK_METHOD(int, Size, (), (const, override));
MOCK_METHOD(int,
RegisterPayload,
(int rtp_payload_type, const SdpAudioFormat& audio_format),
(override));
MOCK_METHOD(int, Remove, (uint8_t rtp_payload_type), (override));
MOCK_METHOD(void, RemoveAll, (), (override));
MOCK_METHOD(const DecoderInfo*,
GetDecoderInfo,
(uint8_t rtp_payload_type),
(const, override));
MOCK_METHOD(int,
SetActiveDecoder,
(uint8_t rtp_payload_type, bool* new_decoder),
(override));
MOCK_METHOD(AudioDecoder*, GetActiveDecoder, (), (const, override));
MOCK_METHOD(int, SetActiveCngDecoder, (uint8_t rtp_payload_type), (override));
MOCK_METHOD(ComfortNoiseDecoder*, GetActiveCngDecoder, (), (const, override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_

View file

@ -0,0 +1,29 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_
#include "api/neteq/tick_timer.h"
#include "modules/audio_coding/neteq/delay_manager.h"
#include "test/gmock.h"
namespace webrtc {
class MockDelayManager : public DelayManager {
public:
MockDelayManager(const MockDelayManager::Config& config,
const TickTimer* tick_timer)
: DelayManager(config, tick_timer) {}
MOCK_METHOD(int, TargetDelayMs, (), (const));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_

View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_
#include "modules/audio_coding/neteq/dtmf_buffer.h"
#include "test/gmock.h"
namespace webrtc {
class MockDtmfBuffer : public DtmfBuffer {
public:
MockDtmfBuffer(int fs) : DtmfBuffer(fs) {}
~MockDtmfBuffer() override { Die(); }
MOCK_METHOD(void, Die, ());
MOCK_METHOD(void, Flush, (), (override));
MOCK_METHOD(int, InsertEvent, (const DtmfEvent& event), (override));
MOCK_METHOD(bool,
GetEvent,
(uint32_t current_timestamp, DtmfEvent* event),
(override));
MOCK_METHOD(size_t, Length, (), (const, override));
MOCK_METHOD(bool, Empty, (), (const, override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
#include "modules/audio_coding/neteq/dtmf_tone_generator.h"
#include "test/gmock.h"
namespace webrtc {
class MockDtmfToneGenerator : public DtmfToneGenerator {
public:
~MockDtmfToneGenerator() override { Die(); }
MOCK_METHOD(void, Die, ());
MOCK_METHOD(int, Init, (int fs, int event, int attenuation), (override));
MOCK_METHOD(void, Reset, (), (override));
MOCK_METHOD(int,
Generate,
(size_t num_samples, AudioMultiVector* output),
(override));
MOCK_METHOD(bool, initialized, (), (const, override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_

View file

@ -0,0 +1,60 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_
#include "modules/audio_coding/neteq/expand.h"
#include "test/gmock.h"
namespace webrtc {
class MockExpand : public Expand {
public:
MockExpand(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels)
: Expand(background_noise,
sync_buffer,
random_vector,
statistics,
fs,
num_channels) {}
~MockExpand() override { Die(); }
MOCK_METHOD(void, Die, ());
MOCK_METHOD(void, Reset, (), (override));
MOCK_METHOD(int, Process, (AudioMultiVector * output), (override));
MOCK_METHOD(void, SetParametersForNormalAfterExpand, (), (override));
MOCK_METHOD(void, SetParametersForMergeAfterExpand, (), (override));
MOCK_METHOD(size_t, overlap_length, (), (const, override));
};
} // namespace webrtc
namespace webrtc {
class MockExpandFactory : public ExpandFactory {
public:
MOCK_METHOD(Expand*,
Create,
(BackgroundNoise * background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels),
(const, override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_
#include "modules/audio_coding/neteq/histogram.h"
#include "test/gmock.h"
namespace webrtc {
class MockHistogram : public Histogram {
public:
MockHistogram(size_t num_buckets, int forget_factor)
: Histogram(num_buckets, forget_factor) {}
virtual ~MockHistogram() {}
MOCK_METHOD(void, Add, (int), (override));
MOCK_METHOD(int, Quantile, (int), (override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_HISTOGRAM_H_

View file

@ -0,0 +1,59 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_
#include "api/neteq/neteq_controller.h"
#include "test/gmock.h"
namespace webrtc {
class MockNetEqController : public NetEqController {
public:
MockNetEqController() = default;
~MockNetEqController() override { Die(); }
MOCK_METHOD(void, Die, ());
MOCK_METHOD(void, Reset, (), (override));
MOCK_METHOD(void, SoftReset, (), (override));
MOCK_METHOD(NetEq::Operation,
GetDecision,
(const NetEqStatus& neteq_status, bool* reset_decoder),
(override));
MOCK_METHOD(void, RegisterEmptyPacket, (), (override));
MOCK_METHOD(void,
SetSampleRate,
(int fs_hz, size_t output_size_samples),
(override));
MOCK_METHOD(bool, SetMaximumDelay, (int delay_ms), (override));
MOCK_METHOD(bool, SetMinimumDelay, (int delay_ms), (override));
MOCK_METHOD(bool, SetBaseMinimumDelay, (int delay_ms), (override));
MOCK_METHOD(int, GetBaseMinimumDelay, (), (const, override));
MOCK_METHOD(void, ExpandDecision, (NetEq::Operation operation), (override));
MOCK_METHOD(void, AddSampleMemory, (int32_t value), (override));
MOCK_METHOD(int, TargetLevelMs, (), (const, override));
MOCK_METHOD(absl::optional<int>,
PacketArrived,
(int fs_hz,
bool should_update_stats,
const PacketArrivedInfo& info),
(override));
MOCK_METHOD(void, NotifyMutedState, (), (override));
MOCK_METHOD(bool, PeakFound, (), (const, override));
MOCK_METHOD(int, GetFilteredBufferLevel, (), (const, override));
MOCK_METHOD(void, set_sample_memory, (int32_t value), (override));
MOCK_METHOD(size_t, noise_fast_forward, (), (const, override));
MOCK_METHOD(size_t, packet_length_samples, (), (const, override));
MOCK_METHOD(void, set_packet_length_samples, (size_t value), (override));
MOCK_METHOD(void, set_prev_time_scale, (bool value), (override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_NETEQ_CONTROLLER_H_

View file

@ -0,0 +1,31 @@
/*
* Copyright 2023 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_ARRIVAL_HISTORY_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_ARRIVAL_HISTORY_H_
#include "api/neteq/tick_timer.h"
#include "modules/audio_coding/neteq/packet_arrival_history.h"
#include "test/gmock.h"
namespace webrtc {
class MockPacketArrivalHistory : public PacketArrivalHistory {
public:
MockPacketArrivalHistory(const TickTimer* tick_timer)
: PacketArrivalHistory(tick_timer, 0) {}
MOCK_METHOD(int, GetDelayMs, (uint32_t rtp_timestamp), (const override));
MOCK_METHOD(int, GetMaxDelayMs, (), (const override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_ARRIVAL_HISTORY_H_

View file

@ -0,0 +1,53 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_
#include "modules/audio_coding/neteq/packet_buffer.h"
#include "test/gmock.h"
namespace webrtc {
class MockPacketBuffer : public PacketBuffer {
public:
MockPacketBuffer(size_t max_number_of_packets,
const TickTimer* tick_timer,
StatisticsCalculator* stats)
: PacketBuffer(max_number_of_packets, tick_timer, stats) {}
~MockPacketBuffer() override { Die(); }
MOCK_METHOD(void, Die, ());
MOCK_METHOD(void, Flush, (), (override));
MOCK_METHOD(bool, Empty, (), (const, override));
MOCK_METHOD(int, InsertPacket, (Packet && packet), (override));
MOCK_METHOD(int,
NextTimestamp,
(uint32_t * next_timestamp),
(const, override));
MOCK_METHOD(int,
NextHigherTimestamp,
(uint32_t timestamp, uint32_t* next_timestamp),
(const, override));
MOCK_METHOD(const Packet*, PeekNextPacket, (), (const, override));
MOCK_METHOD(absl::optional<Packet>, GetNextPacket, (), (override));
MOCK_METHOD(int, DiscardNextPacket, (), (override));
MOCK_METHOD(void,
DiscardOldPackets,
(uint32_t timestamp_limit, uint32_t horizon_samples),
(override));
MOCK_METHOD(void,
DiscardAllOldPackets,
(uint32_t timestamp_limit),
(override));
MOCK_METHOD(size_t, NumPacketsInBuffer, (), (const, override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_
#include "modules/audio_coding/neteq/red_payload_splitter.h"
#include "test/gmock.h"
namespace webrtc {
class MockRedPayloadSplitter : public RedPayloadSplitter {
public:
MOCK_METHOD(bool, SplitRed, (PacketList * packet_list), (override));
MOCK_METHOD(void,
CheckRedPayloads,
(PacketList * packet_list,
const DecoderDatabase& decoder_database),
(override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_
#define MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "test/gmock.h"
namespace webrtc {
class MockStatisticsCalculator : public StatisticsCalculator {
public:
MOCK_METHOD(void, PacketsDiscarded, (size_t num_packets), (override));
MOCK_METHOD(void,
SecondaryPacketsDiscarded,
(size_t num_packets),
(override));
MOCK_METHOD(void, RelativePacketArrivalDelay, (size_t delay_ms), (override));
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_

View file

@ -0,0 +1,246 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/nack_tracker.h"
#include <cstdint>
#include <utility>
#include "rtc_base/checks.h"
#include "rtc_base/experiments/struct_parameters_parser.h"
#include "rtc_base/logging.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
const int kDefaultSampleRateKhz = 48;
const int kMaxPacketSizeMs = 120;
constexpr char kNackTrackerConfigFieldTrial[] =
"WebRTC-Audio-NetEqNackTrackerConfig";
} // namespace
NackTracker::Config::Config() {
auto parser = StructParametersParser::Create(
"packet_loss_forget_factor", &packet_loss_forget_factor,
"ms_per_loss_percent", &ms_per_loss_percent, "never_nack_multiple_times",
&never_nack_multiple_times, "require_valid_rtt", &require_valid_rtt,
"max_loss_rate", &max_loss_rate);
parser->Parse(
webrtc::field_trial::FindFullName(kNackTrackerConfigFieldTrial));
RTC_LOG(LS_INFO) << "Nack tracker config:"
" packet_loss_forget_factor="
<< packet_loss_forget_factor
<< " ms_per_loss_percent=" << ms_per_loss_percent
<< " never_nack_multiple_times=" << never_nack_multiple_times
<< " require_valid_rtt=" << require_valid_rtt
<< " max_loss_rate=" << max_loss_rate;
}
NackTracker::NackTracker()
: sequence_num_last_received_rtp_(0),
timestamp_last_received_rtp_(0),
any_rtp_received_(false),
sequence_num_last_decoded_rtp_(0),
timestamp_last_decoded_rtp_(0),
any_rtp_decoded_(false),
sample_rate_khz_(kDefaultSampleRateKhz),
max_nack_list_size_(kNackListSizeLimit) {}
NackTracker::~NackTracker() = default;
void NackTracker::UpdateSampleRate(int sample_rate_hz) {
RTC_DCHECK_GT(sample_rate_hz, 0);
sample_rate_khz_ = sample_rate_hz / 1000;
}
void NackTracker::UpdateLastReceivedPacket(uint16_t sequence_number,
uint32_t timestamp) {
// Just record the value of sequence number and timestamp if this is the
// first packet.
if (!any_rtp_received_) {
sequence_num_last_received_rtp_ = sequence_number;
timestamp_last_received_rtp_ = timestamp;
any_rtp_received_ = true;
// If no packet is decoded, to have a reasonable estimate of time-to-play
// use the given values.
if (!any_rtp_decoded_) {
sequence_num_last_decoded_rtp_ = sequence_number;
timestamp_last_decoded_rtp_ = timestamp;
}
return;
}
if (sequence_number == sequence_num_last_received_rtp_)
return;
// Received RTP should not be in the list.
nack_list_.erase(sequence_number);
// If this is an old sequence number, no more action is required, return.
if (IsNewerSequenceNumber(sequence_num_last_received_rtp_, sequence_number))
return;
UpdatePacketLossRate(sequence_number - sequence_num_last_received_rtp_ - 1);
UpdateList(sequence_number, timestamp);
sequence_num_last_received_rtp_ = sequence_number;
timestamp_last_received_rtp_ = timestamp;
LimitNackListSize();
}
absl::optional<int> NackTracker::GetSamplesPerPacket(
uint16_t sequence_number_current_received_rtp,
uint32_t timestamp_current_received_rtp) const {
uint32_t timestamp_increase =
timestamp_current_received_rtp - timestamp_last_received_rtp_;
uint16_t sequence_num_increase =
sequence_number_current_received_rtp - sequence_num_last_received_rtp_;
int samples_per_packet = timestamp_increase / sequence_num_increase;
if (samples_per_packet == 0 ||
samples_per_packet > kMaxPacketSizeMs * sample_rate_khz_) {
// Not a valid samples per packet.
return absl::nullopt;
}
return samples_per_packet;
}
void NackTracker::UpdateList(uint16_t sequence_number_current_received_rtp,
uint32_t timestamp_current_received_rtp) {
if (!IsNewerSequenceNumber(sequence_number_current_received_rtp,
sequence_num_last_received_rtp_ + 1)) {
return;
}
RTC_DCHECK(!any_rtp_decoded_ ||
IsNewerSequenceNumber(sequence_number_current_received_rtp,
sequence_num_last_decoded_rtp_));
absl::optional<int> samples_per_packet = GetSamplesPerPacket(
sequence_number_current_received_rtp, timestamp_current_received_rtp);
if (!samples_per_packet) {
return;
}
for (uint16_t n = sequence_num_last_received_rtp_ + 1;
IsNewerSequenceNumber(sequence_number_current_received_rtp, n); ++n) {
uint32_t timestamp = EstimateTimestamp(n, *samples_per_packet);
NackElement nack_element(TimeToPlay(timestamp), timestamp);
nack_list_.insert(nack_list_.end(), std::make_pair(n, nack_element));
}
}
uint32_t NackTracker::EstimateTimestamp(uint16_t sequence_num,
int samples_per_packet) {
uint16_t sequence_num_diff = sequence_num - sequence_num_last_received_rtp_;
return sequence_num_diff * samples_per_packet + timestamp_last_received_rtp_;
}
void NackTracker::UpdateLastDecodedPacket(uint16_t sequence_number,
uint32_t timestamp) {
any_rtp_decoded_ = true;
sequence_num_last_decoded_rtp_ = sequence_number;
timestamp_last_decoded_rtp_ = timestamp;
// Packets in the list with sequence numbers less than the
// sequence number of the decoded RTP should be removed from the lists.
// They will be discarded by the jitter buffer if they arrive.
nack_list_.erase(nack_list_.begin(),
nack_list_.upper_bound(sequence_num_last_decoded_rtp_));
// Update estimated time-to-play.
for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end();
++it) {
it->second.time_to_play_ms = TimeToPlay(it->second.estimated_timestamp);
}
}
NackTracker::NackList NackTracker::GetNackList() const {
return nack_list_;
}
void NackTracker::Reset() {
nack_list_.clear();
sequence_num_last_received_rtp_ = 0;
timestamp_last_received_rtp_ = 0;
any_rtp_received_ = false;
sequence_num_last_decoded_rtp_ = 0;
timestamp_last_decoded_rtp_ = 0;
any_rtp_decoded_ = false;
sample_rate_khz_ = kDefaultSampleRateKhz;
}
void NackTracker::SetMaxNackListSize(size_t max_nack_list_size) {
RTC_CHECK_GT(max_nack_list_size, 0);
// Ugly hack to get around the problem of passing static consts by reference.
const size_t kNackListSizeLimitLocal = NackTracker::kNackListSizeLimit;
RTC_CHECK_LE(max_nack_list_size, kNackListSizeLimitLocal);
max_nack_list_size_ = max_nack_list_size;
LimitNackListSize();
}
void NackTracker::LimitNackListSize() {
uint16_t limit = sequence_num_last_received_rtp_ -
static_cast<uint16_t>(max_nack_list_size_) - 1;
nack_list_.erase(nack_list_.begin(), nack_list_.upper_bound(limit));
}
int64_t NackTracker::TimeToPlay(uint32_t timestamp) const {
uint32_t timestamp_increase = timestamp - timestamp_last_decoded_rtp_;
return timestamp_increase / sample_rate_khz_;
}
// We don't erase elements with time-to-play shorter than round-trip-time.
std::vector<uint16_t> NackTracker::GetNackList(int64_t round_trip_time_ms) {
RTC_DCHECK_GE(round_trip_time_ms, 0);
std::vector<uint16_t> sequence_numbers;
if (round_trip_time_ms == 0) {
if (config_.require_valid_rtt) {
return sequence_numbers;
} else {
round_trip_time_ms = config_.default_rtt_ms;
}
}
if (packet_loss_rate_ >
static_cast<uint32_t>(config_.max_loss_rate * (1 << 30))) {
return sequence_numbers;
}
// The estimated packet loss is between 0 and 1, so we need to multiply by 100
// here.
int max_wait_ms =
100.0 * config_.ms_per_loss_percent * packet_loss_rate_ / (1 << 30);
for (NackList::const_iterator it = nack_list_.begin(); it != nack_list_.end();
++it) {
int64_t time_since_packet_ms =
(timestamp_last_received_rtp_ - it->second.estimated_timestamp) /
sample_rate_khz_;
if (it->second.time_to_play_ms > round_trip_time_ms ||
time_since_packet_ms + round_trip_time_ms < max_wait_ms)
sequence_numbers.push_back(it->first);
}
if (config_.never_nack_multiple_times) {
nack_list_.clear();
}
return sequence_numbers;
}
void NackTracker::UpdatePacketLossRate(int packets_lost) {
const uint64_t alpha_q30 = (1 << 30) * config_.packet_loss_forget_factor;
// Exponential filter.
packet_loss_rate_ = (alpha_q30 * packet_loss_rate_) >> 30;
for (int i = 0; i < packets_lost; ++i) {
packet_loss_rate_ =
((alpha_q30 * packet_loss_rate_) >> 30) + ((1 << 30) - alpha_q30);
}
}
} // namespace webrtc

View file

@ -0,0 +1,206 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_
#define MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_
#include <stddef.h>
#include <stdint.h>
#include <map>
#include <vector>
#include "absl/types/optional.h"
#include "modules/include/module_common_types_public.h"
#include "rtc_base/gtest_prod_util.h"
//
// The NackTracker class keeps track of the lost packets, an estimate of
// time-to-play for each packet is also given.
//
// Every time a packet is pushed into NetEq, LastReceivedPacket() has to be
// called to update the NACK list.
//
// Every time 10ms audio is pulled from NetEq LastDecodedPacket() should be
// called, and time-to-play is updated at that moment.
//
// If packet N is received, any packet prior to N which has not arrived is
// considered lost, and should be labeled as "missing" (the size of
// the list might be limited and older packet eliminated from the list).
//
// The NackTracker class has to know about the sample rate of the packets to
// compute time-to-play. So sample rate should be set as soon as the first
// packet is received. If there is a change in the receive codec (sender changes
// codec) then NackTracker should be reset. This is because NetEQ would flush
// its buffer and re-transmission is meaning less for old packet. Therefore, in
// that case, after reset the sampling rate has to be updated.
//
// Thread Safety
// =============
// Please note that this class in not thread safe. The class must be protected
// if different APIs are called from different threads.
//
namespace webrtc {
class NackTracker {
public:
// A limit for the size of the NACK list.
static const size_t kNackListSizeLimit = 500; // 10 seconds for 20 ms frame
// packets.
NackTracker();
~NackTracker();
// Set a maximum for the size of the NACK list. If the last received packet
// has sequence number of N, then NACK list will not contain any element
// with sequence number earlier than N - `max_nack_list_size`.
//
// The largest maximum size is defined by `kNackListSizeLimit`
void SetMaxNackListSize(size_t max_nack_list_size);
// Set the sampling rate.
//
// If associated sampling rate of the received packets is changed, call this
// function to update sampling rate. Note that if there is any change in
// received codec then NetEq will flush its buffer and NACK has to be reset.
// After Reset() is called sampling rate has to be set.
void UpdateSampleRate(int sample_rate_hz);
// Update the sequence number and the timestamp of the last decoded RTP.
void UpdateLastDecodedPacket(uint16_t sequence_number, uint32_t timestamp);
// Update the sequence number and the timestamp of the last received RTP. This
// API should be called every time a packet pushed into ACM.
void UpdateLastReceivedPacket(uint16_t sequence_number, uint32_t timestamp);
// Get a list of "missing" packets which have expected time-to-play larger
// than the given round-trip-time (in milliseconds).
// Note: Late packets are not included.
// Calling this method multiple times may give different results, since the
// internal nack list may get flushed if never_nack_multiple_times_ is true.
std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms);
// Reset to default values. The NACK list is cleared.
// `max_nack_list_size_` preserves its value.
void Reset();
// Returns the estimated packet loss rate in Q30, for testing only.
uint32_t GetPacketLossRateForTest() { return packet_loss_rate_; }
private:
// This test need to access the private method GetNackList().
FRIEND_TEST_ALL_PREFIXES(NackTrackerTest, EstimateTimestampAndTimeToPlay);
// Options that can be configured via field trial.
struct Config {
Config();
// The exponential decay factor used to estimate the packet loss rate.
double packet_loss_forget_factor = 0.996;
// How many additional ms we are willing to wait (at most) for nacked
// packets for each additional percentage of packet loss.
int ms_per_loss_percent = 20;
// If true, never nack packets more than once.
bool never_nack_multiple_times = false;
// Only nack if the RTT is valid.
bool require_valid_rtt = false;
// Default RTT to use unless `require_valid_rtt` is set.
int default_rtt_ms = 100;
// Do not nack if the loss rate is above this value.
double max_loss_rate = 1.0;
};
struct NackElement {
NackElement(int64_t initial_time_to_play_ms, uint32_t initial_timestamp)
: time_to_play_ms(initial_time_to_play_ms),
estimated_timestamp(initial_timestamp) {}
// Estimated time (ms) left for this packet to be decoded. This estimate is
// updated every time jitter buffer decodes a packet.
int64_t time_to_play_ms;
// A guess about the timestamp of the missing packet, it is used for
// estimation of `time_to_play_ms`. The estimate might be slightly wrong if
// there has been frame-size change since the last received packet and the
// missing packet. However, the risk of this is low, and in case of such
// errors, there will be a minor misestimation in time-to-play of missing
// packets. This will have a very minor effect on NACK performance.
uint32_t estimated_timestamp;
};
class NackListCompare {
public:
bool operator()(uint16_t sequence_number_old,
uint16_t sequence_number_new) const {
return IsNewerSequenceNumber(sequence_number_new, sequence_number_old);
}
};
typedef std::map<uint16_t, NackElement, NackListCompare> NackList;
// This API is used only for testing to assess whether time-to-play is
// computed correctly.
NackList GetNackList() const;
// Returns a valid number of samples per packet given the current received
// sequence number and timestamp or nullopt of none could be computed.
absl::optional<int> GetSamplesPerPacket(
uint16_t sequence_number_current_received_rtp,
uint32_t timestamp_current_received_rtp) const;
// Given the `sequence_number_current_received_rtp` of currently received RTP
// update the list. Packets that are older than the received packet are added
// to the nack list.
void UpdateList(uint16_t sequence_number_current_received_rtp,
uint32_t timestamp_current_received_rtp);
// Packets which have sequence number older that
// `sequence_num_last_received_rtp_` - `max_nack_list_size_` are removed
// from the NACK list.
void LimitNackListSize();
// Estimate timestamp of a missing packet given its sequence number.
uint32_t EstimateTimestamp(uint16_t sequence_number, int samples_per_packet);
// Compute time-to-play given a timestamp.
int64_t TimeToPlay(uint32_t timestamp) const;
// Updates the estimated packet lost rate.
void UpdatePacketLossRate(int packets_lost);
const Config config_;
// Valid if a packet is received.
uint16_t sequence_num_last_received_rtp_;
uint32_t timestamp_last_received_rtp_;
bool any_rtp_received_; // If any packet received.
// Valid if a packet is decoded.
uint16_t sequence_num_last_decoded_rtp_;
uint32_t timestamp_last_decoded_rtp_;
bool any_rtp_decoded_; // If any packet decoded.
int sample_rate_khz_; // Sample rate in kHz.
// A list of missing packets to be retransmitted. Components of the list
// contain the sequence number of missing packets and the estimated time that
// each pack is going to be played out.
NackList nack_list_;
// NACK list will not keep track of missing packets prior to
// `sequence_num_last_received_rtp_` - `max_nack_list_size_`.
size_t max_nack_list_size_;
// Current estimate of the packet loss rate in Q30.
uint32_t packet_loss_rate_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_

View file

@ -0,0 +1,552 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/nack_tracker.h"
#include <stdint.h>
#include <algorithm>
#include <memory>
#include "modules/audio_coding/include/audio_coding_module_typedefs.h"
#include "test/field_trial.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
const int kSampleRateHz = 16000;
const int kPacketSizeMs = 30;
const uint32_t kTimestampIncrement = 480; // 30 ms.
const int64_t kShortRoundTripTimeMs = 1;
bool IsNackListCorrect(const std::vector<uint16_t>& nack_list,
const uint16_t* lost_sequence_numbers,
size_t num_lost_packets) {
if (nack_list.size() != num_lost_packets)
return false;
if (num_lost_packets == 0)
return true;
for (size_t k = 0; k < nack_list.size(); ++k) {
int seq_num = nack_list[k];
bool seq_num_matched = false;
for (size_t n = 0; n < num_lost_packets; ++n) {
if (seq_num == lost_sequence_numbers[n]) {
seq_num_matched = true;
break;
}
}
if (!seq_num_matched)
return false;
}
return true;
}
} // namespace
TEST(NackTrackerTest, EmptyListWhenNoPacketLoss) {
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
int seq_num = 1;
uint32_t timestamp = 0;
std::vector<uint16_t> nack_list;
for (int n = 0; n < 100; n++) {
nack.UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
seq_num++;
timestamp += kTimestampIncrement;
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
}
TEST(NackTrackerTest, LatePacketsMovedToNackThenNackListDoesNotChange) {
const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9};
static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) /
sizeof(kSequenceNumberLostPackets[0]);
for (int k = 0; k < 2; k++) { // Two iteration with/without wrap around.
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
uint16_t sequence_num_lost_packets[kNumAllLostPackets];
for (int n = 0; n < kNumAllLostPackets; n++) {
sequence_num_lost_packets[n] =
kSequenceNumberLostPackets[n] +
k * 65531; // Have wrap around in sequence numbers for |k == 1|.
}
uint16_t seq_num = sequence_num_lost_packets[0] - 1;
uint32_t timestamp = 0;
std::vector<uint16_t> nack_list;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1;
timestamp += kTimestampIncrement * (kNumAllLostPackets + 1);
int num_lost_packets = std::max(0, kNumAllLostPackets);
nack.UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets,
num_lost_packets));
seq_num++;
timestamp += kTimestampIncrement;
num_lost_packets++;
for (int n = 0; n < 100; ++n) {
nack.UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets,
kNumAllLostPackets));
seq_num++;
timestamp += kTimestampIncrement;
}
}
}
TEST(NackTrackerTest, ArrivedPacketsAreRemovedFromNackList) {
const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9};
static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) /
sizeof(kSequenceNumberLostPackets[0]);
for (int k = 0; k < 2; ++k) { // Two iteration with/without wrap around.
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
uint16_t sequence_num_lost_packets[kNumAllLostPackets];
for (int n = 0; n < kNumAllLostPackets; ++n) {
sequence_num_lost_packets[n] = kSequenceNumberLostPackets[n] +
k * 65531; // Wrap around for |k == 1|.
}
uint16_t seq_num = sequence_num_lost_packets[0] - 1;
uint32_t timestamp = 0;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
size_t index_retransmitted_rtp = 0;
uint32_t timestamp_retransmitted_rtp = timestamp + kTimestampIncrement;
seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1;
timestamp += kTimestampIncrement * (kNumAllLostPackets + 1);
size_t num_lost_packets = kNumAllLostPackets;
for (int n = 0; n < kNumAllLostPackets; ++n) {
// Number of lost packets does not change for the first
// |kNackThreshold + 1| packets, one is added to the list and one is
// removed. Thereafter, the list shrinks every iteration.
if (n >= 1)
num_lost_packets--;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &sequence_num_lost_packets[index_retransmitted_rtp],
num_lost_packets));
seq_num++;
timestamp += kTimestampIncrement;
// Retransmission of a lost RTP.
nack.UpdateLastReceivedPacket(
sequence_num_lost_packets[index_retransmitted_rtp],
timestamp_retransmitted_rtp);
index_retransmitted_rtp++;
timestamp_retransmitted_rtp += kTimestampIncrement;
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &sequence_num_lost_packets[index_retransmitted_rtp],
num_lost_packets - 1)); // One less lost packet in the list.
}
ASSERT_TRUE(nack_list.empty());
}
}
// Assess if estimation of timestamps and time-to-play is correct. Introduce all
// combinations that timestamps and sequence numbers might have wrap around.
TEST(NackTrackerTest, EstimateTimestampAndTimeToPlay) {
const uint16_t kLostPackets[] = {2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15};
static const int kNumAllLostPackets =
sizeof(kLostPackets) / sizeof(kLostPackets[0]);
for (int k = 0; k < 4; ++k) {
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
// Sequence number wrap around if `k` is 2 or 3;
int seq_num_offset = (k < 2) ? 0 : 65531;
// Timestamp wrap around if `k` is 1 or 3.
uint32_t timestamp_offset =
(k & 0x1) ? static_cast<uint32_t>(0xffffffff) - 6 : 0;
uint32_t timestamp_lost_packets[kNumAllLostPackets];
uint16_t seq_num_lost_packets[kNumAllLostPackets];
for (int n = 0; n < kNumAllLostPackets; ++n) {
timestamp_lost_packets[n] =
timestamp_offset + kLostPackets[n] * kTimestampIncrement;
seq_num_lost_packets[n] = seq_num_offset + kLostPackets[n];
}
// We and to push two packets before lost burst starts.
uint16_t seq_num = seq_num_lost_packets[0] - 2;
uint32_t timestamp = timestamp_lost_packets[0] - 2 * kTimestampIncrement;
const uint16_t first_seq_num = seq_num;
const uint32_t first_timestamp = timestamp;
// Two consecutive packets to have a correct estimate of timestamp increase.
nack.UpdateLastReceivedPacket(seq_num, timestamp);
seq_num++;
timestamp += kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
// A packet after the last one which is supposed to be lost.
seq_num = seq_num_lost_packets[kNumAllLostPackets - 1] + 1;
timestamp =
timestamp_lost_packets[kNumAllLostPackets - 1] + kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
NackTracker::NackList nack_list = nack.GetNackList();
EXPECT_EQ(static_cast<size_t>(kNumAllLostPackets), nack_list.size());
// Pretend the first packet is decoded.
nack.UpdateLastDecodedPacket(first_seq_num, first_timestamp);
nack_list = nack.GetNackList();
NackTracker::NackList::iterator it = nack_list.begin();
while (it != nack_list.end()) {
seq_num = it->first - seq_num_offset;
int index = seq_num - kLostPackets[0];
EXPECT_EQ(timestamp_lost_packets[index], it->second.estimated_timestamp);
EXPECT_EQ((index + 2) * kPacketSizeMs, it->second.time_to_play_ms);
++it;
}
}
}
TEST(NackTrackerTest,
MissingPacketsPriorToLastDecodedRtpShouldNotBeInNackList) {
for (int m = 0; m < 2; ++m) {
uint16_t seq_num_offset = (m == 0) ? 0 : 65531; // Wrap around if `m` is 1.
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
// Two consecutive packets to have a correct estimate of timestamp increase.
uint16_t seq_num = 0;
nack.UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
seq_num++;
nack.UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
// Skip 10 packets (larger than NACK threshold).
const int kNumLostPackets = 10;
seq_num += kNumLostPackets + 1;
nack.UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
const size_t kExpectedListSize = kNumLostPackets;
std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kExpectedListSize, nack_list.size());
for (int k = 0; k < 2; ++k) {
// Decoding of the first and the second arrived packets.
for (int n = 0; n < kPacketSizeMs / 10; ++n) {
nack.UpdateLastDecodedPacket(seq_num_offset + k,
k * kTimestampIncrement);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kExpectedListSize, nack_list.size());
}
}
// Decoding of the last received packet.
nack.UpdateLastDecodedPacket(seq_num + seq_num_offset,
seq_num * kTimestampIncrement);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
// Make sure list of late packets is also empty. To check that, push few
// packets, if the late list is not empty its content will pop up in NACK
// list.
for (int n = 0; n < 10; ++n) {
seq_num++;
nack.UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
}
}
TEST(NackTrackerTest, Reset) {
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
// Two consecutive packets to have a correct estimate of timestamp increase.
uint16_t seq_num = 0;
nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
seq_num++;
nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
// Skip 10 packets (larger than NACK threshold).
const int kNumLostPackets = 10;
seq_num += kNumLostPackets + 1;
nack.UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
const size_t kExpectedListSize = kNumLostPackets;
std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kExpectedListSize, nack_list.size());
nack.Reset();
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
TEST(NackTrackerTest, ListSizeAppliedFromBeginning) {
const size_t kNackListSize = 10;
for (int m = 0; m < 2; ++m) {
uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if `m` is 1.
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
nack.SetMaxNackListSize(kNackListSize);
uint16_t seq_num = seq_num_offset;
uint32_t timestamp = 0x12345678;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
// Packet lost more than NACK-list size limit.
uint16_t num_lost_packets = kNackListSize + 5;
seq_num += num_lost_packets + 1;
timestamp += (num_lost_packets + 1) * kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kNackListSize, nack_list.size());
}
}
TEST(NackTrackerTest, ChangeOfListSizeAppliedAndOldElementsRemoved) {
const size_t kNackListSize = 10;
for (int m = 0; m < 2; ++m) {
uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if `m` is 1.
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
uint16_t seq_num = seq_num_offset;
uint32_t timestamp = 0x87654321;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
// Packet lost more than NACK-list size limit.
uint16_t num_lost_packets = kNackListSize + 5;
std::unique_ptr<uint16_t[]> seq_num_lost(new uint16_t[num_lost_packets]);
for (int n = 0; n < num_lost_packets; ++n) {
seq_num_lost[n] = ++seq_num;
}
++seq_num;
timestamp += (num_lost_packets + 1) * kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
size_t expected_size = num_lost_packets;
std::vector<uint16_t> nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(expected_size, nack_list.size());
nack.SetMaxNackListSize(kNackListSize);
expected_size = kNackListSize;
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &seq_num_lost[num_lost_packets - kNackListSize],
expected_size));
// NACK list should shrink.
for (size_t n = 1; n < kNackListSize; ++n) {
++seq_num;
timestamp += kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
--expected_size;
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &seq_num_lost[num_lost_packets - kNackListSize + n],
expected_size));
}
// After this packet, NACK list should be empty.
++seq_num;
timestamp += kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack.GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
}
TEST(NackTrackerTest, RoudTripTimeIsApplied) {
const int kNackListSize = 200;
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
nack.SetMaxNackListSize(kNackListSize);
uint16_t seq_num = 0;
uint32_t timestamp = 0x87654321;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
// Packet lost more than NACK-list size limit.
uint16_t kNumLostPackets = 5;
seq_num += (1 + kNumLostPackets);
timestamp += (1 + kNumLostPackets) * kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
// Expected time-to-play are:
// kPacketSizeMs - 10, 2*kPacketSizeMs - 10, 3*kPacketSizeMs - 10, ...
//
// sequence number: 1, 2, 3, 4, 5
// time-to-play: 20, 50, 80, 110, 140
//
std::vector<uint16_t> nack_list = nack.GetNackList(100);
ASSERT_EQ(2u, nack_list.size());
EXPECT_EQ(4, nack_list[0]);
EXPECT_EQ(5, nack_list[1]);
}
// Set never_nack_multiple_times to true with a field trial and verify that
// packets are not nacked multiple times.
TEST(NackTrackerTest, DoNotNackMultipleTimes) {
test::ScopedFieldTrials field_trials(
"WebRTC-Audio-NetEqNackTrackerConfig/"
"packet_loss_forget_factor:0.996,ms_per_loss_percent:20,"
"never_nack_multiple_times:true/");
const int kNackListSize = 200;
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
nack.SetMaxNackListSize(kNackListSize);
uint16_t seq_num = 0;
uint32_t timestamp = 0x87654321;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
uint16_t kNumLostPackets = 3;
seq_num += (1 + kNumLostPackets);
timestamp += (1 + kNumLostPackets) * kTimestampIncrement;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
std::vector<uint16_t> nack_list = nack.GetNackList(10);
ASSERT_EQ(3u, nack_list.size());
EXPECT_EQ(1, nack_list[0]);
EXPECT_EQ(2, nack_list[1]);
EXPECT_EQ(3, nack_list[2]);
// When we get the nack list again, it should be empty.
std::vector<uint16_t> nack_list2 = nack.GetNackList(10);
EXPECT_TRUE(nack_list2.empty());
}
// Test if estimated packet loss rate is correct.
TEST(NackTrackerTest, PacketLossRateCorrect) {
const int kNackListSize = 200;
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
nack.SetMaxNackListSize(kNackListSize);
uint16_t seq_num = 0;
uint32_t timestamp = 0x87654321;
auto add_packet = [&nack, &seq_num, &timestamp](bool received) {
if (received) {
nack.UpdateLastReceivedPacket(seq_num, timestamp);
}
seq_num++;
timestamp += kTimestampIncrement;
};
// Add some packets, but every fourth packet is lost.
for (int i = 0; i < 300; i++) {
add_packet(true);
add_packet(true);
add_packet(true);
add_packet(false);
}
// 1 << 28 is 0.25 in Q30. We expect the packet loss estimate to be within
// 0.01 of that.
EXPECT_NEAR(nack.GetPacketLossRateForTest(), 1 << 28, (1 << 30) / 100);
}
TEST(NackTrackerTest, DoNotNackAfterDtx) {
const int kNackListSize = 200;
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
nack.SetMaxNackListSize(kNackListSize);
uint16_t seq_num = 0;
uint32_t timestamp = 0x87654321;
nack.UpdateLastReceivedPacket(seq_num, timestamp);
EXPECT_TRUE(nack.GetNackList(0).empty());
constexpr int kDtxPeriod = 400;
nack.UpdateLastReceivedPacket(seq_num + 2,
timestamp + kDtxPeriod * kSampleRateHz / 1000);
EXPECT_TRUE(nack.GetNackList(0).empty());
}
TEST(NackTrackerTest, DoNotNackIfLossRateIsTooHigh) {
test::ScopedFieldTrials field_trials(
"WebRTC-Audio-NetEqNackTrackerConfig/max_loss_rate:0.4/");
const int kNackListSize = 200;
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
nack.SetMaxNackListSize(kNackListSize);
uint16_t seq_num = 0;
uint32_t timestamp = 0x87654321;
auto add_packet = [&nack, &seq_num, &timestamp](bool received) {
if (received) {
nack.UpdateLastReceivedPacket(seq_num, timestamp);
}
seq_num++;
timestamp += kTimestampIncrement;
};
for (int i = 0; i < 500; i++) {
add_packet(true);
add_packet(false);
}
// Expect 50% loss rate which is higher that the configured maximum 40%.
EXPECT_NEAR(nack.GetPacketLossRateForTest(), 1 << 29, (1 << 30) / 100);
EXPECT_TRUE(nack.GetNackList(0).empty());
}
TEST(NackTrackerTest, OnlyNackIfRttIsValid) {
test::ScopedFieldTrials field_trials(
"WebRTC-Audio-NetEqNackTrackerConfig/require_valid_rtt:true/");
const int kNackListSize = 200;
NackTracker nack;
nack.UpdateSampleRate(kSampleRateHz);
nack.SetMaxNackListSize(kNackListSize);
uint16_t seq_num = 0;
uint32_t timestamp = 0x87654321;
auto add_packet = [&nack, &seq_num, &timestamp](bool received) {
if (received) {
nack.UpdateLastReceivedPacket(seq_num, timestamp);
}
seq_num++;
timestamp += kTimestampIncrement;
};
add_packet(true);
add_packet(false);
add_packet(true);
EXPECT_TRUE(nack.GetNackList(0).empty());
EXPECT_FALSE(nack.GetNackList(10).empty());
}
} // namespace webrtc

View file

@ -0,0 +1,321 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test to verify correct operation when using the decoder-internal PLC.
#include <memory>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h"
#include "modules/audio_coding/neteq/tools/audio_checksum.h"
#include "modules/audio_coding/neteq/tools/audio_sink.h"
#include "modules/audio_coding/neteq/tools/encode_neteq_input.h"
#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h"
#include "modules/audio_coding/neteq/tools/input_audio_file.h"
#include "modules/audio_coding/neteq/tools/neteq_test.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "test/audio_decoder_proxy_factory.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
namespace test {
namespace {
constexpr int kSampleRateHz = 32000;
constexpr int kRunTimeMs = 10000;
// This class implements a fake decoder. The decoder will read audio from a file
// and present as output, both for regular decoding and for PLC.
class AudioDecoderPlc : public AudioDecoder {
public:
AudioDecoderPlc(std::unique_ptr<InputAudioFile> input, int sample_rate_hz)
: input_(std::move(input)), sample_rate_hz_(sample_rate_hz) {}
void Reset() override {}
int SampleRateHz() const override { return sample_rate_hz_; }
size_t Channels() const override { return 1; }
int DecodeInternal(const uint8_t* /*encoded*/,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
RTC_CHECK_GE(encoded_len / 2, 10 * sample_rate_hz_ / 1000);
RTC_CHECK_LE(encoded_len / 2, 2 * 10 * sample_rate_hz_ / 1000);
RTC_CHECK_EQ(sample_rate_hz, sample_rate_hz_);
RTC_CHECK(decoded);
RTC_CHECK(speech_type);
RTC_CHECK(input_->Read(encoded_len / 2, decoded));
*speech_type = kSpeech;
last_was_plc_ = false;
return encoded_len / 2;
}
void GeneratePlc(size_t requested_samples_per_channel,
rtc::BufferT<int16_t>* concealment_audio) override {
// Instead of generating random data for GeneratePlc we use the same data as
// the input, so we can check that we produce the same result independently
// of the losses.
RTC_DCHECK_EQ(requested_samples_per_channel, 10 * sample_rate_hz_ / 1000);
// Must keep a local copy of this since DecodeInternal sets it to false.
const bool last_was_plc = last_was_plc_;
std::vector<int16_t> decoded(5760);
SpeechType speech_type;
int dec_len = DecodeInternal(nullptr, 2 * 10 * sample_rate_hz_ / 1000,
sample_rate_hz_, decoded.data(), &speech_type);
concealment_audio->AppendData(decoded.data(), dec_len);
concealed_samples_ += rtc::checked_cast<size_t>(dec_len);
if (!last_was_plc) {
++concealment_events_;
}
last_was_plc_ = true;
}
size_t concealed_samples() { return concealed_samples_; }
size_t concealment_events() { return concealment_events_; }
private:
const std::unique_ptr<InputAudioFile> input_;
const int sample_rate_hz_;
size_t concealed_samples_ = 0;
size_t concealment_events_ = 0;
bool last_was_plc_ = false;
};
// An input sample generator which generates only zero-samples.
class ZeroSampleGenerator : public EncodeNetEqInput::Generator {
public:
rtc::ArrayView<const int16_t> Generate(size_t num_samples) override {
vec.resize(num_samples, 0);
rtc::ArrayView<const int16_t> view(vec);
RTC_DCHECK_EQ(view.size(), num_samples);
return view;
}
private:
std::vector<int16_t> vec;
};
// A NetEqInput which connects to another NetEqInput, but drops a number of
// consecutive packets on the way
class LossyInput : public NetEqInput {
public:
LossyInput(int loss_cadence,
int burst_length,
std::unique_ptr<NetEqInput> input)
: loss_cadence_(loss_cadence),
burst_length_(burst_length),
input_(std::move(input)) {}
absl::optional<int64_t> NextPacketTime() const override {
return input_->NextPacketTime();
}
absl::optional<int64_t> NextOutputEventTime() const override {
return input_->NextOutputEventTime();
}
absl::optional<SetMinimumDelayInfo> NextSetMinimumDelayInfo() const override {
return input_->NextSetMinimumDelayInfo();
}
std::unique_ptr<PacketData> PopPacket() override {
if (loss_cadence_ != 0 && (++count_ % loss_cadence_) == 0) {
// Pop `burst_length_` packets to create the loss.
auto packet_to_return = input_->PopPacket();
for (int i = 0; i < burst_length_; i++) {
input_->PopPacket();
}
return packet_to_return;
}
return input_->PopPacket();
}
void AdvanceOutputEvent() override { return input_->AdvanceOutputEvent(); }
void AdvanceSetMinimumDelay() override {
return input_->AdvanceSetMinimumDelay();
}
bool ended() const override { return input_->ended(); }
absl::optional<RTPHeader> NextHeader() const override {
return input_->NextHeader();
}
private:
const int loss_cadence_;
const int burst_length_;
int count_ = 0;
const std::unique_ptr<NetEqInput> input_;
};
class AudioChecksumWithOutput : public AudioChecksum {
public:
explicit AudioChecksumWithOutput(std::string* output_str)
: output_str_(*output_str) {}
~AudioChecksumWithOutput() { output_str_ = Finish(); }
private:
std::string& output_str_;
};
struct TestStatistics {
NetEqNetworkStatistics network;
NetEqLifetimeStatistics lifetime;
};
TestStatistics RunTest(int loss_cadence,
int burst_length,
std::string* checksum) {
NetEq::Config config;
config.for_test_no_time_stretching = true;
// The input is mostly useless. It sends zero-samples to a PCM16b encoder,
// but the actual encoded samples will never be used by the decoder in the
// test. See below about the decoder.
auto generator = std::make_unique<ZeroSampleGenerator>();
constexpr int kPayloadType = 100;
AudioEncoderPcm16B::Config encoder_config;
encoder_config.sample_rate_hz = kSampleRateHz;
encoder_config.payload_type = kPayloadType;
auto encoder = std::make_unique<AudioEncoderPcm16B>(encoder_config);
auto input = std::make_unique<EncodeNetEqInput>(
std::move(generator), std::move(encoder), kRunTimeMs);
// Wrap the input in a loss function.
auto lossy_input = std::make_unique<LossyInput>(loss_cadence, burst_length,
std::move(input));
// Setting up decoders.
NetEqTest::DecoderMap decoders;
// Using a fake decoder which simply reads the output audio from a file.
auto input_file = std::make_unique<InputAudioFile>(
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"));
AudioDecoderPlc dec(std::move(input_file), kSampleRateHz);
// Masquerading as a PCM16b decoder.
decoders.emplace(kPayloadType, SdpAudioFormat("l16", 32000, 1));
// Output is simply a checksum calculator.
auto output = std::make_unique<AudioChecksumWithOutput>(checksum);
// No callback objects.
NetEqTest::Callbacks callbacks;
NetEqTest neteq_test(
config, /*decoder_factory=*/
rtc::make_ref_counted<test::AudioDecoderProxyFactory>(&dec),
/*codecs=*/decoders, /*text_log=*/nullptr, /*neteq_factory=*/nullptr,
/*input=*/std::move(lossy_input), std::move(output), callbacks);
EXPECT_LE(kRunTimeMs, neteq_test.Run());
auto lifetime_stats = neteq_test.LifetimeStats();
EXPECT_EQ(dec.concealed_samples(), lifetime_stats.concealed_samples);
EXPECT_EQ(dec.concealment_events(), lifetime_stats.concealment_events);
return {neteq_test.SimulationStats(), neteq_test.LifetimeStats()};
}
} // namespace
// Check that some basic metrics are produced in the right direction. In
// particular, expand_rate should only increase if there are losses present. Our
// dummy decoder is designed such as the checksum should always be the same
// regardless of the losses given that calls are executed in the right order.
TEST(NetEqDecoderPlc, BasicMetrics) {
std::string checksum;
// Drop 1 packet every 10 packets.
auto stats = RunTest(10, 1, &checksum);
std::string checksum_no_loss;
auto stats_no_loss = RunTest(0, 0, &checksum_no_loss);
EXPECT_EQ(checksum, checksum_no_loss);
EXPECT_EQ(stats.network.preemptive_rate,
stats_no_loss.network.preemptive_rate);
EXPECT_EQ(stats.network.accelerate_rate,
stats_no_loss.network.accelerate_rate);
EXPECT_EQ(0, stats_no_loss.network.expand_rate);
EXPECT_GT(stats.network.expand_rate, 0);
}
// Checks that interruptions are not counted in small losses but they are
// correctly counted in long interruptions.
TEST(NetEqDecoderPlc, CountInterruptions) {
std::string checksum;
std::string checksum_2;
std::string checksum_3;
// Half of the packets lost but in short interruptions.
auto stats_no_interruptions = RunTest(1, 1, &checksum);
// One lost of 500 ms (250 packets).
auto stats_one_interruption = RunTest(200, 250, &checksum_2);
// Two losses of 250ms each (125 packets).
auto stats_two_interruptions = RunTest(125, 125, &checksum_3);
EXPECT_EQ(checksum, checksum_2);
EXPECT_EQ(checksum, checksum_3);
EXPECT_GT(stats_no_interruptions.network.expand_rate, 0);
EXPECT_EQ(stats_no_interruptions.lifetime.total_interruption_duration_ms, 0);
EXPECT_EQ(stats_no_interruptions.lifetime.interruption_count, 0);
EXPECT_GT(stats_one_interruption.network.expand_rate, 0);
EXPECT_EQ(stats_one_interruption.lifetime.total_interruption_duration_ms,
5000);
EXPECT_EQ(stats_one_interruption.lifetime.interruption_count, 1);
EXPECT_GT(stats_two_interruptions.network.expand_rate, 0);
EXPECT_EQ(stats_two_interruptions.lifetime.total_interruption_duration_ms,
5000);
EXPECT_EQ(stats_two_interruptions.lifetime.interruption_count, 2);
}
// Checks that small losses do not produce interruptions.
TEST(NetEqDecoderPlc, NoInterruptionsInSmallLosses) {
std::string checksum_1;
std::string checksum_4;
auto stats_1 = RunTest(300, 1, &checksum_1);
auto stats_4 = RunTest(300, 4, &checksum_4);
EXPECT_EQ(checksum_1, checksum_4);
EXPECT_EQ(stats_1.lifetime.interruption_count, 0);
EXPECT_EQ(stats_1.lifetime.total_interruption_duration_ms, 0);
EXPECT_EQ(stats_1.lifetime.concealed_samples, 640u); // 20ms of concealment.
EXPECT_EQ(stats_1.lifetime.concealment_events, 1u); // in just one event.
EXPECT_EQ(stats_4.lifetime.interruption_count, 0);
EXPECT_EQ(stats_4.lifetime.total_interruption_duration_ms, 0);
EXPECT_EQ(stats_4.lifetime.concealed_samples, 2560u); // 80ms of concealment.
EXPECT_EQ(stats_4.lifetime.concealment_events, 1u); // in just one event.
}
// Checks that interruptions of different sizes report correct duration.
TEST(NetEqDecoderPlc, InterruptionsReportCorrectSize) {
std::string checksum;
for (int burst_length = 5; burst_length < 10; burst_length++) {
auto stats = RunTest(300, burst_length, &checksum);
auto duration = stats.lifetime.total_interruption_duration_ms;
if (burst_length < 8) {
EXPECT_EQ(duration, 0);
} else {
EXPECT_EQ(duration, burst_length * 20);
}
}
}
} // namespace test
} // namespace webrtc

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,405 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
#define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "api/audio/audio_frame.h"
#include "api/neteq/neteq.h"
#include "api/neteq/neteq_controller.h"
#include "api/neteq/neteq_controller_factory.h"
#include "api/neteq/tick_timer.h"
#include "api/rtp_packet_info.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/expand_uma_logger.h"
#include "modules/audio_coding/neteq/packet.h"
#include "modules/audio_coding/neteq/packet_buffer.h"
#include "modules/audio_coding/neteq/random_vector.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "rtc_base/synchronization/mutex.h"
#include "rtc_base/thread_annotations.h"
namespace webrtc {
// Forward declarations.
class Accelerate;
class BackgroundNoise;
class Clock;
class ComfortNoise;
class DecoderDatabase;
class DtmfBuffer;
class DtmfToneGenerator;
class Expand;
class Merge;
class NackTracker;
class Normal;
class RedPayloadSplitter;
class PreemptiveExpand;
class RandomVector;
class SyncBuffer;
class TimestampScaler;
struct AccelerateFactory;
struct DtmfEvent;
struct ExpandFactory;
struct PreemptiveExpandFactory;
class NetEqImpl : public webrtc::NetEq {
public:
enum class OutputType {
kNormalSpeech,
kPLC,
kCNG,
kPLCCNG,
kVadPassive,
kCodecPLC
};
enum ErrorCodes {
kNoError = 0,
kOtherError,
kUnknownRtpPayloadType,
kDecoderNotFound,
kInvalidPointer,
kAccelerateError,
kPreemptiveExpandError,
kComfortNoiseErrorCode,
kDecoderErrorCode,
kOtherDecoderError,
kInvalidOperation,
kDtmfParsingError,
kDtmfInsertError,
kSampleUnderrun,
kDecodedTooMuch,
kRedundancySplitError,
kPacketBufferCorruption
};
struct Dependencies {
// The constructor populates the Dependencies struct with the default
// implementations of the objects. They can all be replaced by the user
// before sending the struct to the NetEqImpl constructor. However, there
// are dependencies between some of the classes inside the struct, so
// swapping out one may make it necessary to re-create another one.
Dependencies(const NetEq::Config& config,
Clock* clock,
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
const NetEqControllerFactory& controller_factory);
~Dependencies();
Clock* const clock;
std::unique_ptr<TickTimer> tick_timer;
std::unique_ptr<StatisticsCalculator> stats;
std::unique_ptr<DecoderDatabase> decoder_database;
std::unique_ptr<DtmfBuffer> dtmf_buffer;
std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator;
std::unique_ptr<PacketBuffer> packet_buffer;
std::unique_ptr<NetEqController> neteq_controller;
std::unique_ptr<RedPayloadSplitter> red_payload_splitter;
std::unique_ptr<TimestampScaler> timestamp_scaler;
std::unique_ptr<AccelerateFactory> accelerate_factory;
std::unique_ptr<ExpandFactory> expand_factory;
std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory;
};
// Creates a new NetEqImpl object.
NetEqImpl(const NetEq::Config& config,
Dependencies&& deps,
bool create_components = true);
~NetEqImpl() override;
NetEqImpl(const NetEqImpl&) = delete;
NetEqImpl& operator=(const NetEqImpl&) = delete;
// Inserts a new packet into NetEq. Returns 0 on success, -1 on failure.
int InsertPacket(const RTPHeader& rtp_header,
rtc::ArrayView<const uint8_t> payload) override;
void InsertEmptyPacket(const RTPHeader& rtp_header) override;
int GetAudio(
AudioFrame* audio_frame,
bool* muted,
int* current_sample_rate_hz = nullptr,
absl::optional<Operation> action_override = absl::nullopt) override;
void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
bool RegisterPayloadType(int rtp_payload_type,
const SdpAudioFormat& audio_format) override;
// Removes `rtp_payload_type` from the codec database. Returns 0 on success,
// -1 on failure.
int RemovePayloadType(uint8_t rtp_payload_type) override;
void RemoveAllPayloadTypes() override;
bool SetMinimumDelay(int delay_ms) override;
bool SetMaximumDelay(int delay_ms) override;
bool SetBaseMinimumDelayMs(int delay_ms) override;
int GetBaseMinimumDelayMs() const override;
int TargetDelayMs() const override;
int FilteredCurrentDelayMs() const override;
// Writes the current network statistics to `stats`. The statistics are reset
// after the call.
int NetworkStatistics(NetEqNetworkStatistics* stats) override;
NetEqNetworkStatistics CurrentNetworkStatistics() const override;
NetEqLifetimeStatistics GetLifetimeStatistics() const override;
NetEqOperationsAndState GetOperationsAndState() const override;
absl::optional<uint32_t> GetPlayoutTimestamp() const override;
int last_output_sample_rate_hz() const override;
absl::optional<DecoderFormat> GetDecoderFormat(
int payload_type) const override;
// Flushes both the packet buffer and the sync buffer.
void FlushBuffers() override;
void EnableNack(size_t max_nack_list_size) override;
void DisableNack() override;
std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
int SyncBufferSizeMs() const override;
// This accessor method is only intended for testing purposes.
const SyncBuffer* sync_buffer_for_test() const;
Operation last_operation_for_test() const;
protected:
static const int kOutputSizeMs = 10;
static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz.
// TODO(hlundin): Provide a better value for kSyncBufferSize.
// Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for
// calculating correlations of current frame against history.
static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48;
// Inserts a new packet into NetEq. This is used by the InsertPacket method
// above. Returns 0 on success, otherwise an error code.
// TODO(hlundin): Merge this with InsertPacket above?
int InsertPacketInternal(const RTPHeader& rtp_header,
rtc::ArrayView<const uint8_t> payload)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Returns true if the payload type changed (this should be followed by
// resetting various state). Returns false if the current payload type is
// unknown or equal to `payload_type`.
bool MaybeChangePayloadType(uint8_t payload_type)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Delivers 10 ms of audio data. The data is written to `audio_frame`.
// Returns 0 on success, otherwise an error code.
int GetAudioInternal(AudioFrame* audio_frame,
bool* muted,
absl::optional<Operation> action_override)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Provides a decision to the GetAudioInternal method. The decision what to
// do is written to `operation`. Packets to decode are written to
// `packet_list`, and a DTMF event to play is written to `dtmf_event`. When
// DTMF should be played, `play_dtmf` is set to true by the method.
// Returns 0 on success, otherwise an error code.
int GetDecision(Operation* operation,
PacketList* packet_list,
DtmfEvent* dtmf_event,
bool* play_dtmf,
absl::optional<Operation> action_override)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Decodes the speech packets in `packet_list`, and writes the results to
// `decoded_buffer`, which is allocated to hold `decoded_buffer_length`
// elements. The length of the decoded data is written to `decoded_length`.
// The speech type -- speech or (codec-internal) comfort noise -- is written
// to `speech_type`. If `packet_list` contains any SID frames for RFC 3389
// comfort noise, those are not decoded.
int Decode(PacketList* packet_list,
Operation* operation,
int* decoded_length,
AudioDecoder::SpeechType* speech_type)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method to Decode(). Performs codec internal CNG.
int DecodeCng(AudioDecoder* decoder,
int* decoded_length,
AudioDecoder::SpeechType* speech_type)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method to Decode(). Performs the actual decoding.
int DecodeLoop(PacketList* packet_list,
const Operation& operation,
AudioDecoder* decoder,
int* decoded_length,
AudioDecoder::SpeechType* speech_type)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method which calls the Normal class to perform the normal operation.
void DoNormal(const int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method which calls the Merge class to perform the merge operation.
void DoMerge(int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method which calls the Expand class to perform the expand operation.
int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method which calls the Accelerate class to perform the accelerate
// operation.
int DoAccelerate(int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf,
bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method which calls the PreemptiveExpand class to perform the
// preemtive expand operation.
int DoPreemptiveExpand(int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
// noise. `packet_list` can either contain one SID frame to update the
// noise parameters, or no payload at all, in which case the previously
// received parameters are used.
int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Calls the audio decoder to generate codec-internal comfort noise when
// no packet was received.
void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Calls the DtmfToneGenerator class to generate DTMF tones.
int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Overdub DTMF on top of `output`.
int DtmfOverdub(const DtmfEvent& dtmf_event,
size_t num_channels,
int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Extracts packets from `packet_buffer_` to produce at least
// `required_samples` samples. The packets are inserted into `packet_list`.
// Returns the number of samples that the packets in the list will produce, or
// -1 in case of an error.
int ExtractPackets(size_t required_samples, PacketList* packet_list)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Resets various variables and objects to new values based on the sample rate
// `fs_hz` and `channels` number audio channels.
void SetSampleRateAndChannels(int fs_hz, size_t channels)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Returns the output type for the audio produced by the latest call to
// GetAudio().
OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Updates Expand and Merge.
virtual void UpdatePlcComponents(int fs_hz, size_t channels)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
NetEqController::PacketArrivedInfo ToPacketArrivedInfo(
const Packet& packet) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Clock* const clock_;
mutable Mutex mutex_;
const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<DecoderDatabase> decoder_database_
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<TimestampScaler> timestamp_scaler_
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<AccelerateFactory> accelerate_factory_
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_);
const bool enable_fec_delay_adaptation_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<AudioMultiVector> algorithm_buffer_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<PreemptiveExpand> preemptive_expand_ RTC_GUARDED_BY(mutex_);
RandomVector random_vector_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(mutex_);
int fs_hz_ RTC_GUARDED_BY(mutex_);
int fs_mult_ RTC_GUARDED_BY(mutex_);
int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_);
size_t output_size_samples_ RTC_GUARDED_BY(mutex_);
size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_);
Mode last_mode_ RTC_GUARDED_BY(mutex_);
Operation last_operation_ RTC_GUARDED_BY(mutex_);
absl::optional<AudioDecoder::SpeechType> last_decoded_type_
RTC_GUARDED_BY(mutex_);
size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(mutex_);
uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_);
bool new_codec_ RTC_GUARDED_BY(mutex_);
uint32_t timestamp_ RTC_GUARDED_BY(mutex_);
bool reset_decoder_ RTC_GUARDED_BY(mutex_);
absl::optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
absl::optional<uint8_t> current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
bool first_packet_ RTC_GUARDED_BY(mutex_);
bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_);
bool nack_enabled_ RTC_GUARDED_BY(mutex_);
const bool enable_muted_state_ RTC_GUARDED_BY(mutex_);
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
RTC_GUARDED_BY(mutex_);
std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_);
ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_);
ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_);
bool no_time_stretching_ RTC_GUARDED_BY(mutex_); // Only used for test.
rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(mutex_);
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,346 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory>
#include "absl/memory/memory.h"
#include "api/audio/audio_frame.h"
#include "api/audio_codecs/audio_decoder.h"
#include "api/audio_codecs/builtin_audio_decoder_factory.h"
#include "api/neteq/neteq.h"
#include "modules/audio_coding/neteq/default_neteq_factory.h"
#include "modules/audio_coding/neteq/tools/rtp_generator.h"
#include "system_wrappers/include/clock.h"
#include "test/audio_decoder_proxy_factory.h"
#include "test/gmock.h"
namespace webrtc {
namespace test {
namespace {
std::unique_ptr<NetEq> CreateNetEq(
const NetEq::Config& config,
Clock* clock,
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) {
return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock);
}
} // namespace
using ::testing::_;
using ::testing::Return;
using ::testing::SetArgPointee;
class MockAudioDecoder final : public AudioDecoder {
public:
static const int kPacketDuration = 960; // 48 kHz * 20 ms
MockAudioDecoder(int sample_rate_hz, size_t num_channels)
: sample_rate_hz_(sample_rate_hz),
num_channels_(num_channels),
fec_enabled_(false) {}
~MockAudioDecoder() override { Die(); }
MOCK_METHOD(void, Die, ());
MOCK_METHOD(void, Reset, (), (override));
class MockFrame : public AudioDecoder::EncodedAudioFrame {
public:
MockFrame(size_t num_channels) : num_channels_(num_channels) {}
size_t Duration() const override { return kPacketDuration; }
absl::optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const override {
const size_t output_size =
sizeof(int16_t) * kPacketDuration * num_channels_;
if (decoded.size() >= output_size) {
memset(decoded.data(), 0,
sizeof(int16_t) * kPacketDuration * num_channels_);
return DecodeResult{kPacketDuration * num_channels_, kSpeech};
} else {
ADD_FAILURE() << "Expected decoded.size() to be >= output_size ("
<< decoded.size() << " vs. " << output_size << ")";
return absl::nullopt;
}
}
private:
const size_t num_channels_;
};
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp) override {
std::vector<ParseResult> results;
if (fec_enabled_) {
std::unique_ptr<MockFrame> fec_frame(new MockFrame(num_channels_));
results.emplace_back(timestamp - kPacketDuration, 1,
std::move(fec_frame));
}
std::unique_ptr<MockFrame> frame(new MockFrame(num_channels_));
results.emplace_back(timestamp, 0, std::move(frame));
return results;
}
int PacketDuration(const uint8_t* encoded,
size_t encoded_len) const override {
ADD_FAILURE() << "Since going through ParsePayload, PacketDuration should "
"never get called.";
return kPacketDuration;
}
bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override {
ADD_FAILURE() << "Since going through ParsePayload, PacketHasFec should "
"never get called.";
return fec_enabled_;
}
int SampleRateHz() const override { return sample_rate_hz_; }
size_t Channels() const override { return num_channels_; }
void set_fec_enabled(bool enable_fec) { fec_enabled_ = enable_fec; }
bool fec_enabled() const { return fec_enabled_; }
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
ADD_FAILURE() << "Since going through ParsePayload, DecodeInternal should "
"never get called.";
return -1;
}
private:
const int sample_rate_hz_;
const size_t num_channels_;
bool fec_enabled_;
};
class NetEqNetworkStatsTest {
public:
static const int kPayloadSizeByte = 30;
static const int kFrameSizeMs = 20;
static const uint8_t kPayloadType = 95;
static const int kOutputLengthMs = 10;
enum logic {
kIgnore,
kEqual,
kSmallerThan,
kLargerThan,
};
struct NetEqNetworkStatsCheck {
logic current_buffer_size_ms;
logic preferred_buffer_size_ms;
logic jitter_peaks_found;
logic packet_loss_rate;
logic expand_rate;
logic speech_expand_rate;
logic preemptive_rate;
logic accelerate_rate;
logic secondary_decoded_rate;
logic secondary_discarded_rate;
logic added_zero_samples;
NetEqNetworkStatistics stats_ref;
};
NetEqNetworkStatsTest(const SdpAudioFormat& format, MockAudioDecoder* decoder)
: decoder_(decoder),
decoder_factory_(
rtc::make_ref_counted<AudioDecoderProxyFactory>(decoder)),
samples_per_ms_(format.clockrate_hz / 1000),
frame_size_samples_(kFrameSizeMs * samples_per_ms_),
rtp_generator_(new RtpGenerator(samples_per_ms_)),
last_lost_time_(0),
packet_loss_interval_(0xffffffff) {
NetEq::Config config;
config.sample_rate_hz = format.clockrate_hz;
neteq_ = CreateNetEq(config, Clock::GetRealTimeClock(), decoder_factory_);
neteq_->RegisterPayloadType(kPayloadType, format);
}
bool Lost(uint32_t send_time) {
if (send_time - last_lost_time_ >= packet_loss_interval_) {
last_lost_time_ = send_time;
return true;
}
return false;
}
void SetPacketLossRate(double loss_rate) {
packet_loss_interval_ =
(loss_rate >= 1e-3 ? static_cast<double>(kFrameSizeMs) / loss_rate
: 0xffffffff);
}
// `stats_ref`
// expects.x = -1, do not care
// expects.x = 0, 'x' in current stats should equal 'x' in `stats_ref`
// expects.x = 1, 'x' in current stats should < 'x' in `stats_ref`
// expects.x = 2, 'x' in current stats should > 'x' in `stats_ref`
void CheckNetworkStatistics(NetEqNetworkStatsCheck expects) {
NetEqNetworkStatistics stats;
neteq_->NetworkStatistics(&stats);
#define CHECK_NETEQ_NETWORK_STATS(x) \
switch (expects.x) { \
case kEqual: \
EXPECT_EQ(stats.x, expects.stats_ref.x); \
break; \
case kSmallerThan: \
EXPECT_LT(stats.x, expects.stats_ref.x); \
break; \
case kLargerThan: \
EXPECT_GT(stats.x, expects.stats_ref.x); \
break; \
default: \
break; \
}
CHECK_NETEQ_NETWORK_STATS(current_buffer_size_ms);
CHECK_NETEQ_NETWORK_STATS(preferred_buffer_size_ms);
CHECK_NETEQ_NETWORK_STATS(jitter_peaks_found);
CHECK_NETEQ_NETWORK_STATS(expand_rate);
CHECK_NETEQ_NETWORK_STATS(speech_expand_rate);
CHECK_NETEQ_NETWORK_STATS(preemptive_rate);
CHECK_NETEQ_NETWORK_STATS(accelerate_rate);
CHECK_NETEQ_NETWORK_STATS(secondary_decoded_rate);
CHECK_NETEQ_NETWORK_STATS(secondary_discarded_rate);
#undef CHECK_NETEQ_NETWORK_STATS
}
void RunTest(int num_loops, NetEqNetworkStatsCheck expects) {
uint32_t time_now;
uint32_t next_send_time;
// Initiate `last_lost_time_`.
time_now = next_send_time = last_lost_time_ = rtp_generator_->GetRtpHeader(
kPayloadType, frame_size_samples_, &rtp_header_);
for (int k = 0; k < num_loops; ++k) {
// Delay by one frame such that the FEC can come in.
while (time_now + kFrameSizeMs >= next_send_time) {
next_send_time = rtp_generator_->GetRtpHeader(
kPayloadType, frame_size_samples_, &rtp_header_);
if (!Lost(next_send_time)) {
static const uint8_t payload[kPayloadSizeByte] = {0};
ASSERT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header_, payload));
}
}
bool muted = true;
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_frame_, &muted));
ASSERT_FALSE(muted);
EXPECT_EQ(decoder_->Channels(), output_frame_.num_channels_);
EXPECT_EQ(static_cast<size_t>(kOutputLengthMs * samples_per_ms_),
output_frame_.samples_per_channel_);
EXPECT_EQ(48000, neteq_->last_output_sample_rate_hz());
time_now += kOutputLengthMs;
}
CheckNetworkStatistics(expects);
neteq_->FlushBuffers();
}
void DecodeFecTest() {
decoder_->set_fec_enabled(false);
NetEqNetworkStatsCheck expects = {kIgnore, // current_buffer_size_ms
kIgnore, // preferred_buffer_size_ms
kIgnore, // jitter_peaks_found
kEqual, // packet_loss_rate
kEqual, // expand_rate
kEqual, // voice_expand_rate
kIgnore, // preemptive_rate
kEqual, // accelerate_rate
kEqual, // decoded_fec_rate
kEqual, // discarded_fec_rate
kEqual, // added_zero_samples
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
RunTest(50, expects);
// Next we introduce packet losses.
SetPacketLossRate(0.1);
expects.expand_rate = expects.speech_expand_rate = kLargerThan;
RunTest(50, expects);
// Next we enable FEC.
decoder_->set_fec_enabled(true);
// If FEC fills in the lost packets, no packet loss will be counted.
expects.expand_rate = expects.speech_expand_rate = kEqual;
expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 0;
expects.secondary_decoded_rate = kLargerThan;
expects.secondary_discarded_rate = kLargerThan;
RunTest(50, expects);
}
void NoiseExpansionTest() {
NetEqNetworkStatsCheck expects = {kIgnore, // current_buffer_size_ms
kIgnore, // preferred_buffer_size_ms
kIgnore, // jitter_peaks_found
kEqual, // packet_loss_rate
kEqual, // expand_rate
kEqual, // speech_expand_rate
kIgnore, // preemptive_rate
kEqual, // accelerate_rate
kEqual, // decoded_fec_rate
kEqual, // discard_fec_rate
kEqual, // added_zero_samples
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
RunTest(50, expects);
SetPacketLossRate(1);
expects.stats_ref.expand_rate = 16384;
expects.stats_ref.speech_expand_rate = 5324;
RunTest(10, expects); // Lost 10 * 20ms in a row.
}
private:
MockAudioDecoder* decoder_;
rtc::scoped_refptr<AudioDecoderProxyFactory> decoder_factory_;
std::unique_ptr<NetEq> neteq_;
const int samples_per_ms_;
const size_t frame_size_samples_;
std::unique_ptr<RtpGenerator> rtp_generator_;
RTPHeader rtp_header_;
uint32_t last_lost_time_;
uint32_t packet_loss_interval_;
AudioFrame output_frame_;
};
TEST(NetEqNetworkStatsTest, DecodeFec) {
MockAudioDecoder decoder(48000, 1);
NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder);
test.DecodeFecTest();
EXPECT_CALL(decoder, Die()).Times(1);
}
TEST(NetEqNetworkStatsTest, StereoDecodeFec) {
MockAudioDecoder decoder(48000, 2);
NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder);
test.DecodeFecTest();
EXPECT_CALL(decoder, Die()).Times(1);
}
TEST(NetEqNetworkStatsTest, NoiseExpansionTest) {
MockAudioDecoder decoder(48000, 1);
NetEqNetworkStatsTest test(SdpAudioFormat("opus", 48000, 2), &decoder);
test.NoiseExpansionTest();
EXPECT_CALL(decoder, Die()).Times(1);
}
} // namespace test
} // namespace webrtc

View file

@ -0,0 +1,424 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test to verify correct stereo and multi-channel operation.
#include <algorithm>
#include <list>
#include <memory>
#include <string>
#include "api/audio/audio_frame.h"
#include "api/audio_codecs/builtin_audio_decoder_factory.h"
#include "api/neteq/neteq.h"
#include "modules/audio_coding/codecs/pcm16b/pcm16b.h"
#include "modules/audio_coding/neteq/default_neteq_factory.h"
#include "modules/audio_coding/neteq/tools/input_audio_file.h"
#include "modules/audio_coding/neteq/tools/rtp_generator.h"
#include "rtc_base/strings/string_builder.h"
#include "system_wrappers/include/clock.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
struct TestParameters {
int frame_size;
int sample_rate;
size_t num_channels;
};
// This is a parameterized test. The test parameters are supplied through a
// TestParameters struct, which is obtained through the GetParam() method.
//
// The objective of the test is to create a mono input signal and a
// multi-channel input signal, where each channel is identical to the mono
// input channel. The two input signals are processed through their respective
// NetEq instances. After that, the output signals are compared. The expected
// result is that each channel in the multi-channel output is identical to the
// mono output.
class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
protected:
static const int kTimeStepMs = 10;
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
static const uint8_t kPayloadTypeMono = 95;
static const uint8_t kPayloadTypeMulti = 96;
NetEqStereoTest()
: num_channels_(GetParam().num_channels),
sample_rate_hz_(GetParam().sample_rate),
samples_per_ms_(sample_rate_hz_ / 1000),
frame_size_ms_(GetParam().frame_size),
frame_size_samples_(
static_cast<size_t>(frame_size_ms_ * samples_per_ms_)),
output_size_samples_(10 * samples_per_ms_),
clock_(0),
rtp_generator_mono_(samples_per_ms_),
rtp_generator_(samples_per_ms_),
payload_size_bytes_(0),
multi_payload_size_bytes_(0),
last_send_time_(0),
last_arrival_time_(0) {
NetEq::Config config;
config.sample_rate_hz = sample_rate_hz_;
DefaultNetEqFactory neteq_factory;
auto decoder_factory = CreateBuiltinAudioDecoderFactory();
neteq_mono_ = neteq_factory.CreateNetEq(config, decoder_factory, &clock_);
neteq_ = neteq_factory.CreateNetEq(config, decoder_factory, &clock_);
input_ = new int16_t[frame_size_samples_];
encoded_ = new uint8_t[2 * frame_size_samples_];
input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_];
encoded_multi_channel_ =
new uint8_t[frame_size_samples_ * 2 * num_channels_];
}
~NetEqStereoTest() {
delete[] input_;
delete[] encoded_;
delete[] input_multi_channel_;
delete[] encoded_multi_channel_;
}
virtual void SetUp() {
const std::string file_name =
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
input_file_.reset(new test::InputAudioFile(file_name));
RTC_CHECK_GE(num_channels_, 2);
ASSERT_TRUE(neteq_mono_->RegisterPayloadType(
kPayloadTypeMono, SdpAudioFormat("l16", sample_rate_hz_, 1)));
ASSERT_TRUE(neteq_->RegisterPayloadType(
kPayloadTypeMulti,
SdpAudioFormat("l16", sample_rate_hz_, num_channels_)));
}
virtual void TearDown() {}
int GetNewPackets() {
if (!input_file_->Read(frame_size_samples_, input_)) {
return -1;
}
payload_size_bytes_ =
WebRtcPcm16b_Encode(input_, frame_size_samples_, encoded_);
if (frame_size_samples_ * 2 != payload_size_bytes_) {
return -1;
}
int next_send_time = rtp_generator_mono_.GetRtpHeader(
kPayloadTypeMono, frame_size_samples_, &rtp_header_mono_);
MakeMultiChannelInput();
multi_payload_size_bytes_ = WebRtcPcm16b_Encode(
input_multi_channel_, frame_size_samples_ * num_channels_,
encoded_multi_channel_);
if (frame_size_samples_ * 2 * num_channels_ != multi_payload_size_bytes_) {
return -1;
}
rtp_generator_.GetRtpHeader(kPayloadTypeMulti, frame_size_samples_,
&rtp_header_);
return next_send_time;
}
virtual void MakeMultiChannelInput() {
test::InputAudioFile::DuplicateInterleaved(
input_, frame_size_samples_, num_channels_, input_multi_channel_);
}
virtual void VerifyOutput(size_t num_samples) {
const int16_t* output_data = output_.data();
const int16_t* output_multi_channel_data = output_multi_channel_.data();
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = 0; j < num_channels_; ++j) {
ASSERT_EQ(output_data[i],
output_multi_channel_data[i * num_channels_ + j])
<< "Diff in sample " << i << ", channel " << j << ".";
}
}
}
virtual int GetArrivalTime(int send_time) {
int arrival_time = last_arrival_time_ + (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
virtual bool Lost() { return false; }
void RunTest(int num_loops) {
// Get next input packets (mono and multi-channel).
int next_send_time;
int next_arrival_time;
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
int time_now = 0;
for (int k = 0; k < num_loops; ++k) {
while (time_now >= next_arrival_time) {
// Insert packet in mono instance.
ASSERT_EQ(NetEq::kOK,
neteq_mono_->InsertPacket(
rtp_header_mono_, rtc::ArrayView<const uint8_t>(
encoded_, payload_size_bytes_)));
// Insert packet in multi-channel instance.
ASSERT_EQ(NetEq::kOK, neteq_->InsertPacket(
rtp_header_, rtc::ArrayView<const uint8_t>(
encoded_multi_channel_,
multi_payload_size_bytes_)));
// Get next input packets (mono and multi-channel).
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
}
// Get audio from mono instance.
bool muted;
EXPECT_EQ(NetEq::kOK, neteq_mono_->GetAudio(&output_, &muted));
ASSERT_FALSE(muted);
EXPECT_EQ(1u, output_.num_channels_);
EXPECT_EQ(output_size_samples_, output_.samples_per_channel_);
// Get audio from multi-channel instance.
ASSERT_EQ(NetEq::kOK, neteq_->GetAudio(&output_multi_channel_, &muted));
ASSERT_FALSE(muted);
EXPECT_EQ(num_channels_, output_multi_channel_.num_channels_);
EXPECT_EQ(output_size_samples_,
output_multi_channel_.samples_per_channel_);
rtc::StringBuilder ss;
ss << "Lap number " << k << ".";
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
// Compare mono and multi-channel.
ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_));
time_now += kTimeStepMs;
clock_.AdvanceTimeMilliseconds(kTimeStepMs);
}
}
const size_t num_channels_;
const int sample_rate_hz_;
const int samples_per_ms_;
const int frame_size_ms_;
const size_t frame_size_samples_;
const size_t output_size_samples_;
SimulatedClock clock_;
std::unique_ptr<NetEq> neteq_mono_;
std::unique_ptr<NetEq> neteq_;
test::RtpGenerator rtp_generator_mono_;
test::RtpGenerator rtp_generator_;
int16_t* input_;
int16_t* input_multi_channel_;
uint8_t* encoded_;
uint8_t* encoded_multi_channel_;
AudioFrame output_;
AudioFrame output_multi_channel_;
RTPHeader rtp_header_mono_;
RTPHeader rtp_header_;
size_t payload_size_bytes_;
size_t multi_payload_size_bytes_;
int last_send_time_;
int last_arrival_time_;
std::unique_ptr<test::InputAudioFile> input_file_;
};
class NetEqStereoTestNoJitter : public NetEqStereoTest {
protected:
NetEqStereoTestNoJitter() : NetEqStereoTest() {
// Start the sender 100 ms before the receiver to pre-fill the buffer.
// This is to avoid doing preemptive expand early in the test.
// TODO(hlundin): Mock the decision making instead to control the modes.
last_arrival_time_ = -100;
}
};
TEST_P(NetEqStereoTestNoJitter, RunTest) {
RunTest(8);
}
class NetEqStereoTestPositiveDrift : public NetEqStereoTest {
protected:
NetEqStereoTestPositiveDrift() : NetEqStereoTest(), drift_factor(0.9) {
// Start the sender 100 ms before the receiver to pre-fill the buffer.
// This is to avoid doing preemptive expand early in the test.
// TODO(hlundin): Mock the decision making instead to control the modes.
last_arrival_time_ = -100;
}
virtual int GetArrivalTime(int send_time) {
int arrival_time =
last_arrival_time_ + drift_factor * (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
double drift_factor;
};
TEST_P(NetEqStereoTestPositiveDrift, RunTest) {
RunTest(100);
}
class NetEqStereoTestNegativeDrift : public NetEqStereoTestPositiveDrift {
protected:
NetEqStereoTestNegativeDrift() : NetEqStereoTestPositiveDrift() {
drift_factor = 1.1;
last_arrival_time_ = 0;
}
};
TEST_P(NetEqStereoTestNegativeDrift, RunTest) {
RunTest(100);
}
class NetEqStereoTestDelays : public NetEqStereoTest {
protected:
static const int kDelayInterval = 10;
static const int kDelay = 1000;
NetEqStereoTestDelays() : NetEqStereoTest(), frame_index_(0) {}
virtual int GetArrivalTime(int send_time) {
// Deliver immediately, unless we have a back-log.
int arrival_time = std::min(last_arrival_time_, send_time);
if (++frame_index_ % kDelayInterval == 0) {
// Delay this packet.
arrival_time += kDelay;
}
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
int frame_index_;
};
TEST_P(NetEqStereoTestDelays, RunTest) {
RunTest(1000);
}
class NetEqStereoTestLosses : public NetEqStereoTest {
protected:
static const int kLossInterval = 10;
NetEqStereoTestLosses() : NetEqStereoTest(), frame_index_(0) {}
virtual bool Lost() { return (++frame_index_) % kLossInterval == 0; }
// TODO(hlundin): NetEq is not giving bitexact results for these cases.
virtual void VerifyOutput(size_t num_samples) {
for (size_t i = 0; i < num_samples; ++i) {
const int16_t* output_data = output_.data();
const int16_t* output_multi_channel_data = output_multi_channel_.data();
auto first_channel_sample = output_multi_channel_data[i * num_channels_];
for (size_t j = 0; j < num_channels_; ++j) {
const int kErrorMargin = 200;
EXPECT_NEAR(output_data[i],
output_multi_channel_data[i * num_channels_ + j],
kErrorMargin)
<< "Diff in sample " << i << ", channel " << j << ".";
EXPECT_EQ(first_channel_sample,
output_multi_channel_data[i * num_channels_ + j]);
}
}
}
int frame_index_;
};
TEST_P(NetEqStereoTestLosses, RunTest) {
RunTest(100);
}
class NetEqStereoTestSingleActiveChannelPlc : public NetEqStereoTestLosses {
protected:
NetEqStereoTestSingleActiveChannelPlc() : NetEqStereoTestLosses() {}
virtual void MakeMultiChannelInput() override {
// Create a multi-channel input by copying the mono channel from file to the
// first channel, and setting the others to zero.
memset(input_multi_channel_, 0,
frame_size_samples_ * num_channels_ * sizeof(int16_t));
for (size_t i = 0; i < frame_size_samples_; ++i) {
input_multi_channel_[i * num_channels_] = input_[i];
}
}
virtual void VerifyOutput(size_t num_samples) override {
// Simply verify that all samples in channels other than the first are zero.
const int16_t* output_multi_channel_data = output_multi_channel_.data();
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = 1; j < num_channels_; ++j) {
EXPECT_EQ(0, output_multi_channel_data[i * num_channels_ + j])
<< "Sample " << i << ", channel " << j << " is non-zero.";
}
}
}
};
TEST_P(NetEqStereoTestSingleActiveChannelPlc, RunTest) {
RunTest(100);
}
// Creates a list of parameter sets.
std::list<TestParameters> GetTestParameters() {
std::list<TestParameters> l;
const int sample_rates[] = {8000, 16000, 32000};
const int num_rates = sizeof(sample_rates) / sizeof(sample_rates[0]);
// Loop through sample rates.
for (int rate_index = 0; rate_index < num_rates; ++rate_index) {
int sample_rate = sample_rates[rate_index];
// Loop through all frame sizes between 10 and 60 ms.
for (int frame_size = 10; frame_size <= 60; frame_size += 10) {
TestParameters p;
p.frame_size = frame_size;
p.sample_rate = sample_rate;
p.num_channels = 2;
l.push_back(p);
if (sample_rate == 8000) {
// Add a five-channel test for 8000 Hz.
p.num_channels = 5;
l.push_back(p);
}
}
}
return l;
}
// Pretty-printing the test parameters in case of an error.
void PrintTo(const TestParameters& p, ::std::ostream* os) {
*os << "{frame_size = " << p.frame_size
<< ", num_channels = " << p.num_channels
<< ", sample_rate = " << p.sample_rate << "}";
}
// Instantiate the tests. Each test is instantiated using the function above,
// so that all different parameter combinations are tested.
INSTANTIATE_TEST_SUITE_P(MultiChannel,
NetEqStereoTestNoJitter,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_SUITE_P(MultiChannel,
NetEqStereoTestPositiveDrift,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_SUITE_P(MultiChannel,
NetEqStereoTestNegativeDrift,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_SUITE_P(MultiChannel,
NetEqStereoTestDelays,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_SUITE_P(MultiChannel,
NetEqStereoTestLosses,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_SUITE_P(MultiChannel,
NetEqStereoTestSingleActiveChannelPlc,
::testing::ValuesIn(GetTestParameters()));
} // namespace webrtc

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,31 @@
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
package webrtc.neteq_unittest;
message NetEqNetworkStatistics {
// Next field number 18.
optional uint32 current_buffer_size_ms = 1;
optional uint32 preferred_buffer_size_ms = 2;
optional uint32 jitter_peaks_found = 3;
reserved 4; // Was packet_loss_rate.
reserved 5; // Was packet_discard_rate.
optional uint32 expand_rate = 6;
optional uint32 speech_expand_rate = 7;
optional uint32 preemptive_rate = 8;
optional uint32 accelerate_rate = 9;
optional uint32 secondary_decoded_rate = 10;
optional uint32 secondary_discarded_rate = 17;
optional int32 clockdrift_ppm = 11;
reserved 12; // Was added_zero_samples.
optional int32 mean_waiting_time_ms = 13;
optional int32 median_waiting_time_ms = 14;
optional int32 min_waiting_time_ms = 15;
optional int32 max_waiting_time_ms = 16;
}
message RtcpStatistics {
optional uint32 fraction_lost = 1;
optional uint32 cumulative_lost = 2;
optional uint32 extended_max_sequence_number = 3;
optional uint32 jitter = 4;
}

View file

@ -0,0 +1,194 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/normal.h"
#include <string.h> // memset, memcpy
#include <algorithm> // min
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/background_noise.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/expand.h"
#include "rtc_base/checks.h"
namespace webrtc {
int Normal::Process(const int16_t* input,
size_t length,
NetEq::Mode last_mode,
AudioMultiVector* output) {
if (length == 0) {
// Nothing to process.
output->Clear();
return static_cast<int>(length);
}
RTC_DCHECK(output->Empty());
// Output should be empty at this point.
if (length % output->Channels() != 0) {
// The length does not match the number of channels.
output->Clear();
return 0;
}
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(input, length));
const int fs_mult = fs_hz_ / 8000;
RTC_DCHECK_GT(fs_mult, 0);
// fs_shift = log2(fs_mult), rounded down.
// Note that `fs_shift` is not "exact" for 48 kHz.
// TODO(hlundin): Investigate this further.
const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);
// If last call resulted in a CodedPlc we don't need to do cross-fading but we
// need to report the end of the interruption once we are back to normal
// operation.
if (last_mode == NetEq::Mode::kCodecPlc) {
statistics_->EndExpandEvent(fs_hz_);
}
// Check if last RecOut call resulted in an Expand. If so, we have to take
// care of some cross-fading and unmuting.
if (last_mode == NetEq::Mode::kExpand) {
// Generate interpolation data using Expand.
// First, set Expand parameters to appropriate values.
expand_->SetParametersForNormalAfterExpand();
// Call Expand.
AudioMultiVector expanded(output->Channels());
expand_->Process(&expanded);
expand_->Reset();
size_t length_per_channel = length / output->Channels();
std::unique_ptr<int16_t[]> signal(new int16_t[length_per_channel]);
for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) {
// Set muting factor to the same as expand muting factor.
int16_t mute_factor = expand_->MuteFactor(channel_ix);
(*output)[channel_ix].CopyTo(length_per_channel, 0, signal.get());
// Find largest absolute value in new data.
int16_t decoded_max =
WebRtcSpl_MaxAbsValueW16(signal.get(), length_per_channel);
// Adjust muting factor if needed (to BGN level).
size_t energy_length =
std::min(static_cast<size_t>(fs_mult * 64), length_per_channel);
int scaling = 6 + fs_shift - WebRtcSpl_NormW32(decoded_max * decoded_max);
scaling = std::max(scaling, 0); // `scaling` should always be >= 0.
int32_t energy = WebRtcSpl_DotProductWithScale(signal.get(), signal.get(),
energy_length, scaling);
int32_t scaled_energy_length =
static_cast<int32_t>(energy_length >> scaling);
if (scaled_energy_length > 0) {
energy = energy / scaled_energy_length;
} else {
energy = 0;
}
int local_mute_factor = 16384; // 1.0 in Q14.
if ((energy != 0) && (energy > background_noise_.Energy(channel_ix))) {
// Normalize new frame energy to 15 bits.
scaling = WebRtcSpl_NormW32(energy) - 16;
// We want background_noise_.energy() / energy in Q14.
int32_t bgn_energy = WEBRTC_SPL_SHIFT_W32(
background_noise_.Energy(channel_ix), scaling + 14);
int16_t energy_scaled =
static_cast<int16_t>(WEBRTC_SPL_SHIFT_W32(energy, scaling));
int32_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled);
local_mute_factor =
std::min(local_mute_factor, WebRtcSpl_SqrtFloor(ratio << 14));
}
mute_factor = std::max<int16_t>(mute_factor, local_mute_factor);
RTC_DCHECK_LE(mute_factor, 16384);
RTC_DCHECK_GE(mute_factor, 0);
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14),
// or as fast as it takes to come back to full gain within the frame
// length.
const int back_to_fullscale_inc =
static_cast<int>((16384 - mute_factor) / length_per_channel);
const int increment = std::max(64 / fs_mult, back_to_fullscale_inc);
for (size_t i = 0; i < length_per_channel; i++) {
// Scale with mute factor.
RTC_DCHECK_LT(channel_ix, output->Channels());
RTC_DCHECK_LT(i, output->Size());
int32_t scaled_signal = (*output)[channel_ix][i] * mute_factor;
// Shift 14 with proper rounding.
(*output)[channel_ix][i] =
static_cast<int16_t>((scaled_signal + 8192) >> 14);
// Increase mute_factor towards 16384.
mute_factor =
static_cast<int16_t>(std::min(mute_factor + increment, 16384));
}
// Interpolate the expanded data into the new vector.
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
size_t win_length = samples_per_ms_;
int16_t win_slope_Q14 = default_win_slope_Q14_;
RTC_DCHECK_LT(channel_ix, output->Channels());
if (win_length > output->Size()) {
win_length = output->Size();
win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length);
}
int16_t win_up_Q14 = 0;
for (size_t i = 0; i < win_length; i++) {
win_up_Q14 += win_slope_Q14;
(*output)[channel_ix][i] =
(win_up_Q14 * (*output)[channel_ix][i] +
((1 << 14) - win_up_Q14) * expanded[channel_ix][i] + (1 << 13)) >>
14;
}
RTC_DCHECK_GT(win_up_Q14,
(1 << 14) - 32); // Worst case rouding is a length of 34
}
} else if (last_mode == NetEq::Mode::kRfc3389Cng) {
RTC_DCHECK_EQ(output->Channels(), 1); // Not adapted for multi-channel yet.
static const size_t kCngLength = 48;
RTC_DCHECK_LE(8 * fs_mult, kCngLength);
int16_t cng_output[kCngLength];
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
if (cng_decoder) {
// Generate long enough for 48kHz.
if (!cng_decoder->Generate(cng_output, false)) {
// Error returned; set return vector to all zeros.
memset(cng_output, 0, sizeof(cng_output));
}
} else {
// If no CNG instance is defined, just copy from the decoded data.
// (This will result in interpolating the decoded with itself.)
(*output)[0].CopyTo(fs_mult * 8, 0, cng_output);
}
// Interpolate the CNG into the new vector.
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
size_t win_length = samples_per_ms_;
int16_t win_slope_Q14 = default_win_slope_Q14_;
if (win_length > kCngLength) {
win_length = kCngLength;
win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length);
}
int16_t win_up_Q14 = 0;
for (size_t i = 0; i < win_length; i++) {
win_up_Q14 += win_slope_Q14;
(*output)[0][i] =
(win_up_Q14 * (*output)[0][i] +
((1 << 14) - win_up_Q14) * cng_output[i] + (1 << 13)) >>
14;
}
RTC_DCHECK_GT(win_up_Q14,
(1 << 14) - 32); // Worst case rouding is a length of 34
}
return static_cast<int>(length);
}
} // namespace webrtc

View file

@ -0,0 +1,76 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_NORMAL_H_
#define MODULES_AUDIO_CODING_NETEQ_NORMAL_H_
#include <stdint.h>
#include <string.h> // Access to size_t.
#include "api/neteq/neteq.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
// Forward declarations.
class AudioMultiVector;
class BackgroundNoise;
class DecoderDatabase;
class Expand;
// This class provides the "Normal" DSP operation, that is performed when
// there is no data loss, no need to stretch the timing of the signal, and
// no other "special circumstances" are at hand.
class Normal {
public:
Normal(int fs_hz,
DecoderDatabase* decoder_database,
const BackgroundNoise& background_noise,
Expand* expand,
StatisticsCalculator* statistics)
: fs_hz_(fs_hz),
decoder_database_(decoder_database),
background_noise_(background_noise),
expand_(expand),
samples_per_ms_(rtc::CheckedDivExact(fs_hz_, 1000)),
default_win_slope_Q14_(
rtc::dchecked_cast<uint16_t>((1 << 14) / samples_per_ms_)),
statistics_(statistics) {}
virtual ~Normal() {}
Normal(const Normal&) = delete;
Normal& operator=(const Normal&) = delete;
// Performs the "Normal" operation. The decoder data is supplied in `input`,
// having `length` samples in total for all channels (interleaved). The
// result is written to `output`. The number of channels allocated in
// `output` defines the number of channels that will be used when
// de-interleaving `input`. `last_mode` contains the mode used in the previous
// GetAudio call (i.e., not the current one).
int Process(const int16_t* input,
size_t length,
NetEq::Mode last_mode,
AudioMultiVector* output);
private:
int fs_hz_;
DecoderDatabase* decoder_database_;
const BackgroundNoise& background_noise_;
Expand* expand_;
const size_t samples_per_ms_;
const int16_t default_win_slope_Q14_;
StatisticsCalculator* const statistics_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_NORMAL_H_

View file

@ -0,0 +1,147 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Normal class.
#include "modules/audio_coding/neteq/normal.h"
#include <memory>
#include <vector>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/background_noise.h"
#include "modules/audio_coding/neteq/expand.h"
#include "modules/audio_coding/neteq/mock/mock_decoder_database.h"
#include "modules/audio_coding/neteq/mock/mock_expand.h"
#include "modules/audio_coding/neteq/random_vector.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "modules/audio_coding/neteq/sync_buffer.h"
#include "test/gtest.h"
using ::testing::_;
using ::testing::Invoke;
namespace webrtc {
namespace {
int ExpandProcess120ms(AudioMultiVector* output) {
AudioMultiVector dummy_audio(1, 11520u);
dummy_audio.CopyTo(output);
return 0;
}
} // namespace
TEST(Normal, CreateAndDestroy) {
MockDecoderDatabase db;
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
Normal normal(fs, &db, bgn, &expand, &statistics);
EXPECT_CALL(db, Die()); // Called when `db` goes out of scope.
}
TEST(Normal, AvoidDivideByZero) {
MockDecoderDatabase db;
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
channels);
Normal normal(fs, &db, bgn, &expand, &statistics);
int16_t input[1000] = {0};
AudioMultiVector output(channels);
// Zero input length.
EXPECT_EQ(0, normal.Process(input, 0, NetEq::Mode::kExpand, &output));
EXPECT_EQ(0u, output.Size());
// Try to make energy_length >> scaling = 0;
EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
EXPECT_CALL(expand, Process(_));
EXPECT_CALL(expand, Reset());
// If input_size_samples < 64, then energy_length in Normal::Process() will
// be equal to input_size_samples. Since the input is all zeros, decoded_max
// will be zero, and scaling will be >= 6. Thus, energy_length >> scaling = 0,
// and using this as a denominator would lead to problems.
int input_size_samples = 63;
EXPECT_EQ(input_size_samples, normal.Process(input, input_size_samples,
NetEq::Mode::kExpand, &output));
EXPECT_CALL(db, Die()); // Called when `db` goes out of scope.
EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope.
}
TEST(Normal, InputLengthAndChannelsDoNotMatch) {
MockDecoderDatabase db;
int fs = 8000;
size_t channels = 2;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(channels, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
channels);
Normal normal(fs, &db, bgn, &expand, &statistics);
int16_t input[1000] = {0};
AudioMultiVector output(channels);
// Let the number of samples be one sample less than 80 samples per channel.
size_t input_len = 80 * channels - 1;
EXPECT_EQ(0, normal.Process(input, input_len, NetEq::Mode::kExpand, &output));
EXPECT_EQ(0u, output.Size());
EXPECT_CALL(db, Die()); // Called when `db` goes out of scope.
EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope.
}
TEST(Normal, LastModeExpand120msPacket) {
MockDecoderDatabase db;
const int kFs = 48000;
const size_t kPacketsizeBytes = 11520u;
const size_t kChannels = 1;
BackgroundNoise bgn(kChannels);
SyncBuffer sync_buffer(kChannels, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs,
kChannels);
Normal normal(kFs, &db, bgn, &expand, &statistics);
int16_t input[kPacketsizeBytes] = {0};
AudioMultiVector output(kChannels);
EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms));
EXPECT_CALL(expand, Reset());
EXPECT_EQ(
static_cast<int>(kPacketsizeBytes),
normal.Process(input, kPacketsizeBytes, NetEq::Mode::kExpand, &output));
EXPECT_EQ(kPacketsizeBytes, output.Size());
EXPECT_CALL(db, Die()); // Called when `db` goes out of scope.
EXPECT_CALL(expand, Die()); // Called when `expand` goes out of scope.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -0,0 +1,36 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/packet.h"
namespace webrtc {
Packet::Packet() = default;
Packet::Packet(Packet&& b) = default;
Packet::~Packet() = default;
Packet& Packet::operator=(Packet&& b) = default;
Packet Packet::Clone() const {
RTC_CHECK(!frame);
Packet clone;
clone.timestamp = timestamp;
clone.sequence_number = sequence_number;
clone.payload_type = payload_type;
clone.payload.SetData(payload.data(), payload.size());
clone.priority = priority;
clone.packet_info = packet_info;
return clone;
}
} // namespace webrtc

View file

@ -0,0 +1,128 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_H_
#define MODULES_AUDIO_CODING_NETEQ_PACKET_H_
#include <stdint.h>
#include <list>
#include <memory>
#include "api/audio_codecs/audio_decoder.h"
#include "api/neteq/tick_timer.h"
#include "api/rtp_packet_info.h"
#include "rtc_base/buffer.h"
#include "rtc_base/checks.h"
namespace webrtc {
// Struct for holding RTP packets.
struct Packet {
struct Priority {
Priority() : codec_level(0), red_level(0) {}
Priority(int codec_level, int red_level)
: codec_level(codec_level), red_level(red_level) {
CheckInvariant();
}
int codec_level;
int red_level;
// Priorities are sorted low-to-high, first on the level the codec
// prioritizes it, then on the level of RED packet it is; i.e. if it is a
// primary or secondary payload of a RED packet. For example: with Opus, an
// Fec packet (which the decoder prioritizes lower than a regular packet)
// will not be used if there is _any_ RED payload for the same
// timeframe. The highest priority packet will have levels {0, 0}. Negative
// priorities are not allowed.
bool operator<(const Priority& b) const {
CheckInvariant();
b.CheckInvariant();
if (codec_level == b.codec_level)
return red_level < b.red_level;
return codec_level < b.codec_level;
}
bool operator==(const Priority& b) const {
CheckInvariant();
b.CheckInvariant();
return codec_level == b.codec_level && red_level == b.red_level;
}
bool operator!=(const Priority& b) const { return !(*this == b); }
bool operator>(const Priority& b) const { return b < *this; }
bool operator<=(const Priority& b) const { return !(b > *this); }
bool operator>=(const Priority& b) const { return !(b < *this); }
private:
void CheckInvariant() const {
RTC_DCHECK_GE(codec_level, 0);
RTC_DCHECK_GE(red_level, 0);
}
};
uint32_t timestamp;
uint16_t sequence_number;
uint8_t payload_type;
// Datagram excluding RTP header and header extension.
rtc::Buffer payload;
Priority priority;
absl::optional<RtpPacketInfo> packet_info;
std::unique_ptr<TickTimer::Stopwatch> waiting_time;
std::unique_ptr<AudioDecoder::EncodedAudioFrame> frame;
Packet();
Packet(Packet&& b);
~Packet();
// Packets should generally be moved around but sometimes it's useful to make
// a copy, for example for testing purposes. NOTE: Will only work for
// un-parsed packets, i.e. `frame` must be unset. The payload will, however,
// be copied. `waiting_time` will also not be copied.
Packet Clone() const;
Packet& operator=(Packet&& b);
// Comparison operators. Establish a packet ordering based on (1) timestamp,
// (2) sequence number and (3) redundancy.
// Timestamp and sequence numbers are compared taking wrap-around into
// account. For two packets with the same sequence number and timestamp a
// primary payload is considered "smaller" than a secondary.
bool operator==(const Packet& rhs) const {
return (this->timestamp == rhs.timestamp &&
this->sequence_number == rhs.sequence_number &&
this->priority == rhs.priority);
}
bool operator!=(const Packet& rhs) const { return !operator==(rhs); }
bool operator<(const Packet& rhs) const {
if (this->timestamp == rhs.timestamp) {
if (this->sequence_number == rhs.sequence_number) {
// Timestamp and sequence numbers are identical - deem the left hand
// side to be "smaller" (i.e., "earlier") if it has higher priority.
return this->priority < rhs.priority;
}
return (static_cast<uint16_t>(rhs.sequence_number -
this->sequence_number) < 0xFFFF / 2);
}
return (static_cast<uint32_t>(rhs.timestamp - this->timestamp) <
0xFFFFFFFF / 2);
}
bool operator>(const Packet& rhs) const { return rhs.operator<(*this); }
bool operator<=(const Packet& rhs) const { return !operator>(rhs); }
bool operator>=(const Packet& rhs) const { return !operator<(rhs); }
bool empty() const { return !frame && payload.empty(); }
};
// A list of packets.
typedef std::list<Packet> PacketList;
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_H_

View file

@ -0,0 +1,132 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/packet_arrival_history.h"
#include <algorithm>
#include <cstdint>
#include "api/neteq/tick_timer.h"
#include "rtc_base/checks.h"
namespace webrtc {
PacketArrivalHistory::PacketArrivalHistory(const TickTimer* tick_timer,
int window_size_ms)
: tick_timer_(tick_timer), window_size_ms_(window_size_ms) {}
bool PacketArrivalHistory::Insert(uint32_t rtp_timestamp,
int packet_length_samples) {
int64_t arrival_timestamp =
tick_timer_->ticks() * tick_timer_->ms_per_tick() * sample_rate_khz_;
PacketArrival packet(timestamp_unwrapper_.Unwrap(rtp_timestamp),
arrival_timestamp, packet_length_samples);
if (IsObsolete(packet)) {
return false;
}
if (Contains(packet)) {
return false;
}
history_.emplace(packet.rtp_timestamp, packet);
if (packet != history_.rbegin()->second) {
// Packet was reordered.
return true;
}
// Remove old packets.
while (IsObsolete(history_.begin()->second)) {
if (history_.begin()->second == min_packet_arrivals_.front()) {
min_packet_arrivals_.pop_front();
}
if (history_.begin()->second == max_packet_arrivals_.front()) {
max_packet_arrivals_.pop_front();
}
history_.erase(history_.begin());
}
// Ensure ordering constraints.
while (!min_packet_arrivals_.empty() &&
packet <= min_packet_arrivals_.back()) {
min_packet_arrivals_.pop_back();
}
while (!max_packet_arrivals_.empty() &&
packet >= max_packet_arrivals_.back()) {
max_packet_arrivals_.pop_back();
}
min_packet_arrivals_.push_back(packet);
max_packet_arrivals_.push_back(packet);
return true;
}
void PacketArrivalHistory::Reset() {
history_.clear();
min_packet_arrivals_.clear();
max_packet_arrivals_.clear();
timestamp_unwrapper_.Reset();
}
int PacketArrivalHistory::GetDelayMs(uint32_t rtp_timestamp) const {
int64_t unwrapped_rtp_timestamp =
timestamp_unwrapper_.PeekUnwrap(rtp_timestamp);
int64_t current_timestamp =
tick_timer_->ticks() * tick_timer_->ms_per_tick() * sample_rate_khz_;
PacketArrival packet(unwrapped_rtp_timestamp, current_timestamp,
/*duration_ms=*/0);
return GetPacketArrivalDelayMs(packet);
}
int PacketArrivalHistory::GetMaxDelayMs() const {
if (max_packet_arrivals_.empty()) {
return 0;
}
return GetPacketArrivalDelayMs(max_packet_arrivals_.front());
}
bool PacketArrivalHistory::IsNewestRtpTimestamp(uint32_t rtp_timestamp) const {
if (history_.empty()) {
return true;
}
int64_t unwrapped_rtp_timestamp =
timestamp_unwrapper_.PeekUnwrap(rtp_timestamp);
return unwrapped_rtp_timestamp == history_.rbegin()->second.rtp_timestamp;
}
int PacketArrivalHistory::GetPacketArrivalDelayMs(
const PacketArrival& packet_arrival) const {
if (min_packet_arrivals_.empty()) {
return 0;
}
RTC_DCHECK_NE(sample_rate_khz_, 0);
// TODO(jakobi): Timestamps are first converted to millis for bit-exactness.
return std::max<int>(
packet_arrival.arrival_timestamp / sample_rate_khz_ -
min_packet_arrivals_.front().arrival_timestamp / sample_rate_khz_ -
(packet_arrival.rtp_timestamp / sample_rate_khz_ -
min_packet_arrivals_.front().rtp_timestamp / sample_rate_khz_),
0);
}
bool PacketArrivalHistory::IsObsolete(
const PacketArrival& packet_arrival) const {
if (history_.empty()) {
return false;
}
return packet_arrival.rtp_timestamp + window_size_ms_ * sample_rate_khz_ <
history_.rbegin()->second.rtp_timestamp;
}
bool PacketArrivalHistory::Contains(const PacketArrival& packet_arrival) const {
auto it = history_.upper_bound(packet_arrival.rtp_timestamp);
if (it == history_.begin()) {
return false;
}
--it;
return it->second.contains(packet_arrival);
}
} // namespace webrtc

View file

@ -0,0 +1,110 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_
#define MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_
#include <cstddef>
#include <cstdint>
#include <deque>
#include <map>
#include "api/neteq/tick_timer.h"
#include "rtc_base/numerics/sequence_number_unwrapper.h"
namespace webrtc {
// Stores timing information about previously received packets.
// The history has a fixed window size beyond which old data is automatically
// pruned.
class PacketArrivalHistory {
public:
explicit PacketArrivalHistory(const TickTimer* tick_timer,
int window_size_ms);
virtual ~PacketArrivalHistory() = default;
// Insert packet with `rtp_timestamp` into the history. Returns true if the
// packet was inserted, false if the timestamp is too old or if the timestamp
// already exists.
bool Insert(uint32_t rtp_timestamp, int packet_length_samples);
// The delay for `rtp_timestamp` at time `now` is calculated as
// `(now - p.arrival_timestamp) - (rtp_timestamp - p.rtp_timestamp)` where `p`
// is chosen as the packet arrival in the history that maximizes the delay.
virtual int GetDelayMs(uint32_t rtp_timestamp) const;
// Get the maximum packet arrival delay observed in the history, excluding
// reordered packets.
virtual int GetMaxDelayMs() const;
bool IsNewestRtpTimestamp(uint32_t rtp_timestamp) const;
void Reset();
void set_sample_rate(int sample_rate) {
sample_rate_khz_ = sample_rate / 1000;
}
size_t size() const { return history_.size(); }
private:
struct PacketArrival {
PacketArrival(int64_t rtp_timestamp,
int64_t arrival_timestamp,
int length_samples)
: rtp_timestamp(rtp_timestamp),
arrival_timestamp(arrival_timestamp),
length_samples(length_samples) {}
PacketArrival() = default;
int64_t rtp_timestamp;
int64_t arrival_timestamp;
int length_samples;
bool operator==(const PacketArrival& other) const {
return rtp_timestamp == other.rtp_timestamp &&
arrival_timestamp == other.arrival_timestamp &&
length_samples == other.length_samples;
}
bool operator!=(const PacketArrival& other) const {
return !(*this == other);
}
bool operator<=(const PacketArrival& other) const {
return arrival_timestamp - rtp_timestamp <=
other.arrival_timestamp - other.rtp_timestamp;
}
bool operator>=(const PacketArrival& other) const {
return arrival_timestamp - rtp_timestamp >=
other.arrival_timestamp - other.rtp_timestamp;
}
bool contains(const PacketArrival& other) const {
return rtp_timestamp <= other.rtp_timestamp &&
rtp_timestamp + length_samples >=
other.rtp_timestamp + other.length_samples;
}
};
int GetPacketArrivalDelayMs(const PacketArrival& packet_arrival) const;
// Checks if the packet is older than the window size.
bool IsObsolete(const PacketArrival& packet_arrival) const;
// Check if the packet exists or fully overlaps with a packet in the history.
bool Contains(const PacketArrival& packet_arrival) const;
const TickTimer* tick_timer_;
const int window_size_ms_;
int sample_rate_khz_ = 0;
RtpTimestampUnwrapper timestamp_unwrapper_;
// Packet history ordered by rtp timestamp.
std::map<int64_t, PacketArrival> history_;
// Tracks min/max packet arrivals in `history_` in ascending/descending order.
// Reordered packets are excluded.
std::deque<PacketArrival> min_packet_arrivals_;
std::deque<PacketArrival> max_packet_arrivals_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_

View file

@ -0,0 +1,171 @@
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/packet_arrival_history.h"
#include <cstdint>
#include <limits>
#include "test/gtest.h"
namespace webrtc {
namespace {
constexpr int kFs = 8000;
constexpr int kFsKhz = kFs / 1000;
constexpr int kFrameSizeMs = 20;
constexpr int kFrameSizeSamples = kFrameSizeMs * kFsKhz;
constexpr int kWindowSizeMs = 1000;
class PacketArrivalHistoryTest : public testing::Test {
public:
PacketArrivalHistoryTest() : history_(&tick_timer_, kWindowSizeMs) {
history_.set_sample_rate(kFs);
}
void IncrementTime(int delta_ms) {
tick_timer_.Increment(delta_ms / tick_timer_.ms_per_tick());
}
int InsertPacketAndGetDelay(int timestamp_delta_ms) {
uint32_t timestamp = timestamp_ + timestamp_delta_ms * kFsKhz;
if (timestamp_delta_ms > 0) {
timestamp_ = timestamp;
}
EXPECT_TRUE(history_.Insert(timestamp, kFrameSizeSamples));
EXPECT_EQ(history_.IsNewestRtpTimestamp(timestamp),
timestamp_delta_ms >= 0);
return history_.GetDelayMs(timestamp);
}
protected:
TickTimer tick_timer_;
PacketArrivalHistory history_;
uint32_t timestamp_ = 0x12345678;
};
TEST_F(PacketArrivalHistoryTest, RelativeArrivalDelay) {
// Insert first packet.
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
IncrementTime(kFrameSizeMs);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0);
IncrementTime(2 * kFrameSizeMs);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20);
// Reordered packet.
EXPECT_EQ(InsertPacketAndGetDelay(-3 * kFrameSizeMs), 80);
IncrementTime(2 * kFrameSizeMs);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 40);
// Move reference packet forward.
EXPECT_EQ(InsertPacketAndGetDelay(4 * kFrameSizeMs), 0);
IncrementTime(2 * kFrameSizeMs);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20);
// Earlier packet is now more delayed due to the new reference packet.
EXPECT_EQ(history_.GetMaxDelayMs(), 80);
}
TEST_F(PacketArrivalHistoryTest, ReorderedPackets) {
// Insert first packet.
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
// Insert reordered packet.
EXPECT_EQ(InsertPacketAndGetDelay(-80), 80);
// Insert another reordered packet.
EXPECT_EQ(InsertPacketAndGetDelay(-kFrameSizeMs), 20);
// Insert the next packet in order and verify that the relative delay is
// estimated based on the first inserted packet.
IncrementTime(4 * kFrameSizeMs);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 60);
EXPECT_EQ(history_.GetMaxDelayMs(), 60);
}
TEST_F(PacketArrivalHistoryTest, MaxHistorySize) {
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
IncrementTime(2 * kFrameSizeMs);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20);
EXPECT_EQ(history_.GetMaxDelayMs(), 20);
// Insert next packet with a timestamp difference larger than maximum history
// size. This removes the previously inserted packet from the history.
IncrementTime(kWindowSizeMs + kFrameSizeMs);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs + kWindowSizeMs), 0);
EXPECT_EQ(history_.GetMaxDelayMs(), 0);
}
TEST_F(PacketArrivalHistoryTest, TimestampWraparound) {
timestamp_ = std::numeric_limits<uint32_t>::max();
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
IncrementTime(2 * kFrameSizeMs);
// Insert timestamp that will wrap around.
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), kFrameSizeMs);
// Insert reordered packet before the wraparound.
EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 3 * kFrameSizeMs);
// Insert another in-order packet after the wraparound.
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0);
EXPECT_EQ(history_.GetMaxDelayMs(), kFrameSizeMs);
}
TEST_F(PacketArrivalHistoryTest, TimestampWraparoundBackwards) {
timestamp_ = 0;
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
IncrementTime(2 * kFrameSizeMs);
// Insert timestamp that will wrap around.
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), kFrameSizeMs);
// Insert reordered packet before the wraparound.
EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 3 * kFrameSizeMs);
// Insert another in-order packet after the wraparound.
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0);
EXPECT_EQ(history_.GetMaxDelayMs(), kFrameSizeMs);
}
TEST_F(PacketArrivalHistoryTest, OldPacketShouldNotBeInserted) {
// Insert first packet as reference.
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
// Insert packet with timestamp older than the window size compared to the
// first packet.
EXPECT_FALSE(history_.Insert(timestamp_ - kWindowSizeMs * kFsKhz - 1,
kFrameSizeSamples));
}
TEST_F(PacketArrivalHistoryTest, DuplicatePacketShouldNotBeInserted) {
// Insert first packet as reference.
uint32_t first_timestamp = timestamp_;
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0);
// Same timestamp as the first packet.
EXPECT_FALSE(history_.Insert(first_timestamp, kFrameSizeSamples));
}
TEST_F(PacketArrivalHistoryTest, OverlappingPacketShouldNotBeInserted) {
// Insert first packet as reference.
EXPECT_EQ(InsertPacketAndGetDelay(0), 0);
// 10 ms overlap with the previous packet.
EXPECT_FALSE(history_.Insert(timestamp_ + kFrameSizeSamples / 2,
kFrameSizeSamples / 2));
}
} // namespace
} // namespace webrtc

View file

@ -0,0 +1,275 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This is the implementation of the PacketBuffer class. It is mostly based on
// an STL list. The list is kept sorted at all times so that the next packet to
// decode is at the beginning of the list.
#include "modules/audio_coding/neteq/packet_buffer.h"
#include <algorithm>
#include <list>
#include <memory>
#include <type_traits>
#include <utility>
#include "api/audio_codecs/audio_decoder.h"
#include "api/neteq/tick_timer.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/struct_parameters_parser.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
// Predicate used when inserting packets in the buffer list.
// Operator() returns true when `packet` goes before `new_packet`.
class NewTimestampIsLarger {
public:
explicit NewTimestampIsLarger(const Packet& new_packet)
: new_packet_(new_packet) {}
bool operator()(const Packet& packet) { return (new_packet_ >= packet); }
private:
const Packet& new_packet_;
};
} // namespace
PacketBuffer::PacketBuffer(size_t max_number_of_packets,
const TickTimer* tick_timer,
StatisticsCalculator* stats)
: max_number_of_packets_(max_number_of_packets),
tick_timer_(tick_timer),
stats_(stats) {}
// Destructor. All packets in the buffer will be destroyed.
PacketBuffer::~PacketBuffer() {
buffer_.clear();
}
// Flush the buffer. All packets in the buffer will be destroyed.
void PacketBuffer::Flush() {
for (auto& p : buffer_) {
LogPacketDiscarded(p.priority.codec_level);
}
buffer_.clear();
stats_->FlushedPacketBuffer();
}
bool PacketBuffer::Empty() const {
return buffer_.empty();
}
int PacketBuffer::InsertPacket(Packet&& packet) {
if (packet.empty()) {
RTC_LOG(LS_WARNING) << "InsertPacket invalid packet";
return kInvalidPacket;
}
RTC_DCHECK_GE(packet.priority.codec_level, 0);
RTC_DCHECK_GE(packet.priority.red_level, 0);
int return_val = kOK;
packet.waiting_time = tick_timer_->GetNewStopwatch();
if (buffer_.size() >= max_number_of_packets_) {
// Buffer is full.
Flush();
return_val = kFlushed;
RTC_LOG(LS_WARNING) << "Packet buffer flushed.";
}
// Get an iterator pointing to the place in the buffer where the new packet
// should be inserted. The list is searched from the back, since the most
// likely case is that the new packet should be near the end of the list.
PacketList::reverse_iterator rit = std::find_if(
buffer_.rbegin(), buffer_.rend(), NewTimestampIsLarger(packet));
// The new packet is to be inserted to the right of `rit`. If it has the same
// timestamp as `rit`, which has a higher priority, do not insert the new
// packet to list.
if (rit != buffer_.rend() && packet.timestamp == rit->timestamp) {
LogPacketDiscarded(packet.priority.codec_level);
return return_val;
}
// The new packet is to be inserted to the left of `it`. If it has the same
// timestamp as `it`, which has a lower priority, replace `it` with the new
// packet.
PacketList::iterator it = rit.base();
if (it != buffer_.end() && packet.timestamp == it->timestamp) {
LogPacketDiscarded(it->priority.codec_level);
it = buffer_.erase(it);
}
buffer_.insert(it, std::move(packet)); // Insert the packet at that position.
return return_val;
}
int PacketBuffer::NextTimestamp(uint32_t* next_timestamp) const {
if (Empty()) {
return kBufferEmpty;
}
if (!next_timestamp) {
return kInvalidPointer;
}
*next_timestamp = buffer_.front().timestamp;
return kOK;
}
int PacketBuffer::NextHigherTimestamp(uint32_t timestamp,
uint32_t* next_timestamp) const {
if (Empty()) {
return kBufferEmpty;
}
if (!next_timestamp) {
return kInvalidPointer;
}
PacketList::const_iterator it;
for (it = buffer_.begin(); it != buffer_.end(); ++it) {
if (it->timestamp >= timestamp) {
// Found a packet matching the search.
*next_timestamp = it->timestamp;
return kOK;
}
}
return kNotFound;
}
const Packet* PacketBuffer::PeekNextPacket() const {
return buffer_.empty() ? nullptr : &buffer_.front();
}
absl::optional<Packet> PacketBuffer::GetNextPacket() {
if (Empty()) {
// Buffer is empty.
return absl::nullopt;
}
absl::optional<Packet> packet(std::move(buffer_.front()));
// Assert that the packet sanity checks in InsertPacket method works.
RTC_DCHECK(!packet->empty());
buffer_.pop_front();
return packet;
}
int PacketBuffer::DiscardNextPacket() {
if (Empty()) {
return kBufferEmpty;
}
// Assert that the packet sanity checks in InsertPacket method works.
const Packet& packet = buffer_.front();
RTC_DCHECK(!packet.empty());
LogPacketDiscarded(packet.priority.codec_level);
buffer_.pop_front();
return kOK;
}
void PacketBuffer::DiscardOldPackets(uint32_t timestamp_limit,
uint32_t horizon_samples) {
buffer_.remove_if([this, timestamp_limit, horizon_samples](const Packet& p) {
if (timestamp_limit == p.timestamp ||
!IsObsoleteTimestamp(p.timestamp, timestamp_limit, horizon_samples)) {
return false;
}
LogPacketDiscarded(p.priority.codec_level);
return true;
});
}
void PacketBuffer::DiscardAllOldPackets(uint32_t timestamp_limit) {
DiscardOldPackets(timestamp_limit, 0);
}
void PacketBuffer::DiscardPacketsWithPayloadType(uint8_t payload_type) {
buffer_.remove_if([this, payload_type](const Packet& p) {
if (p.payload_type != payload_type) {
return false;
}
LogPacketDiscarded(p.priority.codec_level);
return true;
});
}
size_t PacketBuffer::NumPacketsInBuffer() const {
return buffer_.size();
}
size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const {
size_t num_samples = 0;
size_t last_duration = last_decoded_length;
for (const Packet& packet : buffer_) {
if (packet.frame) {
// TODO(hlundin): Verify that it's fine to count all packets and remove
// this check.
if (packet.priority != Packet::Priority(0, 0)) {
continue;
}
size_t duration = packet.frame->Duration();
if (duration > 0) {
last_duration = duration; // Save the most up-to-date (valid) duration.
}
}
num_samples += last_duration;
}
return num_samples;
}
size_t PacketBuffer::GetSpanSamples(size_t last_decoded_length,
size_t sample_rate,
bool count_waiting_time) const {
if (buffer_.size() == 0) {
return 0;
}
size_t span = buffer_.back().timestamp - buffer_.front().timestamp;
size_t waiting_time_samples = rtc::dchecked_cast<size_t>(
buffer_.back().waiting_time->ElapsedMs() * (sample_rate / 1000));
if (count_waiting_time) {
span += waiting_time_samples;
} else if (buffer_.back().frame && buffer_.back().frame->Duration() > 0) {
size_t duration = buffer_.back().frame->Duration();
if (buffer_.back().frame->IsDtxPacket()) {
duration = std::max(duration, waiting_time_samples);
}
span += duration;
} else {
span += last_decoded_length;
}
return span;
}
bool PacketBuffer::ContainsDtxOrCngPacket(
const DecoderDatabase* decoder_database) const {
RTC_DCHECK(decoder_database);
for (const Packet& packet : buffer_) {
if ((packet.frame && packet.frame->IsDtxPacket()) ||
decoder_database->IsComfortNoise(packet.payload_type)) {
return true;
}
}
return false;
}
void PacketBuffer::LogPacketDiscarded(int codec_level) {
if (codec_level > 0) {
stats_->SecondaryPacketsDiscarded(1);
} else {
stats_->PacketsDiscarded(1);
}
}
} // namespace webrtc

View file

@ -0,0 +1,144 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
#define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
#include "absl/types/optional.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/packet.h"
#include "modules/include/module_common_types_public.h" // IsNewerTimestamp
namespace webrtc {
class DecoderDatabase;
class StatisticsCalculator;
class TickTimer;
// This is the actual buffer holding the packets before decoding.
class PacketBuffer {
public:
enum BufferReturnCodes {
kOK = 0,
kFlushed,
kNotFound,
kBufferEmpty,
kInvalidPacket,
kInvalidPointer
};
// Constructor creates a buffer which can hold a maximum of
// `max_number_of_packets` packets.
PacketBuffer(size_t max_number_of_packets,
const TickTimer* tick_timer,
StatisticsCalculator* stats);
// Deletes all packets in the buffer before destroying the buffer.
virtual ~PacketBuffer();
PacketBuffer(const PacketBuffer&) = delete;
PacketBuffer& operator=(const PacketBuffer&) = delete;
// Flushes the buffer and deletes all packets in it.
virtual void Flush();
// Returns true for an empty buffer.
virtual bool Empty() const;
// Inserts `packet` into the buffer. The buffer will take over ownership of
// the packet object.
// Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer
// was flushed due to overfilling.
virtual int InsertPacket(Packet&& packet);
// Gets the timestamp for the first packet in the buffer and writes it to the
// output variable `next_timestamp`.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int NextTimestamp(uint32_t* next_timestamp) const;
// Gets the timestamp for the first packet in the buffer with a timestamp no
// lower than the input limit `timestamp`. The result is written to the output
// variable `next_timestamp`.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int NextHigherTimestamp(uint32_t timestamp,
uint32_t* next_timestamp) const;
// Returns a (constant) pointer to the first packet in the buffer. Returns
// NULL if the buffer is empty.
virtual const Packet* PeekNextPacket() const;
// Extracts the first packet in the buffer and returns it.
// Returns an empty optional if the buffer is empty.
virtual absl::optional<Packet> GetNextPacket();
// Discards the first packet in the buffer. The packet is deleted.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int DiscardNextPacket();
// Discards all packets that are (strictly) older than timestamp_limit,
// but newer than timestamp_limit - horizon_samples. Setting horizon_samples
// to zero implies that the horizon is set to half the timestamp range. That
// is, if a packet is more than 2^31 timestamps into the future compared with
// timestamp_limit (including wrap-around), it is considered old.
virtual void DiscardOldPackets(uint32_t timestamp_limit,
uint32_t horizon_samples);
// Discards all packets that are (strictly) older than timestamp_limit.
virtual void DiscardAllOldPackets(uint32_t timestamp_limit);
// Removes all packets with a specific payload type from the buffer.
virtual void DiscardPacketsWithPayloadType(uint8_t payload_type);
// Returns the number of packets in the buffer, including duplicates and
// redundant packets.
virtual size_t NumPacketsInBuffer() const;
// Returns the number of samples in the buffer, including samples carried in
// duplicate and redundant packets.
virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;
// Returns the total duration in samples that the packets in the buffer spans
// across.
virtual size_t GetSpanSamples(size_t last_decoded_length,
size_t sample_rate,
bool count_waiting_time) const;
// Returns true if the packet buffer contains any DTX or CNG packets.
virtual bool ContainsDtxOrCngPacket(
const DecoderDatabase* decoder_database) const;
// Static method returning true if `timestamp` is older than `timestamp_limit`
// but less than `horizon_samples` behind `timestamp_limit`. For instance,
// with timestamp_limit = 100 and horizon_samples = 10, a timestamp in the
// range (90, 100) is considered obsolete, and will yield true.
// Setting `horizon_samples` to 0 is the same as setting it to 2^31, i.e.,
// half the 32-bit timestamp range.
static bool IsObsoleteTimestamp(uint32_t timestamp,
uint32_t timestamp_limit,
uint32_t horizon_samples) {
return IsNewerTimestamp(timestamp_limit, timestamp) &&
(horizon_samples == 0 ||
IsNewerTimestamp(timestamp, timestamp_limit - horizon_samples));
}
private:
void LogPacketDiscarded(int codec_level);
size_t max_number_of_packets_;
PacketList buffer_;
const TickTimer* tick_timer_;
StatisticsCalculator* stats_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_

View file

@ -0,0 +1,644 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PacketBuffer class.
#include "modules/audio_coding/neteq/packet_buffer.h"
#include <memory>
#include "api/audio_codecs/builtin_audio_decoder_factory.h"
#include "api/neteq/tick_timer.h"
#include "modules/audio_coding/neteq/mock/mock_decoder_database.h"
#include "modules/audio_coding/neteq/mock/mock_statistics_calculator.h"
#include "modules/audio_coding/neteq/packet.h"
#include "test/field_trial.h"
#include "test/gmock.h"
#include "test/gtest.h"
using ::testing::_;
using ::testing::InSequence;
using ::testing::MockFunction;
using ::testing::Return;
using ::testing::StrictMock;
namespace {
class MockEncodedAudioFrame : public webrtc::AudioDecoder::EncodedAudioFrame {
public:
MOCK_METHOD(size_t, Duration, (), (const, override));
MOCK_METHOD(bool, IsDtxPacket, (), (const, override));
MOCK_METHOD(absl::optional<DecodeResult>,
Decode,
(rtc::ArrayView<int16_t> decoded),
(const, override));
};
// Helper class to generate packets. Packets must be deleted by the user.
class PacketGenerator {
public:
PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size);
virtual ~PacketGenerator() {}
void Reset(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size);
webrtc::Packet NextPacket(
int payload_size_bytes,
std::unique_ptr<webrtc::AudioDecoder::EncodedAudioFrame> audio_frame);
uint16_t seq_no_;
uint32_t ts_;
uint8_t pt_;
int frame_size_;
};
PacketGenerator::PacketGenerator(uint16_t seq_no,
uint32_t ts,
uint8_t pt,
int frame_size) {
Reset(seq_no, ts, pt, frame_size);
}
void PacketGenerator::Reset(uint16_t seq_no,
uint32_t ts,
uint8_t pt,
int frame_size) {
seq_no_ = seq_no;
ts_ = ts;
pt_ = pt;
frame_size_ = frame_size;
}
webrtc::Packet PacketGenerator::NextPacket(
int payload_size_bytes,
std::unique_ptr<webrtc::AudioDecoder::EncodedAudioFrame> audio_frame) {
webrtc::Packet packet;
packet.sequence_number = seq_no_;
packet.timestamp = ts_;
packet.payload_type = pt_;
packet.payload.SetSize(payload_size_bytes);
++seq_no_;
ts_ += frame_size_;
packet.frame = std::move(audio_frame);
return packet;
}
struct PacketsToInsert {
uint16_t sequence_number;
uint32_t timestamp;
uint8_t payload_type;
bool primary;
// Order of this packet to appear upon extraction, after inserting a series
// of packets. A negative number means that it should have been discarded
// before extraction.
int extract_order;
};
} // namespace
namespace webrtc {
// Start of test definitions.
TEST(PacketBuffer, CreateAndDestroy) {
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer* buffer =
new PacketBuffer(10, &tick_timer, &mock_stats); // 10 packets.
EXPECT_TRUE(buffer->Empty());
delete buffer;
}
TEST(PacketBuffer, InsertPacket) {
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(10, &tick_timer, &mock_stats); // 10 packets.
PacketGenerator gen(17u, 4711u, 0, 10);
MockDecoderDatabase decoder_database;
const int payload_len = 100;
const Packet packet = gen.NextPacket(payload_len, nullptr);
EXPECT_EQ(0, buffer.InsertPacket(/*packet=*/packet.Clone()));
uint32_t next_ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
EXPECT_EQ(4711u, next_ts);
EXPECT_FALSE(buffer.Empty());
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
const Packet* next_packet = buffer.PeekNextPacket();
EXPECT_EQ(packet, *next_packet); // Compare contents.
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
// Do not explicitly flush buffer or delete packet to test that it is deleted
// with the buffer. (Tested with Valgrind or similar tool.)
}
// Test to flush buffer.
TEST(PacketBuffer, FlushBuffer) {
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(10, &tick_timer, &mock_stats); // 10 packets.
PacketGenerator gen(0, 0, 0, 10);
const int payload_len = 10;
MockDecoderDatabase decoder_database;
// Insert 10 small packets; should be ok.
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(/*packet=*/gen.NextPacket(
payload_len, nullptr)));
}
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
EXPECT_FALSE(buffer.Empty());
EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10);
buffer.Flush();
// Buffer should delete the payloads itself.
EXPECT_EQ(0u, buffer.NumPacketsInBuffer());
EXPECT_TRUE(buffer.Empty());
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
// Test to fill the buffer over the limits, and verify that it flushes.
TEST(PacketBuffer, OverfillBuffer) {
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(10, &tick_timer, &mock_stats); // 10 packets.
PacketGenerator gen(0, 0, 0, 10);
MockDecoderDatabase decoder_database;
// Insert 10 small packets; should be ok.
const int payload_len = 10;
int i;
for (i = 0; i < 10; ++i) {
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(/*packet=*/gen.NextPacket(
payload_len, nullptr)));
}
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
uint32_t next_ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line.
EXPECT_CALL(mock_stats, PacketsDiscarded(1)).Times(10);
const Packet packet = gen.NextPacket(payload_len, nullptr);
// Insert 11th packet; should flush the buffer and insert it after flushing.
EXPECT_EQ(PacketBuffer::kFlushed,
buffer.InsertPacket(/*packet=*/packet.Clone()));
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
// Expect last inserted packet to be first in line.
EXPECT_EQ(packet.timestamp, next_ts);
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
TEST(PacketBuffer, ExtractOrderRedundancy) {
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(100, &tick_timer, &mock_stats); // 100 packets.
const int kPackets = 18;
const int kFrameSize = 10;
const int kPayloadLength = 10;
PacketsToInsert packet_facts[kPackets] = {
{0xFFFD, 0xFFFFFFD7, 0, true, 0}, {0xFFFE, 0xFFFFFFE1, 0, true, 1},
{0xFFFE, 0xFFFFFFD7, 1, false, -1}, {0xFFFF, 0xFFFFFFEB, 0, true, 2},
{0xFFFF, 0xFFFFFFE1, 1, false, -1}, {0x0000, 0xFFFFFFF5, 0, true, 3},
{0x0000, 0xFFFFFFEB, 1, false, -1}, {0x0001, 0xFFFFFFFF, 0, true, 4},
{0x0001, 0xFFFFFFF5, 1, false, -1}, {0x0002, 0x0000000A, 0, true, 5},
{0x0002, 0xFFFFFFFF, 1, false, -1}, {0x0003, 0x0000000A, 1, false, -1},
{0x0004, 0x0000001E, 0, true, 7}, {0x0004, 0x00000014, 1, false, 6},
{0x0005, 0x0000001E, 0, true, -1}, {0x0005, 0x00000014, 1, false, -1},
{0x0006, 0x00000028, 0, true, 8}, {0x0006, 0x0000001E, 1, false, -1},
};
MockDecoderDatabase decoder_database;
const size_t kExpectPacketsInBuffer = 9;
std::vector<Packet> expect_order(kExpectPacketsInBuffer);
PacketGenerator gen(0, 0, 0, kFrameSize);
// Interleaving the EXPECT_CALL sequence with expectations on the MockFunction
// check ensures that exactly one call to PacketsDiscarded happens in each
// DiscardNextPacket call.
InSequence s;
MockFunction<void(int check_point_id)> check;
for (int i = 0; i < kPackets; ++i) {
gen.Reset(packet_facts[i].sequence_number, packet_facts[i].timestamp,
packet_facts[i].payload_type, kFrameSize);
Packet packet = gen.NextPacket(kPayloadLength, nullptr);
packet.priority.codec_level = packet_facts[i].primary ? 0 : 1;
if (packet_facts[i].extract_order < 0) {
if (packet.priority.codec_level > 0) {
EXPECT_CALL(mock_stats, SecondaryPacketsDiscarded(1));
} else {
EXPECT_CALL(mock_stats, PacketsDiscarded(1));
}
}
EXPECT_CALL(check, Call(i));
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacket(/*packet=*/packet.Clone()));
if (packet_facts[i].extract_order >= 0) {
expect_order[packet_facts[i].extract_order] = std::move(packet);
}
check.Call(i);
}
EXPECT_EQ(kExpectPacketsInBuffer, buffer.NumPacketsInBuffer());
for (size_t i = 0; i < kExpectPacketsInBuffer; ++i) {
const absl::optional<Packet> packet = buffer.GetNextPacket();
EXPECT_EQ(packet, expect_order[i]); // Compare contents.
}
EXPECT_TRUE(buffer.Empty());
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
TEST(PacketBuffer, DiscardPackets) {
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(100, &tick_timer, &mock_stats); // 100 packets.
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
PacketList list;
const int payload_len = 10;
MockDecoderDatabase decoder_database;
constexpr int kTotalPackets = 10;
// Insert 10 small packets.
for (int i = 0; i < kTotalPackets; ++i) {
buffer.InsertPacket(/*packet=*/gen.NextPacket(payload_len, nullptr));
}
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
uint32_t current_ts = start_ts;
// Discard them one by one and make sure that the right packets are at the
// front of the buffer.
constexpr int kDiscardPackets = 5;
// Interleaving the EXPECT_CALL sequence with expectations on the MockFunction
// check ensures that exactly one call to PacketsDiscarded happens in each
// DiscardNextPacket call.
InSequence s;
MockFunction<void(int check_point_id)> check;
for (int i = 0; i < kDiscardPackets; ++i) {
uint32_t ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts));
EXPECT_EQ(current_ts, ts);
EXPECT_CALL(mock_stats, PacketsDiscarded(1));
EXPECT_CALL(check, Call(i));
EXPECT_EQ(PacketBuffer::kOK, buffer.DiscardNextPacket());
current_ts += ts_increment;
check.Call(i);
}
constexpr int kRemainingPackets = kTotalPackets - kDiscardPackets;
// This will discard all remaining packets but one. The oldest packet is older
// than the indicated horizon_samples, and will thus be left in the buffer.
constexpr size_t kSkipPackets = 1;
EXPECT_CALL(mock_stats, PacketsDiscarded(1))
.Times(kRemainingPackets - kSkipPackets);
EXPECT_CALL(check, Call(17)); // Arbitrary id number.
buffer.DiscardOldPackets(start_ts + kTotalPackets * ts_increment,
kRemainingPackets * ts_increment);
check.Call(17); // Same arbitrary id number.
EXPECT_EQ(kSkipPackets, buffer.NumPacketsInBuffer());
uint32_t ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts));
EXPECT_EQ(current_ts, ts);
// Discard all remaining packets.
EXPECT_CALL(mock_stats, PacketsDiscarded(kSkipPackets));
buffer.DiscardAllOldPackets(start_ts + kTotalPackets * ts_increment);
EXPECT_TRUE(buffer.Empty());
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
TEST(PacketBuffer, Reordering) {
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(100, &tick_timer, &mock_stats); // 100 packets.
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
const int payload_len = 10;
// Generate 10 small packets and insert them into a PacketList. Insert every
// odd packet to the front, and every even packet to the back, thus creating
// a (rather strange) reordering.
PacketList list;
for (int i = 0; i < 10; ++i) {
Packet packet = gen.NextPacket(payload_len, nullptr);
if (i % 2) {
list.push_front(std::move(packet));
} else {
list.push_back(std::move(packet));
}
}
for (Packet& packet : list) {
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(std::move(packet)));
}
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
// Extract them and make sure that come out in the right order.
uint32_t current_ts = start_ts;
for (int i = 0; i < 10; ++i) {
const absl::optional<Packet> packet = buffer.GetNextPacket();
ASSERT_TRUE(packet);
EXPECT_EQ(current_ts, packet->timestamp);
current_ts += ts_increment;
}
EXPECT_TRUE(buffer.Empty());
}
TEST(PacketBuffer, Failures) {
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
int payload_len = 100;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(100, &tick_timer, &mock_stats); // 100 packets.
{
Packet packet = gen.NextPacket(payload_len, nullptr);
packet.payload.Clear();
EXPECT_EQ(PacketBuffer::kInvalidPacket,
buffer.InsertPacket(/*packet=*/std::move(packet)));
}
// Buffer should still be empty. Test all empty-checks.
uint32_t temp_ts;
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer.NextTimestamp(&temp_ts));
EXPECT_EQ(PacketBuffer::kBufferEmpty,
buffer.NextHigherTimestamp(0, &temp_ts));
EXPECT_EQ(NULL, buffer.PeekNextPacket());
EXPECT_FALSE(buffer.GetNextPacket());
// Discarding packets will not invoke mock_stats.PacketDiscarded() because the
// packet buffer is empty.
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer.DiscardNextPacket());
buffer.DiscardAllOldPackets(0);
}
// Test packet comparison function.
// The function should return true if the first packet "goes before" the second.
TEST(PacketBuffer, ComparePackets) {
PacketGenerator gen(0, 0, 0, 10);
Packet a(gen.NextPacket(10, nullptr)); // SN = 0, TS = 0.
Packet b(gen.NextPacket(10, nullptr)); // SN = 1, TS = 10.
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Testing wrap-around case; 'a' is earlier but has a larger timestamp value.
a.timestamp = 0xFFFFFFFF - 10;
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Test equal packets.
EXPECT_TRUE(a == a);
EXPECT_FALSE(a != a);
EXPECT_FALSE(a < a);
EXPECT_FALSE(a > a);
EXPECT_TRUE(a <= a);
EXPECT_TRUE(a >= a);
// Test equal timestamps but different sequence numbers (0 and 1).
a.timestamp = b.timestamp;
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Test equal timestamps but different sequence numbers (32767 and 1).
a.sequence_number = 0xFFFF;
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Test equal timestamps and sequence numbers, but differing priorities.
a.sequence_number = b.sequence_number;
a.priority = {1, 0};
b.priority = {0, 0};
// a after b
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_FALSE(a < b);
EXPECT_TRUE(a > b);
EXPECT_FALSE(a <= b);
EXPECT_TRUE(a >= b);
Packet c(gen.NextPacket(0, nullptr)); // SN = 2, TS = 20.
Packet d(gen.NextPacket(0, nullptr)); // SN = 3, TS = 20.
c.timestamp = b.timestamp;
d.timestamp = b.timestamp;
c.sequence_number = b.sequence_number;
d.sequence_number = b.sequence_number;
c.priority = {1, 1};
d.priority = {0, 1};
// c after d
EXPECT_FALSE(c == d);
EXPECT_TRUE(c != d);
EXPECT_FALSE(c < d);
EXPECT_TRUE(c > d);
EXPECT_FALSE(c <= d);
EXPECT_TRUE(c >= d);
// c after a
EXPECT_FALSE(c == a);
EXPECT_TRUE(c != a);
EXPECT_FALSE(c < a);
EXPECT_TRUE(c > a);
EXPECT_FALSE(c <= a);
EXPECT_TRUE(c >= a);
// c after b
EXPECT_FALSE(c == b);
EXPECT_TRUE(c != b);
EXPECT_FALSE(c < b);
EXPECT_TRUE(c > b);
EXPECT_FALSE(c <= b);
EXPECT_TRUE(c >= b);
// a after d
EXPECT_FALSE(a == d);
EXPECT_TRUE(a != d);
EXPECT_FALSE(a < d);
EXPECT_TRUE(a > d);
EXPECT_FALSE(a <= d);
EXPECT_TRUE(a >= d);
// d after b
EXPECT_FALSE(d == b);
EXPECT_TRUE(d != b);
EXPECT_FALSE(d < b);
EXPECT_TRUE(d > b);
EXPECT_FALSE(d <= b);
EXPECT_TRUE(d >= b);
}
TEST(PacketBuffer, GetSpanSamples) {
constexpr size_t kFrameSizeSamples = 10;
constexpr int kPayloadSizeBytes = 1; // Does not matter to this test;
constexpr uint32_t kStartTimeStamp = 0xFFFFFFFE; // Close to wrap around.
constexpr int kSampleRateHz = 48000;
constexpr bool kCountWaitingTime = false;
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(3, &tick_timer, &mock_stats);
PacketGenerator gen(0, kStartTimeStamp, 0, kFrameSizeSamples);
MockDecoderDatabase decoder_database;
Packet packet_1 = gen.NextPacket(kPayloadSizeBytes, nullptr);
std::unique_ptr<MockEncodedAudioFrame> mock_audio_frame =
std::make_unique<MockEncodedAudioFrame>();
EXPECT_CALL(*mock_audio_frame, Duration())
.WillRepeatedly(Return(kFrameSizeSamples));
Packet packet_2 =
gen.NextPacket(kPayloadSizeBytes, std::move(mock_audio_frame));
RTC_DCHECK_GT(packet_1.timestamp,
packet_2.timestamp); // Tmestamp wrapped around.
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacket(/*packet=*/std::move(packet_1)));
constexpr size_t kLastDecodedSizeSamples = 2;
// packet_1 has no access to duration, and relies last decoded duration as
// input.
EXPECT_EQ(kLastDecodedSizeSamples,
buffer.GetSpanSamples(kLastDecodedSizeSamples, kSampleRateHz,
kCountWaitingTime));
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacket(/*packet=*/std::move(packet_2)));
EXPECT_EQ(kFrameSizeSamples * 2,
buffer.GetSpanSamples(0, kSampleRateHz, kCountWaitingTime));
// packet_2 has access to duration, and ignores last decoded duration as
// input.
EXPECT_EQ(kFrameSizeSamples * 2,
buffer.GetSpanSamples(kLastDecodedSizeSamples, kSampleRateHz,
kCountWaitingTime));
}
TEST(PacketBuffer, GetSpanSamplesCountWaitingTime) {
constexpr size_t kFrameSizeSamples = 10;
constexpr int kPayloadSizeBytes = 1; // Does not matter to this test;
constexpr uint32_t kStartTimeStamp = 0xFFFFFFFE; // Close to wrap around.
constexpr int kSampleRateHz = 48000;
constexpr bool kCountWaitingTime = true;
constexpr size_t kLastDecodedSizeSamples = 0;
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer buffer(3, &tick_timer, &mock_stats);
PacketGenerator gen(0, kStartTimeStamp, 0, kFrameSizeSamples);
MockDecoderDatabase decoder_database;
Packet packet = gen.NextPacket(kPayloadSizeBytes, nullptr);
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacket(/*packet=*/std::move(packet)));
EXPECT_EQ(0u, buffer.GetSpanSamples(kLastDecodedSizeSamples, kSampleRateHz,
kCountWaitingTime));
tick_timer.Increment();
EXPECT_EQ(480u, buffer.GetSpanSamples(0, kSampleRateHz, kCountWaitingTime));
tick_timer.Increment();
EXPECT_EQ(960u, buffer.GetSpanSamples(0, kSampleRateHz, kCountWaitingTime));
}
namespace {
void TestIsObsoleteTimestamp(uint32_t limit_timestamp) {
// Check with zero horizon, which implies that the horizon is at 2^31, i.e.,
// half the timestamp range.
static const uint32_t kZeroHorizon = 0;
static const uint32_t k2Pow31Minus1 = 0x7FFFFFFF;
// Timestamp on the limit is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp, limit_timestamp, kZeroHorizon));
// 1 sample behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 1,
limit_timestamp, kZeroHorizon));
// 2^31 - 1 samples behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - k2Pow31Minus1,
limit_timestamp, kZeroHorizon));
// 1 sample ahead is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp + 1, limit_timestamp, kZeroHorizon));
// If |t1-t2|=2^31 and t1>t2, t2 is older than t1 but not the opposite.
uint32_t other_timestamp = limit_timestamp + (1 << 31);
uint32_t lowest_timestamp = std::min(limit_timestamp, other_timestamp);
uint32_t highest_timestamp = std::max(limit_timestamp, other_timestamp);
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
lowest_timestamp, highest_timestamp, kZeroHorizon));
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
highest_timestamp, lowest_timestamp, kZeroHorizon));
// Fixed horizon at 10 samples.
static const uint32_t kHorizon = 10;
// Timestamp on the limit is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp,
limit_timestamp, kHorizon));
// 1 sample behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 1,
limit_timestamp, kHorizon));
// 9 samples behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 9,
limit_timestamp, kHorizon));
// 10 samples behind is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp - 10,
limit_timestamp, kHorizon));
// 2^31 - 1 samples behind is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp - k2Pow31Minus1, limit_timestamp, kHorizon));
// 1 sample ahead is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp + 1,
limit_timestamp, kHorizon));
// 2^31 samples ahead is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(limit_timestamp + (1 << 31),
limit_timestamp, kHorizon));
}
} // namespace
// Test the IsObsoleteTimestamp method with different limit timestamps.
TEST(PacketBuffer, IsObsoleteTimestamp) {
TestIsObsoleteTimestamp(0);
TestIsObsoleteTimestamp(1);
TestIsObsoleteTimestamp(0xFFFFFFFF); // -1 in uint32_t.
TestIsObsoleteTimestamp(0x80000000); // 2^31.
TestIsObsoleteTimestamp(0x80000001); // 2^31 + 1.
TestIsObsoleteTimestamp(0x7FFFFFFF); // 2^31 - 1.
}
} // namespace webrtc

View file

@ -0,0 +1,117 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/preemptive_expand.h"
#include <algorithm>
#include "api/array_view.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/time_stretch.h"
namespace webrtc {
PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
const int16_t* input,
size_t input_length,
size_t old_data_length,
AudioMultiVector* output,
size_t* length_change_samples) {
old_data_length_per_channel_ = old_data_length;
// Input length must be (almost) 30 ms.
// Also, the new part must be at least `overlap_samples_` elements.
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
if (num_channels_ == 0 ||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ ||
old_data_length >= input_length / num_channels_ - overlap_samples_) {
// Length of input data too short to do preemptive expand. Simply move all
// data from input to output.
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kError;
}
const bool kFastMode = false; // Fast mode is not available for PE Expand.
return TimeStretch::Process(input, input_length, kFastMode, output,
length_change_samples);
}
void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len,
int16_t* best_correlation,
size_t* peak_index) const {
// When the signal does not contain any active speech, the correlation does
// not matter. Simply set it to zero.
*best_correlation = 0;
// For low energy expansion, the new data can be less than 15 ms,
// but we must ensure that best_correlation is not larger than the length of
// the new data.
// but we must ensure that best_correlation is not larger than the new data.
*peak_index = std::min(*peak_index, len - old_data_length_per_channel_);
}
PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool /*fast_mode*/,
AudioMultiVector* output) const {
// Pre-calculate common multiplication with `fs_mult_`.
// 120 corresponds to 15 ms.
size_t fs_mult_120 = static_cast<size_t>(fs_mult_ * 120);
// Check for strong correlation (>0.9 in Q14) and at least 15 ms new data,
// or passive speech.
if (((best_correlation > kCorrelationThreshold) &&
(old_data_length_per_channel_ <= fs_mult_120)) ||
!active_speech) {
// Do accelerate operation by overlap add.
// Set length of the first part, not to be modified.
size_t unmodified_length =
std::max(old_data_length_per_channel_, fs_mult_120);
// Copy first part, including cross-fade region.
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
input, (unmodified_length + peak_index) * num_channels_));
// Copy the last `peak_index` samples up to 15 ms to `temp_vector`.
AudioMultiVector temp_vector(num_channels_);
temp_vector.PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[(unmodified_length - peak_index) * num_channels_],
peak_index * num_channels_));
// Cross-fade `temp_vector` onto the end of `output`.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[unmodified_length * num_channels_],
input_length - unmodified_length * num_channels_));
if (active_speech) {
return kSuccess;
} else {
return kSuccessLowEnergy;
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kNoStretch;
}
}
PreemptiveExpand* PreemptiveExpandFactory::Create(
int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise,
size_t overlap_samples) const {
return new PreemptiveExpand(sample_rate_hz, num_channels, background_noise,
overlap_samples);
}
} // namespace webrtc

View file

@ -0,0 +1,85 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_
#define MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_
#include <stddef.h>
#include <stdint.h>
#include "modules/audio_coding/neteq/time_stretch.h"
namespace webrtc {
class AudioMultiVector;
class BackgroundNoise;
// This class implements the PreemptiveExpand operation. Most of the work is
// done in the base class TimeStretch, which is shared with the Accelerate
// operation. In the PreemptiveExpand class, the operations that are specific to
// PreemptiveExpand are implemented.
class PreemptiveExpand : public TimeStretch {
public:
PreemptiveExpand(int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise,
size_t overlap_samples)
: TimeStretch(sample_rate_hz, num_channels, background_noise),
old_data_length_per_channel_(0),
overlap_samples_(overlap_samples) {}
PreemptiveExpand(const PreemptiveExpand&) = delete;
PreemptiveExpand& operator=(const PreemptiveExpand&) = delete;
// This method performs the actual PreemptiveExpand operation. The samples are
// read from `input`, of length `input_length` elements, and are written to
// `output`. The number of samples added through time-stretching is
// is provided in the output `length_change_samples`. The method returns
// the outcome of the operation as an enumerator value.
ReturnCodes Process(const int16_t* pw16_decoded,
size_t len,
size_t old_data_len,
AudioMultiVector* output,
size_t* length_change_samples);
protected:
// Sets the parameters `best_correlation` and `peak_index` to suitable
// values when the signal contains no active speech.
void SetParametersForPassiveSpeech(size_t input_length,
int16_t* best_correlation,
size_t* peak_index) const override;
// Checks the criteria for performing the time-stretching operation and,
// if possible, performs the time-stretching.
ReturnCodes CheckCriteriaAndStretch(const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool /*fast_mode*/,
AudioMultiVector* output) const override;
private:
size_t old_data_length_per_channel_;
size_t overlap_samples_;
};
struct PreemptiveExpandFactory {
PreemptiveExpandFactory() {}
virtual ~PreemptiveExpandFactory() {}
virtual PreemptiveExpand* Create(int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise,
size_t overlap_samples) const;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_

View file

@ -0,0 +1,63 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/random_vector.h"
namespace webrtc {
const int16_t RandomVector::kRandomTable[RandomVector::kRandomTableSize] = {
2680, 5532, 441, 5520, 16170, -5146, -1024, -8733, 3115,
9598, -10380, -4959, -1280, -21716, 7133, -1522, 13458, -3902,
2789, -675, 3441, 5016, -13599, -4003, -2739, 3922, -7209,
13352, -11617, -7241, 12905, -2314, 5426, 10121, -9702, 11207,
-13542, 1373, 816, -5934, -12504, 4798, 1811, 4112, -613,
201, -10367, -2960, -2419, 3442, 4299, -6116, -6092, 1552,
-1650, -480, -1237, 18720, -11858, -8303, -8212, 865, -2890,
-16968, 12052, -5845, -5912, 9777, -5665, -6294, 5426, -4737,
-6335, 1652, 761, 3832, 641, -8552, -9084, -5753, 8146,
12156, -4915, 15086, -1231, -1869, 11749, -9319, -6403, 11407,
6232, -1683, 24340, -11166, 4017, -10448, 3153, -2936, 6212,
2891, -866, -404, -4807, -2324, -1917, -2388, -6470, -3895,
-10300, 5323, -5403, 2205, 4640, 7022, -21186, -6244, -882,
-10031, -3395, -12885, 7155, -5339, 5079, -2645, -9515, 6622,
14651, 15852, 359, 122, 8246, -3502, -6696, -3679, -13535,
-1409, -704, -7403, -4007, 1798, 279, -420, -12796, -14219,
1141, 3359, 11434, 7049, -6684, -7473, 14283, -4115, -9123,
-8969, 4152, 4117, 13792, 5742, 16168, 8661, -1609, -6095,
1881, 14380, -5588, 6758, -6425, -22969, -7269, 7031, 1119,
-1611, -5850, -11281, 3559, -8952, -10146, -4667, -16251, -1538,
2062, -1012, -13073, 227, -3142, -5265, 20, 5770, -7559,
4740, -4819, 992, -8208, -7130, -4652, 6725, 7369, -1036,
13144, -1588, -5304, -2344, -449, -5705, -8894, 5205, -17904,
-11188, -1022, 4852, 10101, -5255, -4200, -752, 7941, -1543,
5959, 14719, 13346, 17045, -15605, -1678, -1600, -9230, 68,
23348, 1172, 7750, 11212, -18227, 9956, 4161, 883, 3947,
4341, 1014, -4889, -2603, 1246, -5630, -3596, -870, -1298,
2784, -3317, -6612, -20541, 4166, 4181, -8625, 3562, 12890,
4761, 3205, -12259, -8579};
void RandomVector::Reset() {
seed_ = 777;
seed_increment_ = 1;
}
void RandomVector::Generate(size_t length, int16_t* output) {
for (size_t i = 0; i < length; i++) {
seed_ += seed_increment_;
size_t position = seed_ & (kRandomTableSize - 1);
output[i] = kRandomTable[position];
}
}
void RandomVector::IncreaseSeedIncrement(int16_t increase_by) {
seed_increment_ += increase_by;
seed_increment_ &= kRandomTableSize - 1;
}
} // namespace webrtc

View file

@ -0,0 +1,46 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_
#define MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_
#include <stddef.h>
#include <stdint.h>
namespace webrtc {
// This class generates pseudo-random samples.
class RandomVector {
public:
static const size_t kRandomTableSize = 256;
static const int16_t kRandomTable[kRandomTableSize];
RandomVector() : seed_(777), seed_increment_(1) {}
RandomVector(const RandomVector&) = delete;
RandomVector& operator=(const RandomVector&) = delete;
void Reset();
void Generate(size_t length, int16_t* output);
void IncreaseSeedIncrement(int16_t increase_by);
// Accessors and mutators.
int16_t seed_increment() { return seed_increment_; }
void set_seed_increment(int16_t value) { seed_increment_ = value; }
private:
uint32_t seed_;
int16_t seed_increment_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_

View file

@ -0,0 +1,25 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for RandomVector class.
#include "modules/audio_coding/neteq/random_vector.h"
#include "test/gtest.h"
namespace webrtc {
TEST(RandomVector, CreateAndDestroy) {
RandomVector random_vector;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -0,0 +1,183 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/red_payload_splitter.h"
#include <stddef.h>
#include <cstdint>
#include <list>
#include <utility>
#include <vector>
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/packet.h"
#include "rtc_base/buffer.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
// The method loops through a list of packets {A, B, C, ...}. Each packet is
// split into its corresponding RED payloads, {A1, A2, ...}, which is
// temporarily held in the list `new_packets`.
// When the first packet in `packet_list` has been processed, the original
// packet is replaced by the new ones in `new_packets`, so that `packet_list`
// becomes: {A1, A2, ..., B, C, ...}. The method then continues with B, and C,
// until all the original packets have been replaced by their split payloads.
bool RedPayloadSplitter::SplitRed(PacketList* packet_list) {
// Too many RED blocks indicates that something is wrong. Clamp it at some
// reasonable value.
const size_t kMaxRedBlocks = 32;
bool ret = true;
PacketList::iterator it = packet_list->begin();
while (it != packet_list->end()) {
Packet& red_packet = *it;
RTC_DCHECK(!red_packet.payload.empty());
const uint8_t* payload_ptr = red_packet.payload.data();
size_t payload_length = red_packet.payload.size();
// Read RED headers (according to RFC 2198):
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F| block PT | timestamp offset | block length |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// Last RED header:
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |0| Block PT |
// +-+-+-+-+-+-+-+-+
struct RedHeader {
uint8_t payload_type;
uint32_t timestamp;
size_t payload_length;
};
std::vector<RedHeader> new_headers;
bool last_block = false;
size_t sum_length = 0;
while (!last_block) {
if (payload_length == 0) {
RTC_LOG(LS_WARNING) << "SplitRed header too short";
return false;
}
RedHeader new_header;
// Check the F bit. If F == 0, this was the last block.
last_block = ((*payload_ptr & 0x80) == 0);
// Bits 1 through 7 are payload type.
new_header.payload_type = payload_ptr[0] & 0x7F;
if (last_block) {
// No more header data to read.
sum_length += kRedLastHeaderLength; // Account for RED header size.
new_header.timestamp = red_packet.timestamp;
new_header.payload_length = red_packet.payload.size() - sum_length;
payload_ptr += kRedLastHeaderLength; // Advance to first payload byte.
payload_length -= kRedLastHeaderLength;
} else {
if (payload_length < kRedHeaderLength) {
RTC_LOG(LS_WARNING) << "SplitRed header too short";
return false;
}
// Bits 8 through 21 are timestamp offset.
int timestamp_offset =
(payload_ptr[1] << 6) + ((payload_ptr[2] & 0xFC) >> 2);
new_header.timestamp = red_packet.timestamp - timestamp_offset;
// Bits 22 through 31 are payload length.
new_header.payload_length =
((payload_ptr[2] & 0x03) << 8) + payload_ptr[3];
sum_length += new_header.payload_length;
sum_length += kRedHeaderLength; // Account for RED header size.
payload_ptr += kRedHeaderLength; // Advance to next RED header.
payload_length -= kRedHeaderLength;
}
// Store in new list of packets.
if (new_header.payload_length > 0) {
new_headers.push_back(new_header);
}
}
if (new_headers.size() <= kMaxRedBlocks) {
// Populate the new packets with payload data.
// `payload_ptr` now points at the first payload byte.
PacketList new_packets; // An empty list to store the split packets in.
for (size_t i = 0; i != new_headers.size(); ++i) {
const auto& new_header = new_headers[i];
size_t payload_length = new_header.payload_length;
if (payload_ptr + payload_length >
red_packet.payload.data() + red_packet.payload.size()) {
// The block lengths in the RED headers do not match the overall
// packet length. Something is corrupt. Discard this and the remaining
// payloads from this packet.
RTC_LOG(LS_WARNING) << "SplitRed length mismatch";
ret = false;
break;
}
Packet new_packet;
new_packet.timestamp = new_header.timestamp;
new_packet.payload_type = new_header.payload_type;
new_packet.sequence_number = red_packet.sequence_number;
new_packet.priority.red_level =
rtc::dchecked_cast<int>((new_headers.size() - 1) - i);
new_packet.payload.SetData(payload_ptr, payload_length);
new_packets.push_front(std::move(new_packet));
payload_ptr += payload_length;
}
// Insert new packets into original list, before the element pointed to by
// iterator `it`.
packet_list->splice(it, std::move(new_packets));
} else {
RTC_LOG(LS_WARNING) << "SplitRed too many blocks: " << new_headers.size();
ret = false;
}
// Remove `it` from the packet list. This operation effectively moves the
// iterator `it` to the next packet in the list. Thus, we do not have to
// increment it manually.
it = packet_list->erase(it);
}
return ret;
}
void RedPayloadSplitter::CheckRedPayloads(
PacketList* packet_list,
const DecoderDatabase& decoder_database) {
int main_payload_type = -1;
for (auto it = packet_list->begin(); it != packet_list->end(); /* */) {
uint8_t this_payload_type = it->payload_type;
if (decoder_database.IsRed(this_payload_type)) {
it = packet_list->erase(it);
continue;
}
if (!decoder_database.IsDtmf(this_payload_type) &&
!decoder_database.IsComfortNoise(this_payload_type)) {
if (main_payload_type == -1) {
// This is the first packet in the list which is non-DTMF non-CNG.
main_payload_type = this_payload_type;
} else {
if (this_payload_type != main_payload_type) {
// We do not allow redundant payloads of a different type.
// Remove `it` from the packet list. This operation effectively
// moves the iterator `it` to the next packet in the list. Thus, we
// do not have to increment it manually.
it = packet_list->erase(it);
continue;
}
}
}
++it;
}
}
} // namespace webrtc

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_
#define MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_
#include "modules/audio_coding/neteq/packet.h"
namespace webrtc {
class DecoderDatabase;
static const size_t kRedHeaderLength = 4; // 4 bytes RED header.
static const size_t kRedLastHeaderLength =
1; // reduced size for last RED header.
// This class handles splitting of RED payloads into smaller parts.
// Codec-specific packet splitting can be performed by
// AudioDecoder::ParsePayload.
class RedPayloadSplitter {
public:
RedPayloadSplitter() {}
virtual ~RedPayloadSplitter() {}
RedPayloadSplitter(const RedPayloadSplitter&) = delete;
RedPayloadSplitter& operator=(const RedPayloadSplitter&) = delete;
// Splits each packet in `packet_list` into its separate RED payloads. Each
// RED payload is packetized into a Packet. The original elements in
// `packet_list` are properly deleted, and replaced by the new packets.
// Note that all packets in `packet_list` must be RED payloads, i.e., have
// RED headers according to RFC 2198 at the very beginning of the payload.
// Returns kOK or an error.
virtual bool SplitRed(PacketList* packet_list);
// Checks all packets in `packet_list`. Packets that are DTMF events or
// comfort noise payloads are kept. Except that, only one single payload type
// is accepted. Any packet with another payload type is discarded.
virtual void CheckRedPayloads(PacketList* packet_list,
const DecoderDatabase& decoder_database);
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_

View file

@ -0,0 +1,389 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for RedPayloadSplitter class.
#include "modules/audio_coding/neteq/red_payload_splitter.h"
#include <memory>
#include <utility> // pair
#include "api/audio_codecs/builtin_audio_decoder_factory.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/packet.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "test/gtest.h"
#include "test/mock_audio_decoder_factory.h"
using ::testing::Return;
using ::testing::ReturnNull;
namespace webrtc {
static const int kRedPayloadType = 100;
static const size_t kPayloadLength = 10;
static const uint16_t kSequenceNumber = 0;
static const uint32_t kBaseTimestamp = 0x12345678;
// A possible Opus packet that contains FEC is the following.
// The frame is 20 ms in duration.
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |0|0|0|0|1|0|0|0|x|1|x|x|x|x|x|x|x| |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
// | Compressed frame 1 (N-2 bytes)... :
// : |
// | |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
void CreateOpusFecPayload(uint8_t* payload,
size_t payload_length,
uint8_t payload_value) {
if (payload_length < 2) {
return;
}
payload[0] = 0x08;
payload[1] = 0x40;
memset(&payload[2], payload_value, payload_length - 2);
}
// RED headers (according to RFC 2198):
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F| block PT | timestamp offset | block length |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// Last RED header:
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |0| Block PT |
// +-+-+-+-+-+-+-+-+
// Creates a RED packet, with `num_payloads` payloads, with payload types given
// by the values in array `payload_types` (which must be of length
// `num_payloads`). Each redundant payload is `timestamp_offset` samples
// "behind" the the previous payload.
Packet CreateRedPayload(size_t num_payloads,
uint8_t* payload_types,
int timestamp_offset,
bool embed_opus_fec = false) {
Packet packet;
packet.payload_type = kRedPayloadType;
packet.timestamp = kBaseTimestamp;
packet.sequence_number = kSequenceNumber;
packet.payload.SetSize((kPayloadLength + 1) +
(num_payloads - 1) *
(kPayloadLength + kRedHeaderLength));
uint8_t* payload_ptr = packet.payload.data();
for (size_t i = 0; i < num_payloads; ++i) {
// Write the RED headers.
if (i == num_payloads - 1) {
// Special case for last payload.
*payload_ptr = payload_types[i] & 0x7F; // F = 0;
++payload_ptr;
break;
}
*payload_ptr = payload_types[i] & 0x7F;
// Not the last block; set F = 1.
*payload_ptr |= 0x80;
++payload_ptr;
int this_offset =
rtc::checked_cast<int>((num_payloads - i - 1) * timestamp_offset);
*payload_ptr = this_offset >> 6;
++payload_ptr;
RTC_DCHECK_LE(kPayloadLength, 1023); // Max length described by 10 bits.
*payload_ptr = ((this_offset & 0x3F) << 2) | (kPayloadLength >> 8);
++payload_ptr;
*payload_ptr = kPayloadLength & 0xFF;
++payload_ptr;
}
for (size_t i = 0; i < num_payloads; ++i) {
// Write `i` to all bytes in each payload.
if (embed_opus_fec) {
CreateOpusFecPayload(payload_ptr, kPayloadLength,
static_cast<uint8_t>(i));
} else {
memset(payload_ptr, static_cast<int>(i), kPayloadLength);
}
payload_ptr += kPayloadLength;
}
return packet;
}
// Create a packet with all payload bytes set to `payload_value`.
Packet CreatePacket(uint8_t payload_type,
size_t payload_length,
uint8_t payload_value,
bool opus_fec = false) {
Packet packet;
packet.payload_type = payload_type;
packet.timestamp = kBaseTimestamp;
packet.sequence_number = kSequenceNumber;
packet.payload.SetSize(payload_length);
if (opus_fec) {
CreateOpusFecPayload(packet.payload.data(), packet.payload.size(),
payload_value);
} else {
memset(packet.payload.data(), payload_value, packet.payload.size());
}
return packet;
}
// Checks that `packet` has the attributes given in the remaining parameters.
void VerifyPacket(const Packet& packet,
size_t payload_length,
uint8_t payload_type,
uint16_t sequence_number,
uint32_t timestamp,
uint8_t payload_value,
Packet::Priority priority) {
EXPECT_EQ(payload_length, packet.payload.size());
EXPECT_EQ(payload_type, packet.payload_type);
EXPECT_EQ(sequence_number, packet.sequence_number);
EXPECT_EQ(timestamp, packet.timestamp);
EXPECT_EQ(priority, packet.priority);
ASSERT_FALSE(packet.payload.empty());
for (size_t i = 0; i < packet.payload.size(); ++i) {
ASSERT_EQ(payload_value, packet.payload.data()[i]);
}
}
void VerifyPacket(const Packet& packet,
size_t payload_length,
uint8_t payload_type,
uint16_t sequence_number,
uint32_t timestamp,
uint8_t payload_value,
bool primary) {
return VerifyPacket(packet, payload_length, payload_type, sequence_number,
timestamp, payload_value,
Packet::Priority{0, primary ? 0 : 1});
}
// Start of test definitions.
TEST(RedPayloadSplitter, CreateAndDestroy) {
RedPayloadSplitter* splitter = new RedPayloadSplitter;
delete splitter;
}
// Packet A is split into A1 and A2.
TEST(RedPayloadSplitter, OnePacketTwoPayloads) {
uint8_t payload_types[] = {0, 0};
const int kTimestampOffset = 160;
PacketList packet_list;
packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset));
RedPayloadSplitter splitter;
EXPECT_TRUE(splitter.SplitRed(&packet_list));
ASSERT_EQ(2u, packet_list.size());
// Check first packet. The first in list should always be the primary payload.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
kSequenceNumber, kBaseTimestamp, 1, true);
packet_list.pop_front();
// Check second packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp - kTimestampOffset, 0, false);
}
// Packets A and B are not split at all. Only the RED header in each packet is
// removed.
TEST(RedPayloadSplitter, TwoPacketsOnePayload) {
uint8_t payload_types[] = {0};
const int kTimestampOffset = 160;
// Create first packet, with a single RED payload.
PacketList packet_list;
packet_list.push_back(CreateRedPayload(1, payload_types, kTimestampOffset));
// Create second packet, with a single RED payload.
{
Packet packet = CreateRedPayload(1, payload_types, kTimestampOffset);
// Manually change timestamp and sequence number of second packet.
packet.timestamp += kTimestampOffset;
packet.sequence_number++;
packet_list.push_back(std::move(packet));
}
RedPayloadSplitter splitter;
EXPECT_TRUE(splitter.SplitRed(&packet_list));
ASSERT_EQ(2u, packet_list.size());
// Check first packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp, 0, true);
packet_list.pop_front();
// Check second packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 0, true);
}
// Packets A and B are split into packets A1, A2, A3, B1, B2, B3, with
// attributes as follows:
//
// A1* A2 A3 B1* B2 B3
// Payload type 0 1 2 0 1 2
// Timestamp b b-o b-2o b+o b b-o
// Sequence number 0 0 0 1 1 1
//
// b = kBaseTimestamp, o = kTimestampOffset, * = primary.
TEST(RedPayloadSplitter, TwoPacketsThreePayloads) {
uint8_t payload_types[] = {2, 1, 0}; // Primary is the last one.
const int kTimestampOffset = 160;
// Create first packet, with 3 RED payloads.
PacketList packet_list;
packet_list.push_back(CreateRedPayload(3, payload_types, kTimestampOffset));
// Create first packet, with 3 RED payloads.
{
Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset);
// Manually change timestamp and sequence number of second packet.
packet.timestamp += kTimestampOffset;
packet.sequence_number++;
packet_list.push_back(std::move(packet));
}
RedPayloadSplitter splitter;
EXPECT_TRUE(splitter.SplitRed(&packet_list));
ASSERT_EQ(6u, packet_list.size());
// Check first packet, A1.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2],
kSequenceNumber, kBaseTimestamp, 2, {0, 0});
packet_list.pop_front();
// Check second packet, A2.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
kSequenceNumber, kBaseTimestamp - kTimestampOffset, 1, {0, 1});
packet_list.pop_front();
// Check third packet, A3.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0,
{0, 2});
packet_list.pop_front();
// Check fourth packet, B1.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2],
kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 2,
{0, 0});
packet_list.pop_front();
// Check fifth packet, B2.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
kSequenceNumber + 1, kBaseTimestamp, 1, {0, 1});
packet_list.pop_front();
// Check sixth packet, B3.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber + 1, kBaseTimestamp - kTimestampOffset, 0,
{0, 2});
}
// Creates a list with 4 packets with these payload types:
// 0 = CNGnb
// 1 = PCMu
// 2 = DTMF (AVT)
// 3 = iLBC
// We expect the method CheckRedPayloads to discard the iLBC packet, since it
// is a non-CNG, non-DTMF payload of another type than the first speech payload
// found in the list (which is PCMu).
TEST(RedPayloadSplitter, CheckRedPayloads) {
PacketList packet_list;
for (uint8_t i = 0; i <= 3; ++i) {
// Create packet with payload type `i`, payload length 10 bytes, all 0.
packet_list.push_back(CreatePacket(i, 10, 0));
}
// Use a real DecoderDatabase object here instead of a mock, since it is
// easier to just register the payload types and let the actual implementation
// do its job.
DecoderDatabase decoder_database(
rtc::make_ref_counted<MockAudioDecoderFactory>(), absl::nullopt);
decoder_database.RegisterPayload(0, SdpAudioFormat("cn", 8000, 1));
decoder_database.RegisterPayload(1, SdpAudioFormat("pcmu", 8000, 1));
decoder_database.RegisterPayload(2,
SdpAudioFormat("telephone-event", 8000, 1));
decoder_database.RegisterPayload(3, SdpAudioFormat("ilbc", 8000, 1));
RedPayloadSplitter splitter;
splitter.CheckRedPayloads(&packet_list, decoder_database);
ASSERT_EQ(3u, packet_list.size()); // Should have dropped the last packet.
// Verify packets. The loop verifies that payload types 0, 1, and 2 are in the
// list.
for (int i = 0; i <= 2; ++i) {
VerifyPacket(packet_list.front(), 10, i, kSequenceNumber, kBaseTimestamp, 0,
true);
packet_list.pop_front();
}
EXPECT_TRUE(packet_list.empty());
}
// This test creates a RED packet where the payloads also have the payload type
// for RED. That is, some kind of weird nested RED packet. This is not supported
// and the splitter should discard all packets.
TEST(RedPayloadSplitter, CheckRedPayloadsRecursiveRed) {
PacketList packet_list;
for (uint8_t i = 0; i <= 3; ++i) {
// Create packet with RED payload type, payload length 10 bytes, all 0.
packet_list.push_back(CreatePacket(kRedPayloadType, 10, 0));
}
// Use a real DecoderDatabase object here instead of a mock, since it is
// easier to just register the payload types and let the actual implementation
// do its job.
DecoderDatabase decoder_database(
rtc::make_ref_counted<MockAudioDecoderFactory>(), absl::nullopt);
decoder_database.RegisterPayload(kRedPayloadType,
SdpAudioFormat("red", 8000, 1));
RedPayloadSplitter splitter;
splitter.CheckRedPayloads(&packet_list, decoder_database);
EXPECT_TRUE(packet_list.empty()); // Should have dropped all packets.
}
// Packet A is split into A1, A2 and A3. But the length parameter is off, so
// the last payloads should be discarded.
TEST(RedPayloadSplitter, WrongPayloadLength) {
uint8_t payload_types[] = {0, 0, 0};
const int kTimestampOffset = 160;
PacketList packet_list;
{
Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset);
// Manually tamper with the payload length of the packet.
// This is one byte too short for the second payload (out of three).
// We expect only the first payload to be returned.
packet.payload.SetSize(packet.payload.size() - (kPayloadLength + 1));
packet_list.push_back(std::move(packet));
}
RedPayloadSplitter splitter;
EXPECT_FALSE(splitter.SplitRed(&packet_list));
ASSERT_EQ(1u, packet_list.size());
// Check first packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0,
{0, 2});
packet_list.pop_front();
}
// Test that we reject packets too short to contain a RED header.
TEST(RedPayloadSplitter, RejectsIncompleteHeaders) {
RedPayloadSplitter splitter;
uint8_t payload_types[] = {0, 0};
const int kTimestampOffset = 160;
PacketList packet_list;
// Truncate the packet such that the first block can not be parsed.
packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset));
packet_list.front().payload.SetSize(4);
EXPECT_FALSE(splitter.SplitRed(&packet_list));
EXPECT_FALSE(packet_list.empty());
// Truncate the packet such that the first block can not be parsed.
packet_list.front().payload.SetSize(3);
EXPECT_FALSE(splitter.SplitRed(&packet_list));
EXPECT_FALSE(packet_list.empty());
}
} // namespace webrtc

View file

@ -0,0 +1,75 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/reorder_optimizer.h"
#include <algorithm>
#include <limits>
#include <vector>
namespace webrtc {
namespace {
constexpr int kDelayBuckets = 100;
constexpr int kBucketSizeMs = 20;
} // namespace
ReorderOptimizer::ReorderOptimizer(int forget_factor,
int ms_per_loss_percent,
absl::optional<int> start_forget_weight)
: histogram_(kDelayBuckets, forget_factor, start_forget_weight),
ms_per_loss_percent_(ms_per_loss_percent) {}
void ReorderOptimizer::Update(int relative_delay_ms,
bool reordered,
int base_delay_ms) {
const int index = reordered ? relative_delay_ms / kBucketSizeMs : 0;
if (index < histogram_.NumBuckets()) {
// Maximum delay to register is 2000 ms.
histogram_.Add(index);
}
int bucket_index = MinimizeCostFunction(base_delay_ms);
optimal_delay_ms_ = (1 + bucket_index) * kBucketSizeMs;
}
void ReorderOptimizer::Reset() {
histogram_.Reset();
optimal_delay_ms_.reset();
}
int ReorderOptimizer::MinimizeCostFunction(int base_delay_ms) const {
const std::vector<int>& buckets = histogram_.buckets();
// Values are calculated in Q30.
int64_t loss_probability = 1 << 30;
int64_t min_cost = std::numeric_limits<int64_t>::max();
int min_bucket = 0;
for (int i = 0; i < static_cast<int>(buckets.size()); ++i) {
loss_probability -= buckets[i];
int64_t delay_ms =
static_cast<int64_t>(std::max(0, i * kBucketSizeMs - base_delay_ms))
<< 30;
int64_t cost = delay_ms + 100 * ms_per_loss_percent_ * loss_probability;
if (cost < min_cost) {
min_cost = cost;
min_bucket = i;
}
if (loss_probability == 0) {
break;
}
}
return min_bucket;
}
} // namespace webrtc

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_
#define MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_
#include "absl/types/optional.h"
#include "modules/audio_coding/neteq/histogram.h"
namespace webrtc {
// Calculates an optimal delay to reduce the chance of missing reordered
// packets. The delay/loss trade-off can be tune using the `ms_per_loss_percent`
// parameter.
class ReorderOptimizer {
public:
ReorderOptimizer(int forget_factor,
int ms_per_loss_percent,
absl::optional<int> start_forget_weight);
void Update(int relative_delay_ms, bool reordered, int base_delay_ms);
absl::optional<int> GetOptimalDelayMs() const { return optimal_delay_ms_; }
void Reset();
private:
int MinimizeCostFunction(int base_delay_ms) const;
Histogram histogram_;
const int ms_per_loss_percent_;
absl::optional<int> optimal_delay_ms_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_REORDER_OPTIMIZER_H_

View file

@ -0,0 +1,70 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/reorder_optimizer.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
constexpr int kForgetFactor = 32745; // 0.9993 in Q15.
constexpr int kMsPerLossPercent = 20;
constexpr int kStartForgetWeight = 1;
} // namespace
TEST(ReorderOptimizerTest, OnlyIncreaseDelayForReorderedPackets) {
ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent,
kStartForgetWeight);
EXPECT_FALSE(reorder_optimizer.GetOptimalDelayMs());
// Delay should not increase for in-order packets.
reorder_optimizer.Update(60, /*reordered=*/false, 0);
EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 20);
reorder_optimizer.Update(100, /*reordered=*/false, 0);
EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 20);
reorder_optimizer.Update(80, /*reordered=*/true, 0);
EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 100);
}
TEST(ReorderOptimizerTest, AvoidIncreasingDelayWhenProbabilityIsLow) {
ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent,
kStartForgetWeight);
reorder_optimizer.Update(40, /*reordered=*/true, 0);
reorder_optimizer.Update(40, /*reordered=*/true, 0);
reorder_optimizer.Update(40, /*reordered=*/true, 0);
EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60);
// The cost of the delay is too high relative the probability.
reorder_optimizer.Update(600, /*reordered=*/true, 0);
EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60);
}
TEST(ReorderOptimizerTest, BaseDelayIsSubtractedFromCost) {
constexpr int kBaseDelayMs = 200;
ReorderOptimizer reorder_optimizer(kForgetFactor, kMsPerLossPercent,
kStartForgetWeight);
reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs);
reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs);
reorder_optimizer.Update(40, /*reordered=*/true, kBaseDelayMs);
EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 60);
// The cost of the delay is too high relative the probability.
reorder_optimizer.Update(600, /*reordered=*/true, kBaseDelayMs);
EXPECT_EQ(reorder_optimizer.GetOptimalDelayMs(), 620);
}
} // namespace webrtc

View file

@ -0,0 +1,395 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include <string.h> // memset
#include <algorithm>
#include "absl/strings/string_view.h"
#include "modules/audio_coding/neteq/delay_manager.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
size_t AddIntToSizeTWithLowerCap(int a, size_t b) {
const size_t ret = b + a;
// If a + b is negative, resulting in a negative wrap, cap it to zero instead.
static_assert(sizeof(size_t) >= sizeof(int),
"int must not be wider than size_t for this to work");
return (a < 0 && ret > b) ? 0 : ret;
}
constexpr int kInterruptionLenMs = 150;
} // namespace
// Allocating the static const so that it can be passed by reference to
// RTC_DCHECK.
const size_t StatisticsCalculator::kLenWaitingTimes;
StatisticsCalculator::PeriodicUmaLogger::PeriodicUmaLogger(
absl::string_view uma_name,
int report_interval_ms,
int max_value)
: uma_name_(uma_name),
report_interval_ms_(report_interval_ms),
max_value_(max_value),
timer_(0) {}
StatisticsCalculator::PeriodicUmaLogger::~PeriodicUmaLogger() = default;
void StatisticsCalculator::PeriodicUmaLogger::AdvanceClock(int step_ms) {
timer_ += step_ms;
if (timer_ < report_interval_ms_) {
return;
}
LogToUma(Metric());
Reset();
timer_ -= report_interval_ms_;
RTC_DCHECK_GE(timer_, 0);
}
void StatisticsCalculator::PeriodicUmaLogger::LogToUma(int value) const {
RTC_HISTOGRAM_COUNTS_SPARSE(uma_name_, value, 1, max_value_, 50);
}
StatisticsCalculator::PeriodicUmaCount::PeriodicUmaCount(
absl::string_view uma_name,
int report_interval_ms,
int max_value)
: PeriodicUmaLogger(uma_name, report_interval_ms, max_value) {}
StatisticsCalculator::PeriodicUmaCount::~PeriodicUmaCount() {
// Log the count for the current (incomplete) interval.
LogToUma(Metric());
}
void StatisticsCalculator::PeriodicUmaCount::RegisterSample() {
++counter_;
}
int StatisticsCalculator::PeriodicUmaCount::Metric() const {
return counter_;
}
void StatisticsCalculator::PeriodicUmaCount::Reset() {
counter_ = 0;
}
StatisticsCalculator::PeriodicUmaAverage::PeriodicUmaAverage(
absl::string_view uma_name,
int report_interval_ms,
int max_value)
: PeriodicUmaLogger(uma_name, report_interval_ms, max_value) {}
StatisticsCalculator::PeriodicUmaAverage::~PeriodicUmaAverage() {
// Log the average for the current (incomplete) interval.
LogToUma(Metric());
}
void StatisticsCalculator::PeriodicUmaAverage::RegisterSample(int value) {
sum_ += value;
++counter_;
}
int StatisticsCalculator::PeriodicUmaAverage::Metric() const {
return counter_ == 0 ? 0 : static_cast<int>(sum_ / counter_);
}
void StatisticsCalculator::PeriodicUmaAverage::Reset() {
sum_ = 0.0;
counter_ = 0;
}
StatisticsCalculator::StatisticsCalculator()
: preemptive_samples_(0),
accelerate_samples_(0),
expanded_speech_samples_(0),
expanded_noise_samples_(0),
timestamps_since_last_report_(0),
secondary_decoded_samples_(0),
discarded_secondary_packets_(0),
delayed_packet_outage_counter_(
"WebRTC.Audio.DelayedPacketOutageEventsPerMinute",
60000, // 60 seconds report interval.
100),
excess_buffer_delay_("WebRTC.Audio.AverageExcessBufferDelayMs",
60000, // 60 seconds report interval.
1000),
buffer_full_counter_("WebRTC.Audio.JitterBufferFullPerMinute",
60000, // 60 seconds report interval.
100) {}
StatisticsCalculator::~StatisticsCalculator() = default;
void StatisticsCalculator::Reset() {
preemptive_samples_ = 0;
accelerate_samples_ = 0;
expanded_speech_samples_ = 0;
expanded_noise_samples_ = 0;
secondary_decoded_samples_ = 0;
discarded_secondary_packets_ = 0;
waiting_times_.clear();
}
void StatisticsCalculator::ResetMcu() {
timestamps_since_last_report_ = 0;
}
void StatisticsCalculator::ExpandedVoiceSamples(size_t num_samples,
bool is_new_concealment_event) {
expanded_speech_samples_ += num_samples;
ConcealedSamplesCorrection(rtc::dchecked_cast<int>(num_samples), true);
lifetime_stats_.concealment_events += is_new_concealment_event;
}
void StatisticsCalculator::ExpandedNoiseSamples(size_t num_samples,
bool is_new_concealment_event) {
expanded_noise_samples_ += num_samples;
ConcealedSamplesCorrection(rtc::dchecked_cast<int>(num_samples), false);
lifetime_stats_.concealment_events += is_new_concealment_event;
}
void StatisticsCalculator::ExpandedVoiceSamplesCorrection(int num_samples) {
expanded_speech_samples_ =
AddIntToSizeTWithLowerCap(num_samples, expanded_speech_samples_);
ConcealedSamplesCorrection(num_samples, true);
}
void StatisticsCalculator::ExpandedNoiseSamplesCorrection(int num_samples) {
expanded_noise_samples_ =
AddIntToSizeTWithLowerCap(num_samples, expanded_noise_samples_);
ConcealedSamplesCorrection(num_samples, false);
}
void StatisticsCalculator::DecodedOutputPlayed() {
decoded_output_played_ = true;
}
void StatisticsCalculator::EndExpandEvent(int fs_hz) {
RTC_DCHECK_GE(lifetime_stats_.concealed_samples,
concealed_samples_at_event_end_);
const int event_duration_ms =
1000 *
(lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) /
fs_hz;
if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) {
lifetime_stats_.interruption_count++;
lifetime_stats_.total_interruption_duration_ms += event_duration_ms;
RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AudioInterruptionMs", event_duration_ms,
/*min=*/150, /*max=*/5000, /*bucket_count=*/50);
}
concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples;
}
void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples,
bool is_voice) {
if (num_samples < 0) {
// Store negative correction to subtract from future positive additions.
// See also the function comment in the header file.
concealed_samples_correction_ -= num_samples;
if (!is_voice) {
silent_concealed_samples_correction_ -= num_samples;
}
return;
}
const size_t canceled_out =
std::min(static_cast<size_t>(num_samples), concealed_samples_correction_);
concealed_samples_correction_ -= canceled_out;
lifetime_stats_.concealed_samples += num_samples - canceled_out;
if (!is_voice) {
const size_t silent_canceled_out = std::min(
static_cast<size_t>(num_samples), silent_concealed_samples_correction_);
silent_concealed_samples_correction_ -= silent_canceled_out;
lifetime_stats_.silent_concealed_samples +=
num_samples - silent_canceled_out;
}
}
void StatisticsCalculator::PreemptiveExpandedSamples(size_t num_samples) {
preemptive_samples_ += num_samples;
operations_and_state_.preemptive_samples += num_samples;
lifetime_stats_.inserted_samples_for_deceleration += num_samples;
}
void StatisticsCalculator::AcceleratedSamples(size_t num_samples) {
accelerate_samples_ += num_samples;
operations_and_state_.accelerate_samples += num_samples;
lifetime_stats_.removed_samples_for_acceleration += num_samples;
}
void StatisticsCalculator::GeneratedNoiseSamples(size_t num_samples) {
lifetime_stats_.generated_noise_samples += num_samples;
}
void StatisticsCalculator::PacketsDiscarded(size_t num_packets) {
lifetime_stats_.packets_discarded += num_packets;
}
void StatisticsCalculator::SecondaryPacketsDiscarded(size_t num_packets) {
discarded_secondary_packets_ += num_packets;
lifetime_stats_.fec_packets_discarded += num_packets;
}
void StatisticsCalculator::SecondaryPacketsReceived(size_t num_packets) {
lifetime_stats_.fec_packets_received += num_packets;
}
void StatisticsCalculator::IncreaseCounter(size_t num_samples, int fs_hz) {
const int time_step_ms =
rtc::CheckedDivExact(static_cast<int>(1000 * num_samples), fs_hz);
delayed_packet_outage_counter_.AdvanceClock(time_step_ms);
excess_buffer_delay_.AdvanceClock(time_step_ms);
buffer_full_counter_.AdvanceClock(time_step_ms);
timestamps_since_last_report_ += static_cast<uint32_t>(num_samples);
if (timestamps_since_last_report_ >
static_cast<uint32_t>(fs_hz * kMaxReportPeriod)) {
timestamps_since_last_report_ = 0;
}
lifetime_stats_.total_samples_received += num_samples;
}
void StatisticsCalculator::JitterBufferDelay(
size_t num_samples,
uint64_t waiting_time_ms,
uint64_t target_delay_ms,
uint64_t unlimited_target_delay_ms) {
lifetime_stats_.jitter_buffer_delay_ms += waiting_time_ms * num_samples;
lifetime_stats_.jitter_buffer_target_delay_ms +=
target_delay_ms * num_samples;
lifetime_stats_.jitter_buffer_minimum_delay_ms +=
unlimited_target_delay_ms * num_samples;
lifetime_stats_.jitter_buffer_emitted_count += num_samples;
}
void StatisticsCalculator::SecondaryDecodedSamples(int num_samples) {
secondary_decoded_samples_ += num_samples;
}
void StatisticsCalculator::FlushedPacketBuffer() {
operations_and_state_.packet_buffer_flushes++;
buffer_full_counter_.RegisterSample();
}
void StatisticsCalculator::ReceivedPacket() {
++lifetime_stats_.jitter_buffer_packets_received;
}
void StatisticsCalculator::RelativePacketArrivalDelay(size_t delay_ms) {
lifetime_stats_.relative_packet_arrival_delay_ms += delay_ms;
}
void StatisticsCalculator::LogDelayedPacketOutageEvent(int num_samples,
int fs_hz) {
int outage_duration_ms = num_samples / (fs_hz / 1000);
RTC_HISTOGRAM_COUNTS("WebRTC.Audio.DelayedPacketOutageEventMs",
outage_duration_ms, 1 /* min */, 2000 /* max */,
100 /* bucket count */);
delayed_packet_outage_counter_.RegisterSample();
lifetime_stats_.delayed_packet_outage_samples += num_samples;
++lifetime_stats_.delayed_packet_outage_events;
}
void StatisticsCalculator::StoreWaitingTime(int waiting_time_ms) {
excess_buffer_delay_.RegisterSample(waiting_time_ms);
RTC_DCHECK_LE(waiting_times_.size(), kLenWaitingTimes);
if (waiting_times_.size() == kLenWaitingTimes) {
// Erase first value.
waiting_times_.pop_front();
}
waiting_times_.push_back(waiting_time_ms);
operations_and_state_.last_waiting_time_ms = waiting_time_ms;
}
void StatisticsCalculator::GetNetworkStatistics(size_t samples_per_packet,
NetEqNetworkStatistics* stats) {
RTC_DCHECK(stats);
stats->accelerate_rate =
CalculateQ14Ratio(accelerate_samples_, timestamps_since_last_report_);
stats->preemptive_rate =
CalculateQ14Ratio(preemptive_samples_, timestamps_since_last_report_);
stats->expand_rate =
CalculateQ14Ratio(expanded_speech_samples_ + expanded_noise_samples_,
timestamps_since_last_report_);
stats->speech_expand_rate = CalculateQ14Ratio(expanded_speech_samples_,
timestamps_since_last_report_);
stats->secondary_decoded_rate = CalculateQ14Ratio(
secondary_decoded_samples_, timestamps_since_last_report_);
const size_t discarded_secondary_samples =
discarded_secondary_packets_ * samples_per_packet;
stats->secondary_discarded_rate =
CalculateQ14Ratio(discarded_secondary_samples,
static_cast<uint32_t>(discarded_secondary_samples +
secondary_decoded_samples_));
if (waiting_times_.size() == 0) {
stats->mean_waiting_time_ms = -1;
stats->median_waiting_time_ms = -1;
stats->min_waiting_time_ms = -1;
stats->max_waiting_time_ms = -1;
} else {
std::sort(waiting_times_.begin(), waiting_times_.end());
// Find mid-point elements. If the size is odd, the two values
// `middle_left` and `middle_right` will both be the one middle element; if
// the size is even, they will be the the two neighboring elements at the
// middle of the list.
const int middle_left = waiting_times_[(waiting_times_.size() - 1) / 2];
const int middle_right = waiting_times_[waiting_times_.size() / 2];
// Calculate the average of the two. (Works also for odd sizes.)
stats->median_waiting_time_ms = (middle_left + middle_right) / 2;
stats->min_waiting_time_ms = waiting_times_.front();
stats->max_waiting_time_ms = waiting_times_.back();
double sum = 0;
for (auto time : waiting_times_) {
sum += time;
}
stats->mean_waiting_time_ms = static_cast<int>(sum / waiting_times_.size());
}
// Reset counters.
ResetMcu();
Reset();
}
NetEqLifetimeStatistics StatisticsCalculator::GetLifetimeStatistics() const {
return lifetime_stats_;
}
NetEqOperationsAndState StatisticsCalculator::GetOperationsAndState() const {
return operations_and_state_;
}
uint16_t StatisticsCalculator::CalculateQ14Ratio(size_t numerator,
uint32_t denominator) {
if (numerator == 0) {
return 0;
} else if (numerator < denominator) {
// Ratio must be smaller than 1 in Q14.
RTC_DCHECK_LT((numerator << 14) / denominator, (1 << 14));
return static_cast<uint16_t>((numerator << 14) / denominator);
} else {
// Will not produce a ratio larger than 1, since this is probably an error.
return 1 << 14;
}
}
} // namespace webrtc

View file

@ -0,0 +1,210 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_
#define MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_
#include <deque>
#include <string>
#include "absl/strings/string_view.h"
#include "api/neteq/neteq.h"
namespace webrtc {
class DelayManager;
// This class handles various network statistics in NetEq.
class StatisticsCalculator {
public:
StatisticsCalculator();
virtual ~StatisticsCalculator();
StatisticsCalculator(const StatisticsCalculator&) = delete;
StatisticsCalculator& operator=(const StatisticsCalculator&) = delete;
// Resets most of the counters.
void Reset();
// Resets the counters that are not handled by Reset().
void ResetMcu();
// Reports that `num_samples` samples were produced through expansion, and
// that the expansion produced other than just noise samples.
void ExpandedVoiceSamples(size_t num_samples, bool is_new_concealment_event);
// Reports that `num_samples` samples were produced through expansion, and
// that the expansion produced only noise samples.
void ExpandedNoiseSamples(size_t num_samples, bool is_new_concealment_event);
// Corrects the statistics for number of samples produced through non-noise
// expansion by adding `num_samples` (negative or positive) to the current
// value. The result is capped to zero to avoid negative values.
void ExpandedVoiceSamplesCorrection(int num_samples);
// Same as ExpandedVoiceSamplesCorrection but for noise samples.
void ExpandedNoiseSamplesCorrection(int num_samples);
void DecodedOutputPlayed();
// Mark end of expand event; triggers some stats to be reported.
void EndExpandEvent(int fs_hz);
// Reports that `num_samples` samples were produced through preemptive
// expansion.
void PreemptiveExpandedSamples(size_t num_samples);
// Reports that `num_samples` samples were removed through accelerate.
void AcceleratedSamples(size_t num_samples);
// Reports that `num_samples` comfort noise samples were generated.
void GeneratedNoiseSamples(size_t num_samples);
// Reports that `num_packets` packets were discarded.
virtual void PacketsDiscarded(size_t num_packets);
// Reports that `num_packets` secondary (FEC) packets were discarded.
virtual void SecondaryPacketsDiscarded(size_t num_packets);
// Reports that `num_packets` secondary (FEC) packets were received.
virtual void SecondaryPacketsReceived(size_t num_packets);
// Increases the report interval counter with `num_samples` at a sample rate
// of `fs_hz`. This is how the StatisticsCalculator gets notified that current
// time is increasing.
void IncreaseCounter(size_t num_samples, int fs_hz);
// Update jitter buffer delay counter.
void JitterBufferDelay(size_t num_samples,
uint64_t waiting_time_ms,
uint64_t target_delay_ms,
uint64_t unlimited_target_delay_ms);
// Stores new packet waiting time in waiting time statistics.
void StoreWaitingTime(int waiting_time_ms);
// Reports that `num_samples` samples were decoded from secondary packets.
void SecondaryDecodedSamples(int num_samples);
// Reports that the packet buffer was flushed.
void FlushedPacketBuffer();
// Reports that the jitter buffer received a packet.
void ReceivedPacket();
// Reports that a received packet was delayed by `delay_ms` milliseconds.
virtual void RelativePacketArrivalDelay(size_t delay_ms);
// Logs a delayed packet outage event of `num_samples` expanded at a sample
// rate of `fs_hz`. A delayed packet outage event is defined as an expand
// period caused not by an actual packet loss, but by a delayed packet.
virtual void LogDelayedPacketOutageEvent(int num_samples, int fs_hz);
// Returns the current network statistics in `stats`. The number of samples
// per packet is `samples_per_packet`. The method does not populate
// `preferred_buffer_size_ms`, `jitter_peaks_found` or `clockdrift_ppm`; use
// the PopulateDelayManagerStats method for those.
void GetNetworkStatistics(size_t samples_per_packet,
NetEqNetworkStatistics* stats);
// Returns a copy of this class's lifetime statistics. These statistics are
// never reset.
NetEqLifetimeStatistics GetLifetimeStatistics() const;
NetEqOperationsAndState GetOperationsAndState() const;
private:
static const int kMaxReportPeriod = 60; // Seconds before auto-reset.
static const size_t kLenWaitingTimes = 100;
class PeriodicUmaLogger {
public:
PeriodicUmaLogger(absl::string_view uma_name,
int report_interval_ms,
int max_value);
virtual ~PeriodicUmaLogger();
void AdvanceClock(int step_ms);
protected:
void LogToUma(int value) const;
virtual int Metric() const = 0;
virtual void Reset() = 0;
const std::string uma_name_;
const int report_interval_ms_;
const int max_value_;
int timer_ = 0;
};
class PeriodicUmaCount final : public PeriodicUmaLogger {
public:
PeriodicUmaCount(absl::string_view uma_name,
int report_interval_ms,
int max_value);
~PeriodicUmaCount() override;
void RegisterSample();
protected:
int Metric() const override;
void Reset() override;
private:
int counter_ = 0;
};
class PeriodicUmaAverage final : public PeriodicUmaLogger {
public:
PeriodicUmaAverage(absl::string_view uma_name,
int report_interval_ms,
int max_value);
~PeriodicUmaAverage() override;
void RegisterSample(int value);
protected:
int Metric() const override;
void Reset() override;
private:
double sum_ = 0.0;
int counter_ = 0;
};
// Corrects the concealed samples counter in lifetime_stats_. The value of
// num_samples_ is added directly to the stat if the correction is positive.
// If the correction is negative, it is cached and will be subtracted against
// future additions to the counter. This is meant to be called from
// Expanded{Voice,Noise}Samples{Correction}.
void ConcealedSamplesCorrection(int num_samples, bool is_voice);
// Calculates numerator / denominator, and returns the value in Q14.
static uint16_t CalculateQ14Ratio(size_t numerator, uint32_t denominator);
NetEqLifetimeStatistics lifetime_stats_;
NetEqOperationsAndState operations_and_state_;
size_t concealed_samples_correction_ = 0;
size_t silent_concealed_samples_correction_ = 0;
size_t preemptive_samples_;
size_t accelerate_samples_;
size_t expanded_speech_samples_;
size_t expanded_noise_samples_;
size_t concealed_samples_at_event_end_ = 0;
uint32_t timestamps_since_last_report_;
std::deque<int> waiting_times_;
uint32_t secondary_decoded_samples_;
size_t discarded_secondary_packets_;
PeriodicUmaCount delayed_packet_outage_counter_;
PeriodicUmaAverage excess_buffer_delay_;
PeriodicUmaCount buffer_full_counter_;
bool decoded_output_played_ = false;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_STATISTICS_CALCULATOR_H_

View file

@ -0,0 +1,206 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/statistics_calculator.h"
#include "test/gtest.h"
namespace webrtc {
TEST(LifetimeStatistics, TotalSamplesReceived) {
StatisticsCalculator stats;
for (int i = 0; i < 10; ++i) {
stats.IncreaseCounter(480, 48000); // 10 ms at 48 kHz.
}
EXPECT_EQ(10 * 480u, stats.GetLifetimeStatistics().total_samples_received);
}
TEST(LifetimeStatistics, SamplesConcealed) {
StatisticsCalculator stats;
stats.ExpandedVoiceSamples(100, false);
stats.ExpandedNoiseSamples(17, false);
EXPECT_EQ(100u + 17u, stats.GetLifetimeStatistics().concealed_samples);
}
// This test verifies that a negative correction of concealed_samples does not
// result in a decrease in the stats value (because stats-consuming applications
// would not expect the value to decrease). Instead, the correction should be
// made to future increments to the stat.
TEST(LifetimeStatistics, SamplesConcealedCorrection) {
StatisticsCalculator stats;
stats.ExpandedVoiceSamples(100, false);
EXPECT_EQ(100u, stats.GetLifetimeStatistics().concealed_samples);
stats.ExpandedVoiceSamplesCorrection(-10);
// Do not subtract directly, but keep the correction for later.
EXPECT_EQ(100u, stats.GetLifetimeStatistics().concealed_samples);
stats.ExpandedVoiceSamplesCorrection(20);
// The total correction is 20 - 10.
EXPECT_EQ(110u, stats.GetLifetimeStatistics().concealed_samples);
// Also test correction done to the next ExpandedVoiceSamples call.
stats.ExpandedVoiceSamplesCorrection(-17);
EXPECT_EQ(110u, stats.GetLifetimeStatistics().concealed_samples);
stats.ExpandedVoiceSamples(100, false);
EXPECT_EQ(110u + 100u - 17u, stats.GetLifetimeStatistics().concealed_samples);
}
// This test verifies that neither "accelerate" nor "pre-emptive expand" reults
// in a modification to concealed_samples stats. Only PLC operations (i.e.,
// "expand" and "merge") should affect the stat.
TEST(LifetimeStatistics, NoUpdateOnTimeStretch) {
StatisticsCalculator stats;
stats.ExpandedVoiceSamples(100, false);
stats.AcceleratedSamples(4711);
stats.PreemptiveExpandedSamples(17);
stats.ExpandedVoiceSamples(100, false);
EXPECT_EQ(200u, stats.GetLifetimeStatistics().concealed_samples);
}
TEST(StatisticsCalculator, ExpandedSamplesCorrection) {
StatisticsCalculator stats;
NetEqNetworkStatistics stats_output;
constexpr int kSampleRateHz = 48000;
constexpr int k10MsSamples = kSampleRateHz / 100;
constexpr int kPacketSizeMs = 20;
constexpr size_t kSamplesPerPacket = kPacketSizeMs * kSampleRateHz / 1000;
// Advance time by 10 ms.
stats.IncreaseCounter(k10MsSamples, kSampleRateHz);
stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output);
EXPECT_EQ(0u, stats_output.expand_rate);
EXPECT_EQ(0u, stats_output.speech_expand_rate);
// Correct with a negative value.
stats.ExpandedVoiceSamplesCorrection(-100);
stats.ExpandedNoiseSamplesCorrection(-100);
stats.IncreaseCounter(k10MsSamples, kSampleRateHz);
stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output);
// Expect no change, since negative values are disallowed.
EXPECT_EQ(0u, stats_output.expand_rate);
EXPECT_EQ(0u, stats_output.speech_expand_rate);
// Correct with a positive value.
stats.ExpandedVoiceSamplesCorrection(50);
stats.ExpandedNoiseSamplesCorrection(200);
stats.IncreaseCounter(k10MsSamples, kSampleRateHz);
stats.GetNetworkStatistics(kSamplesPerPacket, &stats_output);
// Calculate expected rates in Q14. Expand rate is noise + voice, while
// speech expand rate is only voice.
EXPECT_EQ(((50u + 200u) << 14) / k10MsSamples, stats_output.expand_rate);
EXPECT_EQ((50u << 14) / k10MsSamples, stats_output.speech_expand_rate);
}
TEST(StatisticsCalculator, RelativePacketArrivalDelay) {
StatisticsCalculator stats;
stats.RelativePacketArrivalDelay(50);
NetEqLifetimeStatistics stats_output = stats.GetLifetimeStatistics();
EXPECT_EQ(50u, stats_output.relative_packet_arrival_delay_ms);
stats.RelativePacketArrivalDelay(20);
stats_output = stats.GetLifetimeStatistics();
EXPECT_EQ(70u, stats_output.relative_packet_arrival_delay_ms);
}
TEST(StatisticsCalculator, ReceivedPacket) {
StatisticsCalculator stats;
stats.ReceivedPacket();
NetEqLifetimeStatistics stats_output = stats.GetLifetimeStatistics();
EXPECT_EQ(1u, stats_output.jitter_buffer_packets_received);
stats.ReceivedPacket();
stats_output = stats.GetLifetimeStatistics();
EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received);
}
TEST(StatisticsCalculator, InterruptionCounter) {
constexpr int fs_khz = 48;
constexpr int fs_hz = fs_khz * 1000;
StatisticsCalculator stats;
stats.DecodedOutputPlayed();
stats.EndExpandEvent(fs_hz);
auto lts = stats.GetLifetimeStatistics();
EXPECT_EQ(0, lts.interruption_count);
EXPECT_EQ(0, lts.total_interruption_duration_ms);
// Add an event that is shorter than 150 ms. Should not be logged.
stats.ExpandedVoiceSamples(10 * fs_khz, false); // 10 ms.
stats.ExpandedNoiseSamples(139 * fs_khz, false); // 139 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(0, lts.interruption_count);
// Add an event that is longer than 150 ms. Should be logged.
stats.ExpandedVoiceSamples(140 * fs_khz, false); // 140 ms.
stats.ExpandedNoiseSamples(11 * fs_khz, false); // 11 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(1, lts.interruption_count);
EXPECT_EQ(151, lts.total_interruption_duration_ms);
// Add one more long event.
stats.ExpandedVoiceSamples(100 * fs_khz, false); // 100 ms.
stats.ExpandedNoiseSamples(5000 * fs_khz, false); // 5000 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(2, lts.interruption_count);
EXPECT_EQ(5100 + 151, lts.total_interruption_duration_ms);
}
TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) {
constexpr int fs_khz = 48;
constexpr int fs_hz = fs_khz * 1000;
StatisticsCalculator stats;
// Add an event that is longer than 150 ms. Should normally be logged, but we
// have not called DecodedOutputPlayed() yet, so it shouldn't this time.
stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
stats.EndExpandEvent(fs_hz);
auto lts = stats.GetLifetimeStatistics();
EXPECT_EQ(0, lts.interruption_count);
// Call DecodedOutputPlayed(). Logging should happen after this.
stats.DecodedOutputPlayed();
// Add one more long event.
stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(1, lts.interruption_count);
}
TEST(StatisticsCalculator, DiscardedPackets) {
StatisticsCalculator statistics_calculator;
EXPECT_EQ(0u,
statistics_calculator.GetLifetimeStatistics().packets_discarded);
statistics_calculator.PacketsDiscarded(1);
EXPECT_EQ(1u,
statistics_calculator.GetLifetimeStatistics().packets_discarded);
statistics_calculator.PacketsDiscarded(10);
EXPECT_EQ(11u,
statistics_calculator.GetLifetimeStatistics().packets_discarded);
// Calling `SecondaryPacketsDiscarded` does not modify `packets_discarded`.
statistics_calculator.SecondaryPacketsDiscarded(1);
EXPECT_EQ(11u,
statistics_calculator.GetLifetimeStatistics().packets_discarded);
// Calling `FlushedPacketBuffer` does not modify `packets_discarded`.
statistics_calculator.FlushedPacketBuffer();
EXPECT_EQ(11u,
statistics_calculator.GetLifetimeStatistics().packets_discarded);
}
} // namespace webrtc

Some files were not shown because too many files have changed in this diff Show more