Repo created
This commit is contained in:
parent
81b91f4139
commit
f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions
|
|
@ -0,0 +1,322 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/units/time_delta.h"
|
||||
#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
const int kMaxFrameSizeMs = 60;
|
||||
|
||||
class AudioEncoderCng final : public AudioEncoder {
|
||||
public:
|
||||
explicit AudioEncoderCng(AudioEncoderCngConfig&& config);
|
||||
~AudioEncoderCng() override;
|
||||
|
||||
// Not copyable or moveable.
|
||||
AudioEncoderCng(const AudioEncoderCng&) = delete;
|
||||
AudioEncoderCng(AudioEncoderCng&&) = delete;
|
||||
AudioEncoderCng& operator=(const AudioEncoderCng&) = delete;
|
||||
AudioEncoderCng& operator=(AudioEncoderCng&&) = delete;
|
||||
|
||||
int SampleRateHz() const override;
|
||||
size_t NumChannels() const override;
|
||||
int RtpTimestampRateHz() const override;
|
||||
size_t Num10MsFramesInNextPacket() const override;
|
||||
size_t Max10MsFramesInAPacket() const override;
|
||||
int GetTargetBitrate() const override;
|
||||
EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
|
||||
rtc::ArrayView<const int16_t> audio,
|
||||
rtc::Buffer* encoded) override;
|
||||
void Reset() override;
|
||||
bool SetFec(bool enable) override;
|
||||
bool SetDtx(bool enable) override;
|
||||
bool SetApplication(Application application) override;
|
||||
void SetMaxPlaybackRate(int frequency_hz) override;
|
||||
rtc::ArrayView<std::unique_ptr<AudioEncoder>> ReclaimContainedEncoders()
|
||||
override;
|
||||
void OnReceivedUplinkPacketLossFraction(
|
||||
float uplink_packet_loss_fraction) override;
|
||||
void OnReceivedUplinkBandwidth(
|
||||
int target_audio_bitrate_bps,
|
||||
absl::optional<int64_t> bwe_period_ms) override;
|
||||
absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange()
|
||||
const override;
|
||||
|
||||
private:
|
||||
EncodedInfo EncodePassive(size_t frames_to_encode, rtc::Buffer* encoded);
|
||||
EncodedInfo EncodeActive(size_t frames_to_encode, rtc::Buffer* encoded);
|
||||
size_t SamplesPer10msFrame() const;
|
||||
|
||||
std::unique_ptr<AudioEncoder> speech_encoder_;
|
||||
const int cng_payload_type_;
|
||||
const int num_cng_coefficients_;
|
||||
const int sid_frame_interval_ms_;
|
||||
std::vector<int16_t> speech_buffer_;
|
||||
std::vector<uint32_t> rtp_timestamps_;
|
||||
bool last_frame_active_;
|
||||
std::unique_ptr<Vad> vad_;
|
||||
std::unique_ptr<ComfortNoiseEncoder> cng_encoder_;
|
||||
};
|
||||
|
||||
AudioEncoderCng::AudioEncoderCng(AudioEncoderCngConfig&& config)
|
||||
: speech_encoder_((static_cast<void>([&] {
|
||||
RTC_CHECK(config.IsOk()) << "Invalid configuration.";
|
||||
}()),
|
||||
std::move(config.speech_encoder))),
|
||||
cng_payload_type_(config.payload_type),
|
||||
num_cng_coefficients_(config.num_cng_coefficients),
|
||||
sid_frame_interval_ms_(config.sid_frame_interval_ms),
|
||||
last_frame_active_(true),
|
||||
vad_(config.vad ? std::unique_ptr<Vad>(config.vad)
|
||||
: CreateVad(config.vad_mode)),
|
||||
cng_encoder_(new ComfortNoiseEncoder(SampleRateHz(),
|
||||
sid_frame_interval_ms_,
|
||||
num_cng_coefficients_)) {}
|
||||
|
||||
AudioEncoderCng::~AudioEncoderCng() = default;
|
||||
|
||||
int AudioEncoderCng::SampleRateHz() const {
|
||||
return speech_encoder_->SampleRateHz();
|
||||
}
|
||||
|
||||
size_t AudioEncoderCng::NumChannels() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int AudioEncoderCng::RtpTimestampRateHz() const {
|
||||
return speech_encoder_->RtpTimestampRateHz();
|
||||
}
|
||||
|
||||
size_t AudioEncoderCng::Num10MsFramesInNextPacket() const {
|
||||
return speech_encoder_->Num10MsFramesInNextPacket();
|
||||
}
|
||||
|
||||
size_t AudioEncoderCng::Max10MsFramesInAPacket() const {
|
||||
return speech_encoder_->Max10MsFramesInAPacket();
|
||||
}
|
||||
|
||||
int AudioEncoderCng::GetTargetBitrate() const {
|
||||
return speech_encoder_->GetTargetBitrate();
|
||||
}
|
||||
|
||||
AudioEncoder::EncodedInfo AudioEncoderCng::EncodeImpl(
|
||||
uint32_t rtp_timestamp,
|
||||
rtc::ArrayView<const int16_t> audio,
|
||||
rtc::Buffer* encoded) {
|
||||
const size_t samples_per_10ms_frame = SamplesPer10msFrame();
|
||||
RTC_CHECK_EQ(speech_buffer_.size(),
|
||||
rtp_timestamps_.size() * samples_per_10ms_frame);
|
||||
rtp_timestamps_.push_back(rtp_timestamp);
|
||||
RTC_DCHECK_EQ(samples_per_10ms_frame, audio.size());
|
||||
speech_buffer_.insert(speech_buffer_.end(), audio.cbegin(), audio.cend());
|
||||
const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket();
|
||||
if (rtp_timestamps_.size() < frames_to_encode) {
|
||||
return EncodedInfo();
|
||||
}
|
||||
RTC_CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs)
|
||||
<< "Frame size cannot be larger than " << kMaxFrameSizeMs
|
||||
<< " ms when using VAD/CNG.";
|
||||
|
||||
// Group several 10 ms blocks per VAD call. Call VAD once or twice using the
|
||||
// following split sizes:
|
||||
// 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms;
|
||||
// 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms.
|
||||
size_t blocks_in_first_vad_call =
|
||||
(frames_to_encode > 3 ? 3 : frames_to_encode);
|
||||
if (frames_to_encode == 4)
|
||||
blocks_in_first_vad_call = 2;
|
||||
RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call);
|
||||
const size_t blocks_in_second_vad_call =
|
||||
frames_to_encode - blocks_in_first_vad_call;
|
||||
|
||||
// Check if all of the buffer is passive speech. Start with checking the first
|
||||
// block.
|
||||
Vad::Activity activity = vad_->VoiceActivity(
|
||||
&speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call,
|
||||
SampleRateHz());
|
||||
if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) {
|
||||
// Only check the second block if the first was passive.
|
||||
activity = vad_->VoiceActivity(
|
||||
&speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call],
|
||||
samples_per_10ms_frame * blocks_in_second_vad_call, SampleRateHz());
|
||||
}
|
||||
|
||||
EncodedInfo info;
|
||||
switch (activity) {
|
||||
case Vad::kPassive: {
|
||||
info = EncodePassive(frames_to_encode, encoded);
|
||||
last_frame_active_ = false;
|
||||
break;
|
||||
}
|
||||
case Vad::kActive: {
|
||||
info = EncodeActive(frames_to_encode, encoded);
|
||||
last_frame_active_ = true;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
RTC_CHECK_NOTREACHED();
|
||||
}
|
||||
}
|
||||
|
||||
speech_buffer_.erase(
|
||||
speech_buffer_.begin(),
|
||||
speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame);
|
||||
rtp_timestamps_.erase(rtp_timestamps_.begin(),
|
||||
rtp_timestamps_.begin() + frames_to_encode);
|
||||
return info;
|
||||
}
|
||||
|
||||
void AudioEncoderCng::Reset() {
|
||||
speech_encoder_->Reset();
|
||||
speech_buffer_.clear();
|
||||
rtp_timestamps_.clear();
|
||||
last_frame_active_ = true;
|
||||
vad_->Reset();
|
||||
cng_encoder_.reset(new ComfortNoiseEncoder(
|
||||
SampleRateHz(), sid_frame_interval_ms_, num_cng_coefficients_));
|
||||
}
|
||||
|
||||
bool AudioEncoderCng::SetFec(bool enable) {
|
||||
return speech_encoder_->SetFec(enable);
|
||||
}
|
||||
|
||||
bool AudioEncoderCng::SetDtx(bool enable) {
|
||||
return speech_encoder_->SetDtx(enable);
|
||||
}
|
||||
|
||||
bool AudioEncoderCng::SetApplication(Application application) {
|
||||
return speech_encoder_->SetApplication(application);
|
||||
}
|
||||
|
||||
void AudioEncoderCng::SetMaxPlaybackRate(int frequency_hz) {
|
||||
speech_encoder_->SetMaxPlaybackRate(frequency_hz);
|
||||
}
|
||||
|
||||
rtc::ArrayView<std::unique_ptr<AudioEncoder>>
|
||||
AudioEncoderCng::ReclaimContainedEncoders() {
|
||||
return rtc::ArrayView<std::unique_ptr<AudioEncoder>>(&speech_encoder_, 1);
|
||||
}
|
||||
|
||||
void AudioEncoderCng::OnReceivedUplinkPacketLossFraction(
|
||||
float uplink_packet_loss_fraction) {
|
||||
speech_encoder_->OnReceivedUplinkPacketLossFraction(
|
||||
uplink_packet_loss_fraction);
|
||||
}
|
||||
|
||||
void AudioEncoderCng::OnReceivedUplinkBandwidth(
|
||||
int target_audio_bitrate_bps,
|
||||
absl::optional<int64_t> bwe_period_ms) {
|
||||
speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps,
|
||||
bwe_period_ms);
|
||||
}
|
||||
|
||||
absl::optional<std::pair<TimeDelta, TimeDelta>>
|
||||
AudioEncoderCng::GetFrameLengthRange() const {
|
||||
return speech_encoder_->GetFrameLengthRange();
|
||||
}
|
||||
|
||||
AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive(
|
||||
size_t frames_to_encode,
|
||||
rtc::Buffer* encoded) {
|
||||
bool force_sid = last_frame_active_;
|
||||
bool output_produced = false;
|
||||
const size_t samples_per_10ms_frame = SamplesPer10msFrame();
|
||||
AudioEncoder::EncodedInfo info;
|
||||
|
||||
for (size_t i = 0; i < frames_to_encode; ++i) {
|
||||
// It's important not to pass &info.encoded_bytes directly to
|
||||
// WebRtcCng_Encode(), since later loop iterations may return zero in
|
||||
// that value, in which case we don't want to overwrite any value from
|
||||
// an earlier iteration.
|
||||
size_t encoded_bytes_tmp =
|
||||
cng_encoder_->Encode(rtc::ArrayView<const int16_t>(
|
||||
&speech_buffer_[i * samples_per_10ms_frame],
|
||||
samples_per_10ms_frame),
|
||||
force_sid, encoded);
|
||||
|
||||
if (encoded_bytes_tmp > 0) {
|
||||
RTC_CHECK(!output_produced);
|
||||
info.encoded_bytes = encoded_bytes_tmp;
|
||||
output_produced = true;
|
||||
force_sid = false;
|
||||
}
|
||||
}
|
||||
|
||||
info.encoded_timestamp = rtp_timestamps_.front();
|
||||
info.payload_type = cng_payload_type_;
|
||||
info.send_even_if_empty = true;
|
||||
info.speech = false;
|
||||
return info;
|
||||
}
|
||||
|
||||
AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive(size_t frames_to_encode,
|
||||
rtc::Buffer* encoded) {
|
||||
const size_t samples_per_10ms_frame = SamplesPer10msFrame();
|
||||
AudioEncoder::EncodedInfo info;
|
||||
for (size_t i = 0; i < frames_to_encode; ++i) {
|
||||
info =
|
||||
speech_encoder_->Encode(rtp_timestamps_.front(),
|
||||
rtc::ArrayView<const int16_t>(
|
||||
&speech_buffer_[i * samples_per_10ms_frame],
|
||||
samples_per_10ms_frame),
|
||||
encoded);
|
||||
if (i + 1 == frames_to_encode) {
|
||||
RTC_CHECK_GT(info.encoded_bytes, 0) << "Encoder didn't deliver data.";
|
||||
} else {
|
||||
RTC_CHECK_EQ(info.encoded_bytes, 0)
|
||||
<< "Encoder delivered data too early.";
|
||||
}
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
size_t AudioEncoderCng::SamplesPer10msFrame() const {
|
||||
return rtc::CheckedDivExact(10 * SampleRateHz(), 1000);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
AudioEncoderCngConfig::AudioEncoderCngConfig() = default;
|
||||
AudioEncoderCngConfig::AudioEncoderCngConfig(AudioEncoderCngConfig&&) = default;
|
||||
AudioEncoderCngConfig::~AudioEncoderCngConfig() = default;
|
||||
|
||||
bool AudioEncoderCngConfig::IsOk() const {
|
||||
if (num_channels != 1)
|
||||
return false;
|
||||
if (!speech_encoder)
|
||||
return false;
|
||||
if (num_channels != speech_encoder->NumChannels())
|
||||
return false;
|
||||
if (sid_frame_interval_ms <
|
||||
static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10))
|
||||
return false;
|
||||
if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER ||
|
||||
num_cng_coefficients <= 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioEncoder> CreateComfortNoiseEncoder(
|
||||
AudioEncoderCngConfig&& config) {
|
||||
return std::make_unique<AudioEncoderCng>(std::move(config));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "api/audio_codecs/audio_encoder.h"
|
||||
#include "common_audio/vad/include/vad.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct AudioEncoderCngConfig {
|
||||
// Moveable, not copyable.
|
||||
AudioEncoderCngConfig();
|
||||
AudioEncoderCngConfig(AudioEncoderCngConfig&&);
|
||||
~AudioEncoderCngConfig();
|
||||
|
||||
bool IsOk() const;
|
||||
|
||||
size_t num_channels = 1;
|
||||
int payload_type = 13;
|
||||
std::unique_ptr<AudioEncoder> speech_encoder;
|
||||
Vad::Aggressiveness vad_mode = Vad::kVadNormal;
|
||||
int sid_frame_interval_ms = 100;
|
||||
int num_cng_coefficients = 8;
|
||||
// The Vad pointer is mainly for testing. If a NULL pointer is passed, the
|
||||
// AudioEncoderCng creates (and destroys) a Vad object internally. If an
|
||||
// object is passed, the AudioEncoderCng assumes ownership of the Vad
|
||||
// object.
|
||||
Vad* vad = nullptr;
|
||||
};
|
||||
|
||||
std::unique_ptr<AudioEncoder> CreateComfortNoiseEncoder(
|
||||
AudioEncoderCngConfig&& config);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_CNG_AUDIO_ENCODER_CNG_H_
|
||||
|
|
@ -0,0 +1,520 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/cng/audio_encoder_cng.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common_audio/vad/mock/mock_vad.h"
|
||||
#include "rtc_base/numerics/safe_conversions.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/mock_audio_encoder.h"
|
||||
#include "test/testsupport/rtc_expect_death.h"
|
||||
|
||||
using ::testing::_;
|
||||
using ::testing::Eq;
|
||||
using ::testing::InSequence;
|
||||
using ::testing::Invoke;
|
||||
using ::testing::Not;
|
||||
using ::testing::Optional;
|
||||
using ::testing::Return;
|
||||
using ::testing::SetArgPointee;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo.
|
||||
static const size_t kMockReturnEncodedBytes = 17;
|
||||
static const int kCngPayloadType = 18;
|
||||
} // namespace
|
||||
|
||||
class AudioEncoderCngTest : public ::testing::Test {
|
||||
protected:
|
||||
AudioEncoderCngTest()
|
||||
: mock_encoder_owner_(new MockAudioEncoder),
|
||||
mock_encoder_(mock_encoder_owner_.get()),
|
||||
mock_vad_(new MockVad),
|
||||
timestamp_(4711),
|
||||
num_audio_samples_10ms_(0),
|
||||
sample_rate_hz_(8000) {
|
||||
memset(audio_, 0, kMaxNumSamples * 2);
|
||||
EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(1));
|
||||
}
|
||||
|
||||
AudioEncoderCngTest(const AudioEncoderCngTest&) = delete;
|
||||
AudioEncoderCngTest& operator=(const AudioEncoderCngTest&) = delete;
|
||||
|
||||
void TearDown() override {
|
||||
EXPECT_CALL(*mock_vad_, Die()).Times(1);
|
||||
cng_.reset();
|
||||
}
|
||||
|
||||
AudioEncoderCngConfig MakeCngConfig() {
|
||||
AudioEncoderCngConfig config;
|
||||
config.speech_encoder = std::move(mock_encoder_owner_);
|
||||
EXPECT_TRUE(config.speech_encoder);
|
||||
|
||||
// Let the AudioEncoderCng object use a MockVad instead of its internally
|
||||
// created Vad object.
|
||||
config.vad = mock_vad_;
|
||||
config.payload_type = kCngPayloadType;
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
void CreateCng(AudioEncoderCngConfig&& config) {
|
||||
num_audio_samples_10ms_ = static_cast<size_t>(10 * sample_rate_hz_ / 1000);
|
||||
ASSERT_LE(num_audio_samples_10ms_, kMaxNumSamples);
|
||||
if (config.speech_encoder) {
|
||||
EXPECT_CALL(*mock_encoder_, SampleRateHz())
|
||||
.WillRepeatedly(Return(sample_rate_hz_));
|
||||
// Max10MsFramesInAPacket() is just used to verify that the SID frame
|
||||
// period is not too small. The return value does not matter that much,
|
||||
// as long as it is smaller than 10.
|
||||
EXPECT_CALL(*mock_encoder_, Max10MsFramesInAPacket())
|
||||
.WillOnce(Return(1u));
|
||||
}
|
||||
cng_ = CreateComfortNoiseEncoder(std::move(config));
|
||||
}
|
||||
|
||||
void Encode() {
|
||||
ASSERT_TRUE(cng_) << "Must call CreateCng() first.";
|
||||
encoded_info_ = cng_->Encode(
|
||||
timestamp_,
|
||||
rtc::ArrayView<const int16_t>(audio_, num_audio_samples_10ms_),
|
||||
&encoded_);
|
||||
timestamp_ += static_cast<uint32_t>(num_audio_samples_10ms_);
|
||||
}
|
||||
|
||||
// Expect `num_calls` calls to the encoder, all successful. The last call
|
||||
// claims to have encoded `kMockReturnEncodedBytes` bytes, and all the
|
||||
// preceding ones 0 bytes.
|
||||
void ExpectEncodeCalls(size_t num_calls) {
|
||||
InSequence s;
|
||||
AudioEncoder::EncodedInfo info;
|
||||
for (size_t j = 0; j < num_calls - 1; ++j) {
|
||||
EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).WillOnce(Return(info));
|
||||
}
|
||||
info.encoded_bytes = kMockReturnEncodedBytes;
|
||||
EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _))
|
||||
.WillOnce(
|
||||
Invoke(MockAudioEncoder::FakeEncoding(kMockReturnEncodedBytes)));
|
||||
}
|
||||
|
||||
// Verifies that the cng_ object waits until it has collected
|
||||
// `blocks_per_frame` blocks of audio, and then dispatches all of them to
|
||||
// the underlying codec (speech or cng).
|
||||
void CheckBlockGrouping(size_t blocks_per_frame, bool active_speech) {
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(blocks_per_frame));
|
||||
auto config = MakeCngConfig();
|
||||
const int num_cng_coefficients = config.num_cng_coefficients;
|
||||
CreateCng(std::move(config));
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillRepeatedly(Return(active_speech ? Vad::kActive : Vad::kPassive));
|
||||
|
||||
// Don't expect any calls to the encoder yet.
|
||||
EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0);
|
||||
for (size_t i = 0; i < blocks_per_frame - 1; ++i) {
|
||||
Encode();
|
||||
EXPECT_EQ(0u, encoded_info_.encoded_bytes);
|
||||
}
|
||||
if (active_speech)
|
||||
ExpectEncodeCalls(blocks_per_frame);
|
||||
Encode();
|
||||
if (active_speech) {
|
||||
EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes);
|
||||
} else {
|
||||
EXPECT_EQ(static_cast<size_t>(num_cng_coefficients + 1),
|
||||
encoded_info_.encoded_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies that the audio is partitioned into larger blocks before calling
|
||||
// the VAD.
|
||||
void CheckVadInputSize(int input_frame_size_ms,
|
||||
int expected_first_block_size_ms,
|
||||
int expected_second_block_size_ms) {
|
||||
const size_t blocks_per_frame =
|
||||
static_cast<size_t>(input_frame_size_ms / 10);
|
||||
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(blocks_per_frame));
|
||||
|
||||
// Expect nothing to happen before the last block is sent to cng_.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)).Times(0);
|
||||
for (size_t i = 0; i < blocks_per_frame - 1; ++i) {
|
||||
Encode();
|
||||
}
|
||||
|
||||
// Let the VAD decision be passive, since an active decision may lead to
|
||||
// early termination of the decision loop.
|
||||
InSequence s;
|
||||
EXPECT_CALL(
|
||||
*mock_vad_,
|
||||
VoiceActivity(_, expected_first_block_size_ms * sample_rate_hz_ / 1000,
|
||||
sample_rate_hz_))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
if (expected_second_block_size_ms > 0) {
|
||||
EXPECT_CALL(*mock_vad_,
|
||||
VoiceActivity(
|
||||
_, expected_second_block_size_ms * sample_rate_hz_ / 1000,
|
||||
sample_rate_hz_))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
}
|
||||
|
||||
// With this call to Encode(), `mock_vad_` should be called according to the
|
||||
// above expectations.
|
||||
Encode();
|
||||
}
|
||||
|
||||
// Tests a frame with both active and passive speech. Returns true if the
|
||||
// decision was active speech, false if it was passive.
|
||||
bool CheckMixedActivePassive(Vad::Activity first_type,
|
||||
Vad::Activity second_type) {
|
||||
// Set the speech encoder frame size to 60 ms, to ensure that the VAD will
|
||||
// be called twice.
|
||||
const size_t blocks_per_frame = 6;
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(blocks_per_frame));
|
||||
InSequence s;
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(first_type));
|
||||
if (first_type == Vad::kPassive) {
|
||||
// Expect a second call to the VAD only if the first frame was passive.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(second_type));
|
||||
}
|
||||
encoded_info_.payload_type = 0;
|
||||
for (size_t i = 0; i < blocks_per_frame; ++i) {
|
||||
Encode();
|
||||
}
|
||||
return encoded_info_.payload_type != kCngPayloadType;
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioEncoder> cng_;
|
||||
std::unique_ptr<MockAudioEncoder> mock_encoder_owner_;
|
||||
MockAudioEncoder* mock_encoder_;
|
||||
MockVad* mock_vad_; // Ownership is transferred to `cng_`.
|
||||
uint32_t timestamp_;
|
||||
int16_t audio_[kMaxNumSamples];
|
||||
size_t num_audio_samples_10ms_;
|
||||
rtc::Buffer encoded_;
|
||||
AudioEncoder::EncodedInfo encoded_info_;
|
||||
int sample_rate_hz_;
|
||||
};
|
||||
|
||||
TEST_F(AudioEncoderCngTest, CreateAndDestroy) {
|
||||
CreateCng(MakeCngConfig());
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) {
|
||||
CreateCng(MakeCngConfig());
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillOnce(Return(17U));
|
||||
EXPECT_EQ(17U, cng_->Num10MsFramesInNextPacket());
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, CheckTargetAudioBitratePropagation) {
|
||||
CreateCng(MakeCngConfig());
|
||||
EXPECT_CALL(*mock_encoder_,
|
||||
OnReceivedUplinkBandwidth(4711, absl::optional<int64_t>()));
|
||||
cng_->OnReceivedUplinkBandwidth(4711, absl::nullopt);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, CheckPacketLossFractionPropagation) {
|
||||
CreateCng(MakeCngConfig());
|
||||
EXPECT_CALL(*mock_encoder_, OnReceivedUplinkPacketLossFraction(0.5));
|
||||
cng_->OnReceivedUplinkPacketLossFraction(0.5);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, CheckGetFrameLengthRangePropagation) {
|
||||
CreateCng(MakeCngConfig());
|
||||
auto expected_range =
|
||||
std::make_pair(TimeDelta::Millis(20), TimeDelta::Millis(20));
|
||||
EXPECT_CALL(*mock_encoder_, GetFrameLengthRange())
|
||||
.WillRepeatedly(Return(absl::make_optional(expected_range)));
|
||||
EXPECT_THAT(cng_->GetFrameLengthRange(), Optional(Eq(expected_range)));
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCallsVad) {
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(1U));
|
||||
CreateCng(MakeCngConfig());
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
Encode();
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects1BlockPassiveSpeech) {
|
||||
CheckBlockGrouping(1, false);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksPassiveSpeech) {
|
||||
CheckBlockGrouping(2, false);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksPassiveSpeech) {
|
||||
CheckBlockGrouping(3, false);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects1BlockActiveSpeech) {
|
||||
CheckBlockGrouping(1, true);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksActiveSpeech) {
|
||||
CheckBlockGrouping(2, true);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksActiveSpeech) {
|
||||
CheckBlockGrouping(3, true);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodePassive) {
|
||||
const size_t kBlocksPerFrame = 3;
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(kBlocksPerFrame));
|
||||
auto config = MakeCngConfig();
|
||||
const auto sid_frame_interval_ms = config.sid_frame_interval_ms;
|
||||
const auto num_cng_coefficients = config.num_cng_coefficients;
|
||||
CreateCng(std::move(config));
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillRepeatedly(Return(Vad::kPassive));
|
||||
// Expect no calls at all to the speech encoder mock.
|
||||
EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0);
|
||||
uint32_t expected_timestamp = timestamp_;
|
||||
for (size_t i = 0; i < 100; ++i) {
|
||||
Encode();
|
||||
// Check if it was time to call the cng encoder. This is done once every
|
||||
// `kBlocksPerFrame` calls.
|
||||
if ((i + 1) % kBlocksPerFrame == 0) {
|
||||
// Now check if a SID interval has elapsed.
|
||||
if ((i % (sid_frame_interval_ms / 10)) < kBlocksPerFrame) {
|
||||
// If so, verify that we got a CNG encoding.
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
EXPECT_FALSE(encoded_info_.speech);
|
||||
EXPECT_EQ(static_cast<size_t>(num_cng_coefficients) + 1,
|
||||
encoded_info_.encoded_bytes);
|
||||
EXPECT_EQ(expected_timestamp, encoded_info_.encoded_timestamp);
|
||||
}
|
||||
expected_timestamp += rtc::checked_cast<uint32_t>(
|
||||
kBlocksPerFrame * num_audio_samples_10ms_);
|
||||
} else {
|
||||
// Otherwise, expect no output.
|
||||
EXPECT_EQ(0u, encoded_info_.encoded_bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies that the correct action is taken for frames with both active and
|
||||
// passive speech.
|
||||
TEST_F(AudioEncoderCngTest, MixedActivePassive) {
|
||||
CreateCng(MakeCngConfig());
|
||||
|
||||
// All of the frame is active speech.
|
||||
ExpectEncodeCalls(6);
|
||||
EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive));
|
||||
EXPECT_TRUE(encoded_info_.speech);
|
||||
|
||||
// First half of the frame is active speech.
|
||||
ExpectEncodeCalls(6);
|
||||
EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive));
|
||||
EXPECT_TRUE(encoded_info_.speech);
|
||||
|
||||
// Second half of the frame is active speech.
|
||||
ExpectEncodeCalls(6);
|
||||
EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive));
|
||||
EXPECT_TRUE(encoded_info_.speech);
|
||||
|
||||
// All of the frame is passive speech. Expect no calls to `mock_encoder_`.
|
||||
EXPECT_FALSE(CheckMixedActivePassive(Vad::kPassive, Vad::kPassive));
|
||||
EXPECT_FALSE(encoded_info_.speech);
|
||||
}
|
||||
|
||||
// These tests verify that the audio is partitioned into larger blocks before
|
||||
// calling the VAD.
|
||||
// The parameters for CheckVadInputSize are:
|
||||
// CheckVadInputSize(frame_size, expected_first_block_size,
|
||||
// expected_second_block_size);
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize10Ms) {
|
||||
CreateCng(MakeCngConfig());
|
||||
CheckVadInputSize(10, 10, 0);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize20Ms) {
|
||||
CreateCng(MakeCngConfig());
|
||||
CheckVadInputSize(20, 20, 0);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize30Ms) {
|
||||
CreateCng(MakeCngConfig());
|
||||
CheckVadInputSize(30, 30, 0);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize40Ms) {
|
||||
CreateCng(MakeCngConfig());
|
||||
CheckVadInputSize(40, 20, 20);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize50Ms) {
|
||||
CreateCng(MakeCngConfig());
|
||||
CheckVadInputSize(50, 30, 20);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize60Ms) {
|
||||
CreateCng(MakeCngConfig());
|
||||
CheckVadInputSize(60, 30, 30);
|
||||
}
|
||||
|
||||
// Verifies that the correct payload type is set when CNG is encoded.
|
||||
TEST_F(AudioEncoderCngTest, VerifyCngPayloadType) {
|
||||
CreateCng(MakeCngConfig());
|
||||
EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _)).Times(0);
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1U));
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
encoded_info_.payload_type = 0;
|
||||
Encode();
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
}
|
||||
|
||||
// Verifies that a SID frame is encoded immediately as the signal changes from
|
||||
// active speech to passive.
|
||||
TEST_F(AudioEncoderCngTest, VerifySidFrameAfterSpeech) {
|
||||
auto config = MakeCngConfig();
|
||||
const auto num_cng_coefficients = config.num_cng_coefficients;
|
||||
CreateCng(std::move(config));
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(1U));
|
||||
// Start with encoding noise.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.Times(2)
|
||||
.WillRepeatedly(Return(Vad::kPassive));
|
||||
Encode();
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
EXPECT_EQ(static_cast<size_t>(num_cng_coefficients) + 1,
|
||||
encoded_info_.encoded_bytes);
|
||||
// Encode again, and make sure we got no frame at all (since the SID frame
|
||||
// period is 100 ms by default).
|
||||
Encode();
|
||||
EXPECT_EQ(0u, encoded_info_.encoded_bytes);
|
||||
|
||||
// Now encode active speech.
|
||||
encoded_info_.payload_type = 0;
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kActive));
|
||||
EXPECT_CALL(*mock_encoder_, EncodeImpl(_, _, _))
|
||||
.WillOnce(
|
||||
Invoke(MockAudioEncoder::FakeEncoding(kMockReturnEncodedBytes)));
|
||||
Encode();
|
||||
EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes);
|
||||
|
||||
// Go back to noise again, and verify that a SID frame is emitted.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
Encode();
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
EXPECT_EQ(static_cast<size_t>(num_cng_coefficients) + 1,
|
||||
encoded_info_.encoded_bytes);
|
||||
}
|
||||
|
||||
// Resetting the CNG should reset both the VAD and the encoder.
|
||||
TEST_F(AudioEncoderCngTest, Reset) {
|
||||
CreateCng(MakeCngConfig());
|
||||
EXPECT_CALL(*mock_encoder_, Reset()).Times(1);
|
||||
EXPECT_CALL(*mock_vad_, Reset()).Times(1);
|
||||
cng_->Reset();
|
||||
}
|
||||
|
||||
#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||
|
||||
// This test fixture tests various error conditions that makes the
|
||||
// AudioEncoderCng die via CHECKs.
|
||||
class AudioEncoderCngDeathTest : public AudioEncoderCngTest {
|
||||
protected:
|
||||
AudioEncoderCngDeathTest() : AudioEncoderCngTest() {
|
||||
EXPECT_CALL(*mock_vad_, Die()).Times(1);
|
||||
delete mock_vad_;
|
||||
mock_vad_ = nullptr;
|
||||
}
|
||||
|
||||
// Override AudioEncoderCngTest::TearDown, since that one expects a call to
|
||||
// the destructor of `mock_vad_`. In this case, that object is already
|
||||
// deleted.
|
||||
void TearDown() override { cng_.reset(); }
|
||||
|
||||
AudioEncoderCngConfig MakeCngConfig() {
|
||||
// Don't provide a Vad mock object, since it would leak when the test dies.
|
||||
auto config = AudioEncoderCngTest::MakeCngConfig();
|
||||
config.vad = nullptr;
|
||||
return config;
|
||||
}
|
||||
|
||||
void TryWrongNumCoefficients(int num) {
|
||||
RTC_EXPECT_DEATH(
|
||||
[&] {
|
||||
auto config = MakeCngConfig();
|
||||
config.num_cng_coefficients = num;
|
||||
CreateCng(std::move(config));
|
||||
}(),
|
||||
"Invalid configuration");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, WrongFrameSize) {
|
||||
CreateCng(MakeCngConfig());
|
||||
num_audio_samples_10ms_ *= 2; // 20 ms frame.
|
||||
RTC_EXPECT_DEATH(Encode(), "");
|
||||
num_audio_samples_10ms_ = 0; // Zero samples.
|
||||
RTC_EXPECT_DEATH(Encode(), "");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsA) {
|
||||
TryWrongNumCoefficients(-1);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsB) {
|
||||
TryWrongNumCoefficients(0);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficientsC) {
|
||||
TryWrongNumCoefficients(13);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, NullSpeechEncoder) {
|
||||
auto config = MakeCngConfig();
|
||||
config.speech_encoder = nullptr;
|
||||
RTC_EXPECT_DEATH(CreateCng(std::move(config)), "");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, StereoEncoder) {
|
||||
EXPECT_CALL(*mock_encoder_, NumChannels()).WillRepeatedly(Return(2));
|
||||
RTC_EXPECT_DEATH(CreateCng(MakeCngConfig()), "Invalid configuration");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, StereoConfig) {
|
||||
RTC_EXPECT_DEATH(
|
||||
[&] {
|
||||
auto config = MakeCngConfig();
|
||||
config.num_channels = 2;
|
||||
CreateCng(std::move(config));
|
||||
}(),
|
||||
"Invalid configuration");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, EncoderFrameSizeTooLarge) {
|
||||
CreateCng(MakeCngConfig());
|
||||
EXPECT_CALL(*mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(7U));
|
||||
for (int i = 0; i < 6; ++i)
|
||||
Encode();
|
||||
RTC_EXPECT_DEATH(
|
||||
Encode(), "Frame size cannot be larger than 60 ms when using VAD/CNG.");
|
||||
}
|
||||
|
||||
#endif // GTEST_HAS_DEATH_TEST
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,252 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
enum {
|
||||
kSidShortIntervalUpdate = 1,
|
||||
kSidNormalIntervalUpdate = 100,
|
||||
kSidLongIntervalUpdate = 10000
|
||||
};
|
||||
|
||||
enum : size_t {
|
||||
kCNGNumParamsLow = 0,
|
||||
kCNGNumParamsNormal = 8,
|
||||
kCNGNumParamsHigh = WEBRTC_CNG_MAX_LPC_ORDER,
|
||||
kCNGNumParamsTooHigh = WEBRTC_CNG_MAX_LPC_ORDER + 1
|
||||
};
|
||||
|
||||
enum { kNoSid, kForceSid };
|
||||
|
||||
class CngTest : public ::testing::Test {
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
|
||||
void TestCngEncode(int sample_rate_hz, int quality);
|
||||
|
||||
int16_t speech_data_[640]; // Max size of CNG internal buffers.
|
||||
};
|
||||
|
||||
class CngDeathTest : public CngTest {};
|
||||
|
||||
void CngTest::SetUp() {
|
||||
FILE* input_file;
|
||||
const std::string file_name =
|
||||
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
|
||||
input_file = fopen(file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(input_file != NULL);
|
||||
ASSERT_EQ(640, static_cast<int32_t>(
|
||||
fread(speech_data_, sizeof(int16_t), 640, input_file)));
|
||||
fclose(input_file);
|
||||
input_file = NULL;
|
||||
}
|
||||
|
||||
void CngTest::TestCngEncode(int sample_rate_hz, int quality) {
|
||||
const size_t num_samples_10ms = rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
rtc::Buffer sid_data;
|
||||
|
||||
ComfortNoiseEncoder cng_encoder(sample_rate_hz, kSidNormalIntervalUpdate,
|
||||
quality);
|
||||
EXPECT_EQ(0U, cng_encoder.Encode(rtc::ArrayView<const int16_t>(
|
||||
speech_data_, num_samples_10ms),
|
||||
kNoSid, &sid_data));
|
||||
EXPECT_EQ(static_cast<size_t>(quality + 1),
|
||||
cng_encoder.Encode(
|
||||
rtc::ArrayView<const int16_t>(speech_data_, num_samples_10ms),
|
||||
kForceSid, &sid_data));
|
||||
}
|
||||
|
||||
#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||
// Create CNG encoder, init with faulty values, free CNG encoder.
|
||||
TEST_F(CngDeathTest, CngInitFail) {
|
||||
// Call with too few parameters.
|
||||
EXPECT_DEATH(
|
||||
{
|
||||
ComfortNoiseEncoder(8000, kSidNormalIntervalUpdate, kCNGNumParamsLow);
|
||||
},
|
||||
"");
|
||||
// Call with too many parameters.
|
||||
EXPECT_DEATH(
|
||||
{
|
||||
ComfortNoiseEncoder(8000, kSidNormalIntervalUpdate,
|
||||
kCNGNumParamsTooHigh);
|
||||
},
|
||||
"");
|
||||
}
|
||||
|
||||
// Encode Cng with too long input vector.
|
||||
TEST_F(CngDeathTest, CngEncodeTooLong) {
|
||||
rtc::Buffer sid_data;
|
||||
|
||||
// Create encoder.
|
||||
ComfortNoiseEncoder cng_encoder(8000, kSidNormalIntervalUpdate,
|
||||
kCNGNumParamsNormal);
|
||||
// Run encoder with too much data.
|
||||
EXPECT_DEATH(
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 641),
|
||||
kNoSid, &sid_data),
|
||||
"");
|
||||
}
|
||||
#endif // GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||
|
||||
TEST_F(CngTest, CngEncode8000) {
|
||||
TestCngEncode(8000, kCNGNumParamsNormal);
|
||||
}
|
||||
|
||||
TEST_F(CngTest, CngEncode16000) {
|
||||
TestCngEncode(16000, kCNGNumParamsNormal);
|
||||
}
|
||||
|
||||
TEST_F(CngTest, CngEncode32000) {
|
||||
TestCngEncode(32000, kCNGNumParamsHigh);
|
||||
}
|
||||
|
||||
TEST_F(CngTest, CngEncode48000) {
|
||||
TestCngEncode(48000, kCNGNumParamsNormal);
|
||||
}
|
||||
|
||||
TEST_F(CngTest, CngEncode64000) {
|
||||
TestCngEncode(64000, kCNGNumParamsNormal);
|
||||
}
|
||||
|
||||
// Update SID parameters, for both 9 and 16 parameters.
|
||||
TEST_F(CngTest, CngUpdateSid) {
|
||||
rtc::Buffer sid_data;
|
||||
|
||||
// Create and initialize encoder and decoder.
|
||||
ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate,
|
||||
kCNGNumParamsNormal);
|
||||
ComfortNoiseDecoder cng_decoder;
|
||||
|
||||
// Run normal Encode and UpdateSid.
|
||||
EXPECT_EQ(kCNGNumParamsNormal + 1,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kForceSid, &sid_data));
|
||||
cng_decoder.UpdateSid(sid_data);
|
||||
|
||||
// Reinit with new length.
|
||||
cng_encoder.Reset(16000, kSidNormalIntervalUpdate, kCNGNumParamsHigh);
|
||||
cng_decoder.Reset();
|
||||
|
||||
// Expect 0 because of unstable parameters after switching length.
|
||||
EXPECT_EQ(0U,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kForceSid, &sid_data));
|
||||
EXPECT_EQ(
|
||||
kCNGNumParamsHigh + 1,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_ + 160, 160),
|
||||
kForceSid, &sid_data));
|
||||
cng_decoder.UpdateSid(
|
||||
rtc::ArrayView<const uint8_t>(sid_data.data(), kCNGNumParamsNormal + 1));
|
||||
}
|
||||
|
||||
// Update SID parameters, with wrong parameters or without calling decode.
|
||||
TEST_F(CngTest, CngUpdateSidErroneous) {
|
||||
rtc::Buffer sid_data;
|
||||
|
||||
// Encode.
|
||||
ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate,
|
||||
kCNGNumParamsNormal);
|
||||
ComfortNoiseDecoder cng_decoder;
|
||||
EXPECT_EQ(kCNGNumParamsNormal + 1,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kForceSid, &sid_data));
|
||||
|
||||
// First run with valid parameters, then with too many CNG parameters.
|
||||
// The function will operate correctly by only reading the maximum number of
|
||||
// parameters, skipping the extra.
|
||||
EXPECT_EQ(kCNGNumParamsNormal + 1, sid_data.size());
|
||||
cng_decoder.UpdateSid(sid_data);
|
||||
|
||||
// Make sure the input buffer is large enough. Since Encode() appends data, we
|
||||
// need to set the size manually only afterwards, or the buffer will be bigger
|
||||
// than anticipated.
|
||||
sid_data.SetSize(kCNGNumParamsTooHigh + 1);
|
||||
cng_decoder.UpdateSid(sid_data);
|
||||
}
|
||||
|
||||
// Test to generate cng data, by forcing SID. Both normal and faulty condition.
|
||||
TEST_F(CngTest, CngGenerate) {
|
||||
rtc::Buffer sid_data;
|
||||
int16_t out_data[640];
|
||||
|
||||
// Create and initialize encoder and decoder.
|
||||
ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate,
|
||||
kCNGNumParamsNormal);
|
||||
ComfortNoiseDecoder cng_decoder;
|
||||
|
||||
// Normal Encode.
|
||||
EXPECT_EQ(kCNGNumParamsNormal + 1,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kForceSid, &sid_data));
|
||||
|
||||
// Normal UpdateSid.
|
||||
cng_decoder.UpdateSid(sid_data);
|
||||
|
||||
// Two normal Generate, one with new_period.
|
||||
EXPECT_TRUE(cng_decoder.Generate(rtc::ArrayView<int16_t>(out_data, 640), 1));
|
||||
EXPECT_TRUE(cng_decoder.Generate(rtc::ArrayView<int16_t>(out_data, 640), 0));
|
||||
|
||||
// Call Genereate with too much data.
|
||||
EXPECT_FALSE(cng_decoder.Generate(rtc::ArrayView<int16_t>(out_data, 641), 0));
|
||||
}
|
||||
|
||||
// Test automatic SID.
|
||||
TEST_F(CngTest, CngAutoSid) {
|
||||
rtc::Buffer sid_data;
|
||||
|
||||
// Create and initialize encoder and decoder.
|
||||
ComfortNoiseEncoder cng_encoder(16000, kSidNormalIntervalUpdate,
|
||||
kCNGNumParamsNormal);
|
||||
ComfortNoiseDecoder cng_decoder;
|
||||
|
||||
// Normal Encode, 100 msec, where no SID data should be generated.
|
||||
for (int i = 0; i < 10; i++) {
|
||||
EXPECT_EQ(
|
||||
0U, cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kNoSid, &sid_data));
|
||||
}
|
||||
|
||||
// We have reached 100 msec, and SID data should be generated.
|
||||
EXPECT_EQ(kCNGNumParamsNormal + 1,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kNoSid, &sid_data));
|
||||
}
|
||||
|
||||
// Test automatic SID, with very short interval.
|
||||
TEST_F(CngTest, CngAutoSidShort) {
|
||||
rtc::Buffer sid_data;
|
||||
|
||||
// Create and initialize encoder and decoder.
|
||||
ComfortNoiseEncoder cng_encoder(16000, kSidShortIntervalUpdate,
|
||||
kCNGNumParamsNormal);
|
||||
ComfortNoiseDecoder cng_decoder;
|
||||
|
||||
// First call will never generate SID, unless forced to.
|
||||
EXPECT_EQ(0U,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kNoSid, &sid_data));
|
||||
|
||||
// Normal Encode, 100 msec, SID data should be generated all the time.
|
||||
for (int i = 0; i < 10; i++) {
|
||||
EXPECT_EQ(
|
||||
kCNGNumParamsNormal + 1,
|
||||
cng_encoder.Encode(rtc::ArrayView<const int16_t>(speech_data_, 160),
|
||||
kNoSid, &sid_data));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,436 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_conversions.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
const size_t kCngMaxOutsizeOrder = 640;
|
||||
|
||||
// TODO(ossu): Rename the left-over WebRtcCng according to style guide.
|
||||
void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a);
|
||||
|
||||
const int32_t WebRtcCng_kDbov[94] = {
|
||||
1081109975, 858756178, 682134279, 541838517, 430397633, 341876992,
|
||||
271562548, 215709799, 171344384, 136103682, 108110997, 85875618,
|
||||
68213428, 54183852, 43039763, 34187699, 27156255, 21570980,
|
||||
17134438, 13610368, 10811100, 8587562, 6821343, 5418385,
|
||||
4303976, 3418770, 2715625, 2157098, 1713444, 1361037,
|
||||
1081110, 858756, 682134, 541839, 430398, 341877,
|
||||
271563, 215710, 171344, 136104, 108111, 85876,
|
||||
68213, 54184, 43040, 34188, 27156, 21571,
|
||||
17134, 13610, 10811, 8588, 6821, 5418,
|
||||
4304, 3419, 2716, 2157, 1713, 1361,
|
||||
1081, 859, 682, 542, 430, 342,
|
||||
272, 216, 171, 136, 108, 86,
|
||||
68, 54, 43, 34, 27, 22,
|
||||
17, 14, 11, 9, 7, 5,
|
||||
4, 3, 3, 2, 2, 1,
|
||||
1, 1, 1, 1};
|
||||
|
||||
const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = {
|
||||
32702, 32636, 32570, 32505, 32439, 32374,
|
||||
32309, 32244, 32179, 32114, 32049, 31985};
|
||||
|
||||
} // namespace
|
||||
|
||||
ComfortNoiseDecoder::ComfortNoiseDecoder() {
|
||||
/* Needed to get the right function pointers in SPLIB. */
|
||||
Reset();
|
||||
}
|
||||
|
||||
void ComfortNoiseDecoder::Reset() {
|
||||
dec_seed_ = 7777; /* For debugging only. */
|
||||
dec_target_energy_ = 0;
|
||||
dec_used_energy_ = 0;
|
||||
for (auto& c : dec_target_reflCoefs_)
|
||||
c = 0;
|
||||
for (auto& c : dec_used_reflCoefs_)
|
||||
c = 0;
|
||||
for (auto& c : dec_filtstate_)
|
||||
c = 0;
|
||||
for (auto& c : dec_filtstateLow_)
|
||||
c = 0;
|
||||
dec_order_ = 5;
|
||||
dec_target_scale_factor_ = 0;
|
||||
dec_used_scale_factor_ = 0;
|
||||
}
|
||||
|
||||
void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) {
|
||||
int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER];
|
||||
int32_t targetEnergy;
|
||||
size_t length = sid.size();
|
||||
/* Throw away reflection coefficients of higher order than we can handle. */
|
||||
if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1))
|
||||
length = WEBRTC_CNG_MAX_LPC_ORDER + 1;
|
||||
|
||||
dec_order_ = static_cast<uint16_t>(length - 1);
|
||||
|
||||
uint8_t sid0 = std::min<uint8_t>(sid[0], 93);
|
||||
targetEnergy = WebRtcCng_kDbov[sid0];
|
||||
/* Take down target energy to 75%. */
|
||||
targetEnergy = targetEnergy >> 1;
|
||||
targetEnergy += targetEnergy >> 2;
|
||||
|
||||
dec_target_energy_ = targetEnergy;
|
||||
|
||||
/* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */
|
||||
if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) {
|
||||
for (size_t i = 0; i < (dec_order_); i++) {
|
||||
refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/
|
||||
dec_target_reflCoefs_[i] = refCs[i];
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < (dec_order_); i++) {
|
||||
refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */
|
||||
dec_target_reflCoefs_[i] = refCs[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
|
||||
refCs[i] = 0;
|
||||
dec_target_reflCoefs_[i] = refCs[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data,
|
||||
bool new_period) {
|
||||
int16_t excitation[kCngMaxOutsizeOrder];
|
||||
int16_t low[kCngMaxOutsizeOrder];
|
||||
int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int16_t ReflBetaStd = 26214; /* 0.8 in q15. */
|
||||
int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */
|
||||
int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */
|
||||
int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */
|
||||
int16_t Beta, BetaC; /* These are in Q15. */
|
||||
int32_t targetEnergy;
|
||||
int16_t En;
|
||||
int16_t temp16;
|
||||
const size_t num_samples = out_data.size();
|
||||
|
||||
if (num_samples > kCngMaxOutsizeOrder) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (new_period) {
|
||||
dec_used_scale_factor_ = dec_target_scale_factor_;
|
||||
Beta = ReflBetaNewP;
|
||||
BetaC = ReflBetaCompNewP;
|
||||
} else {
|
||||
Beta = ReflBetaStd;
|
||||
BetaC = ReflBetaCompStd;
|
||||
}
|
||||
|
||||
/* Calculate new scale factor in Q13 */
|
||||
dec_used_scale_factor_ = rtc::checked_cast<int16_t>(
|
||||
WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) +
|
||||
WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13));
|
||||
|
||||
dec_used_energy_ = dec_used_energy_ >> 1;
|
||||
dec_used_energy_ += dec_target_energy_ >> 1;
|
||||
|
||||
/* Do the same for the reflection coeffs, albeit in Q15. */
|
||||
for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
|
||||
dec_used_reflCoefs_[i] =
|
||||
(int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15);
|
||||
dec_used_reflCoefs_[i] +=
|
||||
(int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15);
|
||||
}
|
||||
|
||||
/* Compute the polynomial coefficients. */
|
||||
WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly);
|
||||
|
||||
targetEnergy = dec_used_energy_;
|
||||
|
||||
/* Calculate scaling factor based on filter energy. */
|
||||
En = 8192; /* 1.0 in Q13. */
|
||||
for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) {
|
||||
/* Floating point value for reference.
|
||||
E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) *
|
||||
(dec_used_reflCoefs_[i] / 32768.0);
|
||||
*/
|
||||
|
||||
/* Same in fixed point. */
|
||||
/* K(i).^2 in Q15. */
|
||||
temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i],
|
||||
dec_used_reflCoefs_[i], 15);
|
||||
/* 1 - K(i).^2 in Q15. */
|
||||
temp16 = 0x7fff - temp16;
|
||||
En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15);
|
||||
}
|
||||
|
||||
/* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */
|
||||
|
||||
/* Calculate sqrt(En * target_energy / excitation energy) */
|
||||
targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_);
|
||||
|
||||
En = (int16_t)WebRtcSpl_Sqrt(En) << 6;
|
||||
En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */
|
||||
dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12);
|
||||
|
||||
/* Generate excitation. */
|
||||
/* Excitation energy per sample is 2.^24 - Q13 N(0,1). */
|
||||
for (size_t i = 0; i < num_samples; i++) {
|
||||
excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1;
|
||||
}
|
||||
|
||||
/* Scale to correct energy. */
|
||||
WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_,
|
||||
num_samples, 13);
|
||||
|
||||
/* `lpPoly` - Coefficients in Q12.
|
||||
* `excitation` - Speech samples.
|
||||
* `nst->dec_filtstate` - State preservation.
|
||||
* `out_data` - Filtered speech samples. */
|
||||
WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation,
|
||||
num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER,
|
||||
dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER,
|
||||
out_data.data(), low, num_samples);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality)
|
||||
: enc_nrOfCoefs_(quality),
|
||||
enc_sampfreq_(fs),
|
||||
enc_interval_(interval),
|
||||
enc_msSinceSid_(0),
|
||||
enc_Energy_(0),
|
||||
enc_reflCoefs_{0},
|
||||
enc_corrVector_{0},
|
||||
enc_seed_(7777) /* For debugging only. */ {
|
||||
RTC_CHECK_GT(quality, 0);
|
||||
RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
|
||||
}
|
||||
|
||||
void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) {
|
||||
RTC_CHECK_GT(quality, 0);
|
||||
RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
|
||||
enc_nrOfCoefs_ = quality;
|
||||
enc_sampfreq_ = fs;
|
||||
enc_interval_ = interval;
|
||||
enc_msSinceSid_ = 0;
|
||||
enc_Energy_ = 0;
|
||||
for (auto& c : enc_reflCoefs_)
|
||||
c = 0;
|
||||
for (auto& c : enc_corrVector_)
|
||||
c = 0;
|
||||
enc_seed_ = 7777; /* For debugging only. */
|
||||
}
|
||||
|
||||
size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech,
|
||||
bool force_sid,
|
||||
rtc::Buffer* output) {
|
||||
int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int16_t hanningW[kCngMaxOutsizeOrder];
|
||||
int16_t ReflBeta = 19661; /* 0.6 in q15. */
|
||||
int16_t ReflBetaComp = 13107; /* 0.4 in q15. */
|
||||
int32_t outEnergy;
|
||||
int outShifts;
|
||||
size_t i;
|
||||
int stab;
|
||||
int acorrScale;
|
||||
size_t index;
|
||||
size_t ind, factor;
|
||||
int32_t* bptr;
|
||||
int32_t blo, bhi;
|
||||
int16_t negate;
|
||||
const int16_t* aptr;
|
||||
int16_t speechBuf[kCngMaxOutsizeOrder];
|
||||
|
||||
const size_t num_samples = speech.size();
|
||||
RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder);
|
||||
|
||||
for (i = 0; i < num_samples; i++) {
|
||||
speechBuf[i] = speech[i];
|
||||
}
|
||||
|
||||
factor = num_samples;
|
||||
|
||||
/* Calculate energy and a coefficients. */
|
||||
outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts);
|
||||
while (outShifts > 0) {
|
||||
/* We can only do 5 shifts without destroying accuracy in
|
||||
* division factor. */
|
||||
if (outShifts > 5) {
|
||||
outEnergy <<= (outShifts - 5);
|
||||
outShifts = 5;
|
||||
} else {
|
||||
factor /= 2;
|
||||
outShifts--;
|
||||
}
|
||||
}
|
||||
outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor);
|
||||
|
||||
if (outEnergy > 1) {
|
||||
/* Create Hanning Window. */
|
||||
WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2);
|
||||
for (i = 0; i < (num_samples / 2); i++)
|
||||
hanningW[num_samples - i - 1] = hanningW[i];
|
||||
|
||||
WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples,
|
||||
14);
|
||||
|
||||
WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_,
|
||||
corrVector, &acorrScale);
|
||||
|
||||
if (*corrVector == 0)
|
||||
*corrVector = WEBRTC_SPL_WORD16_MAX;
|
||||
|
||||
/* Adds the bandwidth expansion. */
|
||||
aptr = WebRtcCng_kCorrWindow;
|
||||
bptr = corrVector;
|
||||
|
||||
/* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */
|
||||
for (ind = 0; ind < enc_nrOfCoefs_; ind++) {
|
||||
/* The below code multiplies the 16 b corrWindow values (Q15) with
|
||||
* the 32 b corrvector (Q0) and shifts the result down 15 steps. */
|
||||
negate = *bptr < 0;
|
||||
if (negate)
|
||||
*bptr = -*bptr;
|
||||
|
||||
blo = (int32_t)*aptr * (*bptr & 0xffff);
|
||||
bhi = ((blo >> 16) & 0xffff) +
|
||||
((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff));
|
||||
blo = (blo & 0xffff) | ((bhi & 0xffff) << 16);
|
||||
|
||||
*bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15);
|
||||
if (negate)
|
||||
*bptr = -*bptr;
|
||||
bptr++;
|
||||
}
|
||||
/* End of bandwidth expansion. */
|
||||
|
||||
stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_);
|
||||
|
||||
if (!stab) {
|
||||
/* Disregard from this frame */
|
||||
return 0;
|
||||
}
|
||||
|
||||
} else {
|
||||
for (i = 0; i < enc_nrOfCoefs_; i++)
|
||||
refCs[i] = 0;
|
||||
}
|
||||
|
||||
if (force_sid) {
|
||||
/* Read instantaneous values instead of averaged. */
|
||||
for (i = 0; i < enc_nrOfCoefs_; i++)
|
||||
enc_reflCoefs_[i] = refCs[i];
|
||||
enc_Energy_ = outEnergy;
|
||||
} else {
|
||||
/* Average history with new values. */
|
||||
for (i = 0; i < enc_nrOfCoefs_; i++) {
|
||||
enc_reflCoefs_[i] =
|
||||
(int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15);
|
||||
enc_reflCoefs_[i] +=
|
||||
(int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15);
|
||||
}
|
||||
enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2);
|
||||
}
|
||||
|
||||
if (enc_Energy_ < 1) {
|
||||
enc_Energy_ = 1;
|
||||
}
|
||||
|
||||
if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) {
|
||||
/* Search for best dbov value. */
|
||||
index = 0;
|
||||
for (i = 1; i < 93; i++) {
|
||||
/* Always round downwards. */
|
||||
if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((i == 93) && (index == 0))
|
||||
index = 94;
|
||||
|
||||
const size_t output_coefs = enc_nrOfCoefs_ + 1;
|
||||
output->AppendData(output_coefs, [&](rtc::ArrayView<uint8_t> output) {
|
||||
output[0] = (uint8_t)index;
|
||||
|
||||
/* Quantize coefficients with tweak for WebRtc implementation of
|
||||
* RFC3389. */
|
||||
if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) {
|
||||
for (i = 0; i < enc_nrOfCoefs_; i++) {
|
||||
/* Q15 to Q7 with rounding. */
|
||||
output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < enc_nrOfCoefs_; i++) {
|
||||
/* Q15 to Q7 with rounding. */
|
||||
output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8));
|
||||
}
|
||||
}
|
||||
|
||||
return output_coefs;
|
||||
});
|
||||
|
||||
enc_msSinceSid_ =
|
||||
static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
|
||||
return output_coefs;
|
||||
} else {
|
||||
enc_msSinceSid_ +=
|
||||
static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
/* Values in `k` are Q15, and `a` Q12. */
|
||||
void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) {
|
||||
int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
|
||||
int16_t* aptr;
|
||||
int16_t* aptr2;
|
||||
int16_t* anyptr;
|
||||
const int16_t* kptr;
|
||||
int m, i;
|
||||
|
||||
kptr = k;
|
||||
*a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */
|
||||
*any = *a;
|
||||
a[1] = (*k + 4) >> 3;
|
||||
for (m = 1; m < useOrder; m++) {
|
||||
kptr++;
|
||||
aptr = a;
|
||||
aptr++;
|
||||
aptr2 = &a[m];
|
||||
anyptr = any;
|
||||
anyptr++;
|
||||
|
||||
any[m + 1] = (*kptr + 4) >> 3;
|
||||
for (i = 0; i < m; i++) {
|
||||
*anyptr++ =
|
||||
(*aptr++) +
|
||||
(int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15);
|
||||
}
|
||||
|
||||
aptr = a;
|
||||
anyptr = any;
|
||||
for (i = 0; i < (m + 2); i++) {
|
||||
*aptr++ = *anyptr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "rtc_base/buffer.h"
|
||||
|
||||
#define WEBRTC_CNG_MAX_LPC_ORDER 12
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ComfortNoiseDecoder {
|
||||
public:
|
||||
ComfortNoiseDecoder();
|
||||
~ComfortNoiseDecoder() = default;
|
||||
|
||||
ComfortNoiseDecoder(const ComfortNoiseDecoder&) = delete;
|
||||
ComfortNoiseDecoder& operator=(const ComfortNoiseDecoder&) = delete;
|
||||
|
||||
void Reset();
|
||||
|
||||
// Updates the CN state when a new SID packet arrives.
|
||||
// `sid` is a view of the SID packet without the headers.
|
||||
void UpdateSid(rtc::ArrayView<const uint8_t> sid);
|
||||
|
||||
// Generates comfort noise.
|
||||
// `out_data` will be filled with samples - its size determines the number of
|
||||
// samples generated. When `new_period` is true, CNG history will be reset
|
||||
// before any audio is generated. Returns `false` if outData is too large -
|
||||
// currently 640 bytes (equalling 10ms at 64kHz).
|
||||
// TODO(ossu): Specify better limits for the size of out_data. Either let it
|
||||
// be unbounded or limit to 10ms in the current sample rate.
|
||||
bool Generate(rtc::ArrayView<int16_t> out_data, bool new_period);
|
||||
|
||||
private:
|
||||
uint32_t dec_seed_;
|
||||
int32_t dec_target_energy_;
|
||||
int32_t dec_used_energy_;
|
||||
int16_t dec_target_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int16_t dec_used_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int16_t dec_filtstate_[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int16_t dec_filtstateLow_[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
uint16_t dec_order_;
|
||||
int16_t dec_target_scale_factor_; /* Q29 */
|
||||
int16_t dec_used_scale_factor_; /* Q29 */
|
||||
};
|
||||
|
||||
class ComfortNoiseEncoder {
|
||||
public:
|
||||
// Creates a comfort noise encoder.
|
||||
// `fs` selects sample rate: 8000 for narrowband or 16000 for wideband.
|
||||
// `interval` sets the interval at which to generate SID data (in ms).
|
||||
// `quality` selects the number of refl. coeffs. Maximum allowed is 12.
|
||||
ComfortNoiseEncoder(int fs, int interval, int quality);
|
||||
~ComfortNoiseEncoder() = default;
|
||||
|
||||
ComfortNoiseEncoder(const ComfortNoiseEncoder&) = delete;
|
||||
ComfortNoiseEncoder& operator=(const ComfortNoiseEncoder&) = delete;
|
||||
|
||||
// Resets the comfort noise encoder to its initial state.
|
||||
// Parameters are set as during construction.
|
||||
void Reset(int fs, int interval, int quality);
|
||||
|
||||
// Analyzes background noise from `speech` and appends coefficients to
|
||||
// `output`. Returns the number of coefficients generated. If `force_sid` is
|
||||
// true, a SID frame is forced and the internal sid interval counter is reset.
|
||||
// Will fail if the input size is too large (> 640 samples, see
|
||||
// ComfortNoiseDecoder::Generate).
|
||||
size_t Encode(rtc::ArrayView<const int16_t> speech,
|
||||
bool force_sid,
|
||||
rtc::Buffer* output);
|
||||
|
||||
private:
|
||||
size_t enc_nrOfCoefs_;
|
||||
int enc_sampfreq_;
|
||||
int16_t enc_interval_;
|
||||
int16_t enc_msSinceSid_;
|
||||
int32_t enc_Energy_;
|
||||
int16_t enc_reflCoefs_[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
int32_t enc_corrVector_[WEBRTC_CNG_MAX_LPC_ORDER + 1];
|
||||
uint32_t enc_seed_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_CODING_CODECS_CNG_WEBRTC_CNG_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue