Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,93 @@
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("../../webrtc.gni")
rtc_library("audio_frame_api") {
visibility = [ "*" ]
sources = [
"audio_frame.cc",
"audio_frame.h",
"channel_layout.cc",
"channel_layout.h",
]
deps = [
"..:rtp_packet_info",
"../../rtc_base:checks",
"../../rtc_base:logging",
"../../rtc_base:macromagic",
"../../rtc_base:timeutils",
]
}
rtc_source_set("audio_frame_processor") {
visibility = [ "*" ]
sources = [ "audio_frame_processor.h" ]
}
rtc_source_set("audio_mixer_api") {
visibility = [ "*" ]
sources = [ "audio_mixer.h" ]
deps = [
":audio_frame_api",
"..:make_ref_counted",
"../../rtc_base:refcount",
]
}
rtc_library("aec3_config") {
visibility = [ "*" ]
sources = [
"echo_canceller3_config.cc",
"echo_canceller3_config.h",
]
deps = [
"../../rtc_base:checks",
"../../rtc_base:safe_minmax",
"../../rtc_base/system:rtc_export",
]
}
rtc_library("aec3_factory") {
visibility = [ "*" ]
configs += [ "../../modules/audio_processing:apm_debug_dump" ]
sources = [
"echo_canceller3_factory.cc",
"echo_canceller3_factory.h",
]
deps = [
":aec3_config",
":echo_control",
"../../modules/audio_processing/aec3",
"../../rtc_base/system:rtc_export",
]
}
rtc_source_set("echo_control") {
visibility = [ "*" ]
sources = [ "echo_control.h" ]
deps = [ "../../rtc_base:checks" ]
}
rtc_source_set("echo_detector_creator") {
visibility = [ "*" ]
allow_poison = [ "default_echo_detector" ]
sources = [
"echo_detector_creator.cc",
"echo_detector_creator.h",
]
deps = [
"..:make_ref_counted",
"../../api:scoped_refptr",
"../../modules/audio_processing:api",
"../../modules/audio_processing:residual_echo_detector",
]
}

View file

@ -0,0 +1,2 @@
gustaf@webrtc.org
peah@webrtc.org

View file

@ -0,0 +1,140 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/audio_frame.h"
#include <string.h>
#include "rtc_base/checks.h"
#include "rtc_base/time_utils.h"
namespace webrtc {
AudioFrame::AudioFrame() {
// Visual Studio doesn't like this in the class definition.
static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
}
void AudioFrame::Reset() {
ResetWithoutMuting();
muted_ = true;
}
void AudioFrame::ResetWithoutMuting() {
// TODO(wu): Zero is a valid value for `timestamp_`. We should initialize
// to an invalid value, or add a new member to indicate invalidity.
timestamp_ = 0;
elapsed_time_ms_ = -1;
ntp_time_ms_ = -1;
samples_per_channel_ = 0;
sample_rate_hz_ = 0;
num_channels_ = 0;
channel_layout_ = CHANNEL_LAYOUT_NONE;
speech_type_ = kUndefined;
vad_activity_ = kVadUnknown;
profile_timestamp_ms_ = 0;
packet_infos_ = RtpPacketInfos();
absolute_capture_timestamp_ms_ = absl::nullopt;
}
void AudioFrame::UpdateFrame(uint32_t timestamp,
const int16_t* data,
size_t samples_per_channel,
int sample_rate_hz,
SpeechType speech_type,
VADActivity vad_activity,
size_t num_channels) {
timestamp_ = timestamp;
samples_per_channel_ = samples_per_channel;
sample_rate_hz_ = sample_rate_hz;
speech_type_ = speech_type;
vad_activity_ = vad_activity;
num_channels_ = num_channels;
channel_layout_ = GuessChannelLayout(num_channels);
if (channel_layout_ != CHANNEL_LAYOUT_UNSUPPORTED) {
RTC_DCHECK_EQ(num_channels, ChannelLayoutToChannelCount(channel_layout_));
}
const size_t length = samples_per_channel * num_channels;
RTC_CHECK_LE(length, kMaxDataSizeSamples);
if (data != nullptr) {
memcpy(data_, data, sizeof(int16_t) * length);
muted_ = false;
} else {
muted_ = true;
}
}
void AudioFrame::CopyFrom(const AudioFrame& src) {
if (this == &src)
return;
timestamp_ = src.timestamp_;
elapsed_time_ms_ = src.elapsed_time_ms_;
ntp_time_ms_ = src.ntp_time_ms_;
packet_infos_ = src.packet_infos_;
muted_ = src.muted();
samples_per_channel_ = src.samples_per_channel_;
sample_rate_hz_ = src.sample_rate_hz_;
speech_type_ = src.speech_type_;
vad_activity_ = src.vad_activity_;
num_channels_ = src.num_channels_;
channel_layout_ = src.channel_layout_;
absolute_capture_timestamp_ms_ = src.absolute_capture_timestamp_ms();
const size_t length = samples_per_channel_ * num_channels_;
RTC_CHECK_LE(length, kMaxDataSizeSamples);
if (!src.muted()) {
memcpy(data_, src.data(), sizeof(int16_t) * length);
muted_ = false;
}
}
void AudioFrame::UpdateProfileTimeStamp() {
profile_timestamp_ms_ = rtc::TimeMillis();
}
int64_t AudioFrame::ElapsedProfileTimeMs() const {
if (profile_timestamp_ms_ == 0) {
// Profiling has not been activated.
return -1;
}
return rtc::TimeSince(profile_timestamp_ms_);
}
const int16_t* AudioFrame::data() const {
return muted_ ? empty_data() : data_;
}
// TODO(henrik.lundin) Can we skip zeroing the buffer?
// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
int16_t* AudioFrame::mutable_data() {
if (muted_) {
memset(data_, 0, kMaxDataSizeBytes);
muted_ = false;
}
return data_;
}
void AudioFrame::Mute() {
muted_ = true;
}
bool AudioFrame::muted() const {
return muted_;
}
// static
const int16_t* AudioFrame::empty_data() {
static int16_t* null_data = new int16_t[kMaxDataSizeSamples]();
return &null_data[0];
}
} // namespace webrtc

View file

@ -0,0 +1,173 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_FRAME_H_
#define API_AUDIO_AUDIO_FRAME_H_
#include <stddef.h>
#include <stdint.h>
#include "api/audio/channel_layout.h"
#include "api/rtp_packet_infos.h"
namespace webrtc {
/* This class holds up to 120 ms of super-wideband (32 kHz) stereo audio. It
* allows for adding and subtracting frames while keeping track of the resulting
* states.
*
* Notes
* - This is a de-facto api, not designed for external use. The AudioFrame class
* is in need of overhaul or even replacement, and anyone depending on it
* should be prepared for that.
* - The total number of samples is samples_per_channel_ * num_channels_.
* - Stereo data is interleaved starting with the left channel.
*/
class AudioFrame {
public:
// Using constexpr here causes linker errors unless the variable also has an
// out-of-class definition, which is impractical in this header-only class.
// (This makes no sense because it compiles as an enum value, which we most
// certainly cannot take the address of, just fine.) C++17 introduces inline
// variables which should allow us to switch to constexpr and keep this a
// header-only class.
enum : size_t {
// Stereo, 32 kHz, 120 ms (2 * 32 * 120)
// Stereo, 192 kHz, 20 ms (2 * 192 * 20)
kMaxDataSizeSamples = 7680,
kMaxDataSizeBytes = kMaxDataSizeSamples * sizeof(int16_t),
};
enum VADActivity { kVadActive = 0, kVadPassive = 1, kVadUnknown = 2 };
enum SpeechType {
kNormalSpeech = 0,
kPLC = 1,
kCNG = 2,
kPLCCNG = 3,
kCodecPLC = 5,
kUndefined = 4
};
AudioFrame();
AudioFrame(const AudioFrame&) = delete;
AudioFrame& operator=(const AudioFrame&) = delete;
// Resets all members to their default state.
void Reset();
// Same as Reset(), but leaves mute state unchanged. Muting a frame requires
// the buffer to be zeroed on the next call to mutable_data(). Callers
// intending to write to the buffer immediately after Reset() can instead use
// ResetWithoutMuting() to skip this wasteful zeroing.
void ResetWithoutMuting();
void UpdateFrame(uint32_t timestamp,
const int16_t* data,
size_t samples_per_channel,
int sample_rate_hz,
SpeechType speech_type,
VADActivity vad_activity,
size_t num_channels = 1);
void CopyFrom(const AudioFrame& src);
// Sets a wall-time clock timestamp in milliseconds to be used for profiling
// of time between two points in the audio chain.
// Example:
// t0: UpdateProfileTimeStamp()
// t1: ElapsedProfileTimeMs() => t1 - t0 [msec]
void UpdateProfileTimeStamp();
// Returns the time difference between now and when UpdateProfileTimeStamp()
// was last called. Returns -1 if UpdateProfileTimeStamp() has not yet been
// called.
int64_t ElapsedProfileTimeMs() const;
// data() returns a zeroed static buffer if the frame is muted.
// mutable_frame() always returns a non-static buffer; the first call to
// mutable_frame() zeros the non-static buffer and marks the frame unmuted.
const int16_t* data() const;
int16_t* mutable_data();
// Prefer to mute frames using AudioFrameOperations::Mute.
void Mute();
// Frame is muted by default.
bool muted() const;
size_t max_16bit_samples() const { return kMaxDataSizeSamples; }
size_t samples_per_channel() const { return samples_per_channel_; }
size_t num_channels() const { return num_channels_; }
ChannelLayout channel_layout() const { return channel_layout_; }
int sample_rate_hz() const { return sample_rate_hz_; }
void set_absolute_capture_timestamp_ms(
int64_t absolute_capture_time_stamp_ms) {
absolute_capture_timestamp_ms_ = absolute_capture_time_stamp_ms;
}
absl::optional<int64_t> absolute_capture_timestamp_ms() const {
return absolute_capture_timestamp_ms_;
}
// RTP timestamp of the first sample in the AudioFrame.
uint32_t timestamp_ = 0;
// Time since the first frame in milliseconds.
// -1 represents an uninitialized value.
int64_t elapsed_time_ms_ = -1;
// NTP time of the estimated capture time in local timebase in milliseconds.
// -1 represents an uninitialized value.
int64_t ntp_time_ms_ = -1;
size_t samples_per_channel_ = 0;
int sample_rate_hz_ = 0;
size_t num_channels_ = 0;
ChannelLayout channel_layout_ = CHANNEL_LAYOUT_NONE;
SpeechType speech_type_ = kUndefined;
VADActivity vad_activity_ = kVadUnknown;
// Monotonically increasing timestamp intended for profiling of audio frames.
// Typically used for measuring elapsed time between two different points in
// the audio path. No lock is used to save resources and we are thread safe
// by design.
// TODO(nisse@webrtc.org): consider using absl::optional.
int64_t profile_timestamp_ms_ = 0;
// Information about packets used to assemble this audio frame. This is needed
// by `SourceTracker` when the frame is delivered to the RTCRtpReceiver's
// MediaStreamTrack, in order to implement getContributingSources(). See:
// https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
//
// TODO(bugs.webrtc.org/10757):
// Note that this information might not be fully accurate since we currently
// don't have a proper way to track it across the audio sync buffer. The
// sync buffer is the small sample-holding buffer located after the audio
// decoder and before where samples are assembled into output frames.
//
// `RtpPacketInfos` may also be empty if the audio samples did not come from
// RTP packets. E.g. if the audio were locally generated by packet loss
// concealment, comfort noise generation, etc.
RtpPacketInfos packet_infos_;
private:
// A permanently zeroed out buffer to represent muted frames. This is a
// header-only class, so the only way to avoid creating a separate empty
// buffer per translation unit is to wrap a static in an inline function.
static const int16_t* empty_data();
int16_t data_[kMaxDataSizeSamples];
bool muted_ = true;
// Absolute capture timestamp when this audio frame was originally captured.
// This is only valid for audio frames captured on this machine. The absolute
// capture timestamp of a received frame is found in `packet_infos_`.
// This timestamp MUST be based on the same clock as rtc::TimeMillis().
absl::optional<int64_t> absolute_capture_timestamp_ms_;
};
} // namespace webrtc
#endif // API_AUDIO_AUDIO_FRAME_H_

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
#define API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
#include <functional>
#include <memory>
namespace webrtc {
class AudioFrame;
// If passed into PeerConnectionFactory, will be used for additional
// processing of captured audio frames, performed before encoding.
// Implementations must be thread-safe.
class AudioFrameProcessor {
public:
using OnAudioFrameCallback = std::function<void(std::unique_ptr<AudioFrame>)>;
virtual ~AudioFrameProcessor() = default;
// Processes the frame received from WebRTC, is called by WebRTC off the
// realtime audio capturing path. AudioFrameProcessor must reply with
// processed frames by calling `sink_callback` if it was provided in SetSink()
// call. `sink_callback` can be called in the context of Process().
virtual void Process(std::unique_ptr<AudioFrame> frame) = 0;
// Atomically replaces the current sink with the new one. Before the
// first call to this function, or if the provided `sink_callback` is nullptr,
// processed frames are simply discarded.
virtual void SetSink(OnAudioFrameCallback sink_callback) = 0;
};
} // namespace webrtc
#endif // API_AUDIO_AUDIO_FRAME_PROCESSOR_H_

View file

@ -0,0 +1,80 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_AUDIO_MIXER_H_
#define API_AUDIO_AUDIO_MIXER_H_
#include <memory>
#include "api/audio/audio_frame.h"
#include "rtc_base/ref_count.h"
namespace webrtc {
// WORK IN PROGRESS
// This class is under development and is not yet intended for for use outside
// of WebRtc/Libjingle.
class AudioMixer : public rtc::RefCountInterface {
public:
// A callback class that all mixer participants must inherit from/implement.
class Source {
public:
enum class AudioFrameInfo {
kNormal, // The samples in audio_frame are valid and should be used.
kMuted, // The samples in audio_frame should not be used, but
// should be implicitly interpreted as zero. Other
// fields in audio_frame may be read and should
// contain meaningful values.
kError, // The audio_frame will not be used.
};
// Overwrites `audio_frame`. The data_ field is overwritten with
// 10 ms of new audio (either 1 or 2 interleaved channels) at
// `sample_rate_hz`. All fields in `audio_frame` must be updated.
virtual AudioFrameInfo GetAudioFrameWithInfo(int sample_rate_hz,
AudioFrame* audio_frame) = 0;
// A way for a mixer implementation to distinguish participants.
virtual int Ssrc() const = 0;
// A way for this source to say that GetAudioFrameWithInfo called
// with this sample rate or higher will not cause quality loss.
virtual int PreferredSampleRate() const = 0;
virtual ~Source() {}
};
// Returns true if adding was successful. A source is never added
// twice. Addition and removal can happen on different threads.
virtual bool AddSource(Source* audio_source) = 0;
// Removal is never attempted if a source has not been successfully
// added to the mixer.
virtual void RemoveSource(Source* audio_source) = 0;
// Performs mixing by asking registered audio sources for audio. The
// mixed result is placed in the provided AudioFrame. This method
// will only be called from a single thread. The channels argument
// specifies the number of channels of the mix result. The mixer
// should mix at a rate that doesn't cause quality loss of the
// sources' audio. The mixing rate is one of the rates listed in
// AudioProcessing::NativeRate. All fields in
// `audio_frame_for_mixing` must be updated.
virtual void Mix(size_t number_of_channels,
AudioFrame* audio_frame_for_mixing) = 0;
protected:
// Since the mixer is reference counted, the destructor may be
// called from any thread.
~AudioMixer() override {}
};
} // namespace webrtc
#endif // API_AUDIO_AUDIO_MIXER_H_

View file

@ -0,0 +1,282 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/channel_layout.h"
#include <stddef.h>
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
namespace webrtc {
static const int kLayoutToChannels[] = {
0, // CHANNEL_LAYOUT_NONE
0, // CHANNEL_LAYOUT_UNSUPPORTED
1, // CHANNEL_LAYOUT_MONO
2, // CHANNEL_LAYOUT_STEREO
3, // CHANNEL_LAYOUT_2_1
3, // CHANNEL_LAYOUT_SURROUND
4, // CHANNEL_LAYOUT_4_0
4, // CHANNEL_LAYOUT_2_2
4, // CHANNEL_LAYOUT_QUAD
5, // CHANNEL_LAYOUT_5_0
6, // CHANNEL_LAYOUT_5_1
5, // CHANNEL_LAYOUT_5_0_BACK
6, // CHANNEL_LAYOUT_5_1_BACK
7, // CHANNEL_LAYOUT_7_0
8, // CHANNEL_LAYOUT_7_1
8, // CHANNEL_LAYOUT_7_1_WIDE
2, // CHANNEL_LAYOUT_STEREO_DOWNMIX
3, // CHANNEL_LAYOUT_2POINT1
4, // CHANNEL_LAYOUT_3_1
5, // CHANNEL_LAYOUT_4_1
6, // CHANNEL_LAYOUT_6_0
6, // CHANNEL_LAYOUT_6_0_FRONT
6, // CHANNEL_LAYOUT_HEXAGONAL
7, // CHANNEL_LAYOUT_6_1
7, // CHANNEL_LAYOUT_6_1_BACK
7, // CHANNEL_LAYOUT_6_1_FRONT
7, // CHANNEL_LAYOUT_7_0_FRONT
8, // CHANNEL_LAYOUT_7_1_WIDE_BACK
8, // CHANNEL_LAYOUT_OCTAGONAL
0, // CHANNEL_LAYOUT_DISCRETE
3, // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
5, // CHANNEL_LAYOUT_4_1_QUAD_SIDE
0, // CHANNEL_LAYOUT_BITSTREAM
};
// The channel orderings for each layout as specified by FFmpeg. Each value
// represents the index of each channel in each layout. Values of -1 mean the
// channel at that index is not used for that layout. For example, the left side
// surround sound channel in FFmpeg's 5.1 layout is in the 5th position (because
// the order is L, R, C, LFE, LS, RS), so
// kChannelOrderings[CHANNEL_LAYOUT_5_1][SIDE_LEFT] = 4;
static const int kChannelOrderings[CHANNEL_LAYOUT_MAX + 1][CHANNELS_MAX + 1] = {
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_NONE
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_UNSUPPORTED
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_MONO
{-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_STEREO
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_2_1
{0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1},
// CHANNEL_LAYOUT_SURROUND
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_0
{0, 1, 2, -1, -1, -1, -1, -1, 3, -1, -1},
// CHANNEL_LAYOUT_2_2
{0, 1, -1, -1, -1, -1, -1, -1, -1, 2, 3},
// CHANNEL_LAYOUT_QUAD
{0, 1, -1, -1, 2, 3, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_5_0
{0, 1, 2, -1, -1, -1, -1, -1, -1, 3, 4},
// CHANNEL_LAYOUT_5_1
{0, 1, 2, 3, -1, -1, -1, -1, -1, 4, 5},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_5_0_BACK
{0, 1, 2, -1, 3, 4, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_5_1_BACK
{0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_7_0
{0, 1, 2, -1, 5, 6, -1, -1, -1, 3, 4},
// CHANNEL_LAYOUT_7_1
{0, 1, 2, 3, 6, 7, -1, -1, -1, 4, 5},
// CHANNEL_LAYOUT_7_1_WIDE
{0, 1, 2, 3, -1, -1, 6, 7, -1, 4, 5},
// CHANNEL_LAYOUT_STEREO_DOWNMIX
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_2POINT1
{0, 1, -1, 2, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_3_1
{0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_1
{0, 1, 2, 4, -1, -1, -1, -1, 3, -1, -1},
// CHANNEL_LAYOUT_6_0
{0, 1, 2, -1, -1, -1, -1, -1, 5, 3, 4},
// CHANNEL_LAYOUT_6_0_FRONT
{0, 1, -1, -1, -1, -1, 4, 5, -1, 2, 3},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
// CHANNEL_LAYOUT_HEXAGONAL
{0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1},
// CHANNEL_LAYOUT_6_1
{0, 1, 2, 3, -1, -1, -1, -1, 6, 4, 5},
// CHANNEL_LAYOUT_6_1_BACK
{0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1},
// CHANNEL_LAYOUT_6_1_FRONT
{0, 1, -1, 6, -1, -1, 4, 5, -1, 2, 3},
// CHANNEL_LAYOUT_7_0_FRONT
{0, 1, 2, -1, -1, -1, 5, 6, -1, 3, 4},
// CHANNEL_LAYOUT_7_1_WIDE_BACK
{0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1},
// CHANNEL_LAYOUT_OCTAGONAL
{0, 1, 2, -1, 5, 6, -1, -1, 7, 3, 4},
// CHANNEL_LAYOUT_DISCRETE
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
// CHANNEL_LAYOUT_4_1_QUAD_SIDE
{0, 1, -1, 4, -1, -1, -1, -1, -1, 2, 3},
// CHANNEL_LAYOUT_BITSTREAM
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
};
int ChannelLayoutToChannelCount(ChannelLayout layout) {
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kLayoutToChannels));
RTC_DCHECK_LE(kLayoutToChannels[layout], kMaxConcurrentChannels);
return kLayoutToChannels[layout];
}
// Converts a channel count into a channel layout.
ChannelLayout GuessChannelLayout(int channels) {
switch (channels) {
case 1:
return CHANNEL_LAYOUT_MONO;
case 2:
return CHANNEL_LAYOUT_STEREO;
case 3:
return CHANNEL_LAYOUT_SURROUND;
case 4:
return CHANNEL_LAYOUT_QUAD;
case 5:
return CHANNEL_LAYOUT_5_0;
case 6:
return CHANNEL_LAYOUT_5_1;
case 7:
return CHANNEL_LAYOUT_6_1;
case 8:
return CHANNEL_LAYOUT_7_1;
default:
RTC_DLOG(LS_WARNING) << "Unsupported channel count: " << channels;
}
return CHANNEL_LAYOUT_UNSUPPORTED;
}
int ChannelOrder(ChannelLayout layout, Channels channel) {
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kChannelOrderings));
RTC_DCHECK_LT(static_cast<size_t>(channel), arraysize(kChannelOrderings[0]));
return kChannelOrderings[layout][channel];
}
const char* ChannelLayoutToString(ChannelLayout layout) {
switch (layout) {
case CHANNEL_LAYOUT_NONE:
return "NONE";
case CHANNEL_LAYOUT_UNSUPPORTED:
return "UNSUPPORTED";
case CHANNEL_LAYOUT_MONO:
return "MONO";
case CHANNEL_LAYOUT_STEREO:
return "STEREO";
case CHANNEL_LAYOUT_2_1:
return "2.1";
case CHANNEL_LAYOUT_SURROUND:
return "SURROUND";
case CHANNEL_LAYOUT_4_0:
return "4.0";
case CHANNEL_LAYOUT_2_2:
return "QUAD_SIDE";
case CHANNEL_LAYOUT_QUAD:
return "QUAD";
case CHANNEL_LAYOUT_5_0:
return "5.0";
case CHANNEL_LAYOUT_5_1:
return "5.1";
case CHANNEL_LAYOUT_5_0_BACK:
return "5.0_BACK";
case CHANNEL_LAYOUT_5_1_BACK:
return "5.1_BACK";
case CHANNEL_LAYOUT_7_0:
return "7.0";
case CHANNEL_LAYOUT_7_1:
return "7.1";
case CHANNEL_LAYOUT_7_1_WIDE:
return "7.1_WIDE";
case CHANNEL_LAYOUT_STEREO_DOWNMIX:
return "STEREO_DOWNMIX";
case CHANNEL_LAYOUT_2POINT1:
return "2POINT1";
case CHANNEL_LAYOUT_3_1:
return "3.1";
case CHANNEL_LAYOUT_4_1:
return "4.1";
case CHANNEL_LAYOUT_6_0:
return "6.0";
case CHANNEL_LAYOUT_6_0_FRONT:
return "6.0_FRONT";
case CHANNEL_LAYOUT_HEXAGONAL:
return "HEXAGONAL";
case CHANNEL_LAYOUT_6_1:
return "6.1";
case CHANNEL_LAYOUT_6_1_BACK:
return "6.1_BACK";
case CHANNEL_LAYOUT_6_1_FRONT:
return "6.1_FRONT";
case CHANNEL_LAYOUT_7_0_FRONT:
return "7.0_FRONT";
case CHANNEL_LAYOUT_7_1_WIDE_BACK:
return "7.1_WIDE_BACK";
case CHANNEL_LAYOUT_OCTAGONAL:
return "OCTAGONAL";
case CHANNEL_LAYOUT_DISCRETE:
return "DISCRETE";
case CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
return "STEREO_AND_KEYBOARD_MIC";
case CHANNEL_LAYOUT_4_1_QUAD_SIDE:
return "4.1_QUAD_SIDE";
case CHANNEL_LAYOUT_BITSTREAM:
return "BITSTREAM";
}
RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
return "";
}
} // namespace webrtc

View file

@ -0,0 +1,165 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_CHANNEL_LAYOUT_H_
#define API_AUDIO_CHANNEL_LAYOUT_H_
namespace webrtc {
// This file is derived from Chromium's base/channel_layout.h.
// Enumerates the various representations of the ordering of audio channels.
// Logged to UMA, so never reuse a value, always add new/greater ones!
enum ChannelLayout {
CHANNEL_LAYOUT_NONE = 0,
CHANNEL_LAYOUT_UNSUPPORTED = 1,
// Front C
CHANNEL_LAYOUT_MONO = 2,
// Front L, Front R
CHANNEL_LAYOUT_STEREO = 3,
// Front L, Front R, Back C
CHANNEL_LAYOUT_2_1 = 4,
// Front L, Front R, Front C
CHANNEL_LAYOUT_SURROUND = 5,
// Front L, Front R, Front C, Back C
CHANNEL_LAYOUT_4_0 = 6,
// Front L, Front R, Side L, Side R
CHANNEL_LAYOUT_2_2 = 7,
// Front L, Front R, Back L, Back R
CHANNEL_LAYOUT_QUAD = 8,
// Front L, Front R, Front C, Side L, Side R
CHANNEL_LAYOUT_5_0 = 9,
// Front L, Front R, Front C, LFE, Side L, Side R
CHANNEL_LAYOUT_5_1 = 10,
// Front L, Front R, Front C, Back L, Back R
CHANNEL_LAYOUT_5_0_BACK = 11,
// Front L, Front R, Front C, LFE, Back L, Back R
CHANNEL_LAYOUT_5_1_BACK = 12,
// Front L, Front R, Front C, Side L, Side R, Back L, Back R
CHANNEL_LAYOUT_7_0 = 13,
// Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R
CHANNEL_LAYOUT_7_1 = 14,
// Front L, Front R, Front C, LFE, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_1_WIDE = 15,
// Stereo L, Stereo R
CHANNEL_LAYOUT_STEREO_DOWNMIX = 16,
// Stereo L, Stereo R, LFE
CHANNEL_LAYOUT_2POINT1 = 17,
// Stereo L, Stereo R, Front C, LFE
CHANNEL_LAYOUT_3_1 = 18,
// Stereo L, Stereo R, Front C, Rear C, LFE
CHANNEL_LAYOUT_4_1 = 19,
// Stereo L, Stereo R, Front C, Side L, Side R, Back C
CHANNEL_LAYOUT_6_0 = 20,
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_6_0_FRONT = 21,
// Stereo L, Stereo R, Front C, Rear L, Rear R, Rear C
CHANNEL_LAYOUT_HEXAGONAL = 22,
// Stereo L, Stereo R, Front C, LFE, Side L, Side R, Rear Center
CHANNEL_LAYOUT_6_1 = 23,
// Stereo L, Stereo R, Front C, LFE, Back L, Back R, Rear Center
CHANNEL_LAYOUT_6_1_BACK = 24,
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC, LFE
CHANNEL_LAYOUT_6_1_FRONT = 25,
// Front L, Front R, Front C, Side L, Side R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_0_FRONT = 26,
// Front L, Front R, Front C, LFE, Back L, Back R, Front LofC, Front RofC
CHANNEL_LAYOUT_7_1_WIDE_BACK = 27,
// Front L, Front R, Front C, Side L, Side R, Rear L, Back R, Back C.
CHANNEL_LAYOUT_OCTAGONAL = 28,
// Channels are not explicitly mapped to speakers.
CHANNEL_LAYOUT_DISCRETE = 29,
// Front L, Front R, Front C. Front C contains the keyboard mic audio. This
// layout is only intended for input for WebRTC. The Front C channel
// is stripped away in the WebRTC audio input pipeline and never seen outside
// of that.
CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC = 30,
// Front L, Front R, Side L, Side R, LFE
CHANNEL_LAYOUT_4_1_QUAD_SIDE = 31,
// Actual channel layout is specified in the bitstream and the actual channel
// count is unknown at Chromium media pipeline level (useful for audio
// pass-through mode).
CHANNEL_LAYOUT_BITSTREAM = 32,
// Max value, must always equal the largest entry ever logged.
CHANNEL_LAYOUT_MAX = CHANNEL_LAYOUT_BITSTREAM
};
// Note: Do not reorder or reassign these values; other code depends on their
// ordering to operate correctly. E.g., CoreAudio channel layout computations.
enum Channels {
LEFT = 0,
RIGHT,
CENTER,
LFE,
BACK_LEFT,
BACK_RIGHT,
LEFT_OF_CENTER,
RIGHT_OF_CENTER,
BACK_CENTER,
SIDE_LEFT,
SIDE_RIGHT,
CHANNELS_MAX =
SIDE_RIGHT, // Must always equal the largest value ever logged.
};
// The maximum number of concurrently active channels for all possible layouts.
// ChannelLayoutToChannelCount() will never return a value higher than this.
constexpr int kMaxConcurrentChannels = 8;
// Returns the expected channel position in an interleaved stream. Values of -1
// mean the channel at that index is not used for that layout. Values range
// from 0 to ChannelLayoutToChannelCount(layout) - 1.
int ChannelOrder(ChannelLayout layout, Channels channel);
// Returns the number of channels in a given ChannelLayout.
int ChannelLayoutToChannelCount(ChannelLayout layout);
// Given the number of channels, return the best layout,
// or return CHANNEL_LAYOUT_UNSUPPORTED if there is no good match.
ChannelLayout GuessChannelLayout(int channels);
// Returns a string representation of the channel layout.
const char* ChannelLayoutToString(ChannelLayout layout);
} // namespace webrtc
#endif // API_AUDIO_CHANNEL_LAYOUT_H_

View file

@ -0,0 +1,278 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/echo_canceller3_config.h"
#include <algorithm>
#include <cmath>
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
bool Limit(float* value, float min, float max) {
float clamped = rtc::SafeClamp(*value, min, max);
clamped = std::isfinite(clamped) ? clamped : min;
bool res = *value == clamped;
*value = clamped;
return res;
}
bool Limit(size_t* value, size_t min, size_t max) {
size_t clamped = rtc::SafeClamp(*value, min, max);
bool res = *value == clamped;
*value = clamped;
return res;
}
bool Limit(int* value, int min, int max) {
int clamped = rtc::SafeClamp(*value, min, max);
bool res = *value == clamped;
*value = clamped;
return res;
}
bool FloorLimit(size_t* value, size_t min) {
size_t clamped = *value >= min ? *value : min;
bool res = *value == clamped;
*value = clamped;
return res;
}
} // namespace
EchoCanceller3Config::EchoCanceller3Config() = default;
EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
default;
EchoCanceller3Config& EchoCanceller3Config::operator=(
const EchoCanceller3Config& e) = default;
EchoCanceller3Config::Delay::Delay() = default;
EchoCanceller3Config::Delay::Delay(const EchoCanceller3Config::Delay& e) =
default;
EchoCanceller3Config::Delay& EchoCanceller3Config::Delay::operator=(
const Delay& e) = default;
EchoCanceller3Config::EchoModel::EchoModel() = default;
EchoCanceller3Config::EchoModel::EchoModel(
const EchoCanceller3Config::EchoModel& e) = default;
EchoCanceller3Config::EchoModel& EchoCanceller3Config::EchoModel::operator=(
const EchoModel& e) = default;
EchoCanceller3Config::Suppressor::Suppressor() = default;
EchoCanceller3Config::Suppressor::Suppressor(
const EchoCanceller3Config::Suppressor& e) = default;
EchoCanceller3Config::Suppressor& EchoCanceller3Config::Suppressor::operator=(
const Suppressor& e) = default;
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
float enr_transparent,
float enr_suppress,
float emr_transparent)
: enr_transparent(enr_transparent),
enr_suppress(enr_suppress),
emr_transparent(emr_transparent) {}
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
const EchoCanceller3Config::Suppressor::MaskingThresholds& e) = default;
EchoCanceller3Config::Suppressor::MaskingThresholds&
EchoCanceller3Config::Suppressor::MaskingThresholds::operator=(
const MaskingThresholds& e) = default;
EchoCanceller3Config::Suppressor::Tuning::Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf)
: mask_lf(mask_lf),
mask_hf(mask_hf),
max_inc_factor(max_inc_factor),
max_dec_factor_lf(max_dec_factor_lf) {}
EchoCanceller3Config::Suppressor::Tuning::Tuning(
const EchoCanceller3Config::Suppressor::Tuning& e) = default;
EchoCanceller3Config::Suppressor::Tuning&
EchoCanceller3Config::Suppressor::Tuning::operator=(const Tuning& e) = default;
bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
RTC_DCHECK(config);
EchoCanceller3Config* c = config;
bool res = true;
if (c->delay.down_sampling_factor != 4 &&
c->delay.down_sampling_factor != 8) {
c->delay.down_sampling_factor = 4;
res = false;
}
res = res & Limit(&c->delay.default_delay, 0, 5000);
res = res & Limit(&c->delay.num_filters, 0, 5000);
res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
res = res & Limit(&c->delay.delay_selection_thresholds.initial, 1, 250);
res = res & Limit(&c->delay.delay_selection_thresholds.converged, 1, 250);
res = res & FloorLimit(&c->filter.refined.length_blocks, 1);
res = res & Limit(&c->filter.refined.leakage_converged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.leakage_diverged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.error_floor, 0.f, 1000.f);
res = res & Limit(&c->filter.refined.error_ceil, 0.f, 100000000.f);
res = res & Limit(&c->filter.refined.noise_gate, 0.f, 100000000.f);
res = res & FloorLimit(&c->filter.refined_initial.length_blocks, 1);
res = res & Limit(&c->filter.refined_initial.leakage_converged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.leakage_diverged, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.error_floor, 0.f, 1000.f);
res = res & Limit(&c->filter.refined_initial.error_ceil, 0.f, 100000000.f);
res = res & Limit(&c->filter.refined_initial.noise_gate, 0.f, 100000000.f);
if (c->filter.refined.length_blocks <
c->filter.refined_initial.length_blocks) {
c->filter.refined_initial.length_blocks = c->filter.refined.length_blocks;
res = false;
}
res = res & FloorLimit(&c->filter.coarse.length_blocks, 1);
res = res & Limit(&c->filter.coarse.rate, 0.f, 1.f);
res = res & Limit(&c->filter.coarse.noise_gate, 0.f, 100000000.f);
res = res & FloorLimit(&c->filter.coarse_initial.length_blocks, 1);
res = res & Limit(&c->filter.coarse_initial.rate, 0.f, 1.f);
res = res & Limit(&c->filter.coarse_initial.noise_gate, 0.f, 100000000.f);
if (c->filter.coarse.length_blocks < c->filter.coarse_initial.length_blocks) {
c->filter.coarse_initial.length_blocks = c->filter.coarse.length_blocks;
res = false;
}
res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);
res = res & Limit(&c->erle.min, 1.f, 100000.f);
res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
res = res & Limit(&c->erle.max_h, 1.f, 100000.f);
if (c->erle.min > c->erle.max_l || c->erle.min > c->erle.max_h) {
c->erle.min = std::min(c->erle.max_l, c->erle.max_h);
res = false;
}
res = res & Limit(&c->erle.num_sections, 1, c->filter.refined.length_blocks);
res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);
res =
res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
res = res &
Limit(&c->echo_audibility.normal_render_limit, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.floor_power, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_lf, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_mf, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_audibility.audibility_threshold_hf, 0.f,
32768.f * 32768.f);
res = res &
Limit(&c->render_levels.active_render_limit, 0.f, 32768.f * 32768.f);
res = res & Limit(&c->render_levels.poor_excitation_render_limit, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->render_levels.poor_excitation_render_limit_ds8, 0.f,
32768.f * 32768.f);
res = res & Limit(&c->echo_model.noise_floor_hold, 0, 1000);
res = res & Limit(&c->echo_model.min_noise_floor_power, 0, 2000000.f);
res = res & Limit(&c->echo_model.stationary_gate_slope, 0, 1000000.f);
res = res & Limit(&c->echo_model.noise_gate_power, 0, 1000000.f);
res = res & Limit(&c->echo_model.noise_gate_slope, 0, 1000000.f);
res = res & Limit(&c->echo_model.render_pre_window_size, 0, 100);
res = res & Limit(&c->echo_model.render_post_window_size, 0, 100);
res = res & Limit(&c->comfort_noise.noise_floor_dbfs, -200.f, 0.f);
res = res & Limit(&c->suppressor.nearend_average_blocks, 1, 5000);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.enr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.enr_suppress, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_lf.emr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.enr_transparent, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.enr_suppress, 0.f, 100.f);
res = res &
Limit(&c->suppressor.normal_tuning.mask_hf.emr_transparent, 0.f, 100.f);
res = res & Limit(&c->suppressor.normal_tuning.max_inc_factor, 0.f, 100.f);
res = res & Limit(&c->suppressor.normal_tuning.max_dec_factor_lf, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.enr_transparent, 0.f,
100.f);
res = res &
Limit(&c->suppressor.nearend_tuning.mask_lf.enr_suppress, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.emr_transparent, 0.f,
100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.enr_transparent, 0.f,
100.f);
res = res &
Limit(&c->suppressor.nearend_tuning.mask_hf.enr_suppress, 0.f, 100.f);
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.emr_transparent, 0.f,
100.f);
res = res & Limit(&c->suppressor.nearend_tuning.max_inc_factor, 0.f, 100.f);
res =
res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
res = res &
Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.hold_duration, 0,
10000);
res = res & Limit(&c->suppressor.dominant_nearend_detection.trigger_threshold,
0, 10000);
res = res &
Limit(&c->suppressor.subband_nearend_detection.nearend_average_blocks,
1, 1024);
res =
res & Limit(&c->suppressor.subband_nearend_detection.subband1.low, 0, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.subband1.high,
c->suppressor.subband_nearend_detection.subband1.low, 65);
res =
res & Limit(&c->suppressor.subband_nearend_detection.subband2.low, 0, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.subband2.high,
c->suppressor.subband_nearend_detection.subband2.low, 65);
res = res & Limit(&c->suppressor.subband_nearend_detection.nearend_threshold,
0.f, 1.e24f);
res = res & Limit(&c->suppressor.subband_nearend_detection.snr_threshold, 0.f,
1.e24f);
res = res & Limit(&c->suppressor.high_bands_suppression.enr_threshold, 0.f,
1000000.f);
res = res & Limit(&c->suppressor.high_bands_suppression.max_gain_during_echo,
0.f, 1.f);
res = res & Limit(&c->suppressor.high_bands_suppression
.anti_howling_activation_threshold,
0.f, 32768.f * 32768.f);
res = res & Limit(&c->suppressor.high_bands_suppression.anti_howling_gain,
0.f, 1.f);
res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
return res;
}
} // namespace webrtc

View file

@ -0,0 +1,250 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
#define API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
#include <stddef.h> // size_t
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
// Configuration struct for EchoCanceller3
struct RTC_EXPORT EchoCanceller3Config {
// Checks and updates the config parameters to lie within (mostly) reasonable
// ranges. Returns true if and only of the config did not need to be changed.
static bool Validate(EchoCanceller3Config* config);
EchoCanceller3Config();
EchoCanceller3Config(const EchoCanceller3Config& e);
EchoCanceller3Config& operator=(const EchoCanceller3Config& other);
struct Buffering {
size_t excess_render_detection_interval_blocks = 250;
size_t max_allowed_excess_render_blocks = 8;
} buffering;
struct Delay {
Delay();
Delay(const Delay& e);
Delay& operator=(const Delay& e);
size_t default_delay = 5;
size_t down_sampling_factor = 4;
size_t num_filters = 5;
size_t delay_headroom_samples = 32;
size_t hysteresis_limit_blocks = 1;
size_t fixed_capture_delay_samples = 0;
float delay_estimate_smoothing = 0.7f;
float delay_estimate_smoothing_delay_found = 0.7f;
float delay_candidate_detection_threshold = 0.2f;
struct DelaySelectionThresholds {
int initial;
int converged;
} delay_selection_thresholds = {5, 20};
bool use_external_delay_estimator = false;
bool log_warning_on_delay_changes = false;
struct AlignmentMixing {
bool downmix;
bool adaptive_selection;
float activity_power_threshold;
bool prefer_first_two_channels;
};
AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
bool detect_pre_echo = true;
} delay;
struct Filter {
struct RefinedConfiguration {
size_t length_blocks;
float leakage_converged;
float leakage_diverged;
float error_floor;
float error_ceil;
float noise_gate;
};
struct CoarseConfiguration {
size_t length_blocks;
float rate;
float noise_gate;
};
RefinedConfiguration refined = {13, 0.00005f, 0.05f,
0.001f, 2.f, 20075344.f};
CoarseConfiguration coarse = {13, 0.7f, 20075344.f};
RefinedConfiguration refined_initial = {12, 0.005f, 0.5f,
0.001f, 2.f, 20075344.f};
CoarseConfiguration coarse_initial = {12, 0.9f, 20075344.f};
size_t config_change_duration_blocks = 250;
float initial_state_seconds = 2.5f;
int coarse_reset_hangover_blocks = 25;
bool conservative_initial_phase = false;
bool enable_coarse_filter_output_usage = true;
bool use_linear_filter = true;
bool high_pass_filter_echo_reference = false;
bool export_linear_aec_output = false;
} filter;
struct Erle {
float min = 1.f;
float max_l = 4.f;
float max_h = 1.5f;
bool onset_detection = true;
size_t num_sections = 1;
bool clamp_quality_estimate_to_zero = true;
bool clamp_quality_estimate_to_one = true;
} erle;
struct EpStrength {
float default_gain = 1.f;
float default_len = 0.83f;
float nearend_len = 0.83f;
bool echo_can_saturate = true;
bool bounded_erl = false;
bool erle_onset_compensation_in_dominant_nearend = false;
bool use_conservative_tail_frequency_response = true;
} ep_strength;
struct EchoAudibility {
float low_render_limit = 4 * 64.f;
float normal_render_limit = 64.f;
float floor_power = 2 * 64.f;
float audibility_threshold_lf = 10;
float audibility_threshold_mf = 10;
float audibility_threshold_hf = 10;
bool use_stationarity_properties = false;
bool use_stationarity_properties_at_init = false;
} echo_audibility;
struct RenderLevels {
float active_render_limit = 100.f;
float poor_excitation_render_limit = 150.f;
float poor_excitation_render_limit_ds8 = 20.f;
float render_power_gain_db = 0.f;
} render_levels;
struct EchoRemovalControl {
bool has_clock_drift = false;
bool linear_and_stable_echo_path = false;
} echo_removal_control;
struct EchoModel {
EchoModel();
EchoModel(const EchoModel& e);
EchoModel& operator=(const EchoModel& e);
size_t noise_floor_hold = 50;
float min_noise_floor_power = 1638400.f;
float stationary_gate_slope = 10.f;
float noise_gate_power = 27509.42f;
float noise_gate_slope = 0.3f;
size_t render_pre_window_size = 1;
size_t render_post_window_size = 1;
bool model_reverb_in_nonlinear_mode = true;
} echo_model;
struct ComfortNoise {
float noise_floor_dbfs = -96.03406f;
} comfort_noise;
struct Suppressor {
Suppressor();
Suppressor(const Suppressor& e);
Suppressor& operator=(const Suppressor& e);
size_t nearend_average_blocks = 4;
struct MaskingThresholds {
MaskingThresholds(float enr_transparent,
float enr_suppress,
float emr_transparent);
MaskingThresholds(const MaskingThresholds& e);
MaskingThresholds& operator=(const MaskingThresholds& e);
float enr_transparent;
float enr_suppress;
float emr_transparent;
};
struct Tuning {
Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf);
Tuning(const Tuning& e);
Tuning& operator=(const Tuning& e);
MaskingThresholds mask_lf;
MaskingThresholds mask_hf;
float max_inc_factor;
float max_dec_factor_lf;
};
Tuning normal_tuning = Tuning(MaskingThresholds(.3f, .4f, .3f),
MaskingThresholds(.07f, .1f, .3f),
2.0f,
0.25f);
Tuning nearend_tuning = Tuning(MaskingThresholds(1.09f, 1.1f, .3f),
MaskingThresholds(.1f, .3f, .3f),
2.0f,
0.25f);
bool lf_smoothing_during_initial_phase = true;
int last_permanent_lf_smoothing_band = 0;
int last_lf_smoothing_band = 5;
int last_lf_band = 5;
int first_hf_band = 8;
struct DominantNearendDetection {
float enr_threshold = .25f;
float enr_exit_threshold = 10.f;
float snr_threshold = 30.f;
int hold_duration = 50;
int trigger_threshold = 12;
bool use_during_initial_phase = true;
bool use_unbounded_echo_spectrum = true;
} dominant_nearend_detection;
struct SubbandNearendDetection {
size_t nearend_average_blocks = 1;
struct SubbandRegion {
size_t low;
size_t high;
};
SubbandRegion subband1 = {1, 1};
SubbandRegion subband2 = {1, 1};
float nearend_threshold = 1.f;
float snr_threshold = 1.f;
} subband_nearend_detection;
bool use_subband_nearend_detection = false;
struct HighBandsSuppression {
float enr_threshold = 1.f;
float max_gain_during_echo = 1.f;
float anti_howling_activation_threshold = 400.f;
float anti_howling_gain = 1.f;
} high_bands_suppression;
float floor_first_increase = 0.00001f;
bool conservative_hf_suppression = false;
} suppressor;
struct MultiChannel {
bool detect_stereo_content = true;
float stereo_detection_threshold = 0.0f;
int stereo_detection_timeout_threshold_seconds = 300;
float stereo_detection_hysteresis_seconds = 2.0f;
} multi_channel;
};
} // namespace webrtc
#endif // API_AUDIO_ECHO_CANCELLER3_CONFIG_H_

View file

@ -0,0 +1,32 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/echo_canceller3_factory.h"
#include <memory>
#include "modules/audio_processing/aec3/echo_canceller3.h"
namespace webrtc {
EchoCanceller3Factory::EchoCanceller3Factory() {}
EchoCanceller3Factory::EchoCanceller3Factory(const EchoCanceller3Config& config)
: config_(config) {}
std::unique_ptr<EchoControl> EchoCanceller3Factory::Create(
int sample_rate_hz,
int num_render_channels,
int num_capture_channels) {
return std::make_unique<EchoCanceller3>(
config_, /*multichannel_config=*/absl::nullopt, sample_rate_hz,
num_render_channels, num_capture_channels);
}
} // namespace webrtc

View file

@ -0,0 +1,41 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_CANCELLER3_FACTORY_H_
#define API_AUDIO_ECHO_CANCELLER3_FACTORY_H_
#include <memory>
#include "api/audio/echo_canceller3_config.h"
#include "api/audio/echo_control.h"
#include "rtc_base/system/rtc_export.h"
namespace webrtc {
class RTC_EXPORT EchoCanceller3Factory : public EchoControlFactory {
public:
// Factory producing EchoCanceller3 instances with the default configuration.
EchoCanceller3Factory();
// Factory producing EchoCanceller3 instances with the specified
// configuration.
explicit EchoCanceller3Factory(const EchoCanceller3Config& config);
// Creates an EchoCanceller3 with a specified channel count and sampling rate.
std::unique_ptr<EchoControl> Create(int sample_rate_hz,
int num_render_channels,
int num_capture_channels) override;
private:
const EchoCanceller3Config config_;
};
} // namespace webrtc
#endif // API_AUDIO_ECHO_CANCELLER3_FACTORY_H_

View file

@ -0,0 +1,75 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_CONTROL_H_
#define API_AUDIO_ECHO_CONTROL_H_
#include <memory>
#include "rtc_base/checks.h"
namespace webrtc {
class AudioBuffer;
// Interface for an acoustic echo cancellation (AEC) submodule.
class EchoControl {
public:
// Analysis (not changing) of the render signal.
virtual void AnalyzeRender(AudioBuffer* render) = 0;
// Analysis (not changing) of the capture signal.
virtual void AnalyzeCapture(AudioBuffer* capture) = 0;
// Processes the capture signal in order to remove the echo.
virtual void ProcessCapture(AudioBuffer* capture, bool level_change) = 0;
// As above, but also returns the linear filter output.
virtual void ProcessCapture(AudioBuffer* capture,
AudioBuffer* linear_output,
bool level_change) = 0;
struct Metrics {
double echo_return_loss;
double echo_return_loss_enhancement;
int delay_ms;
};
// Collect current metrics from the echo controller.
virtual Metrics GetMetrics() const = 0;
// Provides an optional external estimate of the audio buffer delay.
virtual void SetAudioBufferDelay(int delay_ms) = 0;
// Specifies whether the capture output will be used. The purpose of this is
// to allow the echo controller to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
// TODO(b/177830919): Make pure virtual.
virtual void SetCaptureOutputUsage(bool capture_output_used) {}
// Returns wheter the signal is altered.
virtual bool ActiveProcessing() const = 0;
virtual ~EchoControl() {}
};
// Interface for a factory that creates EchoControllers.
class EchoControlFactory {
public:
virtual std::unique_ptr<EchoControl> Create(int sample_rate_hz,
int num_render_channels,
int num_capture_channels) = 0;
virtual ~EchoControlFactory() = default;
};
} // namespace webrtc
#endif // API_AUDIO_ECHO_CONTROL_H_

View file

@ -0,0 +1,21 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/echo_detector_creator.h"
#include "api/make_ref_counted.h"
#include "modules/audio_processing/residual_echo_detector.h"
namespace webrtc {
rtc::scoped_refptr<EchoDetector> CreateEchoDetector() {
return rtc::make_ref_counted<ResidualEchoDetector>();
}
} // namespace webrtc

View file

@ -0,0 +1,26 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_AUDIO_ECHO_DETECTOR_CREATOR_H_
#define API_AUDIO_ECHO_DETECTOR_CREATOR_H_
#include "api/scoped_refptr.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
// Returns an instance of the WebRTC implementation of a residual echo detector.
// It can be provided to the webrtc::AudioProcessingBuilder to obtain the
// usual residual echo metrics.
rtc::scoped_refptr<EchoDetector> CreateEchoDetector();
} // namespace webrtc
#endif // API_AUDIO_ECHO_DETECTOR_CREATOR_H_

View file

@ -0,0 +1,29 @@
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("../../../webrtc.gni")
if (is_android) {
import("//build/config/android/config.gni")
import("//build/config/android/rules.gni")
}
if (rtc_include_tests) {
rtc_library("audio_api_unittests") {
testonly = true
sources = [
"audio_frame_unittest.cc",
"echo_canceller3_config_unittest.cc",
]
deps = [
"..:aec3_config",
"..:audio_frame_api",
"../../../modules/audio_processing:aec3_config_json",
"../../../test:test_support",
]
}
}

View file

@ -0,0 +1,136 @@
/*
* Copyright 2018 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/audio_frame.h"
#include <stdint.h>
#include <string.h> // memcmp
#include "test/gtest.h"
namespace webrtc {
namespace {
bool AllSamplesAre(int16_t sample, const AudioFrame& frame) {
const int16_t* frame_data = frame.data();
for (size_t i = 0; i < frame.max_16bit_samples(); i++) {
if (frame_data[i] != sample) {
return false;
}
}
return true;
}
constexpr uint32_t kTimestamp = 27;
constexpr int kSampleRateHz = 16000;
constexpr size_t kNumChannelsMono = 1;
constexpr size_t kNumChannelsStereo = 2;
constexpr size_t kNumChannels5_1 = 6;
constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
} // namespace
TEST(AudioFrameTest, FrameStartsMuted) {
AudioFrame frame;
EXPECT_TRUE(frame.muted());
EXPECT_TRUE(AllSamplesAre(0, frame));
}
TEST(AudioFrameTest, UnmutedFrameIsInitiallyZeroed) {
AudioFrame frame;
frame.mutable_data();
EXPECT_FALSE(frame.muted());
EXPECT_TRUE(AllSamplesAre(0, frame));
}
TEST(AudioFrameTest, MutedFrameBufferIsZeroed) {
AudioFrame frame;
int16_t* frame_data = frame.mutable_data();
for (size_t i = 0; i < frame.max_16bit_samples(); i++) {
frame_data[i] = 17;
}
ASSERT_TRUE(AllSamplesAre(17, frame));
frame.Mute();
EXPECT_TRUE(frame.muted());
EXPECT_TRUE(AllSamplesAre(0, frame));
}
TEST(AudioFrameTest, UpdateFrameMono) {
AudioFrame frame;
int16_t samples[kNumChannelsMono * kSamplesPerChannel] = {17};
frame.UpdateFrame(kTimestamp, samples, kSamplesPerChannel, kSampleRateHz,
AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannelsMono);
EXPECT_EQ(kTimestamp, frame.timestamp_);
EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel());
EXPECT_EQ(kSampleRateHz, frame.sample_rate_hz());
EXPECT_EQ(AudioFrame::kPLC, frame.speech_type_);
EXPECT_EQ(AudioFrame::kVadActive, frame.vad_activity_);
EXPECT_EQ(kNumChannelsMono, frame.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame.channel_layout());
EXPECT_FALSE(frame.muted());
EXPECT_EQ(0, memcmp(samples, frame.data(), sizeof(samples)));
frame.UpdateFrame(kTimestamp, nullptr /* data*/, kSamplesPerChannel,
kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
kNumChannelsMono);
EXPECT_TRUE(frame.muted());
EXPECT_TRUE(AllSamplesAre(0, frame));
}
TEST(AudioFrameTest, UpdateFrameMultiChannel) {
AudioFrame frame;
frame.UpdateFrame(kTimestamp, nullptr /* data */, kSamplesPerChannel,
kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
kNumChannelsStereo);
EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel());
EXPECT_EQ(kNumChannelsStereo, frame.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame.channel_layout());
frame.UpdateFrame(kTimestamp, nullptr /* data */, kSamplesPerChannel,
kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
kNumChannels5_1);
EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel());
EXPECT_EQ(kNumChannels5_1, frame.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame.channel_layout());
}
TEST(AudioFrameTest, CopyFrom) {
AudioFrame frame1;
AudioFrame frame2;
int16_t samples[kNumChannelsMono * kSamplesPerChannel] = {17};
frame2.UpdateFrame(kTimestamp, samples, kSamplesPerChannel, kSampleRateHz,
AudioFrame::kPLC, AudioFrame::kVadActive,
kNumChannelsMono);
frame1.CopyFrom(frame2);
EXPECT_EQ(frame2.timestamp_, frame1.timestamp_);
EXPECT_EQ(frame2.samples_per_channel_, frame1.samples_per_channel_);
EXPECT_EQ(frame2.sample_rate_hz_, frame1.sample_rate_hz_);
EXPECT_EQ(frame2.speech_type_, frame1.speech_type_);
EXPECT_EQ(frame2.vad_activity_, frame1.vad_activity_);
EXPECT_EQ(frame2.num_channels_, frame1.num_channels_);
EXPECT_EQ(frame2.muted(), frame1.muted());
EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
frame2.UpdateFrame(kTimestamp, nullptr /* data */, kSamplesPerChannel,
kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
kNumChannelsMono);
frame1.CopyFrom(frame2);
EXPECT_EQ(frame2.muted(), frame1.muted());
EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
}
} // namespace webrtc

View file

@ -0,0 +1,46 @@
/*
* Copyright 2018 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/echo_canceller3_config.h"
#include "modules/audio_processing/test/echo_canceller3_config_json.h"
#include "test/gtest.h"
namespace webrtc {
TEST(EchoCanceller3Config, ValidConfigIsNotModified) {
EchoCanceller3Config config;
EXPECT_TRUE(EchoCanceller3Config::Validate(&config));
EchoCanceller3Config default_config;
EXPECT_EQ(Aec3ConfigToJsonString(config),
Aec3ConfigToJsonString(default_config));
}
TEST(EchoCanceller3Config, InvalidConfigIsCorrected) {
// Change a parameter and validate.
EchoCanceller3Config config;
config.echo_model.min_noise_floor_power = -1600000.f;
EXPECT_FALSE(EchoCanceller3Config::Validate(&config));
EXPECT_GE(config.echo_model.min_noise_floor_power, 0.f);
// Verify remaining parameters are unchanged.
EchoCanceller3Config default_config;
config.echo_model.min_noise_floor_power =
default_config.echo_model.min_noise_floor_power;
EXPECT_EQ(Aec3ConfigToJsonString(config),
Aec3ConfigToJsonString(default_config));
}
TEST(EchoCanceller3Config, ValidatedConfigsAreValid) {
EchoCanceller3Config config;
config.delay.down_sampling_factor = 983;
EXPECT_FALSE(EchoCanceller3Config::Validate(&config));
EXPECT_TRUE(EchoCanceller3Config::Validate(&config));
}
} // namespace webrtc