Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,294 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "audio/utility/audio_frame_operations.h"
#include <string.h>
#include <algorithm>
#include <cstdint>
#include <utility>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
namespace {
// 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz.
const size_t kMuteFadeFrames = 128;
const float kMuteFadeInc = 1.0f / kMuteFadeFrames;
} // namespace
void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
AudioFrame* result_frame) {
// Sanity check.
RTC_DCHECK(result_frame);
RTC_DCHECK_GT(result_frame->num_channels_, 0);
RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
bool no_previous_data = result_frame->muted();
if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
// Special case we have no data to start with.
RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_;
no_previous_data = true;
}
if (result_frame->vad_activity_ == AudioFrame::kVadActive ||
frame_to_add.vad_activity_ == AudioFrame::kVadActive) {
result_frame->vad_activity_ = AudioFrame::kVadActive;
} else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown ||
frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) {
result_frame->vad_activity_ = AudioFrame::kVadUnknown;
}
if (result_frame->speech_type_ != frame_to_add.speech_type_)
result_frame->speech_type_ = AudioFrame::kUndefined;
if (!frame_to_add.muted()) {
const int16_t* in_data = frame_to_add.data();
int16_t* out_data = result_frame->mutable_data();
size_t length =
frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
if (no_previous_data) {
std::copy(in_data, in_data + length, out_data);
} else {
for (size_t i = 0; i < length; i++) {
const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
static_cast<int32_t>(in_data[i]);
out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
}
}
}
}
int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
if (frame->num_channels_ != 1) {
return -1;
}
UpmixChannels(2, frame);
return 0;
}
int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
if (frame->num_channels_ != 2) {
return -1;
}
DownmixChannels(1, frame);
return frame->num_channels_ == 1 ? 0 : -1;
}
void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio) {
for (size_t i = 0; i < samples_per_channel; i++) {
dst_audio[i * 2] =
(static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1;
dst_audio[i * 2 + 1] =
(static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >>
1;
}
}
int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
if (frame->num_channels_ != 4) {
return -1;
}
RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
AudioFrame::kMaxDataSizeSamples);
if (!frame->muted()) {
QuadToStereo(frame->data(), frame->samples_per_channel_,
frame->mutable_data());
}
frame->num_channels_ = 2;
return 0;
}
void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
size_t src_channels,
size_t samples_per_channel,
size_t dst_channels,
int16_t* dst_audio) {
if (src_channels > 1 && dst_channels == 1) {
DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels,
dst_audio);
return;
} else if (src_channels == 4 && dst_channels == 2) {
QuadToStereo(src_audio, samples_per_channel, dst_audio);
return;
}
RTC_DCHECK_NOTREACHED() << "src_channels: " << src_channels
<< ", dst_channels: " << dst_channels;
}
void AudioFrameOperations::DownmixChannels(size_t dst_channels,
AudioFrame* frame) {
RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_,
AudioFrame::kMaxDataSizeSamples);
if (frame->num_channels_ > 1 && dst_channels == 1) {
if (!frame->muted()) {
DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_,
frame->num_channels_, frame->mutable_data());
}
frame->num_channels_ = 1;
} else if (frame->num_channels_ == 4 && dst_channels == 2) {
int err = QuadToStereo(frame);
RTC_DCHECK_EQ(err, 0);
} else {
RTC_DCHECK_NOTREACHED() << "src_channels: " << frame->num_channels_
<< ", dst_channels: " << dst_channels;
}
}
void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels,
AudioFrame* frame) {
RTC_DCHECK_EQ(frame->num_channels_, 1);
RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels,
AudioFrame::kMaxDataSizeSamples);
if (frame->num_channels_ != 1 ||
frame->samples_per_channel_ * target_number_of_channels >
AudioFrame::kMaxDataSizeSamples) {
return;
}
if (!frame->muted()) {
// Up-mixing done in place. Going backwards through the frame ensure nothing
// is irrevocably overwritten.
int16_t* frame_data = frame->mutable_data();
for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) {
for (size_t j = 0; j < target_number_of_channels; ++j) {
frame_data[target_number_of_channels * i + j] = frame_data[i];
}
}
}
frame->num_channels_ = target_number_of_channels;
}
void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
RTC_DCHECK(frame);
if (frame->num_channels_ != 2 || frame->muted()) {
return;
}
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
std::swap(frame_data[i], frame_data[i + 1]);
}
}
void AudioFrameOperations::Mute(AudioFrame* frame,
bool previous_frame_muted,
bool current_frame_muted) {
RTC_DCHECK(frame);
if (!previous_frame_muted && !current_frame_muted) {
// Not muted, don't touch.
} else if (previous_frame_muted && current_frame_muted) {
// Frame fully muted.
size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
frame->Mute();
} else {
// Fade is a no-op on a muted frame.
if (frame->muted()) {
return;
}
// Limit number of samples to fade, if frame isn't long enough.
size_t count = kMuteFadeFrames;
float inc = kMuteFadeInc;
if (frame->samples_per_channel_ < kMuteFadeFrames) {
count = frame->samples_per_channel_;
if (count > 0) {
inc = 1.0f / count;
}
}
size_t start = 0;
size_t end = count;
float start_g = 0.0f;
if (current_frame_muted) {
// Fade out the last `count` samples of frame.
RTC_DCHECK(!previous_frame_muted);
start = frame->samples_per_channel_ - count;
end = frame->samples_per_channel_;
start_g = 1.0f;
inc = -inc;
} else {
// Fade in the first `count` samples of frame.
RTC_DCHECK(previous_frame_muted);
}
// Perform fade.
int16_t* frame_data = frame->mutable_data();
size_t channels = frame->num_channels_;
for (size_t j = 0; j < channels; ++j) {
float g = start_g;
for (size_t i = start * channels; i < end * channels; i += channels) {
g += inc;
frame_data[i + j] *= g;
}
}
}
}
void AudioFrameOperations::Mute(AudioFrame* frame) {
Mute(frame, true, true);
}
void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
RTC_DCHECK(frame);
RTC_DCHECK_GT(frame->num_channels_, 0);
if (frame->num_channels_ < 1 || frame->muted()) {
return;
}
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
i++) {
frame_data[i] = frame_data[i] >> 1;
}
}
int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
if (frame->num_channels_ != 2) {
return -1;
} else if (frame->muted()) {
return 0;
}
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel_; i++) {
frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
}
return 0;
}
int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
if (frame->muted()) {
return 0;
}
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
i++) {
frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
}
return 0;
}
} // namespace webrtc

View file

@ -0,0 +1,107 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_
#define AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_
#include <stddef.h>
#include <stdint.h>
#include "absl/base/attributes.h"
#include "api/audio/audio_frame.h"
namespace webrtc {
// TODO(andrew): consolidate this with utility.h and audio_frame_manipulator.h.
// Change reference parameters to pointers. Consider using a namespace rather
// than a class.
class AudioFrameOperations {
public:
// Add samples in `frame_to_add` with samples in `result_frame`
// putting the results in `results_frame`. The fields
// `vad_activity_` and `speech_type_` of the result frame are
// updated. If `result_frame` is empty (`samples_per_channel_`==0),
// the samples in `frame_to_add` are added to it. The number of
// channels and number of samples per channel must match except when
// `result_frame` is empty.
static void Add(const AudioFrame& frame_to_add, AudioFrame* result_frame);
// `frame.num_channels_` will be updated. This version checks for sufficient
// buffer size and that `num_channels_` is mono. Use UpmixChannels
// instead. TODO(bugs.webrtc.org/8649): remove.
ABSL_DEPRECATED("bugs.webrtc.org/8649")
static int MonoToStereo(AudioFrame* frame);
// `frame.num_channels_` will be updated. This version checks that
// `num_channels_` is stereo. Use DownmixChannels
// instead. TODO(bugs.webrtc.org/8649): remove.
ABSL_DEPRECATED("bugs.webrtc.org/8649")
static int StereoToMono(AudioFrame* frame);
// Downmixes 4 channels `src_audio` to stereo `dst_audio`. This is an in-place
// operation, meaning `src_audio` and `dst_audio` may point to the same
// buffer.
static void QuadToStereo(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio);
// `frame.num_channels_` will be updated. This version checks that
// `num_channels_` is 4 channels.
static int QuadToStereo(AudioFrame* frame);
// Downmixes `src_channels` `src_audio` to `dst_channels` `dst_audio`.
// This is an in-place operation, meaning `src_audio` and `dst_audio`
// may point to the same buffer. Supported channel combinations are
// Stereo to Mono, Quad to Mono, and Quad to Stereo.
static void DownmixChannels(const int16_t* src_audio,
size_t src_channels,
size_t samples_per_channel,
size_t dst_channels,
int16_t* dst_audio);
// `frame.num_channels_` will be updated. This version checks that
// `num_channels_` and `dst_channels` are valid and performs relevant downmix.
// Supported channel combinations are N channels to Mono, and Quad to Stereo.
static void DownmixChannels(size_t dst_channels, AudioFrame* frame);
// `frame.num_channels_` will be updated. This version checks that
// `num_channels_` and `dst_channels` are valid and performs relevant
// downmix. Supported channel combinations are Mono to N
// channels. The single channel is replicated.
static void UpmixChannels(size_t target_number_of_channels,
AudioFrame* frame);
// Swap the left and right channels of `frame`. Fails silently if `frame` is
// not stereo.
static void SwapStereoChannels(AudioFrame* frame);
// Conditionally zero out contents of `frame` for implementing audio mute:
// `previous_frame_muted` && `current_frame_muted` - Zero out whole frame.
// `previous_frame_muted` && !`current_frame_muted` - Fade-in at frame start.
// !`previous_frame_muted` && `current_frame_muted` - Fade-out at frame end.
// !`previous_frame_muted` && !`current_frame_muted` - Leave frame untouched.
static void Mute(AudioFrame* frame,
bool previous_frame_muted,
bool current_frame_muted);
// Zero out contents of frame.
static void Mute(AudioFrame* frame);
// Halve samples in `frame`.
static void ApplyHalfGain(AudioFrame* frame);
static int Scale(float left, float right, AudioFrame* frame);
static int ScaleWithSat(float scale, AudioFrame* frame);
};
} // namespace webrtc
#endif // AUDIO_UTILITY_AUDIO_FRAME_OPERATIONS_H_

View file

@ -0,0 +1,99 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "audio/utility/channel_mixer.h"
#include "audio/utility/channel_mixing_matrix.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
ChannelMixer::ChannelMixer(ChannelLayout input_layout,
ChannelLayout output_layout)
: input_layout_(input_layout),
output_layout_(output_layout),
input_channels_(ChannelLayoutToChannelCount(input_layout)),
output_channels_(ChannelLayoutToChannelCount(output_layout)) {
// Create the transformation matrix.
ChannelMixingMatrix matrix_builder(input_layout_, input_channels_,
output_layout_, output_channels_);
remapping_ = matrix_builder.CreateTransformationMatrix(&matrix_);
}
ChannelMixer::~ChannelMixer() = default;
void ChannelMixer::Transform(AudioFrame* frame) {
RTC_DCHECK(frame);
RTC_DCHECK_EQ(matrix_[0].size(), static_cast<size_t>(input_channels_));
RTC_DCHECK_EQ(matrix_.size(), static_cast<size_t>(output_channels_));
// Leave the audio frame intact if the channel layouts for in and out are
// identical.
if (input_layout_ == output_layout_) {
return;
}
if (IsUpMixing()) {
RTC_CHECK_LE(frame->samples_per_channel() * output_channels_,
frame->max_16bit_samples());
}
// Only change the number of output channels if the audio frame is muted.
if (frame->muted()) {
frame->num_channels_ = output_channels_;
frame->channel_layout_ = output_layout_;
return;
}
const int16_t* in_audio = frame->data();
// Only allocate fresh memory at first access or if the required size has
// increased.
// TODO(henrika): we might be able to do downmixing in-place and thereby avoid
// extra memory allocation and a memcpy.
const size_t num_elements = frame->samples_per_channel() * output_channels_;
if (audio_vector_ == nullptr || num_elements > audio_vector_size_) {
audio_vector_.reset(new int16_t[num_elements]);
audio_vector_size_ = num_elements;
}
int16_t* out_audio = audio_vector_.get();
// Modify the number of channels by creating a weighted sum of input samples
// where the weights (scale factors) for each output sample are given by the
// transformation matrix.
for (size_t i = 0; i < frame->samples_per_channel(); i++) {
for (size_t output_ch = 0; output_ch < output_channels_; ++output_ch) {
float acc_value = 0.0f;
for (size_t input_ch = 0; input_ch < input_channels_; ++input_ch) {
const float scale = matrix_[output_ch][input_ch];
// Scale should always be positive.
RTC_DCHECK_GE(scale, 0);
// Each output sample is a weighted sum of input samples.
acc_value += scale * in_audio[i * input_channels_ + input_ch];
}
const size_t index = output_channels_ * i + output_ch;
RTC_CHECK_LE(index, audio_vector_size_);
out_audio[index] = rtc::saturated_cast<int16_t>(acc_value);
}
}
// Update channel information.
frame->num_channels_ = output_channels_;
frame->channel_layout_ = output_layout_;
// Copy the output result to the audio frame in `frame`.
memcpy(
frame->mutable_data(), out_audio,
sizeof(int16_t) * frame->samples_per_channel() * frame->num_channels());
}
} // namespace webrtc

View file

@ -0,0 +1,86 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef AUDIO_UTILITY_CHANNEL_MIXER_H_
#define AUDIO_UTILITY_CHANNEL_MIXER_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <vector>
#include "api/audio/audio_frame.h"
#include "api/audio/channel_layout.h"
namespace webrtc {
// ChannelMixer is for converting audio between channel layouts. The conversion
// matrix is built upon construction and used during each Transform() call. The
// algorithm works by generating a conversion matrix mapping each output channel
// to list of input channels. The transform renders all of the output channels,
// with each output channel rendered according to a weighted sum of the relevant
// input channels as defined in the matrix.
// This file is derived from Chromium's media/base/channel_mixer.h.
class ChannelMixer {
public:
// To mix two channels into one and preserve loudness, we must apply
// (1 / sqrt(2)) gain to each.
static constexpr float kHalfPower = 0.707106781186547524401f;
ChannelMixer(ChannelLayout input_layout, ChannelLayout output_layout);
~ChannelMixer();
// Transforms all input channels corresponding to the selected `input_layout`
// to the number of channels in the selected `output_layout`.
// Example usage (downmix from stereo to mono):
//
// ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
// AudioFrame frame;
// frame.samples_per_channel_ = 160;
// frame.num_channels_ = 2;
// EXPECT_EQ(2u, frame.channels());
// mixer.Transform(&frame);
// EXPECT_EQ(1u, frame.channels());
//
void Transform(AudioFrame* frame);
private:
bool IsUpMixing() const { return output_channels_ > input_channels_; }
// Selected channel layouts.
const ChannelLayout input_layout_;
const ChannelLayout output_layout_;
// Channel counts for input and output.
const size_t input_channels_;
const size_t output_channels_;
// 2D matrix of output channels to input channels.
std::vector<std::vector<float> > matrix_;
// 1D array used as temporary storage during the transformation.
std::unique_ptr<int16_t[]> audio_vector_;
// Number of elements allocated for `audio_vector_`.
size_t audio_vector_size_ = 0;
// Optimization case for when we can simply remap the input channels to output
// channels, i.e., when all scaling factors in `matrix_` equals 1.0.
bool remapping_;
// Delete the copy constructor and assignment operator.
ChannelMixer(const ChannelMixer& other) = delete;
ChannelMixer& operator=(const ChannelMixer& other) = delete;
};
} // namespace webrtc
#endif // AUDIO_UTILITY_CHANNEL_MIXER_H_

View file

@ -0,0 +1,333 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "audio/utility/channel_mixing_matrix.h"
#include <stddef.h>
#include <algorithm>
#include "audio/utility/channel_mixer.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
// Selects the default usage of VoIP channel mapping adjustments.
bool UseChannelMappingAdjustmentsByDefault() {
return !field_trial::IsEnabled(
"WebRTC-VoIPChannelRemixingAdjustmentKillSwitch");
}
} // namespace
static void ValidateLayout(ChannelLayout layout) {
RTC_CHECK_NE(layout, CHANNEL_LAYOUT_NONE);
RTC_CHECK_LE(layout, CHANNEL_LAYOUT_MAX);
RTC_CHECK_NE(layout, CHANNEL_LAYOUT_UNSUPPORTED);
RTC_CHECK_NE(layout, CHANNEL_LAYOUT_DISCRETE);
RTC_CHECK_NE(layout, CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC);
// Verify there's at least one channel. Should always be true here by virtue
// of not being one of the invalid layouts, but lets double check to be sure.
int channel_count = ChannelLayoutToChannelCount(layout);
RTC_DCHECK_GT(channel_count, 0);
// If we have more than one channel, verify a symmetric layout for sanity.
// The unit test will verify all possible layouts, so this can be a DCHECK.
// Symmetry allows simplifying the matrix building code by allowing us to
// assume that if one channel of a pair exists, the other will too.
if (channel_count > 1) {
// Assert that LEFT exists if and only if RIGHT exists, and so on.
RTC_DCHECK_EQ(ChannelOrder(layout, LEFT) >= 0,
ChannelOrder(layout, RIGHT) >= 0);
RTC_DCHECK_EQ(ChannelOrder(layout, SIDE_LEFT) >= 0,
ChannelOrder(layout, SIDE_RIGHT) >= 0);
RTC_DCHECK_EQ(ChannelOrder(layout, BACK_LEFT) >= 0,
ChannelOrder(layout, BACK_RIGHT) >= 0);
RTC_DCHECK_EQ(ChannelOrder(layout, LEFT_OF_CENTER) >= 0,
ChannelOrder(layout, RIGHT_OF_CENTER) >= 0);
} else {
RTC_DCHECK_EQ(layout, CHANNEL_LAYOUT_MONO);
}
}
ChannelMixingMatrix::ChannelMixingMatrix(ChannelLayout input_layout,
int input_channels,
ChannelLayout output_layout,
int output_channels)
: use_voip_channel_mapping_adjustments_(
UseChannelMappingAdjustmentsByDefault()),
input_layout_(input_layout),
input_channels_(input_channels),
output_layout_(output_layout),
output_channels_(output_channels) {
// Stereo down mix should never be the output layout.
RTC_CHECK_NE(output_layout, CHANNEL_LAYOUT_STEREO_DOWNMIX);
// Verify that the layouts are supported
if (input_layout != CHANNEL_LAYOUT_DISCRETE)
ValidateLayout(input_layout);
if (output_layout != CHANNEL_LAYOUT_DISCRETE)
ValidateLayout(output_layout);
// Special case for 5.0, 5.1 with back channels when upmixed to 7.0, 7.1,
// which should map the back LR to side LR.
if (input_layout_ == CHANNEL_LAYOUT_5_0_BACK &&
output_layout_ == CHANNEL_LAYOUT_7_0) {
input_layout_ = CHANNEL_LAYOUT_5_0;
} else if (input_layout_ == CHANNEL_LAYOUT_5_1_BACK &&
output_layout_ == CHANNEL_LAYOUT_7_1) {
input_layout_ = CHANNEL_LAYOUT_5_1;
}
}
ChannelMixingMatrix::~ChannelMixingMatrix() = default;
bool ChannelMixingMatrix::CreateTransformationMatrix(
std::vector<std::vector<float>>* matrix) {
matrix_ = matrix;
// Size out the initial matrix.
matrix_->reserve(output_channels_);
for (int output_ch = 0; output_ch < output_channels_; ++output_ch)
matrix_->push_back(std::vector<float>(input_channels_, 0));
// First check for discrete case.
if (input_layout_ == CHANNEL_LAYOUT_DISCRETE ||
output_layout_ == CHANNEL_LAYOUT_DISCRETE) {
// If the number of input channels is more than output channels, then
// copy as many as we can then drop the remaining input channels.
// If the number of input channels is less than output channels, then
// copy them all, then zero out the remaining output channels.
int passthrough_channels = std::min(input_channels_, output_channels_);
for (int i = 0; i < passthrough_channels; ++i)
(*matrix_)[i][i] = 1;
return true;
}
// If specified, use adjusted channel mapping for the VoIP scenario.
if (use_voip_channel_mapping_adjustments_ &&
input_layout_ == CHANNEL_LAYOUT_MONO &&
ChannelLayoutToChannelCount(output_layout_) >= 2) {
// Only place the mono input in the front left and right channels.
(*matrix_)[0][0] = 1.f;
(*matrix_)[1][0] = 1.f;
for (size_t output_ch = 2; output_ch < matrix_->size(); ++output_ch) {
(*matrix_)[output_ch][0] = 0.f;
}
return true;
}
// Route matching channels and figure out which ones aren't accounted for.
for (Channels ch = LEFT; ch < CHANNELS_MAX + 1;
ch = static_cast<Channels>(ch + 1)) {
int input_ch_index = ChannelOrder(input_layout_, ch);
if (input_ch_index < 0)
continue;
int output_ch_index = ChannelOrder(output_layout_, ch);
if (output_ch_index < 0) {
unaccounted_inputs_.push_back(ch);
continue;
}
RTC_DCHECK_LT(static_cast<size_t>(output_ch_index), matrix_->size());
RTC_DCHECK_LT(static_cast<size_t>(input_ch_index),
(*matrix_)[output_ch_index].size());
(*matrix_)[output_ch_index][input_ch_index] = 1;
}
// If all input channels are accounted for, there's nothing left to do.
if (unaccounted_inputs_.empty()) {
// Since all output channels map directly to inputs we can optimize.
return true;
}
// Mix front LR into center.
if (IsUnaccounted(LEFT)) {
// When down mixing to mono from stereo, we need to be careful of full scale
// stereo mixes. Scaling by 1 / sqrt(2) here will likely lead to clipping
// so we use 1 / 2 instead.
float scale =
(output_layout_ == CHANNEL_LAYOUT_MONO && input_channels_ == 2)
? 0.5
: ChannelMixer::kHalfPower;
Mix(LEFT, CENTER, scale);
Mix(RIGHT, CENTER, scale);
}
// Mix center into front LR.
if (IsUnaccounted(CENTER)) {
// When up mixing from mono, just do a copy to front LR.
float scale =
(input_layout_ == CHANNEL_LAYOUT_MONO) ? 1 : ChannelMixer::kHalfPower;
MixWithoutAccounting(CENTER, LEFT, scale);
Mix(CENTER, RIGHT, scale);
}
// Mix back LR into: side LR || back center || front LR || front center.
if (IsUnaccounted(BACK_LEFT)) {
if (HasOutputChannel(SIDE_LEFT)) {
// If the input has side LR, mix back LR into side LR, but instead if the
// input doesn't have side LR (but output does) copy back LR to side LR.
float scale = HasInputChannel(SIDE_LEFT) ? ChannelMixer::kHalfPower : 1;
Mix(BACK_LEFT, SIDE_LEFT, scale);
Mix(BACK_RIGHT, SIDE_RIGHT, scale);
} else if (HasOutputChannel(BACK_CENTER)) {
// Mix back LR into back center.
Mix(BACK_LEFT, BACK_CENTER, ChannelMixer::kHalfPower);
Mix(BACK_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower);
} else if (output_layout_ > CHANNEL_LAYOUT_MONO) {
// Mix back LR into front LR.
Mix(BACK_LEFT, LEFT, ChannelMixer::kHalfPower);
Mix(BACK_RIGHT, RIGHT, ChannelMixer::kHalfPower);
} else {
// Mix back LR into front center.
Mix(BACK_LEFT, CENTER, ChannelMixer::kHalfPower);
Mix(BACK_RIGHT, CENTER, ChannelMixer::kHalfPower);
}
}
// Mix side LR into: back LR || back center || front LR || front center.
if (IsUnaccounted(SIDE_LEFT)) {
if (HasOutputChannel(BACK_LEFT)) {
// If the input has back LR, mix side LR into back LR, but instead if the
// input doesn't have back LR (but output does) copy side LR to back LR.
float scale = HasInputChannel(BACK_LEFT) ? ChannelMixer::kHalfPower : 1;
Mix(SIDE_LEFT, BACK_LEFT, scale);
Mix(SIDE_RIGHT, BACK_RIGHT, scale);
} else if (HasOutputChannel(BACK_CENTER)) {
// Mix side LR into back center.
Mix(SIDE_LEFT, BACK_CENTER, ChannelMixer::kHalfPower);
Mix(SIDE_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower);
} else if (output_layout_ > CHANNEL_LAYOUT_MONO) {
// Mix side LR into front LR.
Mix(SIDE_LEFT, LEFT, ChannelMixer::kHalfPower);
Mix(SIDE_RIGHT, RIGHT, ChannelMixer::kHalfPower);
} else {
// Mix side LR into front center.
Mix(SIDE_LEFT, CENTER, ChannelMixer::kHalfPower);
Mix(SIDE_RIGHT, CENTER, ChannelMixer::kHalfPower);
}
}
// Mix back center into: back LR || side LR || front LR || front center.
if (IsUnaccounted(BACK_CENTER)) {
if (HasOutputChannel(BACK_LEFT)) {
// Mix back center into back LR.
MixWithoutAccounting(BACK_CENTER, BACK_LEFT, ChannelMixer::kHalfPower);
Mix(BACK_CENTER, BACK_RIGHT, ChannelMixer::kHalfPower);
} else if (HasOutputChannel(SIDE_LEFT)) {
// Mix back center into side LR.
MixWithoutAccounting(BACK_CENTER, SIDE_LEFT, ChannelMixer::kHalfPower);
Mix(BACK_CENTER, SIDE_RIGHT, ChannelMixer::kHalfPower);
} else if (output_layout_ > CHANNEL_LAYOUT_MONO) {
// Mix back center into front LR.
// TODO(dalecurtis): Not sure about these values?
MixWithoutAccounting(BACK_CENTER, LEFT, ChannelMixer::kHalfPower);
Mix(BACK_CENTER, RIGHT, ChannelMixer::kHalfPower);
} else {
// Mix back center into front center.
// TODO(dalecurtis): Not sure about these values?
Mix(BACK_CENTER, CENTER, ChannelMixer::kHalfPower);
}
}
// Mix LR of center into: front LR || front center.
if (IsUnaccounted(LEFT_OF_CENTER)) {
if (HasOutputChannel(LEFT)) {
// Mix LR of center into front LR.
Mix(LEFT_OF_CENTER, LEFT, ChannelMixer::kHalfPower);
Mix(RIGHT_OF_CENTER, RIGHT, ChannelMixer::kHalfPower);
} else {
// Mix LR of center into front center.
Mix(LEFT_OF_CENTER, CENTER, ChannelMixer::kHalfPower);
Mix(RIGHT_OF_CENTER, CENTER, ChannelMixer::kHalfPower);
}
}
// Mix LFE into: front center || front LR.
if (IsUnaccounted(LFE)) {
if (!HasOutputChannel(CENTER)) {
// Mix LFE into front LR.
MixWithoutAccounting(LFE, LEFT, ChannelMixer::kHalfPower);
Mix(LFE, RIGHT, ChannelMixer::kHalfPower);
} else {
// Mix LFE into front center.
Mix(LFE, CENTER, ChannelMixer::kHalfPower);
}
}
// All channels should now be accounted for.
RTC_DCHECK(unaccounted_inputs_.empty());
// See if the output `matrix_` is simply a remapping matrix. If each input
// channel maps to a single output channel we can simply remap. Doing this
// programmatically is less fragile than logic checks on channel mappings.
for (int output_ch = 0; output_ch < output_channels_; ++output_ch) {
int input_mappings = 0;
for (int input_ch = 0; input_ch < input_channels_; ++input_ch) {
// We can only remap if each row contains a single scale of 1. I.e., each
// output channel is mapped from a single unscaled input channel.
if ((*matrix_)[output_ch][input_ch] != 1 || ++input_mappings > 1)
return false;
}
}
// If we've gotten here, `matrix_` is simply a remapping.
return true;
}
void ChannelMixingMatrix::AccountFor(Channels ch) {
unaccounted_inputs_.erase(
std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(), ch));
}
bool ChannelMixingMatrix::IsUnaccounted(Channels ch) const {
return std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(),
ch) != unaccounted_inputs_.end();
}
bool ChannelMixingMatrix::HasInputChannel(Channels ch) const {
return ChannelOrder(input_layout_, ch) >= 0;
}
bool ChannelMixingMatrix::HasOutputChannel(Channels ch) const {
return ChannelOrder(output_layout_, ch) >= 0;
}
void ChannelMixingMatrix::Mix(Channels input_ch,
Channels output_ch,
float scale) {
MixWithoutAccounting(input_ch, output_ch, scale);
AccountFor(input_ch);
}
void ChannelMixingMatrix::MixWithoutAccounting(Channels input_ch,
Channels output_ch,
float scale) {
int input_ch_index = ChannelOrder(input_layout_, input_ch);
int output_ch_index = ChannelOrder(output_layout_, output_ch);
RTC_DCHECK(IsUnaccounted(input_ch));
RTC_DCHECK_GE(input_ch_index, 0);
RTC_DCHECK_GE(output_ch_index, 0);
RTC_DCHECK_EQ((*matrix_)[output_ch_index][input_ch_index], 0);
(*matrix_)[output_ch_index][input_ch_index] = scale;
}
} // namespace webrtc

View file

@ -0,0 +1,76 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_
#define AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_
#include <vector>
#include "api/audio/channel_layout.h"
namespace webrtc {
class ChannelMixingMatrix {
public:
ChannelMixingMatrix(ChannelLayout input_layout,
int input_channels,
ChannelLayout output_layout,
int output_channels);
~ChannelMixingMatrix();
// Create the transformation matrix of input channels to output channels.
// Updates the empty matrix with the transformation, and returns true
// if the transformation is just a remapping of channels (no mixing).
// The size of `matrix` is `output_channels` x `input_channels`, i.e., the
// number of rows equals the number of output channels and the number of
// columns corresponds to the number of input channels.
// This file is derived from Chromium's media/base/channel_mixing_matrix.h.
bool CreateTransformationMatrix(std::vector<std::vector<float>>* matrix);
private:
const bool use_voip_channel_mapping_adjustments_;
// Result transformation of input channels to output channels
std::vector<std::vector<float>>* matrix_;
// Input and output channel layout provided during construction.
ChannelLayout input_layout_;
int input_channels_;
ChannelLayout output_layout_;
int output_channels_;
// Helper variable for tracking which inputs are currently unaccounted,
// should be empty after construction completes.
std::vector<Channels> unaccounted_inputs_;
// Helper methods for managing unaccounted input channels.
void AccountFor(Channels ch);
bool IsUnaccounted(Channels ch) const;
// Helper methods for checking if `ch` exists in either `input_layout_` or
// `output_layout_` respectively.
bool HasInputChannel(Channels ch) const;
bool HasOutputChannel(Channels ch) const;
// Helper methods for updating `matrix_` with the proper value for
// mixing `input_ch` into `output_ch`. MixWithoutAccounting() does not
// remove the channel from `unaccounted_inputs_`.
void Mix(Channels input_ch, Channels output_ch, float scale);
void MixWithoutAccounting(Channels input_ch, Channels output_ch, float scale);
// Delete the copy constructor and assignment operator.
ChannelMixingMatrix(const ChannelMixingMatrix& other) = delete;
ChannelMixingMatrix& operator=(const ChannelMixingMatrix& other) = delete;
};
} // namespace webrtc
#endif // AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_