Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,3 @@
henrik.lundin@webrtc.org
minyue@webrtc.org
peah@webrtc.org

View file

@ -0,0 +1,219 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/audio_converter.h"
#include <cstring>
#include <memory>
#include <utility>
#include <vector>
#include "common_audio/channel_buffer.h"
#include "common_audio/resampler/push_sinc_resampler.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc {
class CopyConverter : public AudioConverter {
public:
CopyConverter(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames)
: AudioConverter(src_channels, src_frames, dst_channels, dst_frames) {}
~CopyConverter() override {}
void Convert(const float* const* src,
size_t src_size,
float* const* dst,
size_t dst_capacity) override {
CheckSizes(src_size, dst_capacity);
if (src != dst) {
for (size_t i = 0; i < src_channels(); ++i)
std::memcpy(dst[i], src[i], dst_frames() * sizeof(*dst[i]));
}
}
};
class UpmixConverter : public AudioConverter {
public:
UpmixConverter(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames)
: AudioConverter(src_channels, src_frames, dst_channels, dst_frames) {}
~UpmixConverter() override {}
void Convert(const float* const* src,
size_t src_size,
float* const* dst,
size_t dst_capacity) override {
CheckSizes(src_size, dst_capacity);
for (size_t i = 0; i < dst_frames(); ++i) {
const float value = src[0][i];
for (size_t j = 0; j < dst_channels(); ++j)
dst[j][i] = value;
}
}
};
class DownmixConverter : public AudioConverter {
public:
DownmixConverter(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames)
: AudioConverter(src_channels, src_frames, dst_channels, dst_frames) {}
~DownmixConverter() override {}
void Convert(const float* const* src,
size_t src_size,
float* const* dst,
size_t dst_capacity) override {
CheckSizes(src_size, dst_capacity);
float* dst_mono = dst[0];
for (size_t i = 0; i < src_frames(); ++i) {
float sum = 0;
for (size_t j = 0; j < src_channels(); ++j)
sum += src[j][i];
dst_mono[i] = sum / src_channels();
}
}
};
class ResampleConverter : public AudioConverter {
public:
ResampleConverter(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames)
: AudioConverter(src_channels, src_frames, dst_channels, dst_frames) {
resamplers_.reserve(src_channels);
for (size_t i = 0; i < src_channels; ++i)
resamplers_.push_back(std::unique_ptr<PushSincResampler>(
new PushSincResampler(src_frames, dst_frames)));
}
~ResampleConverter() override {}
void Convert(const float* const* src,
size_t src_size,
float* const* dst,
size_t dst_capacity) override {
CheckSizes(src_size, dst_capacity);
for (size_t i = 0; i < resamplers_.size(); ++i)
resamplers_[i]->Resample(src[i], src_frames(), dst[i], dst_frames());
}
private:
std::vector<std::unique_ptr<PushSincResampler>> resamplers_;
};
// Apply a vector of converters in serial, in the order given. At least two
// converters must be provided.
class CompositionConverter : public AudioConverter {
public:
explicit CompositionConverter(
std::vector<std::unique_ptr<AudioConverter>> converters)
: converters_(std::move(converters)) {
RTC_CHECK_GE(converters_.size(), 2);
// We need an intermediate buffer after every converter.
for (auto it = converters_.begin(); it != converters_.end() - 1; ++it)
buffers_.push_back(
std::unique_ptr<ChannelBuffer<float>>(new ChannelBuffer<float>(
(*it)->dst_frames(), (*it)->dst_channels())));
}
~CompositionConverter() override {}
void Convert(const float* const* src,
size_t src_size,
float* const* dst,
size_t dst_capacity) override {
converters_.front()->Convert(src, src_size, buffers_.front()->channels(),
buffers_.front()->size());
for (size_t i = 2; i < converters_.size(); ++i) {
auto& src_buffer = buffers_[i - 2];
auto& dst_buffer = buffers_[i - 1];
converters_[i]->Convert(src_buffer->channels(), src_buffer->size(),
dst_buffer->channels(), dst_buffer->size());
}
converters_.back()->Convert(buffers_.back()->channels(),
buffers_.back()->size(), dst, dst_capacity);
}
private:
std::vector<std::unique_ptr<AudioConverter>> converters_;
std::vector<std::unique_ptr<ChannelBuffer<float>>> buffers_;
};
std::unique_ptr<AudioConverter> AudioConverter::Create(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames) {
std::unique_ptr<AudioConverter> sp;
if (src_channels > dst_channels) {
if (src_frames != dst_frames) {
std::vector<std::unique_ptr<AudioConverter>> converters;
converters.push_back(std::unique_ptr<AudioConverter>(new DownmixConverter(
src_channels, src_frames, dst_channels, src_frames)));
converters.push_back(
std::unique_ptr<AudioConverter>(new ResampleConverter(
dst_channels, src_frames, dst_channels, dst_frames)));
sp.reset(new CompositionConverter(std::move(converters)));
} else {
sp.reset(new DownmixConverter(src_channels, src_frames, dst_channels,
dst_frames));
}
} else if (src_channels < dst_channels) {
if (src_frames != dst_frames) {
std::vector<std::unique_ptr<AudioConverter>> converters;
converters.push_back(
std::unique_ptr<AudioConverter>(new ResampleConverter(
src_channels, src_frames, src_channels, dst_frames)));
converters.push_back(std::unique_ptr<AudioConverter>(new UpmixConverter(
src_channels, dst_frames, dst_channels, dst_frames)));
sp.reset(new CompositionConverter(std::move(converters)));
} else {
sp.reset(new UpmixConverter(src_channels, src_frames, dst_channels,
dst_frames));
}
} else if (src_frames != dst_frames) {
sp.reset(new ResampleConverter(src_channels, src_frames, dst_channels,
dst_frames));
} else {
sp.reset(
new CopyConverter(src_channels, src_frames, dst_channels, dst_frames));
}
return sp;
}
// For CompositionConverter.
AudioConverter::AudioConverter()
: src_channels_(0), src_frames_(0), dst_channels_(0), dst_frames_(0) {}
AudioConverter::AudioConverter(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames)
: src_channels_(src_channels),
src_frames_(src_frames),
dst_channels_(dst_channels),
dst_frames_(dst_frames) {
RTC_CHECK(dst_channels == src_channels || dst_channels == 1 ||
src_channels == 1);
}
void AudioConverter::CheckSizes(size_t src_size, size_t dst_capacity) const {
RTC_CHECK_EQ(src_size, src_channels() * src_frames());
RTC_CHECK_GE(dst_capacity, dst_channels() * dst_frames());
}
} // namespace webrtc

View file

@ -0,0 +1,72 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_AUDIO_CONVERTER_H_
#define COMMON_AUDIO_AUDIO_CONVERTER_H_
#include <stddef.h>
#include <memory>
namespace webrtc {
// Format conversion (remixing and resampling) for audio. Only simple remixing
// conversions are supported: downmix to mono (i.e. `dst_channels` == 1) or
// upmix from mono (i.e. |src_channels == 1|).
//
// The source and destination chunks have the same duration in time; specifying
// the number of frames is equivalent to specifying the sample rates.
class AudioConverter {
public:
// Returns a new AudioConverter, which will use the supplied format for its
// lifetime. Caller is responsible for the memory.
static std::unique_ptr<AudioConverter> Create(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames);
virtual ~AudioConverter() {}
AudioConverter(const AudioConverter&) = delete;
AudioConverter& operator=(const AudioConverter&) = delete;
// Convert `src`, containing `src_size` samples, to `dst`, having a sample
// capacity of `dst_capacity`. Both point to a series of buffers containing
// the samples for each channel. The sizes must correspond to the format
// passed to Create().
virtual void Convert(const float* const* src,
size_t src_size,
float* const* dst,
size_t dst_capacity) = 0;
size_t src_channels() const { return src_channels_; }
size_t src_frames() const { return src_frames_; }
size_t dst_channels() const { return dst_channels_; }
size_t dst_frames() const { return dst_frames_; }
protected:
AudioConverter();
AudioConverter(size_t src_channels,
size_t src_frames,
size_t dst_channels,
size_t dst_frames);
// Helper to RTC_CHECK that inputs are correctly sized.
void CheckSizes(size_t src_size, size_t dst_capacity) const;
private:
const size_t src_channels_;
const size_t src_frames_;
const size_t dst_channels_;
const size_t dst_frames_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_AUDIO_CONVERTER_H_

View file

@ -0,0 +1,54 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/include/audio_util.h"
namespace webrtc {
void FloatToS16(const float* src, size_t size, int16_t* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatToS16(src[i]);
}
void S16ToFloat(const int16_t* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = S16ToFloat(src[i]);
}
void S16ToFloatS16(const int16_t* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = src[i];
}
void FloatS16ToS16(const float* src, size_t size, int16_t* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatS16ToS16(src[i]);
}
void FloatToFloatS16(const float* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatToFloatS16(src[i]);
}
void FloatS16ToFloat(const float* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatS16ToFloat(src[i]);
}
template <>
void DownmixInterleavedToMono<int16_t>(const int16_t* interleaved,
size_t num_frames,
int num_channels,
int16_t* deinterleaved) {
DownmixInterleavedToMonoImpl<int16_t, int32_t>(interleaved, num_frames,
num_channels, deinterleaved);
}
} // namespace webrtc

View file

@ -0,0 +1,80 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/channel_buffer.h"
#include <cstdint>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
namespace webrtc {
IFChannelBuffer::IFChannelBuffer(size_t num_frames,
size_t num_channels,
size_t num_bands)
: ivalid_(true),
ibuf_(num_frames, num_channels, num_bands),
fvalid_(true),
fbuf_(num_frames, num_channels, num_bands) {}
IFChannelBuffer::~IFChannelBuffer() = default;
ChannelBuffer<int16_t>* IFChannelBuffer::ibuf() {
RefreshI();
fvalid_ = false;
return &ibuf_;
}
ChannelBuffer<float>* IFChannelBuffer::fbuf() {
RefreshF();
ivalid_ = false;
return &fbuf_;
}
const ChannelBuffer<int16_t>* IFChannelBuffer::ibuf_const() const {
RefreshI();
return &ibuf_;
}
const ChannelBuffer<float>* IFChannelBuffer::fbuf_const() const {
RefreshF();
return &fbuf_;
}
void IFChannelBuffer::RefreshF() const {
if (!fvalid_) {
RTC_DCHECK(ivalid_);
fbuf_.set_num_channels(ibuf_.num_channels());
const int16_t* const* int_channels = ibuf_.channels();
float* const* float_channels = fbuf_.channels();
for (size_t i = 0; i < ibuf_.num_channels(); ++i) {
for (size_t j = 0; j < ibuf_.num_frames(); ++j) {
float_channels[i][j] = int_channels[i][j];
}
}
fvalid_ = true;
}
}
void IFChannelBuffer::RefreshI() const {
if (!ivalid_) {
RTC_DCHECK(fvalid_);
int16_t* const* int_channels = ibuf_.channels();
ibuf_.set_num_channels(fbuf_.num_channels());
const float* const* float_channels = fbuf_.channels();
for (size_t i = 0; i < fbuf_.num_channels(); ++i) {
FloatS16ToS16(float_channels[i], ibuf_.num_frames(), int_channels[i]);
}
ivalid_ = true;
}
}
} // namespace webrtc

View file

@ -0,0 +1,215 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_CHANNEL_BUFFER_H_
#define COMMON_AUDIO_CHANNEL_BUFFER_H_
#include <string.h>
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
#include "rtc_base/gtest_prod_util.h"
namespace webrtc {
// Helper to encapsulate a contiguous data buffer, full or split into frequency
// bands, with access to a pointer arrays of the deinterleaved channels and
// bands. The buffer is zero initialized at creation.
//
// The buffer structure is showed below for a 2 channel and 2 bands case:
//
// `data_`:
// { [ --- b1ch1 --- ] [ --- b2ch1 --- ] [ --- b1ch2 --- ] [ --- b2ch2 --- ] }
//
// The pointer arrays for the same example are as follows:
//
// `channels_`:
// { [ b1ch1* ] [ b1ch2* ] [ b2ch1* ] [ b2ch2* ] }
//
// `bands_`:
// { [ b1ch1* ] [ b2ch1* ] [ b1ch2* ] [ b2ch2* ] }
template <typename T>
class ChannelBuffer {
public:
ChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1)
: data_(new T[num_frames * num_channels]()),
channels_(new T*[num_channels * num_bands]),
bands_(new T*[num_channels * num_bands]),
num_frames_(num_frames),
num_frames_per_band_(num_frames / num_bands),
num_allocated_channels_(num_channels),
num_channels_(num_channels),
num_bands_(num_bands),
bands_view_(num_allocated_channels_,
std::vector<rtc::ArrayView<T>>(num_bands_)),
channels_view_(
num_bands_,
std::vector<rtc::ArrayView<T>>(num_allocated_channels_)) {
// Temporarily cast away const_ness to allow populating the array views.
auto* bands_view =
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(&bands_view_);
auto* channels_view =
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(
&channels_view_);
for (size_t ch = 0; ch < num_allocated_channels_; ++ch) {
for (size_t band = 0; band < num_bands_; ++band) {
(*channels_view)[band][ch] = rtc::ArrayView<T>(
&data_[ch * num_frames_ + band * num_frames_per_band_],
num_frames_per_band_);
(*bands_view)[ch][band] = channels_view_[band][ch];
channels_[band * num_allocated_channels_ + ch] =
channels_view_[band][ch].data();
bands_[ch * num_bands_ + band] =
channels_[band * num_allocated_channels_ + ch];
}
}
}
// Returns a pointer array to the channels.
// If band is explicitly specificed, the channels for a specific band are
// returned and the usage becomes: channels(band)[channel][sample].
// Where:
// 0 <= band < `num_bands_`
// 0 <= channel < `num_allocated_channels_`
// 0 <= sample < `num_frames_per_band_`
// If band is not explicitly specified, the full-band channels (or lower band
// channels) are returned and the usage becomes: channels()[channel][sample].
// Where:
// 0 <= channel < `num_allocated_channels_`
// 0 <= sample < `num_frames_`
const T* const* channels(size_t band = 0) const {
RTC_DCHECK_LT(band, num_bands_);
return &channels_[band * num_allocated_channels_];
}
T* const* channels(size_t band = 0) {
const ChannelBuffer<T>* t = this;
return const_cast<T* const*>(t->channels(band));
}
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) {
return channels_view_[band];
}
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) const {
return channels_view_[band];
}
// Returns a pointer array to the bands for a specific channel.
// Usage:
// bands(channel)[band][sample].
// Where:
// 0 <= channel < `num_channels_`
// 0 <= band < `num_bands_`
// 0 <= sample < `num_frames_per_band_`
const T* const* bands(size_t channel) const {
RTC_DCHECK_LT(channel, num_channels_);
RTC_DCHECK_GE(channel, 0);
return &bands_[channel * num_bands_];
}
T* const* bands(size_t channel) {
const ChannelBuffer<T>* t = this;
return const_cast<T* const*>(t->bands(channel));
}
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) {
return bands_view_[channel];
}
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) const {
return bands_view_[channel];
}
// Sets the `slice` pointers to the `start_frame` position for each channel.
// Returns `slice` for convenience.
const T* const* Slice(T** slice, size_t start_frame) const {
RTC_DCHECK_LT(start_frame, num_frames_);
for (size_t i = 0; i < num_channels_; ++i)
slice[i] = &channels_[i][start_frame];
return slice;
}
T** Slice(T** slice, size_t start_frame) {
const ChannelBuffer<T>* t = this;
return const_cast<T**>(t->Slice(slice, start_frame));
}
size_t num_frames() const { return num_frames_; }
size_t num_frames_per_band() const { return num_frames_per_band_; }
size_t num_channels() const { return num_channels_; }
size_t num_bands() const { return num_bands_; }
size_t size() const { return num_frames_ * num_allocated_channels_; }
void set_num_channels(size_t num_channels) {
RTC_DCHECK_LE(num_channels, num_allocated_channels_);
num_channels_ = num_channels;
}
void SetDataForTesting(const T* data, size_t size) {
RTC_CHECK_EQ(size, this->size());
memcpy(data_.get(), data, size * sizeof(*data));
}
private:
std::unique_ptr<T[]> data_;
std::unique_ptr<T*[]> channels_;
std::unique_ptr<T*[]> bands_;
const size_t num_frames_;
const size_t num_frames_per_band_;
// Number of channels the internal buffer holds.
const size_t num_allocated_channels_;
// Number of channels the user sees.
size_t num_channels_;
const size_t num_bands_;
const std::vector<std::vector<rtc::ArrayView<T>>> bands_view_;
const std::vector<std::vector<rtc::ArrayView<T>>> channels_view_;
};
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
// broken when someone requests write access to either ChannelBuffer, and
// reestablished when someone requests the outdated ChannelBuffer. It is
// therefore safe to use the return value of ibuf_const() and fbuf_const()
// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
// fbuf() until the next call to any of the other functions.
class IFChannelBuffer {
public:
IFChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1);
~IFChannelBuffer();
ChannelBuffer<int16_t>* ibuf();
ChannelBuffer<float>* fbuf();
const ChannelBuffer<int16_t>* ibuf_const() const;
const ChannelBuffer<float>* fbuf_const() const;
size_t num_frames() const { return ibuf_.num_frames(); }
size_t num_frames_per_band() const { return ibuf_.num_frames_per_band(); }
size_t num_channels() const {
return ivalid_ ? ibuf_.num_channels() : fbuf_.num_channels();
}
void set_num_channels(size_t num_channels) {
ibuf_.set_num_channels(num_channels);
fbuf_.set_num_channels(num_channels);
}
size_t num_bands() const { return ibuf_.num_bands(); }
private:
void RefreshF() const;
void RefreshI() const;
mutable bool ivalid_;
mutable ChannelBuffer<int16_t> ibuf_;
mutable bool fvalid_;
mutable ChannelBuffer<float> fbuf_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_CHANNEL_BUFFER_H_

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_FIR_FILTER_H_
#define COMMON_AUDIO_FIR_FILTER_H_
#include <string.h>
namespace webrtc {
// Finite Impulse Response filter using floating-point arithmetic.
class FIRFilter {
public:
virtual ~FIRFilter() {}
// Filters the `in` data supplied.
// `out` must be previously allocated and it must be at least of `length`.
virtual void Filter(const float* in, size_t length, float* out) = 0;
};
} // namespace webrtc
#endif // COMMON_AUDIO_FIR_FILTER_H_

View file

@ -0,0 +1,88 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/fir_filter_avx2.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
#include <xmmintrin.h>
#include "rtc_base/checks.h"
#include "rtc_base/memory/aligned_malloc.h"
namespace webrtc {
FIRFilterAVX2::FIRFilterAVX2(const float* unaligned_coefficients,
size_t unaligned_coefficients_length,
size_t max_input_length)
: // Closest higher multiple of eight.
coefficients_length_((unaligned_coefficients_length + 7) & ~0x07),
state_length_(coefficients_length_ - 1),
coefficients_(static_cast<float*>(
AlignedMalloc(sizeof(float) * coefficients_length_, 32))),
state_(static_cast<float*>(
AlignedMalloc(sizeof(float) * (max_input_length + state_length_),
32))) {
// Add zeros at the end of the coefficients.
RTC_DCHECK_GE(coefficients_length_, unaligned_coefficients_length);
size_t padding = coefficients_length_ - unaligned_coefficients_length;
memset(coefficients_.get(), 0, padding * sizeof(coefficients_[0]));
// The coefficients are reversed to compensate for the order in which the
// input samples are acquired (most recent last).
for (size_t i = 0; i < unaligned_coefficients_length; ++i) {
coefficients_[i + padding] =
unaligned_coefficients[unaligned_coefficients_length - i - 1];
}
memset(state_.get(), 0,
(max_input_length + state_length_) * sizeof(state_[0]));
}
FIRFilterAVX2::~FIRFilterAVX2() = default;
void FIRFilterAVX2::Filter(const float* in, size_t length, float* out) {
RTC_DCHECK_GT(length, 0);
memcpy(&state_[state_length_], in, length * sizeof(*in));
// Convolves the input signal `in` with the filter kernel `coefficients_`
// taking into account the previous state.
for (size_t i = 0; i < length; ++i) {
float* in_ptr = &state_[i];
float* coef_ptr = coefficients_.get();
__m256 m_sum = _mm256_setzero_ps();
__m256 m_in;
// Depending on if the pointer is aligned with 32 bytes or not it is loaded
// differently.
if (reinterpret_cast<uintptr_t>(in_ptr) & 0x1F) {
for (size_t j = 0; j < coefficients_length_; j += 8) {
m_in = _mm256_loadu_ps(in_ptr + j);
m_sum = _mm256_fmadd_ps(m_in, _mm256_load_ps(coef_ptr + j), m_sum);
}
} else {
for (size_t j = 0; j < coefficients_length_; j += 8) {
m_in = _mm256_load_ps(in_ptr + j);
m_sum = _mm256_fmadd_ps(m_in, _mm256_load_ps(coef_ptr + j), m_sum);
}
}
__m128 m128_sum = _mm_add_ps(_mm256_extractf128_ps(m_sum, 0),
_mm256_extractf128_ps(m_sum, 1));
m128_sum = _mm_add_ps(_mm_movehl_ps(m128_sum, m128_sum), m128_sum);
_mm_store_ss(out + i,
_mm_add_ss(m128_sum, _mm_shuffle_ps(m128_sum, m128_sum, 1)));
}
// Update current state.
memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0]));
}
} // namespace webrtc

View file

@ -0,0 +1,41 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_FIR_FILTER_AVX2_H_
#define COMMON_AUDIO_FIR_FILTER_AVX2_H_
#include <stddef.h>
#include <memory>
#include "common_audio/fir_filter.h"
#include "rtc_base/memory/aligned_malloc.h"
namespace webrtc {
class FIRFilterAVX2 : public FIRFilter {
public:
FIRFilterAVX2(const float* coefficients,
size_t coefficients_length,
size_t max_input_length);
~FIRFilterAVX2() override;
void Filter(const float* in, size_t length, float* out) override;
private:
const size_t coefficients_length_;
const size_t state_length_;
std::unique_ptr<float[], AlignedFreeDeleter> coefficients_;
std::unique_ptr<float[], AlignedFreeDeleter> state_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_FIR_FILTER_AVX2_H_

View file

@ -0,0 +1,61 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/fir_filter_c.h"
#include <string.h>
#include <memory>
#include "rtc_base/checks.h"
namespace webrtc {
FIRFilterC::~FIRFilterC() {}
FIRFilterC::FIRFilterC(const float* coefficients, size_t coefficients_length)
: coefficients_length_(coefficients_length),
state_length_(coefficients_length - 1),
coefficients_(new float[coefficients_length_]),
state_(new float[state_length_]) {
for (size_t i = 0; i < coefficients_length_; ++i) {
coefficients_[i] = coefficients[coefficients_length_ - i - 1];
}
memset(state_.get(), 0, state_length_ * sizeof(state_[0]));
}
void FIRFilterC::Filter(const float* in, size_t length, float* out) {
RTC_DCHECK_GT(length, 0);
// Convolves the input signal `in` with the filter kernel `coefficients_`
// taking into account the previous state.
for (size_t i = 0; i < length; ++i) {
out[i] = 0.f;
size_t j;
for (j = 0; state_length_ > i && j < state_length_ - i; ++j) {
out[i] += state_[i + j] * coefficients_[j];
}
for (; j < coefficients_length_; ++j) {
out[i] += in[j + i - state_length_] * coefficients_[j];
}
}
// Update current state.
if (length >= state_length_) {
memcpy(state_.get(), &in[length - state_length_],
state_length_ * sizeof(*in));
} else {
memmove(state_.get(), &state_[length],
(state_length_ - length) * sizeof(state_[0]));
memcpy(&state_[state_length_ - length], in, length * sizeof(*in));
}
}
} // namespace webrtc

View file

@ -0,0 +1,38 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_FIR_FILTER_C_H_
#define COMMON_AUDIO_FIR_FILTER_C_H_
#include <string.h>
#include <memory>
#include "common_audio/fir_filter.h"
namespace webrtc {
class FIRFilterC : public FIRFilter {
public:
FIRFilterC(const float* coefficients, size_t coefficients_length);
~FIRFilterC() override;
void Filter(const float* in, size_t length, float* out) override;
private:
size_t coefficients_length_;
size_t state_length_;
std::unique_ptr<float[]> coefficients_;
std::unique_ptr<float[]> state_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_FIR_FILTER_C_H_

View file

@ -0,0 +1,55 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/fir_filter_factory.h"
#include "common_audio/fir_filter_c.h"
#include "rtc_base/checks.h"
#include "rtc_base/system/arch.h"
#if defined(WEBRTC_HAS_NEON)
#include "common_audio/fir_filter_neon.h"
#elif defined(WEBRTC_ARCH_X86_FAMILY)
#include "common_audio/fir_filter_avx2.h"
#include "common_audio/fir_filter_sse.h"
#include "system_wrappers/include/cpu_features_wrapper.h" // kSSE2, WebRtc_G...
#endif
namespace webrtc {
FIRFilter* CreateFirFilter(const float* coefficients,
size_t coefficients_length,
size_t max_input_length) {
if (!coefficients || coefficients_length <= 0 || max_input_length <= 0) {
RTC_DCHECK_NOTREACHED();
return nullptr;
}
FIRFilter* filter = nullptr;
// If we know the minimum architecture at compile time, avoid CPU detection.
#if defined(WEBRTC_ARCH_X86_FAMILY)
// x86 CPU detection required.
if (GetCPUInfo(kSSE2)) {
filter =
new FIRFilterSSE2(coefficients, coefficients_length, max_input_length);
} else {
filter = new FIRFilterC(coefficients, coefficients_length);
}
#elif defined(WEBRTC_HAS_NEON)
filter =
new FIRFilterNEON(coefficients, coefficients_length, max_input_length);
#else
filter = new FIRFilterC(coefficients, coefficients_length);
#endif
return filter;
}
} // namespace webrtc

View file

@ -0,0 +1,32 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_FIR_FILTER_FACTORY_H_
#define COMMON_AUDIO_FIR_FILTER_FACTORY_H_
#include <string.h>
namespace webrtc {
class FIRFilter;
// Creates a filter with the given coefficients. All initial state values will
// be zeros.
// The length of the chunks fed to the filter should never be greater than
// `max_input_length`. This is needed because, when vectorizing it is
// necessary to concatenate the input after the state, and resizing this array
// dynamically is expensive.
FIRFilter* CreateFirFilter(const float* coefficients,
size_t coefficients_length,
size_t max_input_length);
} // namespace webrtc
#endif // COMMON_AUDIO_FIR_FILTER_FACTORY_H_

View file

@ -0,0 +1,73 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/fir_filter_neon.h"
#include <arm_neon.h>
#include <string.h>
#include "rtc_base/checks.h"
#include "rtc_base/memory/aligned_malloc.h"
namespace webrtc {
FIRFilterNEON::~FIRFilterNEON() {}
FIRFilterNEON::FIRFilterNEON(const float* coefficients,
size_t coefficients_length,
size_t max_input_length)
: // Closest higher multiple of four.
coefficients_length_((coefficients_length + 3) & ~0x03),
state_length_(coefficients_length_ - 1),
coefficients_(static_cast<float*>(
AlignedMalloc(sizeof(float) * coefficients_length_, 16))),
state_(static_cast<float*>(
AlignedMalloc(sizeof(float) * (max_input_length + state_length_),
16))) {
// Add zeros at the end of the coefficients.
size_t padding = coefficients_length_ - coefficients_length;
memset(coefficients_.get(), 0.f, padding * sizeof(coefficients_[0]));
// The coefficients are reversed to compensate for the order in which the
// input samples are acquired (most recent last).
for (size_t i = 0; i < coefficients_length; ++i) {
coefficients_[i + padding] = coefficients[coefficients_length - i - 1];
}
memset(state_.get(), 0.f,
(max_input_length + state_length_) * sizeof(state_[0]));
}
void FIRFilterNEON::Filter(const float* in, size_t length, float* out) {
RTC_DCHECK_GT(length, 0);
memcpy(&state_[state_length_], in, length * sizeof(*in));
// Convolves the input signal `in` with the filter kernel `coefficients_`
// taking into account the previous state.
for (size_t i = 0; i < length; ++i) {
float* in_ptr = &state_[i];
float* coef_ptr = coefficients_.get();
float32x4_t m_sum = vmovq_n_f32(0);
float32x4_t m_in;
for (size_t j = 0; j < coefficients_length_; j += 4) {
m_in = vld1q_f32(in_ptr + j);
m_sum = vmlaq_f32(m_sum, m_in, vld1q_f32(coef_ptr + j));
}
float32x2_t m_half = vadd_f32(vget_high_f32(m_sum), vget_low_f32(m_sum));
out[i] = vget_lane_f32(vpadd_f32(m_half, m_half), 0);
}
// Update current state.
memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0]));
}
} // namespace webrtc

View file

@ -0,0 +1,39 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_FIR_FILTER_NEON_H_
#define COMMON_AUDIO_FIR_FILTER_NEON_H_
#include <memory>
#include "common_audio/fir_filter.h"
#include "rtc_base/memory/aligned_malloc.h"
namespace webrtc {
class FIRFilterNEON : public FIRFilter {
public:
FIRFilterNEON(const float* coefficients,
size_t coefficients_length,
size_t max_input_length);
~FIRFilterNEON() override;
void Filter(const float* in, size_t length, float* out) override;
private:
size_t coefficients_length_;
size_t state_length_;
std::unique_ptr<float[], AlignedFreeDeleter> coefficients_;
std::unique_ptr<float[], AlignedFreeDeleter> state_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_FIR_FILTER_NEON_H_

View file

@ -0,0 +1,82 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/fir_filter_sse.h"
#include <stdint.h>
#include <string.h>
#include <xmmintrin.h>
#include "rtc_base/checks.h"
#include "rtc_base/memory/aligned_malloc.h"
namespace webrtc {
FIRFilterSSE2::~FIRFilterSSE2() {}
FIRFilterSSE2::FIRFilterSSE2(const float* coefficients,
size_t coefficients_length,
size_t max_input_length)
: // Closest higher multiple of four.
coefficients_length_((coefficients_length + 3) & ~0x03),
state_length_(coefficients_length_ - 1),
coefficients_(static_cast<float*>(
AlignedMalloc(sizeof(float) * coefficients_length_, 16))),
state_(static_cast<float*>(
AlignedMalloc(sizeof(float) * (max_input_length + state_length_),
16))) {
// Add zeros at the end of the coefficients.
size_t padding = coefficients_length_ - coefficients_length;
memset(coefficients_.get(), 0, padding * sizeof(coefficients_[0]));
// The coefficients are reversed to compensate for the order in which the
// input samples are acquired (most recent last).
for (size_t i = 0; i < coefficients_length; ++i) {
coefficients_[i + padding] = coefficients[coefficients_length - i - 1];
}
memset(state_.get(), 0,
(max_input_length + state_length_) * sizeof(state_[0]));
}
void FIRFilterSSE2::Filter(const float* in, size_t length, float* out) {
RTC_DCHECK_GT(length, 0);
memcpy(&state_[state_length_], in, length * sizeof(*in));
// Convolves the input signal `in` with the filter kernel `coefficients_`
// taking into account the previous state.
for (size_t i = 0; i < length; ++i) {
float* in_ptr = &state_[i];
float* coef_ptr = coefficients_.get();
__m128 m_sum = _mm_setzero_ps();
__m128 m_in;
// Depending on if the pointer is aligned with 16 bytes or not it is loaded
// differently.
if (reinterpret_cast<uintptr_t>(in_ptr) & 0x0F) {
for (size_t j = 0; j < coefficients_length_; j += 4) {
m_in = _mm_loadu_ps(in_ptr + j);
m_sum = _mm_add_ps(m_sum, _mm_mul_ps(m_in, _mm_load_ps(coef_ptr + j)));
}
} else {
for (size_t j = 0; j < coefficients_length_; j += 4) {
m_in = _mm_load_ps(in_ptr + j);
m_sum = _mm_add_ps(m_sum, _mm_mul_ps(m_in, _mm_load_ps(coef_ptr + j)));
}
}
m_sum = _mm_add_ps(_mm_movehl_ps(m_sum, m_sum), m_sum);
_mm_store_ss(out + i, _mm_add_ss(m_sum, _mm_shuffle_ps(m_sum, m_sum, 1)));
}
// Update current state.
memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0]));
}
} // namespace webrtc

View file

@ -0,0 +1,41 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_FIR_FILTER_SSE_H_
#define COMMON_AUDIO_FIR_FILTER_SSE_H_
#include <stddef.h>
#include <memory>
#include "common_audio/fir_filter.h"
#include "rtc_base/memory/aligned_malloc.h"
namespace webrtc {
class FIRFilterSSE2 : public FIRFilter {
public:
FIRFilterSSE2(const float* coefficients,
size_t coefficients_length,
size_t max_input_length);
~FIRFilterSSE2() override;
void Filter(const float* in, size_t length, float* out) override;
private:
size_t coefficients_length_;
size_t state_length_;
std::unique_ptr<float[], AlignedFreeDeleter> coefficients_;
std::unique_ptr<float[], AlignedFreeDeleter> state_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_FIR_FILTER_SSE_H_

View file

@ -0,0 +1,214 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
#define COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
#include <stdint.h>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <limits>
#include "rtc_base/checks.h"
namespace webrtc {
typedef std::numeric_limits<int16_t> limits_int16;
// The conversion functions use the following naming convention:
// S16: int16_t [-32768, 32767]
// Float: float [-1.0, 1.0]
// FloatS16: float [-32768.0, 32768.0]
// Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0]
// The ratio conversion functions use this naming convention:
// Ratio: float (0, +inf)
// Db: float (-inf, +inf)
static inline float S16ToFloat(int16_t v) {
constexpr float kScaling = 1.f / 32768.f;
return v * kScaling;
}
static inline int16_t FloatS16ToS16(float v) {
v = std::min(v, 32767.f);
v = std::max(v, -32768.f);
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline int16_t FloatToS16(float v) {
v *= 32768.f;
v = std::min(v, 32767.f);
v = std::max(v, -32768.f);
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline float FloatToFloatS16(float v) {
v = std::min(v, 1.f);
v = std::max(v, -1.f);
return v * 32768.f;
}
static inline float FloatS16ToFloat(float v) {
v = std::min(v, 32768.f);
v = std::max(v, -32768.f);
constexpr float kScaling = 1.f / 32768.f;
return v * kScaling;
}
void FloatToS16(const float* src, size_t size, int16_t* dest);
void S16ToFloat(const int16_t* src, size_t size, float* dest);
void S16ToFloatS16(const int16_t* src, size_t size, float* dest);
void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
void FloatToFloatS16(const float* src, size_t size, float* dest);
void FloatS16ToFloat(const float* src, size_t size, float* dest);
inline float DbToRatio(float v) {
return std::pow(10.0f, v / 20.0f);
}
inline float DbfsToFloatS16(float v) {
static constexpr float kMaximumAbsFloatS16 = -limits_int16::min();
return DbToRatio(v) * kMaximumAbsFloatS16;
}
inline float FloatS16ToDbfs(float v) {
RTC_DCHECK_GE(v, 0);
// kMinDbfs is equal to -20.0 * log10(-limits_int16::min())
static constexpr float kMinDbfs = -90.30899869919436f;
if (v <= 1.0f) {
return kMinDbfs;
}
// Equal to 20 * log10(v / (-limits_int16::min()))
return 20.0f * std::log10(v) + kMinDbfs;
}
// Copy audio from `src` channels to `dest` channels unless `src` and `dest`
// point to the same address. `src` and `dest` must have the same number of
// channels, and there must be sufficient space allocated in `dest`.
template <typename T>
void CopyAudioIfNeeded(const T* const* src,
int num_frames,
int num_channels,
T* const* dest) {
for (int i = 0; i < num_channels; ++i) {
if (src[i] != dest[i]) {
std::copy(src[i], src[i] + num_frames, dest[i]);
}
}
}
// Deinterleave audio from `interleaved` to the channel buffers pointed to
// by `deinterleaved`. There must be sufficient space allocated in the
// `deinterleaved` buffers (`num_channel` buffers with `samples_per_channel`
// per buffer).
template <typename T>
void Deinterleave(const T* interleaved,
size_t samples_per_channel,
size_t num_channels,
T* const* deinterleaved) {
for (size_t i = 0; i < num_channels; ++i) {
T* channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
channel[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels;
}
}
}
// Interleave audio from the channel buffers pointed to by `deinterleaved` to
// `interleaved`. There must be sufficient space allocated in `interleaved`
// (`samples_per_channel` * `num_channels`).
template <typename T>
void Interleave(const T* const* deinterleaved,
size_t samples_per_channel,
size_t num_channels,
T* interleaved) {
for (size_t i = 0; i < num_channels; ++i) {
const T* channel = deinterleaved[i];
size_t interleaved_idx = i;
for (size_t j = 0; j < samples_per_channel; ++j) {
interleaved[interleaved_idx] = channel[j];
interleaved_idx += num_channels;
}
}
}
// Copies audio from a single channel buffer pointed to by `mono` to each
// channel of `interleaved`. There must be sufficient space allocated in
// `interleaved` (`samples_per_channel` * `num_channels`).
template <typename T>
void UpmixMonoToInterleaved(const T* mono,
int num_frames,
int num_channels,
T* interleaved) {
int interleaved_idx = 0;
for (int i = 0; i < num_frames; ++i) {
for (int j = 0; j < num_channels; ++j) {
interleaved[interleaved_idx++] = mono[i];
}
}
}
template <typename T, typename Intermediate>
void DownmixToMono(const T* const* input_channels,
size_t num_frames,
int num_channels,
T* out) {
for (size_t i = 0; i < num_frames; ++i) {
Intermediate value = input_channels[0][i];
for (int j = 1; j < num_channels; ++j) {
value += input_channels[j][i];
}
out[i] = value / num_channels;
}
}
// Downmixes an interleaved multichannel signal to a single channel by averaging
// all channels.
template <typename T, typename Intermediate>
void DownmixInterleavedToMonoImpl(const T* interleaved,
size_t num_frames,
int num_channels,
T* deinterleaved) {
RTC_DCHECK_GT(num_channels, 0);
RTC_DCHECK_GT(num_frames, 0);
const T* const end = interleaved + num_frames * num_channels;
while (interleaved < end) {
const T* const frame_end = interleaved + num_channels;
Intermediate value = *interleaved++;
while (interleaved < frame_end) {
value += *interleaved++;
}
*deinterleaved++ = value / num_channels;
}
}
template <typename T>
void DownmixInterleavedToMono(const T* interleaved,
size_t num_frames,
int num_channels,
T* deinterleaved);
template <>
void DownmixInterleavedToMono<int16_t>(const int16_t* interleaved,
size_t num_frames,
int num_channels,
int16_t* deinterleaved);
} // namespace webrtc
#endif // COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_

View file

@ -0,0 +1,28 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_MOCKS_MOCK_SMOOTHING_FILTER_H_
#define COMMON_AUDIO_MOCKS_MOCK_SMOOTHING_FILTER_H_
#include "common_audio/smoothing_filter.h"
#include "test/gmock.h"
namespace webrtc {
class MockSmoothingFilter : public SmoothingFilter {
public:
MOCK_METHOD(void, AddSample, (float), (override));
MOCK_METHOD(absl::optional<float>, GetAverage, (), (override));
MOCK_METHOD(bool, SetTimeConstantMs, (int), (override));
};
} // namespace webrtc
#endif // COMMON_AUDIO_MOCKS_MOCK_SMOOTHING_FILTER_H_

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/real_fourier.h"
#include "common_audio/real_fourier_ooura.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
namespace webrtc {
using std::complex;
const size_t RealFourier::kFftBufferAlignment = 32;
std::unique_ptr<RealFourier> RealFourier::Create(int fft_order) {
return std::unique_ptr<RealFourier>(new RealFourierOoura(fft_order));
}
int RealFourier::FftOrder(size_t length) {
RTC_CHECK_GT(length, 0U);
return WebRtcSpl_GetSizeInBits(static_cast<uint32_t>(length - 1));
}
size_t RealFourier::FftLength(int order) {
RTC_CHECK_GE(order, 0);
return size_t{1} << order;
}
size_t RealFourier::ComplexLength(int order) {
return FftLength(order) / 2 + 1;
}
RealFourier::fft_real_scoper RealFourier::AllocRealBuffer(int count) {
return fft_real_scoper(static_cast<float*>(
AlignedMalloc(sizeof(float) * count, kFftBufferAlignment)));
}
RealFourier::fft_cplx_scoper RealFourier::AllocCplxBuffer(int count) {
return fft_cplx_scoper(static_cast<complex<float>*>(
AlignedMalloc(sizeof(complex<float>) * count, kFftBufferAlignment)));
}
} // namespace webrtc

View file

@ -0,0 +1,76 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_REAL_FOURIER_H_
#define COMMON_AUDIO_REAL_FOURIER_H_
#include <stddef.h>
#include <complex>
#include <memory>
#include "rtc_base/memory/aligned_malloc.h"
// Uniform interface class for the real DFT and its inverse, for power-of-2
// input lengths. Also contains helper functions for buffer allocation, taking
// care of any memory alignment requirements the underlying library might have.
namespace webrtc {
class RealFourier {
public:
// Shorthand typenames for the scopers used by the buffer allocation helpers.
typedef std::unique_ptr<float[], AlignedFreeDeleter> fft_real_scoper;
typedef std::unique_ptr<std::complex<float>[], AlignedFreeDeleter>
fft_cplx_scoper;
// The alignment required for all input and output buffers, in bytes.
static const size_t kFftBufferAlignment;
// Construct a wrapper instance for the given input order, which must be
// between 1 and kMaxFftOrder, inclusively.
static std::unique_ptr<RealFourier> Create(int fft_order);
virtual ~RealFourier() {}
// Helper to compute the smallest FFT order (a power of 2) which will contain
// the given input length.
static int FftOrder(size_t length);
// Helper to compute the input length from the FFT order.
static size_t FftLength(int order);
// Helper to compute the exact length, in complex floats, of the transform
// output (i.e. |2^order / 2 + 1|).
static size_t ComplexLength(int order);
// Buffer allocation helpers. The buffers are large enough to hold `count`
// floats/complexes and suitably aligned for use by the implementation.
// The returned scopers are set up with proper deleters; the caller owns
// the allocated memory.
static fft_real_scoper AllocRealBuffer(int count);
static fft_cplx_scoper AllocCplxBuffer(int count);
// Main forward transform interface. The output array need only be big
// enough for |2^order / 2 + 1| elements - the conjugate pairs are not
// returned. Input and output must be properly aligned (e.g. through
// AllocRealBuffer and AllocCplxBuffer) and input length must be
// |2^order| (same as given at construction time).
virtual void Forward(const float* src, std::complex<float>* dest) const = 0;
// Inverse transform. Same input format as output above, conjugate pairs
// not needed.
virtual void Inverse(const std::complex<float>* src, float* dest) const = 0;
virtual int order() const = 0;
};
} // namespace webrtc
#endif // COMMON_AUDIO_REAL_FOURIER_H_

View file

@ -0,0 +1,91 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/real_fourier_ooura.h"
#include <algorithm>
#include <cmath>
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
#include "rtc_base/checks.h"
namespace webrtc {
using std::complex;
namespace {
void Conjugate(complex<float>* array, size_t complex_length) {
std::for_each(array, array + complex_length,
[=](complex<float>& v) { v = std::conj(v); });
}
size_t ComputeWorkIpSize(size_t fft_length) {
return static_cast<size_t>(
2 + std::ceil(std::sqrt(static_cast<float>(fft_length))));
}
} // namespace
RealFourierOoura::RealFourierOoura(int fft_order)
: order_(fft_order),
length_(FftLength(order_)),
complex_length_(ComplexLength(order_)),
// Zero-initializing work_ip_ will cause rdft to initialize these work
// arrays on the first call.
work_ip_(new size_t[ComputeWorkIpSize(length_)]()),
work_w_(new float[complex_length_]()) {
RTC_CHECK_GE(fft_order, 1);
}
RealFourierOoura::~RealFourierOoura() = default;
void RealFourierOoura::Forward(const float* src, complex<float>* dest) const {
{
// This cast is well-defined since C++11. See "Non-static data members" at:
// http://en.cppreference.com/w/cpp/numeric/complex
auto* dest_float = reinterpret_cast<float*>(dest);
std::copy(src, src + length_, dest_float);
WebRtc_rdft(length_, 1, dest_float, work_ip_.get(), work_w_.get());
}
// Ooura places real[n/2] in imag[0].
dest[complex_length_ - 1] = complex<float>(dest[0].imag(), 0.0f);
dest[0] = complex<float>(dest[0].real(), 0.0f);
// Ooura returns the conjugate of the usual Fourier definition.
Conjugate(dest, complex_length_);
}
void RealFourierOoura::Inverse(const complex<float>* src, float* dest) const {
{
auto* dest_complex = reinterpret_cast<complex<float>*>(dest);
// The real output array is shorter than the input complex array by one
// complex element.
const size_t dest_complex_length = complex_length_ - 1;
std::copy(src, src + dest_complex_length, dest_complex);
// Restore Ooura's conjugate definition.
Conjugate(dest_complex, dest_complex_length);
// Restore real[n/2] to imag[0].
dest_complex[0] =
complex<float>(dest_complex[0].real(), src[complex_length_ - 1].real());
}
WebRtc_rdft(length_, -1, dest, work_ip_.get(), work_w_.get());
// Ooura returns a scaled version.
const float scale = 2.0f / length_;
std::for_each(dest, dest + length_, [scale](float& v) { v *= scale; });
}
int RealFourierOoura::order() const {
return order_;
}
} // namespace webrtc

View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_REAL_FOURIER_OOURA_H_
#define COMMON_AUDIO_REAL_FOURIER_OOURA_H_
#include <stddef.h>
#include <complex>
#include <memory>
#include "common_audio/real_fourier.h"
namespace webrtc {
class RealFourierOoura : public RealFourier {
public:
explicit RealFourierOoura(int fft_order);
~RealFourierOoura() override;
void Forward(const float* src, std::complex<float>* dest) const override;
void Inverse(const std::complex<float>* src, float* dest) const override;
int order() const override;
private:
const int order_;
const size_t length_;
const size_t complex_length_;
// These are work arrays for Ooura. The names are based on the comments in
// common_audio/third_party/ooura/fft_size_256/fft4g.cc.
const std::unique_ptr<size_t[]> work_ip_;
const std::unique_ptr<float[]> work_w_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_REAL_FOURIER_OOURA_H_

View file

@ -0,0 +1,59 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_RESAMPLER_INCLUDE_PUSH_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_INCLUDE_PUSH_RESAMPLER_H_
#include <memory>
#include <vector>
namespace webrtc {
class PushSincResampler;
// Wraps PushSincResampler to provide stereo support.
// TODO(ajm): add support for an arbitrary number of channels.
template <typename T>
class PushResampler {
public:
PushResampler();
virtual ~PushResampler();
// Must be called whenever the parameters change. Free to be called at any
// time as it is a no-op if parameters have not changed since the last call.
int InitializeIfNeeded(int src_sample_rate_hz,
int dst_sample_rate_hz,
size_t num_channels);
// Returns the total number of samples provided in destination (e.g. 32 kHz,
// 2 channel audio gives 640 samples).
int Resample(const T* src, size_t src_length, T* dst, size_t dst_capacity);
private:
int src_sample_rate_hz_;
int dst_sample_rate_hz_;
size_t num_channels_;
// Vector that is needed to provide the proper inputs and outputs to the
// interleave/de-interleave methods used in Resample. This needs to be
// heap-allocated on the state to support an arbitrary number of channels
// without doing run-time heap-allocations in the Resample method.
std::vector<T*> channel_data_array_;
struct ChannelResampler {
std::unique_ptr<PushSincResampler> resampler;
std::vector<T> source;
std::vector<T> destination;
};
std::vector<ChannelResampler> channel_resamplers_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_INCLUDE_PUSH_RESAMPLER_H_

View file

@ -0,0 +1,99 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* A wrapper for resampling a numerous amount of sampling combinations.
*/
#ifndef COMMON_AUDIO_RESAMPLER_INCLUDE_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_INCLUDE_RESAMPLER_H_
#include <stddef.h>
#include <stdint.h>
namespace webrtc {
// All methods return 0 on success and -1 on failure.
class Resampler {
public:
Resampler();
Resampler(int inFreq, int outFreq, size_t num_channels);
~Resampler();
// Reset all states
int Reset(int inFreq, int outFreq, size_t num_channels);
// Reset all states if any parameter has changed
int ResetIfNeeded(int inFreq, int outFreq, size_t num_channels);
// Resample samplesIn to samplesOut.
int Push(const int16_t* samplesIn,
size_t lengthIn,
int16_t* samplesOut,
size_t maxLen,
size_t& outLen); // NOLINT: to avoid changing APIs
private:
enum ResamplerMode {
kResamplerMode1To1,
kResamplerMode1To2,
kResamplerMode1To3,
kResamplerMode1To4,
kResamplerMode1To6,
kResamplerMode1To12,
kResamplerMode2To3,
kResamplerMode2To11,
kResamplerMode4To11,
kResamplerMode8To11,
kResamplerMode11To16,
kResamplerMode11To32,
kResamplerMode2To1,
kResamplerMode3To1,
kResamplerMode4To1,
kResamplerMode6To1,
kResamplerMode12To1,
kResamplerMode3To2,
kResamplerMode11To2,
kResamplerMode11To4,
kResamplerMode11To8
};
// Computes the resampler mode for a given sampling frequency pair.
// Returns -1 for unsupported frequency pairs.
static int ComputeResamplerMode(int in_freq_hz,
int out_freq_hz,
ResamplerMode* mode);
// Generic pointers since we don't know what states we'll need
void* state1_;
void* state2_;
void* state3_;
// Storage if needed
int16_t* in_buffer_;
int16_t* out_buffer_;
size_t in_buffer_size_;
size_t out_buffer_size_;
size_t in_buffer_size_max_;
size_t out_buffer_size_max_;
int my_in_frequency_khz_;
int my_out_frequency_khz_;
ResamplerMode my_mode_;
size_t num_channels_;
// Extra instance for stereo
Resampler* helper_left_;
Resampler* helper_right_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_INCLUDE_RESAMPLER_H_

View file

@ -0,0 +1,123 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/resampler/include/push_resampler.h"
#include <stdint.h>
#include <string.h>
#include <memory>
#include "common_audio/include/audio_util.h"
#include "common_audio/resampler/push_sinc_resampler.h"
#include "rtc_base/checks.h"
namespace webrtc {
template <typename T>
PushResampler<T>::PushResampler()
: src_sample_rate_hz_(0), dst_sample_rate_hz_(0), num_channels_(0) {}
template <typename T>
PushResampler<T>::~PushResampler() {}
template <typename T>
int PushResampler<T>::InitializeIfNeeded(int src_sample_rate_hz,
int dst_sample_rate_hz,
size_t num_channels) {
// These checks used to be factored out of this template function due to
// Windows debug build issues with clang. http://crbug.com/615050
RTC_DCHECK_GT(src_sample_rate_hz, 0);
RTC_DCHECK_GT(dst_sample_rate_hz, 0);
RTC_DCHECK_GT(num_channels, 0);
if (src_sample_rate_hz == src_sample_rate_hz_ &&
dst_sample_rate_hz == dst_sample_rate_hz_ &&
num_channels == num_channels_) {
// No-op if settings haven't changed.
return 0;
}
if (src_sample_rate_hz <= 0 || dst_sample_rate_hz <= 0 || num_channels <= 0) {
return -1;
}
src_sample_rate_hz_ = src_sample_rate_hz;
dst_sample_rate_hz_ = dst_sample_rate_hz;
num_channels_ = num_channels;
const size_t src_size_10ms_mono =
static_cast<size_t>(src_sample_rate_hz / 100);
const size_t dst_size_10ms_mono =
static_cast<size_t>(dst_sample_rate_hz / 100);
channel_resamplers_.clear();
for (size_t i = 0; i < num_channels; ++i) {
channel_resamplers_.push_back(ChannelResampler());
auto channel_resampler = channel_resamplers_.rbegin();
channel_resampler->resampler = std::make_unique<PushSincResampler>(
src_size_10ms_mono, dst_size_10ms_mono);
channel_resampler->source.resize(src_size_10ms_mono);
channel_resampler->destination.resize(dst_size_10ms_mono);
}
channel_data_array_.resize(num_channels_);
return 0;
}
template <typename T>
int PushResampler<T>::Resample(const T* src,
size_t src_length,
T* dst,
size_t dst_capacity) {
// These checks used to be factored out of this template function due to
// Windows debug build issues with clang. http://crbug.com/615050
const size_t src_size_10ms = (src_sample_rate_hz_ / 100) * num_channels_;
const size_t dst_size_10ms = (dst_sample_rate_hz_ / 100) * num_channels_;
RTC_DCHECK_EQ(src_length, src_size_10ms);
RTC_DCHECK_GE(dst_capacity, dst_size_10ms);
if (src_sample_rate_hz_ == dst_sample_rate_hz_) {
// The old resampler provides this memcpy facility in the case of matching
// sample rates, so reproduce it here for the sinc resampler.
memcpy(dst, src, src_length * sizeof(T));
return static_cast<int>(src_length);
}
const size_t src_length_mono = src_length / num_channels_;
const size_t dst_capacity_mono = dst_capacity / num_channels_;
for (size_t ch = 0; ch < num_channels_; ++ch) {
channel_data_array_[ch] = channel_resamplers_[ch].source.data();
}
Deinterleave(src, src_length_mono, num_channels_, channel_data_array_.data());
size_t dst_length_mono = 0;
for (auto& resampler : channel_resamplers_) {
dst_length_mono = resampler.resampler->Resample(
resampler.source.data(), src_length_mono, resampler.destination.data(),
dst_capacity_mono);
}
for (size_t ch = 0; ch < num_channels_; ++ch) {
channel_data_array_[ch] = channel_resamplers_[ch].destination.data();
}
Interleave(channel_data_array_.data(), dst_length_mono, num_channels_, dst);
return static_cast<int>(dst_length_mono * num_channels_);
}
// Explictly generate required instantiations.
template class PushResampler<int16_t>;
template class PushResampler<float>;
} // namespace webrtc

View file

@ -0,0 +1,102 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/resampler/push_sinc_resampler.h"
#include <cstring>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h"
namespace webrtc {
PushSincResampler::PushSincResampler(size_t source_frames,
size_t destination_frames)
: resampler_(new SincResampler(source_frames * 1.0 / destination_frames,
source_frames,
this)),
source_ptr_(nullptr),
source_ptr_int_(nullptr),
destination_frames_(destination_frames),
first_pass_(true),
source_available_(0) {}
PushSincResampler::~PushSincResampler() {}
size_t PushSincResampler::Resample(const int16_t* source,
size_t source_length,
int16_t* destination,
size_t destination_capacity) {
if (!float_buffer_.get())
float_buffer_.reset(new float[destination_frames_]);
source_ptr_int_ = source;
// Pass nullptr as the float source to have Run() read from the int16 source.
Resample(nullptr, source_length, float_buffer_.get(), destination_frames_);
FloatS16ToS16(float_buffer_.get(), destination_frames_, destination);
source_ptr_int_ = nullptr;
return destination_frames_;
}
size_t PushSincResampler::Resample(const float* source,
size_t source_length,
float* destination,
size_t destination_capacity) {
RTC_CHECK_EQ(source_length, resampler_->request_frames());
RTC_CHECK_GE(destination_capacity, destination_frames_);
// Cache the source pointer. Calling Resample() will immediately trigger
// the Run() callback whereupon we provide the cached value.
source_ptr_ = source;
source_available_ = source_length;
// On the first pass, we call Resample() twice. During the first call, we
// provide dummy input and discard the output. This is done to prime the
// SincResampler buffer with the correct delay (half the kernel size), thereby
// ensuring that all later Resample() calls will only result in one input
// request through Run().
//
// If this wasn't done, SincResampler would call Run() twice on the first
// pass, and we'd have to introduce an entire `source_frames` of delay, rather
// than the minimum half kernel.
//
// It works out that ChunkSize() is exactly the amount of output we need to
// request in order to prime the buffer with a single Run() request for
// `source_frames`.
if (first_pass_)
resampler_->Resample(resampler_->ChunkSize(), destination);
resampler_->Resample(destination_frames_, destination);
source_ptr_ = nullptr;
return destination_frames_;
}
void PushSincResampler::Run(size_t frames, float* destination) {
// Ensure we are only asked for the available samples. This would fail if
// Run() was triggered more than once per Resample() call.
RTC_CHECK_EQ(source_available_, frames);
if (first_pass_) {
// Provide dummy input on the first pass, the output of which will be
// discarded, as described in Resample().
std::memset(destination, 0, frames * sizeof(*destination));
first_pass_ = false;
return;
}
if (source_ptr_) {
std::memcpy(destination, source_ptr_, frames * sizeof(*destination));
} else {
for (size_t i = 0; i < frames; ++i)
destination[i] = static_cast<float>(source_ptr_int_[i]);
}
source_available_ -= frames;
}
} // namespace webrtc

View file

@ -0,0 +1,81 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "common_audio/resampler/sinc_resampler.h"
namespace webrtc {
// A thin wrapper over SincResampler to provide a push-based interface as
// required by WebRTC. SincResampler uses a pull-based interface, and will
// use SincResamplerCallback::Run() to request data upon a call to Resample().
// These Run() calls will happen on the same thread Resample() is called on.
class PushSincResampler : public SincResamplerCallback {
public:
// Provide the size of the source and destination blocks in samples. These
// must correspond to the same time duration (typically 10 ms) as the sample
// ratio is inferred from them.
PushSincResampler(size_t source_frames, size_t destination_frames);
~PushSincResampler() override;
PushSincResampler(const PushSincResampler&) = delete;
PushSincResampler& operator=(const PushSincResampler&) = delete;
// Perform the resampling. `source_frames` must always equal the
// `source_frames` provided at construction. `destination_capacity` must be
// at least as large as `destination_frames`. Returns the number of samples
// provided in destination (for convenience, since this will always be equal
// to `destination_frames`).
size_t Resample(const int16_t* source,
size_t source_frames,
int16_t* destination,
size_t destination_capacity);
size_t Resample(const float* source,
size_t source_frames,
float* destination,
size_t destination_capacity);
// Delay due to the filter kernel. Essentially, the time after which an input
// sample will appear in the resampled output.
static float AlgorithmicDelaySeconds(int source_rate_hz) {
return 1.f / source_rate_hz * SincResampler::kKernelSize / 2;
}
protected:
// Implements SincResamplerCallback.
void Run(size_t frames, float* destination) override;
private:
friend class PushSincResamplerTest;
SincResampler* get_resampler_for_testing() { return resampler_.get(); }
std::unique_ptr<SincResampler> resampler_;
std::unique_ptr<float[]> float_buffer_;
const float* source_ptr_;
const int16_t* source_ptr_int_;
const size_t destination_frames_;
// True on the first call to Resample(), to prime the SincResampler buffer.
bool first_pass_;
// Used to assert we are only requested for as much data as is available.
size_t source_available_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_

View file

@ -0,0 +1,923 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* A wrapper for resampling a numerous amount of sampling combinations.
*/
#include "common_audio/resampler/include/resampler.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/logging.h"
namespace webrtc {
Resampler::Resampler()
: state1_(nullptr),
state2_(nullptr),
state3_(nullptr),
in_buffer_(nullptr),
out_buffer_(nullptr),
in_buffer_size_(0),
out_buffer_size_(0),
in_buffer_size_max_(0),
out_buffer_size_max_(0),
my_in_frequency_khz_(0),
my_out_frequency_khz_(0),
my_mode_(kResamplerMode1To1),
num_channels_(0),
helper_left_(nullptr),
helper_right_(nullptr) {}
Resampler::Resampler(int inFreq, int outFreq, size_t num_channels)
: Resampler() {
Reset(inFreq, outFreq, num_channels);
}
Resampler::~Resampler() {
if (state1_) {
free(state1_);
}
if (state2_) {
free(state2_);
}
if (state3_) {
free(state3_);
}
if (in_buffer_) {
free(in_buffer_);
}
if (out_buffer_) {
free(out_buffer_);
}
if (helper_left_) {
delete helper_left_;
}
if (helper_right_) {
delete helper_right_;
}
}
int Resampler::ResetIfNeeded(int inFreq, int outFreq, size_t num_channels) {
int tmpInFreq_kHz = inFreq / 1000;
int tmpOutFreq_kHz = outFreq / 1000;
if ((tmpInFreq_kHz != my_in_frequency_khz_) ||
(tmpOutFreq_kHz != my_out_frequency_khz_) ||
(num_channels != num_channels_)) {
return Reset(inFreq, outFreq, num_channels);
} else {
return 0;
}
}
int Resampler::Reset(int inFreq, int outFreq, size_t num_channels) {
if (num_channels != 1 && num_channels != 2) {
RTC_LOG(LS_WARNING)
<< "Reset() called with unsupported channel count, num_channels = "
<< num_channels;
return -1;
}
ResamplerMode mode;
if (ComputeResamplerMode(inFreq, outFreq, &mode) != 0) {
RTC_LOG(LS_WARNING)
<< "Reset() called with unsupported sample rates, inFreq = " << inFreq
<< ", outFreq = " << outFreq;
return -1;
}
// Reinitialize internal state for the frequencies and sample rates.
num_channels_ = num_channels;
my_mode_ = mode;
if (state1_) {
free(state1_);
state1_ = nullptr;
}
if (state2_) {
free(state2_);
state2_ = nullptr;
}
if (state3_) {
free(state3_);
state3_ = nullptr;
}
if (in_buffer_) {
free(in_buffer_);
in_buffer_ = nullptr;
}
if (out_buffer_) {
free(out_buffer_);
out_buffer_ = nullptr;
}
if (helper_left_) {
delete helper_left_;
helper_left_ = nullptr;
}
if (helper_right_) {
delete helper_right_;
helper_right_ = nullptr;
}
in_buffer_size_ = 0;
out_buffer_size_ = 0;
in_buffer_size_max_ = 0;
out_buffer_size_max_ = 0;
// We need to track what domain we're in.
my_in_frequency_khz_ = inFreq / 1000;
my_out_frequency_khz_ = outFreq / 1000;
if (num_channels_ == 2) {
// Create two mono resamplers.
helper_left_ = new Resampler(inFreq, outFreq, 1);
helper_right_ = new Resampler(inFreq, outFreq, 1);
}
// Now create the states we need.
switch (my_mode_) {
case kResamplerMode1To1:
// No state needed;
break;
case kResamplerMode1To2:
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode1To3:
state1_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz));
WebRtcSpl_ResetResample16khzTo48khz(
static_cast<WebRtcSpl_State16khzTo48khz*>(state1_));
break;
case kResamplerMode1To4:
// 1:2
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
// 2:4
state2_ = malloc(8 * sizeof(int32_t));
memset(state2_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode1To6:
// 1:2
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
// 2:6
state2_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz));
WebRtcSpl_ResetResample16khzTo48khz(
static_cast<WebRtcSpl_State16khzTo48khz*>(state2_));
break;
case kResamplerMode1To12:
// 1:2
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
// 2:4
state2_ = malloc(8 * sizeof(int32_t));
memset(state2_, 0, 8 * sizeof(int32_t));
// 4:12
state3_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz));
WebRtcSpl_ResetResample16khzTo48khz(
static_cast<WebRtcSpl_State16khzTo48khz*>(state3_));
break;
case kResamplerMode2To3:
// 2:6
state1_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz));
WebRtcSpl_ResetResample16khzTo48khz(
static_cast<WebRtcSpl_State16khzTo48khz*>(state1_));
// 6:3
state2_ = malloc(8 * sizeof(int32_t));
memset(state2_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode2To11:
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
state2_ = malloc(sizeof(WebRtcSpl_State8khzTo22khz));
WebRtcSpl_ResetResample8khzTo22khz(
static_cast<WebRtcSpl_State8khzTo22khz*>(state2_));
break;
case kResamplerMode4To11:
state1_ = malloc(sizeof(WebRtcSpl_State8khzTo22khz));
WebRtcSpl_ResetResample8khzTo22khz(
static_cast<WebRtcSpl_State8khzTo22khz*>(state1_));
break;
case kResamplerMode8To11:
state1_ = malloc(sizeof(WebRtcSpl_State16khzTo22khz));
WebRtcSpl_ResetResample16khzTo22khz(
static_cast<WebRtcSpl_State16khzTo22khz*>(state1_));
break;
case kResamplerMode11To16:
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
state2_ = malloc(sizeof(WebRtcSpl_State22khzTo16khz));
WebRtcSpl_ResetResample22khzTo16khz(
static_cast<WebRtcSpl_State22khzTo16khz*>(state2_));
break;
case kResamplerMode11To32:
// 11 -> 22
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
// 22 -> 16
state2_ = malloc(sizeof(WebRtcSpl_State22khzTo16khz));
WebRtcSpl_ResetResample22khzTo16khz(
static_cast<WebRtcSpl_State22khzTo16khz*>(state2_));
// 16 -> 32
state3_ = malloc(8 * sizeof(int32_t));
memset(state3_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode2To1:
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode3To1:
state1_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz));
WebRtcSpl_ResetResample48khzTo16khz(
static_cast<WebRtcSpl_State48khzTo16khz*>(state1_));
break;
case kResamplerMode4To1:
// 4:2
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
// 2:1
state2_ = malloc(8 * sizeof(int32_t));
memset(state2_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode6To1:
// 6:2
state1_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz));
WebRtcSpl_ResetResample48khzTo16khz(
static_cast<WebRtcSpl_State48khzTo16khz*>(state1_));
// 2:1
state2_ = malloc(8 * sizeof(int32_t));
memset(state2_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode12To1:
// 12:4
state1_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz));
WebRtcSpl_ResetResample48khzTo16khz(
static_cast<WebRtcSpl_State48khzTo16khz*>(state1_));
// 4:2
state2_ = malloc(8 * sizeof(int32_t));
memset(state2_, 0, 8 * sizeof(int32_t));
// 2:1
state3_ = malloc(8 * sizeof(int32_t));
memset(state3_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode3To2:
// 3:6
state1_ = malloc(8 * sizeof(int32_t));
memset(state1_, 0, 8 * sizeof(int32_t));
// 6:2
state2_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz));
WebRtcSpl_ResetResample48khzTo16khz(
static_cast<WebRtcSpl_State48khzTo16khz*>(state2_));
break;
case kResamplerMode11To2:
state1_ = malloc(sizeof(WebRtcSpl_State22khzTo8khz));
WebRtcSpl_ResetResample22khzTo8khz(
static_cast<WebRtcSpl_State22khzTo8khz*>(state1_));
state2_ = malloc(8 * sizeof(int32_t));
memset(state2_, 0, 8 * sizeof(int32_t));
break;
case kResamplerMode11To4:
state1_ = malloc(sizeof(WebRtcSpl_State22khzTo8khz));
WebRtcSpl_ResetResample22khzTo8khz(
static_cast<WebRtcSpl_State22khzTo8khz*>(state1_));
break;
case kResamplerMode11To8:
state1_ = malloc(sizeof(WebRtcSpl_State22khzTo16khz));
WebRtcSpl_ResetResample22khzTo16khz(
static_cast<WebRtcSpl_State22khzTo16khz*>(state1_));
break;
}
return 0;
}
int Resampler::ComputeResamplerMode(int in_freq_hz,
int out_freq_hz,
ResamplerMode* mode) {
// Start with a math exercise, Euclid's algorithm to find the gcd:
int a = in_freq_hz;
int b = out_freq_hz;
int c = a % b;
while (c != 0) {
a = b;
b = c;
c = a % b;
}
// b is now the gcd;
// Scale with GCD
const int reduced_in_freq = in_freq_hz / b;
const int reduced_out_freq = out_freq_hz / b;
if (reduced_in_freq == reduced_out_freq) {
*mode = kResamplerMode1To1;
} else if (reduced_in_freq == 1) {
switch (reduced_out_freq) {
case 2:
*mode = kResamplerMode1To2;
break;
case 3:
*mode = kResamplerMode1To3;
break;
case 4:
*mode = kResamplerMode1To4;
break;
case 6:
*mode = kResamplerMode1To6;
break;
case 12:
*mode = kResamplerMode1To12;
break;
default:
return -1;
}
} else if (reduced_out_freq == 1) {
switch (reduced_in_freq) {
case 2:
*mode = kResamplerMode2To1;
break;
case 3:
*mode = kResamplerMode3To1;
break;
case 4:
*mode = kResamplerMode4To1;
break;
case 6:
*mode = kResamplerMode6To1;
break;
case 12:
*mode = kResamplerMode12To1;
break;
default:
return -1;
}
} else if ((reduced_in_freq == 2) && (reduced_out_freq == 3)) {
*mode = kResamplerMode2To3;
} else if ((reduced_in_freq == 2) && (reduced_out_freq == 11)) {
*mode = kResamplerMode2To11;
} else if ((reduced_in_freq == 4) && (reduced_out_freq == 11)) {
*mode = kResamplerMode4To11;
} else if ((reduced_in_freq == 8) && (reduced_out_freq == 11)) {
*mode = kResamplerMode8To11;
} else if ((reduced_in_freq == 3) && (reduced_out_freq == 2)) {
*mode = kResamplerMode3To2;
} else if ((reduced_in_freq == 11) && (reduced_out_freq == 2)) {
*mode = kResamplerMode11To2;
} else if ((reduced_in_freq == 11) && (reduced_out_freq == 4)) {
*mode = kResamplerMode11To4;
} else if ((reduced_in_freq == 11) && (reduced_out_freq == 16)) {
*mode = kResamplerMode11To16;
} else if ((reduced_in_freq == 11) && (reduced_out_freq == 32)) {
*mode = kResamplerMode11To32;
} else if ((reduced_in_freq == 11) && (reduced_out_freq == 8)) {
*mode = kResamplerMode11To8;
} else {
return -1;
}
return 0;
}
// Synchronous resampling, all output samples are written to samplesOut
int Resampler::Push(const int16_t* samplesIn,
size_t lengthIn,
int16_t* samplesOut,
size_t maxLen,
size_t& outLen) {
if (num_channels_ == 2) {
// Split up the signal and call the helper object for each channel
int16_t* left =
static_cast<int16_t*>(malloc(lengthIn * sizeof(int16_t) / 2));
int16_t* right =
static_cast<int16_t*>(malloc(lengthIn * sizeof(int16_t) / 2));
int16_t* out_left =
static_cast<int16_t*>(malloc(maxLen / 2 * sizeof(int16_t)));
int16_t* out_right =
static_cast<int16_t*>(malloc(maxLen / 2 * sizeof(int16_t)));
int res = 0;
for (size_t i = 0; i < lengthIn; i += 2) {
left[i >> 1] = samplesIn[i];
right[i >> 1] = samplesIn[i + 1];
}
// It's OK to overwrite the local parameter, since it's just a copy
lengthIn = lengthIn / 2;
size_t actualOutLen_left = 0;
size_t actualOutLen_right = 0;
// Do resampling for right channel
res |= helper_left_->Push(left, lengthIn, out_left, maxLen / 2,
actualOutLen_left);
res |= helper_right_->Push(right, lengthIn, out_right, maxLen / 2,
actualOutLen_right);
if (res || (actualOutLen_left != actualOutLen_right)) {
free(left);
free(right);
free(out_left);
free(out_right);
return -1;
}
// Reassemble the signal
for (size_t i = 0; i < actualOutLen_left; i++) {
samplesOut[i * 2] = out_left[i];
samplesOut[i * 2 + 1] = out_right[i];
}
outLen = 2 * actualOutLen_left;
free(left);
free(right);
free(out_left);
free(out_right);
return 0;
}
// Containers for temp samples
int16_t* tmp;
int16_t* tmp_2;
// tmp data for resampling routines
int32_t* tmp_mem;
switch (my_mode_) {
case kResamplerMode1To1:
memcpy(samplesOut, samplesIn, lengthIn * sizeof(int16_t));
outLen = lengthIn;
break;
case kResamplerMode1To2:
if (maxLen < (lengthIn * 2)) {
return -1;
}
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, samplesOut,
static_cast<int32_t*>(state1_));
outLen = lengthIn * 2;
return 0;
case kResamplerMode1To3:
// We can only handle blocks of 160 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 160) != 0) {
return -1;
}
if (maxLen < (lengthIn * 3)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(336 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 160) {
WebRtcSpl_Resample16khzTo48khz(
samplesIn + i, samplesOut + i * 3,
static_cast<WebRtcSpl_State16khzTo48khz*>(state1_), tmp_mem);
}
outLen = lengthIn * 3;
free(tmp_mem);
return 0;
case kResamplerMode1To4:
if (maxLen < (lengthIn * 4)) {
return -1;
}
tmp = static_cast<int16_t*>(malloc(sizeof(int16_t) * 2 * lengthIn));
// 1:2
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp,
static_cast<int32_t*>(state1_));
// 2:4
WebRtcSpl_UpsampleBy2(tmp, lengthIn * 2, samplesOut,
static_cast<int32_t*>(state2_));
outLen = lengthIn * 4;
free(tmp);
return 0;
case kResamplerMode1To6:
// We can only handle blocks of 80 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 80) != 0) {
return -1;
}
if (maxLen < (lengthIn * 6)) {
return -1;
}
// 1:2
tmp_mem = static_cast<int32_t*>(malloc(336 * sizeof(int32_t)));
tmp = static_cast<int16_t*>(malloc(sizeof(int16_t) * 2 * lengthIn));
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp,
static_cast<int32_t*>(state1_));
outLen = lengthIn * 2;
for (size_t i = 0; i < outLen; i += 160) {
WebRtcSpl_Resample16khzTo48khz(
tmp + i, samplesOut + i * 3,
static_cast<WebRtcSpl_State16khzTo48khz*>(state2_), tmp_mem);
}
outLen = outLen * 3;
free(tmp_mem);
free(tmp);
return 0;
case kResamplerMode1To12:
// We can only handle blocks of 40 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 40) != 0) {
return -1;
}
if (maxLen < (lengthIn * 12)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(336 * sizeof(int32_t)));
tmp = static_cast<int16_t*>(malloc(sizeof(int16_t) * 4 * lengthIn));
// 1:2
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, samplesOut,
static_cast<int32_t*>(state1_));
outLen = lengthIn * 2;
// 2:4
WebRtcSpl_UpsampleBy2(samplesOut, outLen, tmp,
static_cast<int32_t*>(state2_));
outLen = outLen * 2;
// 4:12
for (size_t i = 0; i < outLen; i += 160) {
// WebRtcSpl_Resample16khzTo48khz() takes a block of 160 samples
// as input and outputs a resampled block of 480 samples. The
// data is now actually in 32 kHz sampling rate, despite the
// function name, and with a resampling factor of three becomes
// 96 kHz.
WebRtcSpl_Resample16khzTo48khz(
tmp + i, samplesOut + i * 3,
static_cast<WebRtcSpl_State16khzTo48khz*>(state3_), tmp_mem);
}
outLen = outLen * 3;
free(tmp_mem);
free(tmp);
return 0;
case kResamplerMode2To3:
if (maxLen < (lengthIn * 3 / 2)) {
return -1;
}
// 2:6
// We can only handle blocks of 160 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 160) != 0) {
return -1;
}
tmp = static_cast<int16_t*>(malloc(sizeof(int16_t) * lengthIn * 3));
tmp_mem = static_cast<int32_t*>(malloc(336 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 160) {
WebRtcSpl_Resample16khzTo48khz(
samplesIn + i, tmp + i * 3,
static_cast<WebRtcSpl_State16khzTo48khz*>(state1_), tmp_mem);
}
lengthIn = lengthIn * 3;
// 6:3
WebRtcSpl_DownsampleBy2(tmp, lengthIn, samplesOut,
static_cast<int32_t*>(state2_));
outLen = lengthIn / 2;
free(tmp);
free(tmp_mem);
return 0;
case kResamplerMode2To11:
// We can only handle blocks of 80 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 80) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 11) / 2)) {
return -1;
}
tmp = static_cast<int16_t*>(malloc(sizeof(int16_t) * 2 * lengthIn));
// 1:2
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp,
static_cast<int32_t*>(state1_));
lengthIn *= 2;
tmp_mem = static_cast<int32_t*>(malloc(98 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 80) {
WebRtcSpl_Resample8khzTo22khz(
tmp + i, samplesOut + (i * 11) / 4,
static_cast<WebRtcSpl_State8khzTo22khz*>(state2_), tmp_mem);
}
outLen = (lengthIn * 11) / 4;
free(tmp_mem);
free(tmp);
return 0;
case kResamplerMode4To11:
// We can only handle blocks of 80 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 80) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 11) / 4)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(98 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 80) {
WebRtcSpl_Resample8khzTo22khz(
samplesIn + i, samplesOut + (i * 11) / 4,
static_cast<WebRtcSpl_State8khzTo22khz*>(state1_), tmp_mem);
}
outLen = (lengthIn * 11) / 4;
free(tmp_mem);
return 0;
case kResamplerMode8To11:
// We can only handle blocks of 160 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 160) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 11) / 8)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(88 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 160) {
WebRtcSpl_Resample16khzTo22khz(
samplesIn + i, samplesOut + (i * 11) / 8,
static_cast<WebRtcSpl_State16khzTo22khz*>(state1_), tmp_mem);
}
outLen = (lengthIn * 11) / 8;
free(tmp_mem);
return 0;
case kResamplerMode11To16:
// We can only handle blocks of 110 samples
if ((lengthIn % 110) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 16) / 11)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(104 * sizeof(int32_t)));
tmp = static_cast<int16_t*>(malloc((sizeof(int16_t) * lengthIn * 2)));
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp,
static_cast<int32_t*>(state1_));
for (size_t i = 0; i < (lengthIn * 2); i += 220) {
WebRtcSpl_Resample22khzTo16khz(
tmp + i, samplesOut + (i / 220) * 160,
static_cast<WebRtcSpl_State22khzTo16khz*>(state2_), tmp_mem);
}
outLen = (lengthIn * 16) / 11;
free(tmp_mem);
free(tmp);
return 0;
case kResamplerMode11To32:
// We can only handle blocks of 110 samples
if ((lengthIn % 110) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 32) / 11)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(104 * sizeof(int32_t)));
tmp = static_cast<int16_t*>(malloc((sizeof(int16_t) * lengthIn * 2)));
// 11 -> 22 kHz in samplesOut
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, samplesOut,
static_cast<int32_t*>(state1_));
// 22 -> 16 in tmp
for (size_t i = 0; i < (lengthIn * 2); i += 220) {
WebRtcSpl_Resample22khzTo16khz(
samplesOut + i, tmp + (i / 220) * 160,
static_cast<WebRtcSpl_State22khzTo16khz*>(state2_), tmp_mem);
}
// 16 -> 32 in samplesOut
WebRtcSpl_UpsampleBy2(tmp, (lengthIn * 16) / 11, samplesOut,
static_cast<int32_t*>(state3_));
outLen = (lengthIn * 32) / 11;
free(tmp_mem);
free(tmp);
return 0;
case kResamplerMode2To1:
if (maxLen < (lengthIn / 2)) {
return -1;
}
WebRtcSpl_DownsampleBy2(samplesIn, lengthIn, samplesOut,
static_cast<int32_t*>(state1_));
outLen = lengthIn / 2;
return 0;
case kResamplerMode3To1:
// We can only handle blocks of 480 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 480) != 0) {
return -1;
}
if (maxLen < (lengthIn / 3)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(496 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 480) {
WebRtcSpl_Resample48khzTo16khz(
samplesIn + i, samplesOut + i / 3,
static_cast<WebRtcSpl_State48khzTo16khz*>(state1_), tmp_mem);
}
outLen = lengthIn / 3;
free(tmp_mem);
return 0;
case kResamplerMode4To1:
if (maxLen < (lengthIn / 4)) {
return -1;
}
tmp = static_cast<int16_t*>(malloc(sizeof(int16_t) * lengthIn / 2));
// 4:2
WebRtcSpl_DownsampleBy2(samplesIn, lengthIn, tmp,
static_cast<int32_t*>(state1_));
// 2:1
WebRtcSpl_DownsampleBy2(tmp, lengthIn / 2, samplesOut,
static_cast<int32_t*>(state2_));
outLen = lengthIn / 4;
free(tmp);
return 0;
case kResamplerMode6To1:
// We can only handle blocks of 480 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 480) != 0) {
return -1;
}
if (maxLen < (lengthIn / 6)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(496 * sizeof(int32_t)));
tmp = static_cast<int16_t*>(malloc((sizeof(int16_t) * lengthIn) / 3));
for (size_t i = 0; i < lengthIn; i += 480) {
WebRtcSpl_Resample48khzTo16khz(
samplesIn + i, tmp + i / 3,
static_cast<WebRtcSpl_State48khzTo16khz*>(state1_), tmp_mem);
}
outLen = lengthIn / 3;
free(tmp_mem);
WebRtcSpl_DownsampleBy2(tmp, outLen, samplesOut,
static_cast<int32_t*>(state2_));
free(tmp);
outLen = outLen / 2;
return 0;
case kResamplerMode12To1:
// We can only handle blocks of 480 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 480) != 0) {
return -1;
}
if (maxLen < (lengthIn / 12)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(496 * sizeof(int32_t)));
tmp = static_cast<int16_t*>(malloc((sizeof(int16_t) * lengthIn) / 3));
tmp_2 = static_cast<int16_t*>(malloc((sizeof(int16_t) * lengthIn) / 6));
// 12:4
for (size_t i = 0; i < lengthIn; i += 480) {
// WebRtcSpl_Resample48khzTo16khz() takes a block of 480 samples
// as input and outputs a resampled block of 160 samples. The
// data is now actually in 96 kHz sampling rate, despite the
// function name, and with a resampling factor of 1/3 becomes
// 32 kHz.
WebRtcSpl_Resample48khzTo16khz(
samplesIn + i, tmp + i / 3,
static_cast<WebRtcSpl_State48khzTo16khz*>(state1_), tmp_mem);
}
outLen = lengthIn / 3;
free(tmp_mem);
// 4:2
WebRtcSpl_DownsampleBy2(tmp, outLen, tmp_2,
static_cast<int32_t*>(state2_));
outLen = outLen / 2;
free(tmp);
// 2:1
WebRtcSpl_DownsampleBy2(tmp_2, outLen, samplesOut,
static_cast<int32_t*>(state3_));
free(tmp_2);
outLen = outLen / 2;
return 0;
case kResamplerMode3To2:
if (maxLen < (lengthIn * 2 / 3)) {
return -1;
}
// 3:6
tmp = static_cast<int16_t*>(malloc(sizeof(int16_t) * lengthIn * 2));
WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp,
static_cast<int32_t*>(state1_));
lengthIn *= 2;
// 6:2
// We can only handle blocks of 480 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 480) != 0) {
free(tmp);
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(496 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 480) {
WebRtcSpl_Resample48khzTo16khz(
tmp + i, samplesOut + i / 3,
static_cast<WebRtcSpl_State48khzTo16khz*>(state2_), tmp_mem);
}
outLen = lengthIn / 3;
free(tmp);
free(tmp_mem);
return 0;
case kResamplerMode11To2:
// We can only handle blocks of 220 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 220) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 2) / 11)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(126 * sizeof(int32_t)));
tmp =
static_cast<int16_t*>(malloc((lengthIn * 4) / 11 * sizeof(int16_t)));
for (size_t i = 0; i < lengthIn; i += 220) {
WebRtcSpl_Resample22khzTo8khz(
samplesIn + i, tmp + (i * 4) / 11,
static_cast<WebRtcSpl_State22khzTo8khz*>(state1_), tmp_mem);
}
lengthIn = (lengthIn * 4) / 11;
WebRtcSpl_DownsampleBy2(tmp, lengthIn, samplesOut,
static_cast<int32_t*>(state2_));
outLen = lengthIn / 2;
free(tmp_mem);
free(tmp);
return 0;
case kResamplerMode11To4:
// We can only handle blocks of 220 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 220) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 4) / 11)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(126 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 220) {
WebRtcSpl_Resample22khzTo8khz(
samplesIn + i, samplesOut + (i * 4) / 11,
static_cast<WebRtcSpl_State22khzTo8khz*>(state1_), tmp_mem);
}
outLen = (lengthIn * 4) / 11;
free(tmp_mem);
return 0;
case kResamplerMode11To8:
// We can only handle blocks of 160 samples
// Can be fixed, but I don't think it's needed
if ((lengthIn % 220) != 0) {
return -1;
}
if (maxLen < ((lengthIn * 8) / 11)) {
return -1;
}
tmp_mem = static_cast<int32_t*>(malloc(104 * sizeof(int32_t)));
for (size_t i = 0; i < lengthIn; i += 220) {
WebRtcSpl_Resample22khzTo16khz(
samplesIn + i, samplesOut + (i * 8) / 11,
static_cast<WebRtcSpl_State22khzTo16khz*>(state1_), tmp_mem);
}
outLen = (lengthIn * 8) / 11;
free(tmp_mem);
return 0;
}
return 0;
}
} // namespace webrtc

View file

@ -0,0 +1,363 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original:
// src/media/base/sinc_resampler.cc
// Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_
// and r4_ will move after the first load):
//
// |----------------|-----------------------------------------|----------------|
//
// request_frames_
// <--------------------------------------------------------->
// r0_ (during first load)
//
// kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2
// <---------------> <---------------> <---------------> <--------------->
// r1_ r2_ r3_ r4_
//
// block_size_ == r4_ - r2_
// <--------------------------------------->
//
// request_frames_
// <------------------ ... ----------------->
// r0_ (during second load)
//
// On the second request r0_ slides to the right by kKernelSize / 2 and r3_, r4_
// and block_size_ are reinitialized via step (3) in the algorithm below.
//
// These new regions remain constant until a Flush() occurs. While complicated,
// this allows us to reduce jitter by always requesting the same amount from the
// provided callback.
//
// The algorithm:
//
// 1) Allocate input_buffer of size: request_frames_ + kKernelSize; this ensures
// there's enough room to read request_frames_ from the callback into region
// r0_ (which will move between the first and subsequent passes).
//
// 2) Let r1_, r2_ each represent half the kernel centered around r0_:
//
// r0_ = input_buffer_ + kKernelSize / 2
// r1_ = input_buffer_
// r2_ = r0_
//
// r0_ is always request_frames_ in size. r1_, r2_ are kKernelSize / 2 in
// size. r1_ must be zero initialized to avoid convolution with garbage (see
// step (5) for why).
//
// 3) Let r3_, r4_ each represent half the kernel right aligned with the end of
// r0_ and choose block_size_ as the distance in frames between r4_ and r2_:
//
// r3_ = r0_ + request_frames_ - kKernelSize
// r4_ = r0_ + request_frames_ - kKernelSize / 2
// block_size_ = r4_ - r2_ = request_frames_ - kKernelSize / 2
//
// 4) Consume request_frames_ frames into r0_.
//
// 5) Position kernel centered at start of r2_ and generate output frames until
// the kernel is centered at the start of r4_ or we've finished generating
// all the output frames.
//
// 6) Wrap left over data from the r3_ to r1_ and r4_ to r2_.
//
// 7) If we're on the second load, in order to avoid overwriting the frames we
// just wrapped from r4_ we need to slide r0_ to the right by the size of
// r4_, which is kKernelSize / 2:
//
// r0_ = r0_ + kKernelSize / 2 = input_buffer_ + kKernelSize
//
// r3_, r4_, and block_size_ then need to be reinitialized, so goto (3).
//
// 8) Else, if we're not on the second load, goto (4).
//
// Note: we're glossing over how the sub-sample handling works with
// `virtual_source_idx_`, etc.
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include "common_audio/resampler/sinc_resampler.h"
#include <math.h>
#include <stdint.h>
#include <string.h>
#include <limits>
#include "rtc_base/checks.h"
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h" // kSSE2, WebRtc_G...
namespace webrtc {
namespace {
double SincScaleFactor(double io_ratio) {
// `sinc_scale_factor` is basically the normalized cutoff frequency of the
// low-pass filter.
double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0;
// The sinc function is an idealized brick-wall filter, but since we're
// windowing it the transition from pass to stop does not happen right away.
// So we should adjust the low pass filter cutoff slightly downward to avoid
// some aliasing at the very high-end.
// TODO(crogers): this value is empirical and to be more exact should vary
// depending on kKernelSize.
sinc_scale_factor *= 0.9;
return sinc_scale_factor;
}
} // namespace
const size_t SincResampler::kKernelSize;
// If we know the minimum architecture at compile time, avoid CPU detection.
void SincResampler::InitializeCPUSpecificFeatures() {
#if defined(WEBRTC_HAS_NEON)
convolve_proc_ = Convolve_NEON;
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (GetCPUInfo(kSSE2))
convolve_proc_ = Convolve_SSE;
else
convolve_proc_ = Convolve_C;
#else
// Unknown architecture.
convolve_proc_ = Convolve_C;
#endif
}
SincResampler::SincResampler(double io_sample_rate_ratio,
size_t request_frames,
SincResamplerCallback* read_cb)
: io_sample_rate_ratio_(io_sample_rate_ratio),
read_cb_(read_cb),
request_frames_(request_frames),
input_buffer_size_(request_frames_ + kKernelSize),
// Create input buffers with a 32-byte alignment for SIMD optimizations.
kernel_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
kernel_pre_sinc_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
kernel_window_storage_(static_cast<float*>(
AlignedMalloc(sizeof(float) * kKernelStorageSize, 32))),
input_buffer_(static_cast<float*>(
AlignedMalloc(sizeof(float) * input_buffer_size_, 32))),
convolve_proc_(nullptr),
r1_(input_buffer_.get()),
r2_(input_buffer_.get() + kKernelSize / 2) {
InitializeCPUSpecificFeatures();
RTC_DCHECK(convolve_proc_);
RTC_DCHECK_GT(request_frames_, 0);
Flush();
RTC_DCHECK_GT(block_size_, kKernelSize);
memset(kernel_storage_.get(), 0,
sizeof(*kernel_storage_.get()) * kKernelStorageSize);
memset(kernel_pre_sinc_storage_.get(), 0,
sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
memset(kernel_window_storage_.get(), 0,
sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
InitializeKernel();
}
SincResampler::~SincResampler() {}
void SincResampler::UpdateRegions(bool second_load) {
// Setup various region pointers in the buffer (see diagram above). If we're
// on the second load we need to slide r0_ to the right by kKernelSize / 2.
r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2);
r3_ = r0_ + request_frames_ - kKernelSize;
r4_ = r0_ + request_frames_ - kKernelSize / 2;
block_size_ = r4_ - r2_;
// r1_ at the beginning of the buffer.
RTC_DCHECK_EQ(r1_, input_buffer_.get());
// r1_ left of r2_, r4_ left of r3_ and size correct.
RTC_DCHECK_EQ(r2_ - r1_, r4_ - r3_);
// r2_ left of r3.
RTC_DCHECK_LT(r2_, r3_);
}
void SincResampler::InitializeKernel() {
// Blackman window parameters.
static const double kAlpha = 0.16;
static const double kA0 = 0.5 * (1.0 - kAlpha);
static const double kA1 = 0.5;
static const double kA2 = 0.5 * kAlpha;
// Generates a set of windowed sinc() kernels.
// We generate a range of sub-sample offsets from 0.0 to 1.0.
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
const float subsample_offset =
static_cast<float>(offset_idx) / kKernelOffsetCount;
for (size_t i = 0; i < kKernelSize; ++i) {
const size_t idx = i + offset_idx * kKernelSize;
const float pre_sinc = static_cast<float>(
M_PI * (static_cast<int>(i) - static_cast<int>(kKernelSize / 2) -
subsample_offset));
kernel_pre_sinc_storage_[idx] = pre_sinc;
// Compute Blackman window, matching the offset of the sinc().
const float x = (i - subsample_offset) / kKernelSize;
const float window = static_cast<float>(kA0 - kA1 * cos(2.0 * M_PI * x) +
kA2 * cos(4.0 * M_PI * x));
kernel_window_storage_[idx] = window;
// Compute the sinc with offset, then window the sinc() function and store
// at the correct offset.
kernel_storage_[idx] = static_cast<float>(
window * ((pre_sinc == 0)
? sinc_scale_factor
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
}
}
}
void SincResampler::SetRatio(double io_sample_rate_ratio) {
if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <
std::numeric_limits<double>::epsilon()) {
return;
}
io_sample_rate_ratio_ = io_sample_rate_ratio;
// Optimize reinitialization by reusing values which are independent of
// `sinc_scale_factor`. Provides a 3x speedup.
const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);
for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {
for (size_t i = 0; i < kKernelSize; ++i) {
const size_t idx = i + offset_idx * kKernelSize;
const float window = kernel_window_storage_[idx];
const float pre_sinc = kernel_pre_sinc_storage_[idx];
kernel_storage_[idx] = static_cast<float>(
window * ((pre_sinc == 0)
? sinc_scale_factor
: (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));
}
}
}
void SincResampler::Resample(size_t frames, float* destination) {
size_t remaining_frames = frames;
// Step (1) -- Prime the input buffer at the start of the input stream.
if (!buffer_primed_ && remaining_frames) {
read_cb_->Run(request_frames_, r0_);
buffer_primed_ = true;
}
// Step (2) -- Resample! const what we can outside of the loop for speed. It
// actually has an impact on ARM performance. See inner loop comment below.
const double current_io_ratio = io_sample_rate_ratio_;
const float* const kernel_ptr = kernel_storage_.get();
while (remaining_frames) {
// `i` may be negative if the last Resample() call ended on an iteration
// that put `virtual_source_idx_` over the limit.
//
// Note: The loop construct here can severely impact performance on ARM
// or when built with clang. See https://codereview.chromium.org/18566009/
for (int i = static_cast<int>(
ceil((block_size_ - virtual_source_idx_) / current_io_ratio));
i > 0; --i) {
RTC_DCHECK_LT(virtual_source_idx_, block_size_);
// `virtual_source_idx_` lies in between two kernel offsets so figure out
// what they are.
const int source_idx = static_cast<int>(virtual_source_idx_);
const double subsample_remainder = virtual_source_idx_ - source_idx;
const double virtual_offset_idx =
subsample_remainder * kKernelOffsetCount;
const int offset_idx = static_cast<int>(virtual_offset_idx);
// We'll compute "convolutions" for the two kernels which straddle
// `virtual_source_idx_`.
const float* const k1 = kernel_ptr + offset_idx * kKernelSize;
const float* const k2 = k1 + kKernelSize;
// Ensure `k1`, `k2` are 32-byte aligned for SIMD usage. Should always be
// true so long as kKernelSize is a multiple of 32.
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k1) % 32);
RTC_DCHECK_EQ(0, reinterpret_cast<uintptr_t>(k2) % 32);
// Initialize input pointer based on quantized `virtual_source_idx_`.
const float* const input_ptr = r1_ + source_idx;
// Figure out how much to weight each kernel's "convolution".
const double kernel_interpolation_factor =
virtual_offset_idx - offset_idx;
*destination++ =
convolve_proc_(input_ptr, k1, k2, kernel_interpolation_factor);
// Advance the virtual index.
virtual_source_idx_ += current_io_ratio;
if (!--remaining_frames)
return;
}
// Wrap back around to the start.
virtual_source_idx_ -= block_size_;
// Step (3) -- Copy r3_, r4_ to r1_, r2_.
// This wraps the last input frames back to the start of the buffer.
memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * kKernelSize);
// Step (4) -- Reinitialize regions if necessary.
if (r0_ == r2_)
UpdateRegions(true);
// Step (5) -- Refresh the buffer with more input.
read_cb_->Run(request_frames_, r0_);
}
}
#undef CONVOLVE_FUNC
size_t SincResampler::ChunkSize() const {
return static_cast<size_t>(block_size_ / io_sample_rate_ratio_);
}
void SincResampler::Flush() {
virtual_source_idx_ = 0;
buffer_primed_ = false;
memset(input_buffer_.get(), 0,
sizeof(*input_buffer_.get()) * input_buffer_size_);
UpdateRegions(false);
}
float SincResampler::Convolve_C(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor) {
float sum1 = 0;
float sum2 = 0;
// Generate a single output sample. Unrolling this loop hurt performance in
// local testing.
size_t n = kKernelSize;
while (n--) {
sum1 += *input_ptr * *k1++;
sum2 += *input_ptr++ * *k2++;
}
// Linearly interpolate the two "convolutions".
return static_cast<float>((1.0 - kernel_interpolation_factor) * sum1 +
kernel_interpolation_factor * sum2);
}
} // namespace webrtc

View file

@ -0,0 +1,181 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original here:
// src/media/base/sinc_resampler.h
#ifndef COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
#define COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_
#include <stddef.h>
#include <memory>
#include "rtc_base/gtest_prod_util.h"
#include "rtc_base/memory/aligned_malloc.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
// Callback class for providing more data into the resampler. Expects `frames`
// of data to be rendered into `destination`; zero padded if not enough frames
// are available to satisfy the request.
class SincResamplerCallback {
public:
virtual ~SincResamplerCallback() {}
virtual void Run(size_t frames, float* destination) = 0;
};
// SincResampler is a high-quality single-channel sample-rate converter.
class SincResampler {
public:
// The kernel size can be adjusted for quality (higher is better) at the
// expense of performance. Must be a multiple of 32.
// TODO(dalecurtis): Test performance to see if we can jack this up to 64+.
static const size_t kKernelSize = 32;
// Default request size. Affects how often and for how much SincResampler
// calls back for input. Must be greater than kKernelSize.
static const size_t kDefaultRequestSize = 512;
// The kernel offset count is used for interpolation and is the number of
// sub-sample kernel shifts. Can be adjusted for quality (higher is better)
// at the expense of allocating more memory.
static const size_t kKernelOffsetCount = 32;
static const size_t kKernelStorageSize =
kKernelSize * (kKernelOffsetCount + 1);
// Constructs a SincResampler with the specified `read_cb`, which is used to
// acquire audio data for resampling. `io_sample_rate_ratio` is the ratio
// of input / output sample rates. `request_frames` controls the size in
// frames of the buffer requested by each `read_cb` call. The value must be
// greater than kKernelSize. Specify kDefaultRequestSize if there are no
// request size constraints.
SincResampler(double io_sample_rate_ratio,
size_t request_frames,
SincResamplerCallback* read_cb);
virtual ~SincResampler();
SincResampler(const SincResampler&) = delete;
SincResampler& operator=(const SincResampler&) = delete;
// Resample `frames` of data from `read_cb_` into `destination`.
void Resample(size_t frames, float* destination);
// The maximum size in frames that guarantees Resample() will only make a
// single call to `read_cb_` for more data.
size_t ChunkSize() const;
size_t request_frames() const { return request_frames_; }
// Flush all buffered data and reset internal indices. Not thread safe, do
// not call while Resample() is in progress.
void Flush();
// Update `io_sample_rate_ratio_`. SetRatio() will cause a reconstruction of
// the kernels used for resampling. Not thread safe, do not call while
// Resample() is in progress.
//
// TODO(ajm): Use this in PushSincResampler rather than reconstructing
// SincResampler. We would also need a way to update `request_frames_`.
void SetRatio(double io_sample_rate_ratio);
float* get_kernel_for_testing() { return kernel_storage_.get(); }
private:
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve);
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark);
void InitializeKernel();
void UpdateRegions(bool second_load);
// Selects runtime specific CPU features like SSE. Must be called before
// using SincResampler.
// TODO(ajm): Currently managed by the class internally. See the note with
// `convolve_proc_` below.
void InitializeCPUSpecificFeatures();
// Compute convolution of `k1` and `k2` over `input_ptr`, resultant sums are
// linearly interpolated using `kernel_interpolation_factor`. On x86 and ARM
// the underlying implementation is chosen at run time.
static float Convolve_C(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#if defined(WEBRTC_ARCH_X86_FAMILY)
static float Convolve_SSE(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
static float Convolve_AVX2(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#elif defined(WEBRTC_HAS_NEON)
static float Convolve_NEON(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor);
#endif
// The ratio of input / output sample rates.
double io_sample_rate_ratio_;
// An index on the source input buffer with sub-sample precision. It must be
// double precision to avoid drift.
double virtual_source_idx_;
// The buffer is primed once at the very beginning of processing.
bool buffer_primed_;
// Source of data for resampling.
SincResamplerCallback* read_cb_;
// The size (in samples) to request from each `read_cb_` execution.
const size_t request_frames_;
// The number of source frames processed per pass.
size_t block_size_;
// The size (in samples) of the internal buffer used by the resampler.
const size_t input_buffer_size_;
// Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize.
// The kernel offsets are sub-sample shifts of a windowed sinc shifted from
// 0.0 to 1.0 sample.
std::unique_ptr<float[], AlignedFreeDeleter> kernel_storage_;
std::unique_ptr<float[], AlignedFreeDeleter> kernel_pre_sinc_storage_;
std::unique_ptr<float[], AlignedFreeDeleter> kernel_window_storage_;
// Data from the source is copied into this buffer for each processing pass.
std::unique_ptr<float[], AlignedFreeDeleter> input_buffer_;
// Stores the runtime selection of which Convolve function to use.
// TODO(ajm): Move to using a global static which must only be initialized
// once by the user. We're not doing this initially, because we don't have
// e.g. a LazyInstance helper in webrtc.
typedef float (*ConvolveProc)(const float*,
const float*,
const float*,
double);
ConvolveProc convolve_proc_;
// Pointers to the various regions inside `input_buffer_`. See the diagram at
// the top of the .cc file for more information.
float* r0_;
float* const r1_;
float* const r2_;
float* r3_;
float* r4_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_

View file

@ -0,0 +1,66 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <immintrin.h>
#include <stddef.h>
#include <stdint.h>
#include <xmmintrin.h>
#include "common_audio/resampler/sinc_resampler.h"
namespace webrtc {
float SincResampler::Convolve_AVX2(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor) {
__m256 m_input;
__m256 m_sums1 = _mm256_setzero_ps();
__m256 m_sums2 = _mm256_setzero_ps();
// Based on `input_ptr` alignment, we need to use loadu or load. Unrolling
// these loops has not been tested or benchmarked.
bool aligned_input = (reinterpret_cast<uintptr_t>(input_ptr) & 0x1F) == 0;
if (!aligned_input) {
for (size_t i = 0; i < kKernelSize; i += 8) {
m_input = _mm256_loadu_ps(input_ptr + i);
m_sums1 = _mm256_fmadd_ps(m_input, _mm256_load_ps(k1 + i), m_sums1);
m_sums2 = _mm256_fmadd_ps(m_input, _mm256_load_ps(k2 + i), m_sums2);
}
} else {
for (size_t i = 0; i < kKernelSize; i += 8) {
m_input = _mm256_load_ps(input_ptr + i);
m_sums1 = _mm256_fmadd_ps(m_input, _mm256_load_ps(k1 + i), m_sums1);
m_sums2 = _mm256_fmadd_ps(m_input, _mm256_load_ps(k2 + i), m_sums2);
}
}
// Linearly interpolate the two "convolutions".
__m128 m128_sums1 = _mm_add_ps(_mm256_extractf128_ps(m_sums1, 0),
_mm256_extractf128_ps(m_sums1, 1));
__m128 m128_sums2 = _mm_add_ps(_mm256_extractf128_ps(m_sums2, 0),
_mm256_extractf128_ps(m_sums2, 1));
m128_sums1 = _mm_mul_ps(
m128_sums1,
_mm_set_ps1(static_cast<float>(1.0 - kernel_interpolation_factor)));
m128_sums2 = _mm_mul_ps(
m128_sums2, _mm_set_ps1(static_cast<float>(kernel_interpolation_factor)));
m128_sums1 = _mm_add_ps(m128_sums1, m128_sums2);
// Sum components together.
float result;
m128_sums2 = _mm_add_ps(_mm_movehl_ps(m128_sums1, m128_sums1), m128_sums1);
_mm_store_ss(&result, _mm_add_ss(m128_sums2,
_mm_shuffle_ps(m128_sums2, m128_sums2, 1)));
return result;
}
} // namespace webrtc

View file

@ -0,0 +1,48 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original:
// src/media/base/sinc_resampler.cc
#include <arm_neon.h>
#include "common_audio/resampler/sinc_resampler.h"
namespace webrtc {
float SincResampler::Convolve_NEON(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor) {
float32x4_t m_input;
float32x4_t m_sums1 = vmovq_n_f32(0);
float32x4_t m_sums2 = vmovq_n_f32(0);
const float* upper = input_ptr + kKernelSize;
for (; input_ptr < upper;) {
m_input = vld1q_f32(input_ptr);
input_ptr += 4;
m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));
k1 += 4;
m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));
k2 += 4;
}
// Linearly interpolate the two "convolutions".
m_sums1 = vmlaq_f32(
vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),
m_sums2, vmovq_n_f32(kernel_interpolation_factor));
// Sum components together.
float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
}
} // namespace webrtc

View file

@ -0,0 +1,63 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original:
// src/media/base/simd/sinc_resampler_sse.cc
#include <stddef.h>
#include <stdint.h>
#include <xmmintrin.h>
#include "common_audio/resampler/sinc_resampler.h"
namespace webrtc {
float SincResampler::Convolve_SSE(const float* input_ptr,
const float* k1,
const float* k2,
double kernel_interpolation_factor) {
__m128 m_input;
__m128 m_sums1 = _mm_setzero_ps();
__m128 m_sums2 = _mm_setzero_ps();
// Based on `input_ptr` alignment, we need to use loadu or load. Unrolling
// these loops hurt performance in local testing.
if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {
for (size_t i = 0; i < kKernelSize; i += 4) {
m_input = _mm_loadu_ps(input_ptr + i);
m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
}
} else {
for (size_t i = 0; i < kKernelSize; i += 4) {
m_input = _mm_load_ps(input_ptr + i);
m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
}
}
// Linearly interpolate the two "convolutions".
m_sums1 = _mm_mul_ps(
m_sums1,
_mm_set_ps1(static_cast<float>(1.0 - kernel_interpolation_factor)));
m_sums2 = _mm_mul_ps(
m_sums2, _mm_set_ps1(static_cast<float>(kernel_interpolation_factor)));
m_sums1 = _mm_add_ps(m_sums1, m_sums2);
// Sum components together.
float result;
m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
_mm_store_ss(&result,
_mm_add_ss(m_sums2, _mm_shuffle_ps(m_sums2, m_sums2, 1)));
return result;
}
} // namespace webrtc

View file

@ -0,0 +1,57 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include "common_audio/resampler/sinusoidal_linear_chirp_source.h"
#include <math.h>
namespace webrtc {
SinusoidalLinearChirpSource::SinusoidalLinearChirpSource(int sample_rate,
size_t samples,
double max_frequency,
double delay_samples)
: sample_rate_(sample_rate),
total_samples_(samples),
max_frequency_(max_frequency),
current_index_(0),
delay_samples_(delay_samples) {
// Chirp rate.
double duration = static_cast<double>(total_samples_) / sample_rate_;
k_ = (max_frequency_ - kMinFrequency) / duration;
}
void SinusoidalLinearChirpSource::Run(size_t frames, float* destination) {
for (size_t i = 0; i < frames; ++i, ++current_index_) {
// Filter out frequencies higher than Nyquist.
if (Frequency(current_index_) > 0.5 * sample_rate_) {
destination[i] = 0;
} else {
// Calculate time in seconds.
if (current_index_ < delay_samples_) {
destination[i] = 0;
} else {
// Sinusoidal linear chirp.
double t = (current_index_ - delay_samples_) / sample_rate_;
destination[i] = sin(2 * M_PI * (kMinFrequency * t + (k_ / 2) * t * t));
}
}
}
}
double SinusoidalLinearChirpSource::Frequency(size_t position) {
return kMinFrequency + (position - delay_samples_) *
(max_frequency_ - kMinFrequency) / total_samples_;
}
} // namespace webrtc

View file

@ -0,0 +1,56 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Modified from the Chromium original here:
// src/media/base/sinc_resampler_unittest.cc
#ifndef COMMON_AUDIO_RESAMPLER_SINUSOIDAL_LINEAR_CHIRP_SOURCE_H_
#define COMMON_AUDIO_RESAMPLER_SINUSOIDAL_LINEAR_CHIRP_SOURCE_H_
#include "common_audio/resampler/sinc_resampler.h"
namespace webrtc {
// Fake audio source for testing the resampler. Generates a sinusoidal linear
// chirp (http://en.wikipedia.org/wiki/Chirp) which can be tuned to stress the
// resampler for the specific sample rate conversion being used.
class SinusoidalLinearChirpSource : public SincResamplerCallback {
public:
// `delay_samples` can be used to insert a fractional sample delay into the
// source. It will produce zeros until non-negative time is reached.
SinusoidalLinearChirpSource(int sample_rate,
size_t samples,
double max_frequency,
double delay_samples);
~SinusoidalLinearChirpSource() override {}
SinusoidalLinearChirpSource(const SinusoidalLinearChirpSource&) = delete;
SinusoidalLinearChirpSource& operator=(const SinusoidalLinearChirpSource&) =
delete;
void Run(size_t frames, float* destination) override;
double Frequency(size_t position);
private:
static constexpr int kMinFrequency = 5;
int sample_rate_;
size_t total_samples_;
double max_frequency_;
double k_;
size_t current_index_;
double delay_samples_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_RESAMPLER_SINUSOIDAL_LINEAR_CHIRP_SOURCE_H_

View file

@ -0,0 +1,232 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
// otherwise specified, functions return 0 on success and -1 on error.
#include "common_audio/ring_buffer.h"
#include <stddef.h> // size_t
#include <stdlib.h>
#include <string.h>
// Get address of region(s) from which we can read data.
// If the region is contiguous, `data_ptr_bytes_2` will be zero.
// If non-contiguous, `data_ptr_bytes_2` will be the size in bytes of the second
// region. Returns room available to be read or `element_count`, whichever is
// smaller.
static size_t GetBufferReadRegions(RingBuffer* buf,
size_t element_count,
void** data_ptr_1,
size_t* data_ptr_bytes_1,
void** data_ptr_2,
size_t* data_ptr_bytes_2) {
const size_t readable_elements = WebRtc_available_read(buf);
const size_t read_elements = (readable_elements < element_count ?
readable_elements : element_count);
const size_t margin = buf->element_count - buf->read_pos;
// Check to see if read is not contiguous.
if (read_elements > margin) {
// Write data in two blocks that wrap the buffer.
*data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
*data_ptr_bytes_1 = margin * buf->element_size;
*data_ptr_2 = buf->data;
*data_ptr_bytes_2 = (read_elements - margin) * buf->element_size;
} else {
*data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
*data_ptr_bytes_1 = read_elements * buf->element_size;
*data_ptr_2 = NULL;
*data_ptr_bytes_2 = 0;
}
return read_elements;
}
RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size) {
RingBuffer* self = NULL;
if (element_count == 0 || element_size == 0) {
return NULL;
}
self = malloc(sizeof(RingBuffer));
if (!self) {
return NULL;
}
self->data = malloc(element_count * element_size);
if (!self->data) {
free(self);
self = NULL;
return NULL;
}
self->element_count = element_count;
self->element_size = element_size;
WebRtc_InitBuffer(self);
return self;
}
void WebRtc_InitBuffer(RingBuffer* self) {
self->read_pos = 0;
self->write_pos = 0;
self->rw_wrap = SAME_WRAP;
// Initialize buffer to zeros
memset(self->data, 0, self->element_count * self->element_size);
}
void WebRtc_FreeBuffer(void* handle) {
RingBuffer* self = (RingBuffer*)handle;
if (!self) {
return;
}
free(self->data);
free(self);
}
size_t WebRtc_ReadBuffer(RingBuffer* self,
void** data_ptr,
void* data,
size_t element_count) {
if (self == NULL) {
return 0;
}
if (data == NULL) {
return 0;
}
{
void* buf_ptr_1 = NULL;
void* buf_ptr_2 = NULL;
size_t buf_ptr_bytes_1 = 0;
size_t buf_ptr_bytes_2 = 0;
const size_t read_count = GetBufferReadRegions(self,
element_count,
&buf_ptr_1,
&buf_ptr_bytes_1,
&buf_ptr_2,
&buf_ptr_bytes_2);
if (buf_ptr_bytes_2 > 0) {
// We have a wrap around when reading the buffer. Copy the buffer data to
// `data` and point to it.
memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2);
buf_ptr_1 = data;
} else if (!data_ptr) {
// No wrap, but a memcpy was requested.
memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
}
if (data_ptr) {
// `buf_ptr_1` == `data` in the case of a wrap.
*data_ptr = read_count == 0 ? NULL : buf_ptr_1;
}
// Update read position
WebRtc_MoveReadPtr(self, (int) read_count);
return read_count;
}
}
size_t WebRtc_WriteBuffer(RingBuffer* self,
const void* data,
size_t element_count) {
if (!self) {
return 0;
}
if (!data) {
return 0;
}
{
const size_t free_elements = WebRtc_available_write(self);
const size_t write_elements = (free_elements < element_count ? free_elements
: element_count);
size_t n = write_elements;
const size_t margin = self->element_count - self->write_pos;
if (write_elements > margin) {
// Buffer wrap around when writing.
memcpy(self->data + self->write_pos * self->element_size,
data, margin * self->element_size);
self->write_pos = 0;
n -= margin;
self->rw_wrap = DIFF_WRAP;
}
memcpy(self->data + self->write_pos * self->element_size,
((const char*) data) + ((write_elements - n) * self->element_size),
n * self->element_size);
self->write_pos += n;
return write_elements;
}
}
int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) {
if (!self) {
return 0;
}
{
// We need to be able to take care of negative changes, hence use "int"
// instead of "size_t".
const int free_elements = (int) WebRtc_available_write(self);
const int readable_elements = (int) WebRtc_available_read(self);
int read_pos = (int) self->read_pos;
if (element_count > readable_elements) {
element_count = readable_elements;
}
if (element_count < -free_elements) {
element_count = -free_elements;
}
read_pos += element_count;
if (read_pos > (int) self->element_count) {
// Buffer wrap around. Restart read position and wrap indicator.
read_pos -= (int) self->element_count;
self->rw_wrap = SAME_WRAP;
}
if (read_pos < 0) {
// Buffer wrap around. Restart read position and wrap indicator.
read_pos += (int) self->element_count;
self->rw_wrap = DIFF_WRAP;
}
self->read_pos = (size_t) read_pos;
return element_count;
}
}
size_t WebRtc_available_read(const RingBuffer* self) {
if (!self) {
return 0;
}
if (self->rw_wrap == SAME_WRAP) {
return self->write_pos - self->read_pos;
} else {
return self->element_count - self->read_pos + self->write_pos;
}
}
size_t WebRtc_available_write(const RingBuffer* self) {
if (!self) {
return 0;
}
return self->element_count - WebRtc_available_read(self);
}

View file

@ -0,0 +1,79 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
// otherwise specified, functions return 0 on success and -1 on error.
#ifndef COMMON_AUDIO_RING_BUFFER_H_
#define COMMON_AUDIO_RING_BUFFER_H_
// TODO(alessiob): Used by AEC, AECm and AudioRingBuffer. Remove when possible.
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h> // size_t
enum Wrap { SAME_WRAP, DIFF_WRAP };
typedef struct RingBuffer {
size_t read_pos;
size_t write_pos;
size_t element_count;
size_t element_size;
enum Wrap rw_wrap;
char* data;
} RingBuffer;
// Creates and initializes the buffer. Returns null on failure.
RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size);
void WebRtc_InitBuffer(RingBuffer* handle);
void WebRtc_FreeBuffer(void* handle);
// Reads data from the buffer. Returns the number of elements that were read.
// The `data_ptr` will point to the address where the read data is located.
// If no data can be read, `data_ptr` is set to `NULL`. If all data can be read
// without buffer wrap around then `data_ptr` will point to the location in the
// buffer. Otherwise, the data will be copied to `data` (memory allocation done
// by the user) and `data_ptr` points to the address of `data`. `data_ptr` is
// only guaranteed to be valid until the next call to WebRtc_WriteBuffer().
//
// To force a copying to `data`, pass a null `data_ptr`.
//
// Returns number of elements read.
size_t WebRtc_ReadBuffer(RingBuffer* handle,
void** data_ptr,
void* data,
size_t element_count);
// Writes `data` to buffer and returns the number of elements written.
size_t WebRtc_WriteBuffer(RingBuffer* handle,
const void* data,
size_t element_count);
// Moves the buffer read position and returns the number of elements moved.
// Positive `element_count` moves the read position towards the write position,
// that is, flushing the buffer. Negative `element_count` moves the read
// position away from the the write position, that is, stuffing the buffer.
// Returns number of elements moved.
int WebRtc_MoveReadPtr(RingBuffer* handle, int element_count);
// Returns number of available elements to read.
size_t WebRtc_available_read(const RingBuffer* handle);
// Returns number of available elements for write.
size_t WebRtc_available_write(const RingBuffer* handle);
#ifdef __cplusplus
}
#endif
#endif // COMMON_AUDIO_RING_BUFFER_H_

View file

@ -0,0 +1,103 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_AutoCorrToReflCoef().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K)
{
int i, n;
int16_t tmp;
const int32_t *rptr;
int32_t L_num, L_den;
int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER],
P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER];
// Initialize loop and pointers.
acfptr = ACF;
rptr = R;
pptr = P;
p1ptr = &P[1];
w1ptr = &W[1];
wptr = w1ptr;
// First loop; n=0. Determine shifting.
tmp = WebRtcSpl_NormW32(*R);
*acfptr = (int16_t)((*rptr++ << tmp) >> 16);
*pptr++ = *acfptr++;
// Initialize ACF, P and W.
for (i = 1; i <= use_order; i++)
{
*acfptr = (int16_t)((*rptr++ << tmp) >> 16);
*wptr++ = *acfptr;
*pptr++ = *acfptr++;
}
// Compute reflection coefficients.
for (n = 1; n <= use_order; n++, K++)
{
tmp = WEBRTC_SPL_ABS_W16(*p1ptr);
if (*P < tmp)
{
for (i = n; i <= use_order; i++)
*K++ = 0;
return;
}
// Division: WebRtcSpl_div(tmp, *P)
*K = 0;
if (tmp != 0)
{
L_num = tmp;
L_den = *P;
i = 15;
while (i--)
{
(*K) <<= 1;
L_num <<= 1;
if (L_num >= L_den)
{
L_num -= L_den;
(*K)++;
}
}
if (*p1ptr > 0)
*K = -*K;
}
// Last iteration; don't do Schur recursion.
if (n == use_order)
return;
// Schur recursion.
pptr = P;
wptr = w1ptr;
tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15);
*pptr = WebRtcSpl_AddSatW16(*pptr, tmp);
pptr++;
for (i = 1; i <= use_order - n; i++)
{
tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15);
*pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp);
pptr++;
tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15);
*wptr = WebRtcSpl_AddSatW16(*wptr, tmp);
wptr++;
}
}
}

View file

@ -0,0 +1,65 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
size_t in_vector_length,
size_t order,
int32_t* result,
int* scale) {
int32_t sum = 0;
size_t i = 0, j = 0;
int16_t smax = 0;
int scaling = 0;
RTC_DCHECK_LE(order, in_vector_length);
// Find the maximum absolute value of the samples.
smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);
// In order to avoid overflow when computing the sum we should scale the
// samples so that (in_vector_length * smax * smax) will not overflow.
if (smax == 0) {
scaling = 0;
} else {
// Number of bits in the sum loop.
int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length);
// Number of bits to normalize smax.
int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
if (t > nbits) {
scaling = 0;
} else {
scaling = nbits - t;
}
}
// Perform the actual correlation calculation.
for (i = 0; i < order + 1; i++) {
sum = 0;
/* Unroll the loop to improve performance. */
for (j = 0; i + j + 3 < in_vector_length; j += 4) {
sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling;
sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling;
sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling;
sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling;
}
for (; j < in_vector_length - i; j++) {
sum += (in_vector[j] * in_vector[i + j]) >> scaling;
}
*result++ = sum;
}
*scale = scaling;
return order + 1;
}

View file

@ -0,0 +1,108 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
/* Tables for data buffer indexes that are bit reversed and thus need to be
* swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
* operations, while index_7[{1, 3, 5, ...}] are for the right side of the
* operation. Same for index_8.
*/
/* Indexes for the case of stages == 7. */
static const int16_t index_7[112] = {
1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
103, 115, 111, 123
};
/* Indexes for the case of stages == 8. */
static const int16_t index_8[240] = {
1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
203, 211, 207, 243, 215, 235, 223, 251, 239, 247
};
void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
/* For any specific value of stages, we know exactly the indexes that are
* bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
* stages are 7 and 8, so we use tables to save unnecessary iterations and
* calculations for these two cases.
*/
if (stages == 7 || stages == 8) {
int m = 0;
int length = 112;
const int16_t* index = index_7;
if (stages == 8) {
length = 240;
index = index_8;
}
/* Decimation in time. Swap the elements with bit-reversed indexes. */
for (m = 0; m < length; m += 2) {
/* We declare a int32_t* type pointer, to load both the 16-bit real
* and imaginary elements from complex_data in one instruction, reducing
* complexity.
*/
int32_t* complex_data_ptr = (int32_t*)complex_data;
int32_t temp = 0;
temp = complex_data_ptr[index[m]]; /* Real and imaginary */
complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
complex_data_ptr[index[m + 1]] = temp;
}
}
else {
int m = 0, mr = 0, l = 0;
int n = 1 << stages;
int nn = n - 1;
/* Decimation in time - re-order data */
for (m = 1; m <= nn; ++m) {
int32_t* complex_data_ptr = (int32_t*)complex_data;
int32_t temp = 0;
/* Find out indexes that are bit-reversed. */
l = n;
do {
l >>= 1;
} while (l > nn - mr);
mr = (mr & (l - 1)) + l;
if (mr <= m) {
continue;
}
/* Swap the elements with bit-reversed indexes.
* This is similar to the loop in the stages == 7 or 8 cases.
*/
temp = complex_data_ptr[m]; /* Real and imaginary */
complex_data_ptr[m] = complex_data_ptr[mr];
complex_data_ptr[mr] = temp;
}
}
}

View file

@ -0,0 +1,119 @@
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized
@ for ARMv5 platforms.
@ Reference C code is in file complex_bit_reverse.c. Bit-exact.
#include "rtc_base/system/asm_defines.h"
GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse
.align 2
DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse
push {r4-r7}
cmp r1, #7
adr r3, index_7 @ Table pointer.
mov r4, #112 @ Number of interations.
beq PRE_LOOP_STAGES_7_OR_8
cmp r1, #8
adr r3, index_8 @ Table pointer.
mov r4, #240 @ Number of interations.
beq PRE_LOOP_STAGES_7_OR_8
mov r3, #1 @ Initialize m.
mov r1, r3, asl r1 @ n = 1 << stages;
subs r6, r1, #1 @ nn = n - 1;
ble END
mov r5, r0 @ &complex_data
mov r4, #0 @ ml
LOOP_GENERIC:
rsb r12, r4, r6 @ l > nn - mr
mov r2, r1 @ n
LOOP_SHIFT:
asr r2, #1 @ l >>= 1;
cmp r2, r12
bgt LOOP_SHIFT
sub r12, r2, #1
and r4, r12, r4
add r4, r2 @ mr = (mr & (l - 1)) + l;
cmp r4, r3 @ mr <= m ?
ble UPDATE_REGISTERS
mov r12, r4, asl #2
ldr r7, [r5, #4] @ complex_data[2 * m, 2 * m + 1].
@ Offset 4 due to m incrementing from 1.
ldr r2, [r0, r12] @ complex_data[2 * mr, 2 * mr + 1].
str r7, [r0, r12]
str r2, [r5, #4]
UPDATE_REGISTERS:
add r3, r3, #1
add r5, #4
cmp r3, r1
bne LOOP_GENERIC
b END
PRE_LOOP_STAGES_7_OR_8:
add r4, r3, r4, asl #1
LOOP_STAGES_7_OR_8:
ldrsh r2, [r3], #2 @ index[m]
ldrsh r5, [r3], #2 @ index[m + 1]
ldr r1, [r0, r2] @ complex_data[index[m], index[m] + 1]
ldr r12, [r0, r5] @ complex_data[index[m + 1], index[m + 1] + 1]
cmp r3, r4
str r1, [r0, r5]
str r12, [r0, r2]
bne LOOP_STAGES_7_OR_8
END:
pop {r4-r7}
bx lr
@ The index tables. Note the values are doubles of the actual indexes for 16-bit
@ elements, different from the generic C code. It actually provides byte offsets
@ for the indexes.
.align 2
index_7: @ Indexes for stages == 7.
.short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
.short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
.short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
.short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
.short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
.short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
.short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
.short 468, 364, 436, 380, 500, 412, 460, 444, 492
index_8: @ Indexes for stages == 8.
.short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
.short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
.short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
.short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
.short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
.short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
.short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
.short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
.short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
.short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
.short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
.short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
.short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
.short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
.short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
.short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
.short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988

View file

@ -0,0 +1,176 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
static int16_t coefTable_7[] = {
4, 256, 8, 128, 12, 384, 16, 64,
20, 320, 24, 192, 28, 448, 36, 288,
40, 160, 44, 416, 48, 96, 52, 352,
56, 224, 60, 480, 68, 272, 72, 144,
76, 400, 84, 336, 88, 208, 92, 464,
100, 304, 104, 176, 108, 432, 116, 368,
120, 240, 124, 496, 132, 264, 140, 392,
148, 328, 152, 200, 156, 456, 164, 296,
172, 424, 180, 360, 184, 232, 188, 488,
196, 280, 204, 408, 212, 344, 220, 472,
228, 312, 236, 440, 244, 376, 252, 504,
268, 388, 276, 324, 284, 452, 300, 420,
308, 356, 316, 484, 332, 404, 348, 468,
364, 436, 380, 500, 412, 460, 444, 492
};
static int16_t coefTable_8[] = {
4, 512, 8, 256, 12, 768, 16, 128,
20, 640, 24, 384, 28, 896, 32, 64,
36, 576, 40, 320, 44, 832, 48, 192,
52, 704, 56, 448, 60, 960, 68, 544,
72, 288, 76, 800, 80, 160, 84, 672,
88, 416, 92, 928, 100, 608, 104, 352,
108, 864, 112, 224, 116, 736, 120, 480,
124, 992, 132, 528, 136, 272, 140, 784,
148, 656, 152, 400, 156, 912, 164, 592,
168, 336, 172, 848, 176, 208, 180, 720,
184, 464, 188, 976, 196, 560, 200, 304,
204, 816, 212, 688, 216, 432, 220, 944,
228, 624, 232, 368, 236, 880, 244, 752,
248, 496, 252, 1008, 260, 520, 268, 776,
276, 648, 280, 392, 284, 904, 292, 584,
296, 328, 300, 840, 308, 712, 312, 456,
316, 968, 324, 552, 332, 808, 340, 680,
344, 424, 348, 936, 356, 616, 364, 872,
372, 744, 376, 488, 380, 1000, 388, 536,
396, 792, 404, 664, 412, 920, 420, 600,
428, 856, 436, 728, 440, 472, 444, 984,
452, 568, 460, 824, 468, 696, 476, 952,
484, 632, 492, 888, 500, 760, 508, 1016,
524, 772, 532, 644, 540, 900, 548, 580,
556, 836, 564, 708, 572, 964, 588, 804,
596, 676, 604, 932, 620, 868, 628, 740,
636, 996, 652, 788, 668, 916, 684, 852,
692, 724, 700, 980, 716, 820, 732, 948,
748, 884, 764, 1012, 796, 908, 812, 844,
828, 972, 860, 940, 892, 1004, 956, 988
};
void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) {
int l;
int16_t tr, ti;
int32_t tmp1, tmp2, tmp3, tmp4;
int32_t* ptr_i;
int32_t* ptr_j;
if (stages == 8) {
int16_t* pcoeftable_8 = coefTable_8;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[l], $zero, 120 \n\t"
"1: \n\t"
"addiu %[l], %[l], -4 \n\t"
"lh %[tr], 0(%[pcoeftable_8]) \n\t"
"lh %[ti], 2(%[pcoeftable_8]) \n\t"
"lh %[tmp3], 4(%[pcoeftable_8]) \n\t"
"lh %[tmp4], 6(%[pcoeftable_8]) \n\t"
"addu %[ptr_i], %[frfi], %[tr] \n\t"
"addu %[ptr_j], %[frfi], %[ti] \n\t"
"addu %[tr], %[frfi], %[tmp3] \n\t"
"addu %[ti], %[frfi], %[tmp4] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"lh %[tmp1], 8(%[pcoeftable_8]) \n\t"
"lh %[tmp2], 10(%[pcoeftable_8]) \n\t"
"lh %[tr], 12(%[pcoeftable_8]) \n\t"
"lh %[ti], 14(%[pcoeftable_8]) \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
"addu %[tr], %[frfi], %[tr] \n\t"
"addu %[ti], %[frfi], %[ti] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"bgtz %[l], 1b \n\t"
" addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
[ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l),
[tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8),
[ti] "=&r" (ti), [tmp4] "=&r" (tmp4)
: [frfi] "r" (frfi)
: "memory"
);
} else if (stages == 7) {
int16_t* pcoeftable_7 = coefTable_7;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[l], $zero, 56 \n\t"
"1: \n\t"
"addiu %[l], %[l], -4 \n\t"
"lh %[tr], 0(%[pcoeftable_7]) \n\t"
"lh %[ti], 2(%[pcoeftable_7]) \n\t"
"lh %[tmp3], 4(%[pcoeftable_7]) \n\t"
"lh %[tmp4], 6(%[pcoeftable_7]) \n\t"
"addu %[ptr_i], %[frfi], %[tr] \n\t"
"addu %[ptr_j], %[frfi], %[ti] \n\t"
"addu %[tr], %[frfi], %[tmp3] \n\t"
"addu %[ti], %[frfi], %[tmp4] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"lh %[tmp1], 8(%[pcoeftable_7]) \n\t"
"lh %[tmp2], 10(%[pcoeftable_7]) \n\t"
"lh %[tr], 12(%[pcoeftable_7]) \n\t"
"lh %[ti], 14(%[pcoeftable_7]) \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
"addu %[tr], %[frfi], %[tr] \n\t"
"addu %[ti], %[frfi], %[ti] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"bgtz %[l], 1b \n\t"
" addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
[ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr),
[l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7),
[tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
: [frfi] "r" (frfi)
: "memory"
);
}
}

View file

@ -0,0 +1,299 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_ComplexFFT().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/complex_fft_tables.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/system/arch.h"
#define CFFTSFT 14
#define CFFTRND 1
#define CFFTRND2 16384
#define CIFFTSFT 14
#define CIFFTRND 1
int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
{
int i, j, l, k, istep, n, m;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = 1 << stages;
if (n > 1024)
return -1;
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
if (mode == 0)
{
// mode==0: Low-complexity and Low-accuracy mode
while (l < n)
{
istep = l << 1;
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
for (i = m; i < n; i += istep)
{
j = i + l;
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
}
}
--k;
l = istep;
}
} else
{
// mode==1: High-complexity and High-accuracy mode
while (l < n)
{
istep = l << 1;
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
#ifdef WEBRTC_ARCH_ARM_V7
int32_t wri = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
"r"((int32_t)wr), "r"((int32_t)wi));
#endif
for (i = m; i < n; i += istep)
{
j = i + l;
#ifdef WEBRTC_ARCH_ARM_V7
register int32_t frfi_r;
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
" lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
"smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
:[frfi_r]"=&r"(frfi_r),
[tr32]"=&r"(tr32),
[ti32]"=r"(ti32)
:[frfi_even]"r"((int32_t)frfi[2*j]),
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
[wri]"r"(wri),
[cfftrnd]"r"(CFFTRND));
#else
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
#endif
tr32 >>= 15 - CFFTSFT;
ti32 >>= 15 - CFFTSFT;
qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT);
qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT);
frfi[2 * j] = (int16_t)(
(qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * j + 1] = (int16_t)(
(qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * i] = (int16_t)(
(qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * i + 1] = (int16_t)(
(qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
}
}
--k;
l = istep;
}
}
return 0;
}
int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
{
size_t i, j, l, istep, n, m;
int k, scale, shift;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
int32_t tmp32, round2;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = ((size_t)1) << stages;
if (n > 1024)
return -1;
scale = 0;
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
while (l < n)
{
// variable scaling, depending upon data
shift = 0;
round2 = 8192;
tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
if (tmp32 > 13573)
{
shift++;
scale++;
round2 <<= 1;
}
if (tmp32 > 27146)
{
shift++;
scale++;
round2 <<= 1;
}
istep = l << 1;
if (mode == 0)
{
// mode==0: Low-complexity and Low-accuracy mode
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
for (i = m; i < n; i += istep)
{
j = i + l;
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
}
}
} else
{
// mode==1: High-complexity and High-accuracy mode
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
#ifdef WEBRTC_ARCH_ARM_V7
int32_t wri = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
"r"((int32_t)wr), "r"((int32_t)wi));
#endif
for (i = m; i < n; i += istep)
{
j = i + l;
#ifdef WEBRTC_ARCH_ARM_V7
register int32_t frfi_r;
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
"smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
:[frfi_r]"=&r"(frfi_r),
[tr32]"=&r"(tr32),
[ti32]"=r"(ti32)
:[frfi_even]"r"((int32_t)frfi[2*j]),
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
[wri]"r"(wri),
[cifftrnd]"r"(CIFFTRND)
);
#else
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
#endif
tr32 >>= 15 - CIFFTSFT;
ti32 >>= 15 - CIFFTSFT;
qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT);
qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT);
frfi[2 * j] = (int16_t)(
(qr32 - tr32 + round2) >> (shift + CIFFTSFT));
frfi[2 * j + 1] = (int16_t)(
(qi32 - ti32 + round2) >> (shift + CIFFTSFT));
frfi[2 * i] = (int16_t)(
(qr32 + tr32 + round2) >> (shift + CIFFTSFT));
frfi[2 * i + 1] = (int16_t)(
(qi32 + ti32 + round2) >> (shift + CIFFTSFT));
}
}
}
--k;
l = istep;
}
return scale;
}

View file

@ -0,0 +1,328 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/complex_fft_tables.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#define CFFTSFT 14
#define CFFTRND 1
#define CFFTRND2 16384
#define CIFFTSFT 14
#define CIFFTRND 1
int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) {
int i = 0;
int l = 0;
int k = 0;
int istep = 0;
int n = 0;
int m = 0;
int32_t wr = 0, wi = 0;
int32_t tmp1 = 0;
int32_t tmp2 = 0;
int32_t tmp3 = 0;
int32_t tmp4 = 0;
int32_t tmp5 = 0;
int32_t tmp6 = 0;
int32_t tmp = 0;
int16_t* ptr_j = NULL;
int16_t* ptr_i = NULL;
n = 1 << stages;
if (n > 1024) {
return -1;
}
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[k], $zero, 10 \n\t"
"addiu %[l], $zero, 1 \n\t"
"3: \n\t"
"sll %[istep], %[l], 1 \n\t"
"move %[m], $zero \n\t"
"sll %[tmp], %[l], 2 \n\t"
"move %[i], $zero \n\t"
"2: \n\t"
#if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addiu %[tmp2], %[tmp3], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lh %[wi], 0(%[ptr_j]) \n\t"
"lh %[wr], 0(%[ptr_i]) \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"1: \n\t"
"sll %[tmp1], %[i], 2 \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
"lh %[tmp6], 0(%[ptr_i]) \n\t"
"lh %[tmp5], 2(%[ptr_i]) \n\t"
"lh %[tmp3], 0(%[ptr_j]) \n\t"
"lh %[tmp4], 2(%[ptr_j]) \n\t"
"addu %[i], %[i], %[istep] \n\t"
#if defined(MIPS_DSP_R2_LE)
"mult %[wr], %[tmp3] \n\t"
"madd %[wi], %[tmp4] \n\t"
"mult $ac1, %[wr], %[tmp4] \n\t"
"msub $ac1, %[wi], %[tmp3] \n\t"
"mflo %[tmp1] \n\t"
"mflo %[tmp2], $ac1 \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"shra_r.w %[tmp1], %[tmp1], 15 \n\t"
"shra_r.w %[tmp6], %[tmp6], 15 \n\t"
"shra_r.w %[tmp4], %[tmp4], 15 \n\t"
"shra_r.w %[tmp5], %[tmp5], 15 \n\t"
#else // #if defined(MIPS_DSP_R2_LE)
"mul %[tmp2], %[wr], %[tmp4] \n\t"
"mul %[tmp1], %[wr], %[tmp3] \n\t"
"mul %[tmp4], %[wi], %[tmp4] \n\t"
"mul %[tmp3], %[wi], %[tmp3] \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"addiu %[tmp6], %[tmp6], 16384 \n\t"
"addiu %[tmp5], %[tmp5], 16384 \n\t"
"addu %[tmp1], %[tmp1], %[tmp4] \n\t"
"subu %[tmp2], %[tmp2], %[tmp3] \n\t"
"addiu %[tmp1], %[tmp1], 1 \n\t"
"addiu %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp1], %[tmp1], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"sra %[tmp4], %[tmp4], 15 \n\t"
"sra %[tmp1], %[tmp1], 15 \n\t"
"sra %[tmp6], %[tmp6], 15 \n\t"
"sra %[tmp5], %[tmp5], 15 \n\t"
#endif // #if defined(MIPS_DSP_R2_LE)
"sh %[tmp1], 0(%[ptr_i]) \n\t"
"sh %[tmp6], 2(%[ptr_i]) \n\t"
"sh %[tmp4], 0(%[ptr_j]) \n\t"
"blt %[i], %[n], 1b \n\t"
" sh %[tmp5], 2(%[ptr_j]) \n\t"
"blt %[m], %[l], 2b \n\t"
" addu %[i], $zero, %[m] \n\t"
"move %[l], %[istep] \n\t"
"blt %[l], %[n], 3b \n\t"
" addiu %[k], %[k], -1 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
[ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr),
[m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k),
[ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp)
: [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
: "hi", "lo", "memory"
#if defined(MIPS_DSP_R2_LE)
, "$ac1hi", "$ac1lo"
#endif // #if defined(MIPS_DSP_R2_LE)
);
return 0;
}
int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) {
int i = 0, l = 0, k = 0;
int istep = 0, n = 0, m = 0;
int scale = 0, shift = 0;
int32_t wr = 0, wi = 0;
int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0;
int32_t tmp5 = 0, tmp6 = 0, tmp = 0, tempMax = 0, round2 = 0;
int16_t* ptr_j = NULL;
int16_t* ptr_i = NULL;
n = 1 << stages;
if (n > 1024) {
return -1;
}
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[k], $zero, 10 \n\t"
"addiu %[l], $zero, 1 \n\t"
"move %[scale], $zero \n\t"
"3: \n\t"
"addiu %[shift], $zero, 14 \n\t"
"addiu %[round2], $zero, 8192 \n\t"
"move %[ptr_i], %[frfi] \n\t"
"move %[tempMax], $zero \n\t"
"addu %[i], %[n], %[n] \n\t"
"5: \n\t"
"lh %[tmp1], 0(%[ptr_i]) \n\t"
"lh %[tmp2], 2(%[ptr_i]) \n\t"
"lh %[tmp3], 4(%[ptr_i]) \n\t"
"lh %[tmp4], 6(%[ptr_i]) \n\t"
#if defined(MIPS_DSP_R1_LE)
"absq_s.w %[tmp1], %[tmp1] \n\t"
"absq_s.w %[tmp2], %[tmp2] \n\t"
"absq_s.w %[tmp3], %[tmp3] \n\t"
"absq_s.w %[tmp4], %[tmp4] \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp5], %[tmp1], $zero \n\t"
"subu %[tmp6], $zero, %[tmp1] \n\t"
"movn %[tmp1], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp2], $zero \n\t"
"subu %[tmp6], $zero, %[tmp2] \n\t"
"movn %[tmp2], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp3], $zero \n\t"
"subu %[tmp6], $zero, %[tmp3] \n\t"
"movn %[tmp3], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp4], $zero \n\t"
"subu %[tmp6], $zero, %[tmp4] \n\t"
"movn %[tmp4], %[tmp6], %[tmp5] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp5], %[tempMax], %[tmp1] \n\t"
"movn %[tempMax], %[tmp1], %[tmp5] \n\t"
"addiu %[i], %[i], -4 \n\t"
"slt %[tmp5], %[tempMax], %[tmp2] \n\t"
"movn %[tempMax], %[tmp2], %[tmp5] \n\t"
"slt %[tmp5], %[tempMax], %[tmp3] \n\t"
"movn %[tempMax], %[tmp3], %[tmp5] \n\t"
"slt %[tmp5], %[tempMax], %[tmp4] \n\t"
"movn %[tempMax], %[tmp4], %[tmp5] \n\t"
"bgtz %[i], 5b \n\t"
" addiu %[ptr_i], %[ptr_i], 8 \n\t"
"addiu %[tmp1], $zero, 13573 \n\t"
"addiu %[tmp2], $zero, 27146 \n\t"
#if !defined(MIPS32_R2_LE)
"sll %[tempMax], %[tempMax], 16 \n\t"
"sra %[tempMax], %[tempMax], 16 \n\t"
#else // #if !defined(MIPS32_R2_LE)
"seh %[tempMax] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"slt %[tmp1], %[tmp1], %[tempMax] \n\t"
"slt %[tmp2], %[tmp2], %[tempMax] \n\t"
"addu %[tmp1], %[tmp1], %[tmp2] \n\t"
"addu %[shift], %[shift], %[tmp1] \n\t"
"addu %[scale], %[scale], %[tmp1] \n\t"
"sllv %[round2], %[round2], %[tmp1] \n\t"
"sll %[istep], %[l], 1 \n\t"
"move %[m], $zero \n\t"
"sll %[tmp], %[l], 2 \n\t"
"2: \n\t"
#if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addiu %[tmp2], %[tmp3], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lh %[wi], 0(%[ptr_j]) \n\t"
"lh %[wr], 0(%[ptr_i]) \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"1: \n\t"
"sll %[tmp1], %[i], 2 \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
"lh %[tmp3], 0(%[ptr_j]) \n\t"
"lh %[tmp4], 2(%[ptr_j]) \n\t"
"lh %[tmp6], 0(%[ptr_i]) \n\t"
"lh %[tmp5], 2(%[ptr_i]) \n\t"
"addu %[i], %[i], %[istep] \n\t"
#if defined(MIPS_DSP_R2_LE)
"mult %[wr], %[tmp3] \n\t"
"msub %[wi], %[tmp4] \n\t"
"mult $ac1, %[wr], %[tmp4] \n\t"
"madd $ac1, %[wi], %[tmp3] \n\t"
"mflo %[tmp1] \n\t"
"mflo %[tmp2], $ac1 \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
"addu %[tmp6], %[tmp6], %[round2] \n\t"
"addu %[tmp5], %[tmp5], %[round2] \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"srav %[tmp4], %[tmp4], %[shift] \n\t"
"srav %[tmp1], %[tmp1], %[shift] \n\t"
"srav %[tmp6], %[tmp6], %[shift] \n\t"
"srav %[tmp5], %[tmp5], %[shift] \n\t"
#else // #if defined(MIPS_DSP_R2_LE)
"mul %[tmp1], %[wr], %[tmp3] \n\t"
"mul %[tmp2], %[wr], %[tmp4] \n\t"
"mul %[tmp4], %[wi], %[tmp4] \n\t"
"mul %[tmp3], %[wi], %[tmp3] \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"sub %[tmp1], %[tmp1], %[tmp4] \n\t"
"addu %[tmp2], %[tmp2], %[tmp3] \n\t"
"addiu %[tmp1], %[tmp1], 1 \n\t"
"addiu %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp1], %[tmp1], 1 \n\t"
"addu %[tmp6], %[tmp6], %[round2] \n\t"
"addu %[tmp5], %[tmp5], %[round2] \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"sra %[tmp4], %[tmp4], %[shift] \n\t"
"sra %[tmp1], %[tmp1], %[shift] \n\t"
"sra %[tmp6], %[tmp6], %[shift] \n\t"
"sra %[tmp5], %[tmp5], %[shift] \n\t"
#endif // #if defined(MIPS_DSP_R2_LE)
"sh %[tmp1], 0(%[ptr_i]) \n\t"
"sh %[tmp6], 2(%[ptr_i]) \n\t"
"sh %[tmp4], 0(%[ptr_j]) \n\t"
"blt %[i], %[n], 1b \n\t"
" sh %[tmp5], 2(%[ptr_j]) \n\t"
"blt %[m], %[l], 2b \n\t"
" addu %[i], $zero, %[m] \n\t"
"move %[l], %[istep] \n\t"
"blt %[l], %[n], 3b \n\t"
" addiu %[k], %[k], -1 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
[ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp),
[istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l),
[k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j),
[shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax)
: [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
: "hi", "lo", "memory"
#if defined(MIPS_DSP_R2_LE)
, "$ac1hi", "$ac1lo"
#endif // #if defined(MIPS_DSP_R2_LE)
);
return scale;
}

View file

@ -0,0 +1,132 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
#include <stdint.h>
static const int16_t kSinTable1024[] = {
0, 201, 402, 603, 804, 1005, 1206, 1406, 1607,
1808, 2009, 2209, 2410, 2610, 2811, 3011, 3211, 3411,
3611, 3811, 4011, 4210, 4409, 4608, 4807, 5006, 5205,
5403, 5601, 5799, 5997, 6195, 6392, 6589, 6786, 6982,
7179, 7375, 7571, 7766, 7961, 8156, 8351, 8545, 8739,
8932, 9126, 9319, 9511, 9703, 9895, 10087, 10278, 10469,
10659, 10849, 11038, 11227, 11416, 11604, 11792, 11980, 12166,
12353, 12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827,
14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268, 15446,
15623, 15799, 15975, 16150, 16325, 16499, 16672, 16845, 17017,
17189, 17360, 17530, 17699, 17868, 18036, 18204, 18371, 18537,
18702, 18867, 19031, 19194, 19357, 19519, 19680, 19840, 20000,
20159, 20317, 20474, 20631, 20787, 20942, 21096, 21249, 21402,
21554, 21705, 21855, 22004, 22153, 22301, 22448, 22594, 22739,
22883, 23027, 23169, 23311, 23452, 23592, 23731, 23869, 24006,
24143, 24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201,
25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198, 26318,
26437, 26556, 26673, 26789, 26905, 27019, 27132, 27244, 27355,
27466, 27575, 27683, 27790, 27896, 28001, 28105, 28208, 28309,
28410, 28510, 28608, 28706, 28802, 28897, 28992, 29085, 29177,
29268, 29358, 29446, 29534, 29621, 29706, 29790, 29873, 29955,
30036, 30116, 30195, 30272, 30349, 30424, 30498, 30571, 30643,
30713, 30783, 30851, 30918, 30984, 31049, 31113, 31175, 31236,
31297, 31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735,
31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097, 32137,
32176, 32213, 32249, 32284, 32318, 32350, 32382, 32412, 32441,
32468, 32495, 32520, 32544, 32567, 32588, 32609, 32628, 32646,
32662, 32678, 32692, 32705, 32717, 32727, 32736, 32744, 32751,
32757, 32761, 32764, 32766, 32767, 32766, 32764, 32761, 32757,
32751, 32744, 32736, 32727, 32717, 32705, 32692, 32678, 32662,
32646, 32628, 32609, 32588, 32567, 32544, 32520, 32495, 32468,
32441, 32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176,
32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833, 31785,
31735, 31684, 31633, 31580, 31525, 31470, 31413, 31356, 31297,
31236, 31175, 31113, 31049, 30984, 30918, 30851, 30783, 30713,
30643, 30571, 30498, 30424, 30349, 30272, 30195, 30116, 30036,
29955, 29873, 29790, 29706, 29621, 29534, 29446, 29358, 29268,
29177, 29085, 28992, 28897, 28802, 28706, 28608, 28510, 28410,
28309, 28208, 28105, 28001, 27896, 27790, 27683, 27575, 27466,
27355, 27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437,
26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456, 25329,
25201, 25072, 24942, 24811, 24679, 24546, 24413, 24278, 24143,
24006, 23869, 23731, 23592, 23452, 23311, 23169, 23027, 22883,
22739, 22594, 22448, 22301, 22153, 22004, 21855, 21705, 21554,
21402, 21249, 21096, 20942, 20787, 20631, 20474, 20317, 20159,
20000, 19840, 19680, 19519, 19357, 19194, 19031, 18867, 18702,
18537, 18371, 18204, 18036, 17868, 17699, 17530, 17360, 17189,
17017, 16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623,
15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191, 14009,
13827, 13645, 13462, 13278, 13094, 12909, 12724, 12539, 12353,
12166, 11980, 11792, 11604, 11416, 11227, 11038, 10849, 10659,
10469, 10278, 10087, 9895, 9703, 9511, 9319, 9126, 8932,
8739, 8545, 8351, 8156, 7961, 7766, 7571, 7375, 7179,
6982, 6786, 6589, 6392, 6195, 5997, 5799, 5601, 5403,
5205, 5006, 4807, 4608, 4409, 4210, 4011, 3811, 3611,
3411, 3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808,
1607, 1406, 1206, 1005, 804, 603, 402, 201, 0,
-201, -402, -603, -804, -1005, -1206, -1406, -1607, -1808,
-2009, -2209, -2410, -2610, -2811, -3011, -3211, -3411, -3611,
-3811, -4011, -4210, -4409, -4608, -4807, -5006, -5205, -5403,
-5601, -5799, -5997, -6195, -6392, -6589, -6786, -6982, -7179,
-7375, -7571, -7766, -7961, -8156, -8351, -8545, -8739, -8932,
-9126, -9319, -9511, -9703, -9895, -10087, -10278, -10469, -10659,
-10849, -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
-12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, -14009,
-14191, -14372, -14552, -14732, -14911, -15090, -15268, -15446, -15623,
-15799, -15975, -16150, -16325, -16499, -16672, -16845, -17017, -17189,
-17360, -17530, -17699, -17868, -18036, -18204, -18371, -18537, -18702,
-18867, -19031, -19194, -19357, -19519, -19680, -19840, -20000, -20159,
-20317, -20474, -20631, -20787, -20942, -21096, -21249, -21402, -21554,
-21705, -21855, -22004, -22153, -22301, -22448, -22594, -22739, -22883,
-23027, -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
-24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, -25329,
-25456, -25582, -25707, -25831, -25954, -26077, -26198, -26318, -26437,
-26556, -26673, -26789, -26905, -27019, -27132, -27244, -27355, -27466,
-27575, -27683, -27790, -27896, -28001, -28105, -28208, -28309, -28410,
-28510, -28608, -28706, -28802, -28897, -28992, -29085, -29177, -29268,
-29358, -29446, -29534, -29621, -29706, -29790, -29873, -29955, -30036,
-30116, -30195, -30272, -30349, -30424, -30498, -30571, -30643, -30713,
-30783, -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
-31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, -31785,
-31833, -31880, -31926, -31970, -32014, -32056, -32097, -32137, -32176,
-32213, -32249, -32284, -32318, -32350, -32382, -32412, -32441, -32468,
-32495, -32520, -32544, -32567, -32588, -32609, -32628, -32646, -32662,
-32678, -32692, -32705, -32717, -32727, -32736, -32744, -32751, -32757,
-32761, -32764, -32766, -32767, -32766, -32764, -32761, -32757, -32751,
-32744, -32736, -32727, -32717, -32705, -32692, -32678, -32662, -32646,
-32628, -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
-32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, -32137,
-32097, -32056, -32014, -31970, -31926, -31880, -31833, -31785, -31735,
-31684, -31633, -31580, -31525, -31470, -31413, -31356, -31297, -31236,
-31175, -31113, -31049, -30984, -30918, -30851, -30783, -30713, -30643,
-30571, -30498, -30424, -30349, -30272, -30195, -30116, -30036, -29955,
-29873, -29790, -29706, -29621, -29534, -29446, -29358, -29268, -29177,
-29085, -28992, -28897, -28802, -28706, -28608, -28510, -28410, -28309,
-28208, -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
-27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, -26318,
-26198, -26077, -25954, -25831, -25707, -25582, -25456, -25329, -25201,
-25072, -24942, -24811, -24679, -24546, -24413, -24278, -24143, -24006,
-23869, -23731, -23592, -23452, -23311, -23169, -23027, -22883, -22739,
-22594, -22448, -22301, -22153, -22004, -21855, -21705, -21554, -21402,
-21249, -21096, -20942, -20787, -20631, -20474, -20317, -20159, -20000,
-19840, -19680, -19519, -19357, -19194, -19031, -18867, -18702, -18537,
-18371, -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
-16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, -15446,
-15268, -15090, -14911, -14732, -14552, -14372, -14191, -14009, -13827,
-13645, -13462, -13278, -13094, -12909, -12724, -12539, -12353, -12166,
-11980, -11792, -11604, -11416, -11227, -11038, -10849, -10659, -10469,
-10278, -10087, -9895, -9703, -9511, -9319, -9126, -8932, -8739,
-8545, -8351, -8156, -7961, -7766, -7571, -7375, -7179, -6982,
-6786, -6589, -6392, -6195, -5997, -5799, -5601, -5403, -5205,
-5006, -4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411,
-3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808, -1607,
-1406, -1206, -1005, -804, -603, -402, -201};
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_

View file

@ -0,0 +1,82 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the implementation of functions
* WebRtcSpl_MemSetW16()
* WebRtcSpl_MemSetW32()
* WebRtcSpl_MemCpyReversedOrder()
* WebRtcSpl_CopyFromEndW16()
* WebRtcSpl_ZerosArrayW16()
* WebRtcSpl_ZerosArrayW32()
*
* The description header can be found in signal_processing_library.h
*
*/
#include <string.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length)
{
size_t j;
int16_t *arrptr = ptr;
for (j = length; j > 0; j--)
{
*arrptr++ = set_value;
}
}
void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length)
{
size_t j;
int32_t *arrptr = ptr;
for (j = length; j > 0; j--)
{
*arrptr++ = set_value;
}
}
void WebRtcSpl_MemCpyReversedOrder(int16_t* dest,
int16_t* source,
size_t length)
{
size_t j;
int16_t* destPtr = dest;
int16_t* sourcePtr = source;
for (j = 0; j < length; j++)
{
*destPtr-- = *sourcePtr++;
}
}
void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in,
size_t length,
size_t samples,
int16_t *vector_out)
{
// Copy the last <samples> of the input vector to vector_out
WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples);
}
void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length)
{
WebRtcSpl_MemSetW16(vector, 0, length);
}
void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length)
{
WebRtcSpl_MemSetW32(vector, 0, length);
}

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
size_t dim_seq,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2) {
size_t i = 0, j = 0;
for (i = 0; i < dim_cross_correlation; i++) {
int32_t corr = 0;
for (j = 0; j < dim_seq; j++)
corr += (seq1[j] * seq2[j]) >> right_shifts;
seq2 += step_seq2;
*cross_correlation++ = corr;
}
}

View file

@ -0,0 +1,104 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
size_t dim_seq,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2) {
int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0;
int16_t *pseq2 = NULL;
int16_t *pseq1 = NULL;
int16_t *pseq1_0 = (int16_t*)&seq1[0];
int16_t *pseq2_0 = (int16_t*)&seq2[0];
int k = 0;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"sll %[step_seq2], %[step_seq2], 1 \n\t"
"andi %[t0], %[dim_seq], 1 \n\t"
"bgtz %[t0], 3f \n\t"
" nop \n\t"
"1: \n\t"
"move %[pseq1], %[pseq1_0] \n\t"
"move %[pseq2], %[pseq2_0] \n\t"
"sra %[k], %[dim_seq], 1 \n\t"
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
"xor %[sum], %[sum], %[sum] \n\t"
"2: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"lh %[t2], 2(%[pseq1]) \n\t"
"lh %[t3], 2(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"addiu %[k], %[k], -1 \n\t"
"mul %[t2], %[t2], %[t3] \n\t"
"addiu %[pseq1], %[pseq1], 4 \n\t"
"addiu %[pseq2], %[pseq2], 4 \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"srav %[t2], %[t2], %[right_shifts] \n\t"
"bgtz %[k], 2b \n\t"
" addu %[sum], %[sum], %[t2] \n\t"
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
"sw %[sum], 0(%[cc]) \n\t"
"bgtz %[dim_cc], 1b \n\t"
" addiu %[cc], %[cc], 4 \n\t"
"b 6f \n\t"
" nop \n\t"
"3: \n\t"
"move %[pseq1], %[pseq1_0] \n\t"
"move %[pseq2], %[pseq2_0] \n\t"
"sra %[k], %[dim_seq], 1 \n\t"
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
"beqz %[k], 5f \n\t"
" xor %[sum], %[sum], %[sum] \n\t"
"4: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"lh %[t2], 2(%[pseq1]) \n\t"
"lh %[t3], 2(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"addiu %[k], %[k], -1 \n\t"
"mul %[t2], %[t2], %[t3] \n\t"
"addiu %[pseq1], %[pseq1], 4 \n\t"
"addiu %[pseq2], %[pseq2], 4 \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"srav %[t2], %[t2], %[right_shifts] \n\t"
"bgtz %[k], 4b \n\t"
" addu %[sum], %[sum], %[t2] \n\t"
"5: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
"sw %[sum], 0(%[cc]) \n\t"
"bgtz %[dim_cc], 3b \n\t"
" addiu %[cc], %[cc], 4 \n\t"
"6: \n\t"
".set pop \n\t"
: [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1),
[t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1),
[pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0),
[k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum),
[cc] "+r" (cross_correlation)
: [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts)
: "hi", "lo", "memory"
);
}

View file

@ -0,0 +1,88 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/system/arch.h"
#include <arm_neon.h>
static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
const int16_t* vector1,
const int16_t* vector2,
size_t length,
int scaling) {
size_t i = 0;
size_t len1 = length >> 3;
size_t len2 = length & 7;
int64x2_t sum0 = vdupq_n_s64(0);
int64x2_t sum1 = vdupq_n_s64(0);
for (i = len1; i > 0; i -= 1) {
int16x8_t seq1_16x8 = vld1q_s16(vector1);
int16x8_t seq2_16x8 = vld1q_s16(vector2);
#if defined(WEBRTC_ARCH_ARM64)
int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
vget_low_s16(seq2_16x8));
int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8);
#else
int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
vget_low_s16(seq2_16x8));
int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8),
vget_high_s16(seq2_16x8));
#endif
sum0 = vpadalq_s32(sum0, tmp0);
sum1 = vpadalq_s32(sum1, tmp1);
vector1 += 8;
vector2 += 8;
}
// Calculate the rest of the samples.
int64_t sum_res = 0;
for (i = len2; i > 0; i -= 1) {
sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
vector1++;
vector2++;
}
sum0 = vaddq_s64(sum0, sum1);
#if defined(WEBRTC_ARCH_ARM64)
int64_t sum2 = vaddvq_s64(sum0);
*cross_correlation = (int32_t)((sum2 + sum_res) >> scaling);
#else
int64x1_t shift = vdup_n_s64(-scaling);
int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0));
sum2 = vadd_s64(sum2, vdup_n_s64(sum_res));
sum2 = vshl_s64(sum2, shift);
vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0);
#endif
}
/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */
void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
const int16_t* seq1,
const int16_t* seq2,
size_t dim_seq,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2) {
size_t i = 0;
for (i = 0; i < dim_cross_correlation; i++) {
const int16_t* seq1_ptr = seq1;
const int16_t* seq2_ptr = seq2 + (step_seq2 * i);
DotProductWithScaleNeon(cross_correlation,
seq1_ptr,
seq2_ptr,
dim_seq,
right_shifts);
cross_correlation++;
}
}

View file

@ -0,0 +1,140 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the divisions
* WebRtcSpl_DivU32U16()
* WebRtcSpl_DivW32W16()
* WebRtcSpl_DivW32W16ResW16()
* WebRtcSpl_DivResultInQ31()
* WebRtcSpl_DivW32HiLow()
*
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/sanitizer.h"
uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
{
// Guard against division with 0
if (den != 0)
{
return (uint32_t)(num / den);
} else
{
return (uint32_t)0xFFFFFFFF;
}
}
int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
{
// Guard against division with 0
if (den != 0)
{
return (int32_t)(num / den);
} else
{
return (int32_t)0x7FFFFFFF;
}
}
int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
{
// Guard against division with 0
if (den != 0)
{
return (int16_t)(num / den);
} else
{
return (int16_t)0x7FFF;
}
}
int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
{
int32_t L_num = num;
int32_t L_den = den;
int32_t div = 0;
int k = 31;
int change_sign = 0;
if (num == 0)
return 0;
if (num < 0)
{
change_sign++;
L_num = -num;
}
if (den < 0)
{
change_sign++;
L_den = -den;
}
while (k--)
{
div <<= 1;
L_num <<= 1;
if (L_num >= L_den)
{
L_num -= L_den;
div++;
}
}
if (change_sign == 1)
{
div = -div;
}
return div;
}
int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
{
int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
int32_t tmpW32;
approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
// result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
// tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
// tmpW32 = den * approx
// result in Q30 (tmpW32 = 2.0-(den*approx))
tmpW32 = (int32_t)((int64_t)0x7fffffffL - tmpW32);
// Store tmpW32 in hi and low format
tmp_hi = (int16_t)(tmpW32 >> 16);
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
// tmpW32 = 1/den in Q29
tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
// 1/den in hi and low format
tmp_hi = (int16_t)(tmpW32 >> 16);
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
// Store num in hi and low format
num_hi = (int16_t)(num >> 16);
num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
// num * (1/den) by 32 bit multiplication (result in Q28)
tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
(num_low * tmp_hi >> 15);
// Put result in Q31 (convert from Q28)
tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
return tmpW32;
}

View file

@ -0,0 +1,34 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/dot_product_with_scale.h"
#include "rtc_base/numerics/safe_conversions.h"
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
const int16_t* vector2,
size_t length,
int scaling) {
int64_t sum = 0;
size_t i = 0;
/* Unroll the loop to improve performance. */
for (i = 0; i + 3 < length; i += 4) {
sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
}
for (; i < length; i++) {
sum += (vector1[i] * vector2[i]) >> scaling;
}
return rtc::saturated_cast<int32_t>(sum);
}

View file

@ -0,0 +1,40 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
// Calculates the dot product between two (int16_t) vectors.
//
// Input:
// - vector1 : Vector 1
// - vector2 : Vector 2
// - vector_length : Number of samples used in the dot product
// - scaling : The number of right bit shifts to apply on each term
// during calculation to avoid overflow, i.e., the
// output will be in Q(-`scaling`)
//
// Return value : The dot product in Q(-scaling)
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
const int16_t* vector2,
size_t length,
int scaling);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_

View file

@ -0,0 +1,65 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
#include "rtc_base/sanitizer.h"
// TODO(Bjornv): Change the function parameter order to WebRTC code style.
// C version of WebRtcSpl_DownsampleFast() for generic platforms.
int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
size_t data_out_length,
const int16_t* __restrict coefficients,
size_t coefficients_length,
int factor,
size_t delay) {
int16_t* const original_data_out = data_out;
size_t i = 0;
size_t j = 0;
int32_t out_s32 = 0;
size_t endpos = delay + factor * (data_out_length - 1) + 1;
// Return error if any of the running conditions doesn't meet.
if (data_out_length == 0 || coefficients_length == 0
|| data_in_length < endpos) {
return -1;
}
rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]),
coefficients_length);
for (i = delay; i < endpos; i += factor) {
out_s32 = 2048; // Round value, 0.5 in Q12.
for (j = 0; j < coefficients_length; j++) {
// Negative overflow is permitted here, because this is
// auto-regressive filters, and the state for each batch run is
// stored in the "negative" positions of the output vector.
rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j],
sizeof(data_in[0]), 1);
// out_s32 is in Q12 domain.
out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j];
}
out_s32 >>= 12; // Q0.
// Saturate and store the output.
*data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
}
RTC_DCHECK_EQ(original_data_out + data_out_length, data_out);
rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]),
data_out_length);
return 0;
}

View file

@ -0,0 +1,169 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
// Version of WebRtcSpl_DownsampleFast() for MIPS platforms.
int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
size_t data_out_length,
const int16_t* __restrict coefficients,
size_t coefficients_length,
int factor,
size_t delay) {
int i;
int j;
int k;
int32_t out_s32 = 0;
size_t endpos = delay + factor * (data_out_length - 1) + 1;
int32_t tmp1, tmp2, tmp3, tmp4, factor_2;
int16_t* p_coefficients;
int16_t* p_data_in;
int16_t* p_data_in_0 = (int16_t*)&data_in[delay];
int16_t* p_coefficients_0 = (int16_t*)&coefficients[0];
#if !defined(MIPS_DSP_R1_LE)
int32_t max_16 = 0x7FFF;
int32_t min_16 = 0xFFFF8000;
#endif // #if !defined(MIPS_DSP_R1_LE)
// Return error if any of the running conditions doesn't meet.
if (data_out_length == 0 || coefficients_length == 0
|| data_in_length < endpos) {
return -1;
}
#if defined(MIPS_DSP_R2_LE)
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"subu %[i], %[endpos], %[delay] \n\t"
"sll %[factor_2], %[factor], 1 \n\t"
"1: \n\t"
"move %[p_data_in], %[p_data_in_0] \n\t"
"mult $zero, $zero \n\t"
"move %[p_coefs], %[p_coefs_0] \n\t"
"sra %[j], %[coef_length], 2 \n\t"
"beq %[j], $zero, 3f \n\t"
" andi %[k], %[coef_length], 3 \n\t"
"2: \n\t"
"lwl %[tmp1], 1(%[p_data_in]) \n\t"
"lwl %[tmp2], 3(%[p_coefs]) \n\t"
"lwl %[tmp3], -3(%[p_data_in]) \n\t"
"lwl %[tmp4], 7(%[p_coefs]) \n\t"
"lwr %[tmp1], -2(%[p_data_in]) \n\t"
"lwr %[tmp2], 0(%[p_coefs]) \n\t"
"lwr %[tmp3], -6(%[p_data_in]) \n\t"
"lwr %[tmp4], 4(%[p_coefs]) \n\t"
"packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t"
"packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t"
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
"dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t"
"addiu %[j], %[j], -1 \n\t"
"addiu %[p_data_in], %[p_data_in], -8 \n\t"
"bgtz %[j], 2b \n\t"
" addiu %[p_coefs], %[p_coefs], 8 \n\t"
"3: \n\t"
"beq %[k], $zero, 5f \n\t"
" nop \n\t"
"4: \n\t"
"lhu %[tmp1], 0(%[p_data_in]) \n\t"
"lhu %[tmp2], 0(%[p_coefs]) \n\t"
"addiu %[p_data_in], %[p_data_in], -2 \n\t"
"addiu %[k], %[k], -1 \n\t"
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
"bgtz %[k], 4b \n\t"
" addiu %[p_coefs], %[p_coefs], 2 \n\t"
"5: \n\t"
"extr_r.w %[out_s32], $ac0, 12 \n\t"
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
"subu %[i], %[i], %[factor] \n\t"
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
"sra %[out_s32], %[out_s32], 16 \n\t"
"sh %[out_s32], 0(%[data_out]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[data_out], %[data_out], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in),
[p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
[j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
[i] "=&r" (i), [k] "=&r" (k)
: [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
[p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
[delay] "r" (delay), [factor] "r" (factor)
: "memory", "hi", "lo"
);
#else // #if defined(MIPS_DSP_R2_LE)
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"sll %[factor_2], %[factor], 1 \n\t"
"subu %[i], %[endpos], %[delay] \n\t"
"1: \n\t"
"move %[p_data_in], %[p_data_in_0] \n\t"
"addiu %[out_s32], $zero, 2048 \n\t"
"move %[p_coefs], %[p_coefs_0] \n\t"
"sra %[j], %[coef_length], 1 \n\t"
"beq %[j], $zero, 3f \n\t"
" andi %[k], %[coef_length], 1 \n\t"
"2: \n\t"
"lh %[tmp1], 0(%[p_data_in]) \n\t"
"lh %[tmp2], 0(%[p_coefs]) \n\t"
"lh %[tmp3], -2(%[p_data_in]) \n\t"
"lh %[tmp4], 2(%[p_coefs]) \n\t"
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
"addiu %[p_coefs], %[p_coefs], 4 \n\t"
"mul %[tmp3], %[tmp3], %[tmp4] \n\t"
"addiu %[j], %[j], -1 \n\t"
"addiu %[p_data_in], %[p_data_in], -4 \n\t"
"addu %[tmp1], %[tmp1], %[tmp3] \n\t"
"bgtz %[j], 2b \n\t"
" addu %[out_s32], %[out_s32], %[tmp1] \n\t"
"3: \n\t"
"beq %[k], $zero, 4f \n\t"
" nop \n\t"
"lh %[tmp1], 0(%[p_data_in]) \n\t"
"lh %[tmp2], 0(%[p_coefs]) \n\t"
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
"addu %[out_s32], %[out_s32], %[tmp1] \n\t"
"4: \n\t"
"sra %[out_s32], %[out_s32], 12 \n\t"
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
#if defined(MIPS_DSP_R1_LE)
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
"sra %[out_s32], %[out_s32], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp1], %[max_16], %[out_s32] \n\t"
"movn %[out_s32], %[max_16], %[tmp1] \n\t"
"slt %[tmp1], %[out_s32], %[min_16] \n\t"
"movn %[out_s32], %[min_16], %[tmp1] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"subu %[i], %[i], %[factor] \n\t"
"sh %[out_s32], 0(%[data_out]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[data_out], %[data_out], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k),
[p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
[j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
[i] "=&r" (i)
: [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
[p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
#if !defined(MIPS_DSP_R1_LE)
[max_16] "r" (max_16), [min_16] "r" (min_16),
#endif // #if !defined(MIPS_DSP_R1_LE)
[delay] "r" (delay), [factor] "r" (factor)
: "memory", "hi", "lo"
);
#endif // #if defined(MIPS_DSP_R2_LE)
return 0;
}

View file

@ -0,0 +1,217 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include <arm_neon.h>
// NEON intrinsics version of WebRtcSpl_DownsampleFast()
// for ARM 32-bit/64-bit platforms.
int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
size_t data_out_length,
const int16_t* __restrict coefficients,
size_t coefficients_length,
int factor,
size_t delay) {
size_t i = 0;
size_t j = 0;
int32_t out_s32 = 0;
size_t endpos = delay + factor * (data_out_length - 1) + 1;
size_t res = data_out_length & 0x7;
size_t endpos1 = endpos - factor * res;
// Return error if any of the running conditions doesn't meet.
if (data_out_length == 0 || coefficients_length == 0
|| data_in_length < endpos) {
return -1;
}
// First part, unroll the loop 8 times, with 3 subcases
// (factor == 2, 4, others).
switch (factor) {
case 2: {
for (i = delay; i < endpos1; i += 16) {
// Round value, 0.5 in Q12.
int32x4_t out32x4_0 = vdupq_n_s32(2048);
int32x4_t out32x4_1 = vdupq_n_s32(2048);
#if defined(WEBRTC_ARCH_ARM64)
// Unroll the loop 2 times.
for (j = 0; j < coefficients_length - 1; j += 2) {
int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]);
int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32);
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]);
// Mul and accumulate low 64-bit data.
int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 1);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_1, coeff16x4, 0);
// Mul and accumulate high 64-bit data.
// TODO: vget_high_s16 need extra cost on ARM64. This could be
// replaced by vmlal_high_lane_s16. But for the interface of
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
// This issue need to be tracked in the future.
int16x4_t in16x4_2 = vget_high_s16(in16x8x2.val[0]);
int16x4_t in16x4_3 = vget_high_s16(in16x8x2.val[1]);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_2, coeff16x4, 1);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0);
}
for (; j < coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
// Mul and accumulate low 64-bit data.
int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
// Mul and accumulate high 64-bit data.
// TODO: vget_high_s16 need extra cost on ARM64. This could be
// replaced by vmlal_high_lane_s16. But for the interface of
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
// This issue need to be tracked in the future.
int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
}
#else
// On ARMv7, the loop unrolling 2 times results in performance
// regression.
for (j = 0; j < coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
// Mul and accumulate.
int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
}
#endif
// Saturate and store the output.
int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
data_out += 8;
}
break;
}
case 4: {
for (i = delay; i < endpos1; i += 32) {
// Round value, 0.5 in Q12.
int32x4_t out32x4_0 = vdupq_n_s32(2048);
int32x4_t out32x4_1 = vdupq_n_s32(2048);
// Unroll the loop 4 times.
for (j = 0; j < coefficients_length - 3; j += 4) {
int16x4_t coeff16x4 = vld1_s16(&coefficients[j]);
int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]);
// Mul and accumulate low 64-bit data.
int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]);
int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]);
int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 3);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_2, coeff16x4, 2);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_4, coeff16x4, 1);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_6, coeff16x4, 0);
// Mul and accumulate high 64-bit data.
// TODO: vget_high_s16 need extra cost on ARM64. This could be
// replaced by vmlal_high_lane_s16. But for the interface of
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
// This issue need to be tracked in the future.
int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
int16x4_t in16x4_3 = vget_high_s16(in16x8x4.val[1]);
int16x4_t in16x4_5 = vget_high_s16(in16x8x4.val[2]);
int16x4_t in16x4_7 = vget_high_s16(in16x8x4.val[3]);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 3);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 2);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_5, coeff16x4, 1);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0);
}
for (; j < coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]);
// Mul and accumulate low 64-bit data.
int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
// Mul and accumulate high 64-bit data.
// TODO: vget_high_s16 need extra cost on ARM64. This could be
// replaced by vmlal_high_lane_s16. But for the interface of
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
// This issue need to be tracked in the future.
int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
}
// Saturate and store the output.
int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
data_out += 8;
}
break;
}
default: {
for (i = delay; i < endpos1; i += factor * 8) {
// Round value, 0.5 in Q12.
int32x4_t out32x4_0 = vdupq_n_s32(2048);
int32x4_t out32x4_1 = vdupq_n_s32(2048);
for (j = 0; j < coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]);
in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1);
in16x4_0 = vld1_lane_s16(&data_in[i + factor * 2 - j], in16x4_0, 2);
in16x4_0 = vld1_lane_s16(&data_in[i + factor * 3 - j], in16x4_0, 3);
int16x4_t in16x4_1 = vld1_dup_s16(&data_in[i + factor * 4 - j]);
in16x4_1 = vld1_lane_s16(&data_in[i + factor * 5 - j], in16x4_1, 1);
in16x4_1 = vld1_lane_s16(&data_in[i + factor * 6 - j], in16x4_1, 2);
in16x4_1 = vld1_lane_s16(&data_in[i + factor * 7 - j], in16x4_1, 3);
// Mul and accumulate.
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
}
// Saturate and store the output.
int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
data_out += 8;
}
break;
}
}
// Second part, do the rest iterations (if any).
for (; i < endpos; i += factor) {
out_s32 = 2048; // Round value, 0.5 in Q12.
for (j = 0; j < coefficients_length; j++) {
out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32);
}
// Saturate and store the output.
out_s32 >>= 12;
*data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
}
return 0;
}

View file

@ -0,0 +1,39 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Energy().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
int32_t WebRtcSpl_Energy(int16_t* vector,
size_t vector_length,
int* scale_factor)
{
int32_t en = 0;
size_t i;
int scaling =
WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
size_t looptimes = vector_length;
int16_t *vectorptr = vector;
for (i = 0; i < looptimes; i++)
{
en += (*vectorptr * *vectorptr) >> scaling;
vectorptr++;
}
*scale_factor = scaling;
return en;
}

View file

@ -0,0 +1,95 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_FilterAR().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
size_t WebRtcSpl_FilterAR(const int16_t* a,
size_t a_length,
const int16_t* x,
size_t x_length,
int16_t* state,
size_t state_length,
int16_t* state_low,
size_t state_low_length,
int16_t* filtered,
int16_t* filtered_low,
size_t filtered_low_length)
{
int64_t o;
int32_t oLOW;
size_t i, j, stop;
const int16_t* x_ptr = &x[0];
int16_t* filteredFINAL_ptr = filtered;
int16_t* filteredFINAL_LOW_ptr = filtered_low;
for (i = 0; i < x_length; i++)
{
// Calculate filtered[i] and filtered_low[i]
const int16_t* a_ptr = &a[1];
// The index can become negative, but the arrays will never be indexed
// with it when negative. Nevertheless, the index cannot be a size_t
// because of this.
int filtered_ix = (int)i - 1;
int16_t* state_ptr = &state[state_length - 1];
int16_t* state_low_ptr = &state_low[state_length - 1];
o = (int32_t)(*x_ptr++) * (1 << 12);
oLOW = (int32_t)0;
stop = (i < a_length) ? i + 1 : a_length;
for (j = 1; j < stop; j++)
{
RTC_DCHECK_GE(filtered_ix, 0);
o -= *a_ptr * filtered[filtered_ix];
oLOW -= *a_ptr++ * filtered_low[filtered_ix];
--filtered_ix;
}
for (j = i + 1; j < a_length; j++)
{
o -= *a_ptr * *state_ptr--;
oLOW -= *a_ptr++ * *state_low_ptr--;
}
o += (oLOW >> 12);
*filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12);
*filteredFINAL_LOW_ptr++ =
(int16_t)(o - ((int32_t)(*filteredFINAL_ptr++) * (1 << 12)));
}
// Save the filter state
if (x_length >= state_length)
{
WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state);
WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low);
} else
{
for (i = 0; i < state_length - x_length; i++)
{
state[i] = state[i + x_length];
state_low[i] = state_low[i + x_length];
}
for (i = 0; i < x_length; i++)
{
state[state_length - x_length + i] = filtered[i];
state_low[state_length - x_length + i] = filtered_low[i];
}
}
return x_length;
}

View file

@ -0,0 +1,47 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "stddef.h"
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
// TODO(bjornv): Change the return type to report errors.
void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
int16_t* data_out,
const int16_t* __restrict coefficients,
size_t coefficients_length,
size_t data_length) {
size_t i = 0;
size_t j = 0;
RTC_DCHECK_GT(data_length, 0);
RTC_DCHECK_GT(coefficients_length, 1);
for (i = 0; i < data_length; i++) {
int64_t output = 0;
int64_t sum = 0;
for (j = coefficients_length - 1; j > 0; j--) {
// Negative overflow is permitted here, because this is
// auto-regressive filters, and the state for each batch run is
// stored in the "negative" positions of the output vector.
sum += coefficients[j] * data_out[(ptrdiff_t) i - (ptrdiff_t) j];
}
output = coefficients[0] * data_in[i];
output -= sum;
// Saturate and store the output.
output = WEBRTC_SPL_SAT(134215679, output, -134217728);
data_out[i] = (int16_t)((output + 2048) >> 12);
}
}

View file

@ -0,0 +1,218 @@
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for
@ ARMv7 platform. The description header can be found in
@ signal_processing_library.h
@
@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and
@ the reference C code at end of this file.
@ Assumptions:
@ (1) data_length > 0
@ (2) coefficients_length > 1
@ Register usage:
@
@ r0: &data_in[i]
@ r1: &data_out[i], for result ouput
@ r2: &coefficients[0]
@ r3: coefficients_length
@ r4: Iteration counter for the outer loop.
@ r5: data_out[j] as multiplication inputs
@ r6: Calculated value for output data_out[]; interation counter for inner loop
@ r7: Partial sum of a filtering multiplication results
@ r8: Partial sum of a filtering multiplication results
@ r9: &data_out[], for filtering input; data_in[i]
@ r10: coefficients[j]
@ r11: Scratch
@ r12: &coefficients[j]
#include "rtc_base/system/asm_defines.h"
GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12
.align 2
DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12
push {r4-r11}
ldrsh r12, [sp, #32] @ data_length
subs r4, r12, #1
beq ODD_LENGTH @ jump if data_length == 1
LOOP_LENGTH:
add r12, r2, r3, lsl #1
sub r12, #4 @ &coefficients[coefficients_length - 2]
sub r9, r1, r3, lsl #1
add r9, #2 @ &data_out[i - coefficients_length + 1]
ldr r5, [r9], #4 @ data_out[i - coefficients_length + {1,2}]
mov r7, #0 @ sum1
mov r8, #0 @ sum2
subs r6, r3, #3 @ Iteration counter for inner loop.
beq ODD_A_LENGTH @ branch if coefficients_length == 3
blt POST_LOOP_A_LENGTH @ branch if coefficients_length == 2
LOOP_A_LENGTH:
ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
subs r6, #2
smlatt r8, r10, r5, r8 @ sum2 += coefficients[j] * data_out[i - j + 1];
smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
smlabt r7, r10, r5, r7 @ coefficients[j - 1] * data_out[i - j + 1];
ldr r5, [r9], #4 @ data_out[i - j + 2], data_out[i - j + 3]
smlabb r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 2];
bgt LOOP_A_LENGTH
blt POST_LOOP_A_LENGTH
ODD_A_LENGTH:
ldrsh r10, [r12, #2] @ Filter coefficients coefficients[2]
sub r12, #2 @ &coefficients[0]
smlabb r7, r10, r5, r7 @ sum1 += coefficients[2] * data_out[i - 2];
smlabt r8, r10, r5, r8 @ sum2 += coefficients[2] * data_out[i - 1];
ldr r5, [r9, #-2] @ data_out[i - 1], data_out[i]
POST_LOOP_A_LENGTH:
ldr r10, [r12] @ coefficients[0], coefficients[1]
smlatb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
ldr r9, [r0], #4 @ data_in[i], data_in[i + 1]
smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
sub r6, r7 @ output1 -= sum1;
sbfx r11, r6, #12, #16
ssat r7, #16, r6, asr #12
cmp r7, r11
addeq r6, r6, #2048
ssat r6, #16, r6, asr #12
strh r6, [r1], #2 @ Store data_out[i]
smlatb r8, r10, r6, r8 @ sum2 += coefficients[1] * data_out[i];
smulbt r6, r10, r9 @ output2 = coefficients[0] * data_in[i + 1];
sub r6, r8 @ output1 -= sum1;
sbfx r11, r6, #12, #16
ssat r7, #16, r6, asr #12
cmp r7, r11
addeq r6, r6, #2048
ssat r6, #16, r6, asr #12
strh r6, [r1], #2 @ Store data_out[i + 1]
subs r4, #2
bgt LOOP_LENGTH
blt END @ For even data_length, it's done. Jump to END.
@ Process i = data_length -1, for the case of an odd length.
ODD_LENGTH:
add r12, r2, r3, lsl #1
sub r12, #4 @ &coefficients[coefficients_length - 2]
sub r9, r1, r3, lsl #1
add r9, #2 @ &data_out[i - coefficients_length + 1]
mov r7, #0 @ sum1
mov r8, #0 @ sum1
subs r6, r3, #2 @ inner loop counter
beq EVEN_A_LENGTH @ branch if coefficients_length == 2
LOOP2_A_LENGTH:
ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
ldr r5, [r9], #4 @ data_out[i - j], data_out[i - j + 1]
subs r6, #2
smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
smlabt r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 1];
bgt LOOP2_A_LENGTH
addlt r12, #2
blt POST_LOOP2_A_LENGTH
EVEN_A_LENGTH:
ldrsh r10, [r12, #2] @ Filter coefficients coefficients[1]
ldrsh r5, [r9] @ data_out[i - 1]
smlabb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
POST_LOOP2_A_LENGTH:
ldrsh r10, [r12] @ Filter coefficients coefficients[0]
ldrsh r9, [r0] @ data_in[i]
smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
sub r6, r7 @ output1 -= sum1;
sub r6, r8 @ output1 -= sum1;
sbfx r8, r6, #12, #16
ssat r7, #16, r6, asr #12
cmp r7, r8
addeq r6, r6, #2048
ssat r6, #16, r6, asr #12
strh r6, [r1] @ Store the data_out[i]
END:
pop {r4-r11}
bx lr
@Reference C code:
@
@void WebRtcSpl_FilterARFastQ12(int16_t* data_in,
@ int16_t* data_out,
@ int16_t* __restrict coefficients,
@ size_t coefficients_length,
@ size_t data_length) {
@ size_t i = 0;
@ size_t j = 0;
@
@ assert(data_length > 0);
@ assert(coefficients_length > 1);
@
@ for (i = 0; i < data_length - 1; i += 2) {
@ int32_t output1 = 0;
@ int32_t sum1 = 0;
@ int32_t output2 = 0;
@ int32_t sum2 = 0;
@
@ for (j = coefficients_length - 1; j > 2; j -= 2) {
@ sum1 += coefficients[j] * data_out[i - j];
@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
@ sum2 += coefficients[j] * data_out[i - j + 1];
@ sum2 += coefficients[j - 1] * data_out[i - j + 2];
@ }
@
@ if (j == 2) {
@ sum1 += coefficients[2] * data_out[i - 2];
@ sum2 += coefficients[2] * data_out[i - 1];
@ }
@
@ sum1 += coefficients[1] * data_out[i - 1];
@ output1 = coefficients[0] * data_in[i];
@ output1 -= sum1;
@ // Saturate and store the output.
@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
@
@ sum2 += coefficients[1] * data_out[i];
@ output2 = coefficients[0] * data_in[i + 1];
@ output2 -= sum2;
@ // Saturate and store the output.
@ output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728);
@ data_out[i + 1] = (int16_t)((output2 + 2048) >> 12);
@ }
@
@ if (i == data_length - 1) {
@ int32_t output1 = 0;
@ int32_t sum1 = 0;
@
@ for (j = coefficients_length - 1; j > 1; j -= 2) {
@ sum1 += coefficients[j] * data_out[i - j];
@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
@ }
@
@ if (j == 1) {
@ sum1 += coefficients[1] * data_out[i - 1];
@ }
@
@ output1 = coefficients[0] * data_in[i];
@ output1 -= sum1;
@ // Saturate and store the output.
@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
@ }
@}

View file

@ -0,0 +1,140 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
int16_t* data_out,
const int16_t* __restrict coefficients,
size_t coefficients_length,
size_t data_length) {
int r0, r1, r2, r3;
int coef0, offset;
int i, j, k;
int coefptr, outptr, tmpout, inptr;
#if !defined(MIPS_DSP_R1_LE)
int max16 = 0x7FFF;
int min16 = 0xFFFF8000;
#endif // #if !defined(MIPS_DSP_R1_LE)
RTC_DCHECK_GT(data_length, 0);
RTC_DCHECK_GT(coefficients_length, 1);
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[i], %[data_length], 0 \n\t"
"lh %[coef0], 0(%[coefficients]) \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
"andi %[k], %[j], 1 \n\t"
"sll %[offset], %[j], 1 \n\t"
"subu %[outptr], %[data_out], %[offset] \n\t"
"addiu %[inptr], %[data_in], 0 \n\t"
"bgtz %[k], 3f \n\t"
" addu %[coefptr], %[coefficients], %[offset] \n\t"
"1: \n\t"
"lh %[r0], 0(%[inptr]) \n\t"
"addiu %[i], %[i], -1 \n\t"
"addiu %[tmpout], %[outptr], 0 \n\t"
"mult %[r0], %[coef0] \n\t"
"2: \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"lh %[r2], 2(%[tmpout]) \n\t"
"lh %[r3], -2(%[coefptr]) \n\t"
"addiu %[tmpout], %[tmpout], 4 \n\t"
"msub %[r0], %[r1] \n\t"
"msub %[r2], %[r3] \n\t"
"addiu %[j], %[j], -2 \n\t"
"bgtz %[j], 2b \n\t"
" addiu %[coefptr], %[coefptr], -4 \n\t"
#if defined(MIPS_DSP_R1_LE)
"extr_r.w %[r0], $ac0, 12 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"mflo %[r0] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"addu %[coefptr], %[coefficients], %[offset] \n\t"
"addiu %[inptr], %[inptr], 2 \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
#if defined(MIPS_DSP_R1_LE)
"shll_s.w %[r0], %[r0], 16 \n\t"
"sra %[r0], %[r0], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"addiu %[r0], %[r0], 2048 \n\t"
"sra %[r0], %[r0], 12 \n\t"
"slt %[r1], %[max16], %[r0] \n\t"
"movn %[r0], %[max16], %[r1] \n\t"
"slt %[r1], %[r0], %[min16] \n\t"
"movn %[r0], %[min16], %[r1] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"sh %[r0], 0(%[tmpout]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[outptr], %[outptr], 2 \n\t"
"b 5f \n\t"
" nop \n\t"
"3: \n\t"
"lh %[r0], 0(%[inptr]) \n\t"
"addiu %[i], %[i], -1 \n\t"
"addiu %[tmpout], %[outptr], 0 \n\t"
"mult %[r0], %[coef0] \n\t"
"4: \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"lh %[r2], 2(%[tmpout]) \n\t"
"lh %[r3], -2(%[coefptr]) \n\t"
"addiu %[tmpout], %[tmpout], 4 \n\t"
"msub %[r0], %[r1] \n\t"
"msub %[r2], %[r3] \n\t"
"addiu %[j], %[j], -2 \n\t"
"bgtz %[j], 4b \n\t"
" addiu %[coefptr], %[coefptr], -4 \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"msub %[r0], %[r1] \n\t"
#if defined(MIPS_DSP_R1_LE)
"extr_r.w %[r0], $ac0, 12 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"mflo %[r0] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"addu %[coefptr], %[coefficients], %[offset] \n\t"
"addiu %[inptr], %[inptr], 2 \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
#if defined(MIPS_DSP_R1_LE)
"shll_s.w %[r0], %[r0], 16 \n\t"
"sra %[r0], %[r0], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"addiu %[r0], %[r0], 2048 \n\t"
"sra %[r0], %[r0], 12 \n\t"
"slt %[r1], %[max16], %[r0] \n\t"
"movn %[r0], %[max16], %[r1] \n\t"
"slt %[r1], %[r0], %[min16] \n\t"
"movn %[r0], %[min16], %[r1] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"sh %[r0], 2(%[tmpout]) \n\t"
"bgtz %[i], 3b \n\t"
" addiu %[outptr], %[outptr], 2 \n\t"
"5: \n\t"
".set pop \n\t"
: [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0),
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
[coef0] "=&r" (coef0), [offset] "=&r" (offset),
[outptr] "=&r" (outptr), [inptr] "=&r" (inptr),
[coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout)
: [coefficients] "r" (coefficients), [data_length] "r" (data_length),
[coefficients_length] "r" (coefficients_length),
#if !defined(MIPS_DSP_R1_LE)
[max16] "r" (max16), [min16] "r" (min16),
#endif
[data_out] "r" (data_out), [data_in] "r" (data_in)
: "hi", "lo", "memory"
);
}

View file

@ -0,0 +1,55 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_FilterMAFastQ12().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/sanitizer.h"
void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr,
int16_t* out_ptr,
const int16_t* B,
size_t B_length,
size_t length)
{
size_t i, j;
rtc_MsanCheckInitialized(B, sizeof(B[0]), B_length);
rtc_MsanCheckInitialized(in_ptr - B_length + 1, sizeof(in_ptr[0]),
B_length + length - 1);
for (i = 0; i < length; i++)
{
int32_t o = 0;
for (j = 0; j < B_length; j++)
{
// Negative overflow is permitted here, because this is
// auto-regressive filters, and the state for each batch run is
// stored in the "negative" positions of the output vector.
o += B[j] * in_ptr[(ptrdiff_t) i - (ptrdiff_t) j];
}
// If output is higher than 32768, saturate it. Same with negative side
// 2^27 = 134217728, which corresponds to 32768 in Q12
// Saturate the output
o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728);
*out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12);
}
return;
}

View file

@ -0,0 +1,77 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_GetHanningWindow().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
// Hanning table with 256 entries
static const int16_t kHanningTable[] = {
1, 2, 6, 10, 15, 22, 30, 39,
50, 62, 75, 89, 104, 121, 138, 157,
178, 199, 222, 246, 271, 297, 324, 353,
383, 413, 446, 479, 513, 549, 586, 624,
663, 703, 744, 787, 830, 875, 920, 967,
1015, 1064, 1114, 1165, 1218, 1271, 1325, 1381,
1437, 1494, 1553, 1612, 1673, 1734, 1796, 1859,
1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399,
2471, 2543, 2617, 2691, 2765, 2841, 2918, 2995,
3073, 3152, 3232, 3312, 3393, 3475, 3558, 3641,
3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330,
4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057,
5150, 5244, 5338, 5432, 5527, 5622, 5718, 5814,
5910, 6007, 6104, 6202, 6299, 6397, 6495, 6594,
6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389,
7489, 7589, 7690, 7790, 7890, 7991, 8091, 8192,
8293, 8393, 8494, 8594, 8694, 8795, 8895, 8995,
9095, 9195, 9294, 9394, 9493, 9593, 9691, 9790,
9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570,
10666, 10762, 10857, 10952, 11046, 11140, 11234, 11327,
11420, 11512, 11603, 11695, 11785, 11875, 11965, 12054,
12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743,
12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389,
13466, 13543, 13619, 13693, 13767, 13841, 13913, 13985,
14055, 14125, 14194, 14262, 14329, 14395, 14460, 14525,
14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003,
15059, 15113, 15166, 15219, 15270, 15320, 15369, 15417,
15464, 15509, 15554, 15597, 15640, 15681, 15721, 15760,
15798, 15835, 15871, 15905, 15938, 15971, 16001, 16031,
16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227,
16246, 16263, 16280, 16295, 16309, 16322, 16334, 16345,
16354, 16362, 16369, 16374, 16378, 16382, 16383, 16384
};
void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size)
{
size_t jj;
int16_t *vptr1;
int32_t index;
int32_t factor = ((int32_t)0x40000000);
factor = WebRtcSpl_DivW32W16(factor, (int16_t)size);
if (size < 513)
index = (int32_t)-0x200000;
else
index = (int32_t)-0x100000;
vptr1 = v;
for (jj = 0; jj < size; jj++)
{
index += factor;
(*vptr1++) = kHanningTable[index >> 22];
}
}

View file

@ -0,0 +1,46 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_GetScalingSquare().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
size_t in_vector_length,
size_t times)
{
int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
size_t i;
int16_t smax = -1;
int16_t sabs;
int16_t *sptr = in_vector;
int16_t t;
size_t looptimes = in_vector_length;
for (i = looptimes; i > 0; i--)
{
sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
smax = (sabs > smax ? sabs : smax);
}
t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
if (smax == 0)
{
return 0; // Since norm(0) returns 0
} else
{
return (t > nbits) ? 0 : nbits - t;
}
}

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the iLBC specific functions
* WebRtcSpl_ReverseOrderMultArrayElements()
* WebRtcSpl_ElementwiseVectorMult()
* WebRtcSpl_AddVectorsAndShift()
* WebRtcSpl_AddAffineVectorToVector()
* WebRtcSpl_AffineTransformVector()
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in,
const int16_t *win,
size_t vector_length,
int16_t right_shifts)
{
size_t i;
int16_t *outptr = out;
const int16_t *inptr = in;
const int16_t *winptr = win;
for (i = 0; i < vector_length; i++)
{
*outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts);
}
}
void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in,
const int16_t *win, size_t vector_length,
int16_t right_shifts)
{
size_t i;
int16_t *outptr = out;
const int16_t *inptr = in;
const int16_t *winptr = win;
for (i = 0; i < vector_length; i++)
{
*outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts);
}
}
void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1,
const int16_t *in2, size_t vector_length,
int16_t right_shifts)
{
size_t i;
int16_t *outptr = out;
const int16_t *in1ptr = in1;
const int16_t *in2ptr = in2;
for (i = vector_length; i > 0; i--)
{
(*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts);
}
}
void WebRtcSpl_AddAffineVectorToVector(int16_t *out, const int16_t *in,
int16_t gain, int32_t add_constant,
int16_t right_shifts,
size_t vector_length)
{
size_t i;
for (i = 0; i < vector_length; i++)
{
out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts);
}
}
void WebRtcSpl_AffineTransformVector(int16_t *out, const int16_t *in,
int16_t gain, int32_t add_constant,
int16_t right_shifts, size_t vector_length)
{
size_t i;
for (i = 0; i < vector_length; i++)
{
out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts);
}
}

View file

@ -0,0 +1,96 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
#include <stdint.h>
// For ComplexFFT(), the maximum fft order is 10;
// WebRTC APM uses orders of only 7 and 8.
enum { kMaxFFTOrder = 10 };
struct RealFFT;
#ifdef __cplusplus
extern "C" {
#endif
struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
// Compute an FFT for a real-valued signal of length of 2^order,
// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
// specification structure, which must be initialized prior to calling the FFT
// function with WebRtcSpl_CreateRealFFT().
// The relationship between the input and output sequences can
// be expressed in terms of the DFT, i.e.:
// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
// n=0,1,2,...N-1
// N=2^order.
// The conjugate-symmetric output sequence is represented using a CCS vector,
// which is of length N+2, and is organized as follows:
// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1
// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0
// where R[n] and I[n], respectively, denote the real and imaginary components
// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length.
// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
// the foldover frequency.
//
// Input Arguments:
// self - pointer to preallocated and initialized FFT specification structure.
// real_data_in - the input signal. For an ARM Neon platform, it must be
// aligned on a 32-byte boundary.
//
// Output Arguments:
// complex_data_out - the output complex signal with (2^order + 2) 16-bit
// elements. For an ARM Neon platform, it must be different
// from real_data_in, and aligned on a 32-byte boundary.
//
// Return Value:
// 0 - FFT calculation is successful.
// -1 - Error with bad arguments (null pointers).
int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
const int16_t* real_data_in,
int16_t* complex_data_out);
// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
// the specification structure, which must be initialized prior to calling the
// FFT function with WebRtcSpl_CreateRealFFT().
// For a transform of length M, the input sequence is represented using a packed
// CCS vector of length M+2, which is explained in the comments for
// WebRtcSpl_RealForwardFFTC above.
//
// Input Arguments:
// self - pointer to preallocated and initialized FFT specification structure.
// complex_data_in - the input complex signal with (2^order + 2) 16-bit
// elements. For an ARM Neon platform, it must be aligned on
// a 32-byte boundary.
//
// Output Arguments:
// real_data_out - the output real signal. For an ARM Neon platform, it must
// be different to complex_data_in, and aligned on a 32-byte
// boundary.
//
// Return Value:
// 0 or a positive number - a value that the elements in the `real_data_out`
// should be shifted left with in order to get
// correct physical values.
// -1 - Error with bad arguments (null pointers).
int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
const int16_t* complex_data_in,
int16_t* real_data_out);
#ifdef __cplusplus
}
#endif
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_

View file

@ -0,0 +1,155 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file includes the inline functions in
// the fix point signal processing library.
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#include <stdint.h>
#include "rtc_base/compile_assert_c.h"
extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
// Normalize n by rounding up to the nearest number that is a sequence of 0
// bits followed by a sequence of 1 bits. This number has the same number of
// leading zeros as the original n. There are exactly 33 such values.
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
// Multiply the modified n with a constant selected (by exhaustive search)
// such that each of the 33 possible values of n give a product whose 6 most
// significant bits are unique. Then look up the answer in the table.
return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
}
// Don't call this directly except in tests!
static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
const int leading_zeros = n >> 32 == 0 ? 32 : 0;
return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
(uint32_t)(n >> (32 - leading_zeros)));
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
return n == 0 ? 32 : __builtin_clz(n);
#else
return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
#endif
}
// Returns the number of leading zero bits in the argument.
static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
#ifdef __GNUC__
RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT
return n == 0 ? 64 : __builtin_clzll(n);
#else
return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
#endif
}
#ifdef WEBRTC_ARCH_ARM_V7
#include "common_audio/signal_processing/include/spl_inl_armv7.h"
#else
#if defined(MIPS32_LE)
#include "common_audio/signal_processing/include/spl_inl_mips.h"
#endif
#if !defined(MIPS_DSP_R1_LE)
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
int16_t out16 = (int16_t)value32;
if (value32 > 32767)
out16 = 32767;
else if (value32 < -32768)
out16 = -32768;
return out16;
}
static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
// Do the addition in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
// a + b can't overflow if a and b have different signs. If they have the
// same sign, a + b also has the same sign iff it didn't overflow.
if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
// The direction of the overflow is obvious from the sign of a + b.
return sum < 0 ? INT32_MAX : INT32_MIN;
}
return sum;
}
static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
// Do the subtraction in unsigned numbers, since signed overflow is undefined
// behavior.
const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
// a - b can't overflow if a and b have the same sign. If they have different
// signs, a - b has the same sign as a iff it didn't overflow.
if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
// The direction of the overflow is obvious from the sign of a - b.
return diff < 0 ? INT32_MAX : INT32_MIN;
}
return diff;
}
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
}
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
}
#endif // #if !defined(MIPS_DSP_R1_LE)
#if !defined(MIPS32_LE)
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
return 32 - WebRtcSpl_CountLeadingZeros32(n);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
const int32_t a32 = a;
return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
}
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
return (a * b + c);
}
#endif // #if !defined(MIPS32_LE)
#endif // WEBRTC_ARCH_ARM_V7
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_

View file

@ -0,0 +1,138 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* This header file includes the inline functions for ARM processors in
* the fix point signal processing library.
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_
#include <stdint.h>
/* TODO(kma): Replace some assembly code with GCC intrinsics
* (e.g. __builtin_clz).
*/
/* This function produces result that is not bit exact with that by the generic
* C version in some cases, although the former is at least as accurate as the
* later.
*/
static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) {
int32_t tmp = 0;
__asm __volatile("smulwb %0, %1, %2" : "=r"(tmp) : "r"(b), "r"(a));
return tmp;
}
static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) {
int32_t tmp = 0;
__asm __volatile("smulbb %0, %1, %2" : "=r"(tmp) : "r"(a), "r"(b));
return tmp;
}
// TODO(kma): add unit test.
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
int32_t tmp = 0;
__asm __volatile("smlabb %0, %1, %2, %3"
: "=r"(tmp)
: "r"(a), "r"(b), "r"(c));
return tmp;
}
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
int32_t s_sum = 0;
__asm __volatile("qadd16 %0, %1, %2" : "=r"(s_sum) : "r"(a), "r"(b));
return (int16_t)s_sum;
}
static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
int32_t l_sum = 0;
__asm __volatile("qadd %0, %1, %2" : "=r"(l_sum) : "r"(l_var1), "r"(l_var2));
return l_sum;
}
static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
int32_t l_sub = 0;
__asm __volatile("qsub %0, %1, %2" : "=r"(l_sub) : "r"(l_var1), "r"(l_var2));
return l_sub;
}
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
int32_t s_sub = 0;
__asm __volatile("qsub16 %0, %1, %2" : "=r"(s_sub) : "r"(var1), "r"(var2));
return (int16_t)s_sub;
}
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
int32_t tmp = 0;
__asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(n));
return (int16_t)(32 - tmp);
}
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
int32_t tmp = 0;
if (a == 0) {
return 0;
} else if (a < 0) {
a ^= 0xFFFFFFFF;
}
__asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a));
return (int16_t)(tmp - 1);
}
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
int tmp = 0;
if (a == 0)
return 0;
__asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a));
return (int16_t)tmp;
}
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
int32_t tmp = 0;
int32_t a_32 = a;
if (a_32 == 0) {
return 0;
} else if (a_32 < 0) {
a_32 ^= 0xFFFFFFFF;
}
__asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a_32));
return (int16_t)(tmp - 17);
}
// TODO(kma): add unit test.
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
int32_t out = 0;
__asm __volatile("ssat %0, #16, %1" : "=r"(out) : "r"(value32));
return (int16_t)out;
}
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_

View file

@ -0,0 +1,204 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file includes the inline functions in
// the fix point signal processing library.
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_
static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a, int32_t b) {
int32_t value32 = 0;
int32_t a1 = 0, b1 = 0;
__asm __volatile(
#if defined(MIPS32_R2_LE)
"seh %[a1], %[a] \n\t"
"seh %[b1], %[b] \n\t"
#else
"sll %[a1], %[a], 16 \n\t"
"sll %[b1], %[b], 16 \n\t"
"sra %[a1], %[a1], 16 \n\t"
"sra %[b1], %[b1], 16 \n\t"
#endif
"mul %[value32], %[a1], %[b1] \n\t"
: [value32] "=r"(value32), [a1] "=&r"(a1), [b1] "=&r"(b1)
: [a] "r"(a), [b] "r"(b)
: "hi", "lo");
return value32;
}
static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) {
int32_t value32 = 0, b1 = 0, b2 = 0;
int32_t a1 = 0;
__asm __volatile(
#if defined(MIPS32_R2_LE)
"seh %[a1], %[a] \n\t"
#else
"sll %[a1], %[a], 16 \n\t"
"sra %[a1], %[a1], 16 \n\t"
#endif
"andi %[b2], %[b], 0xFFFF \n\t"
"sra %[b1], %[b], 16 \n\t"
"sra %[b2], %[b2], 1 \n\t"
"mul %[value32], %[a1], %[b1] \n\t"
"mul %[b2], %[a1], %[b2] \n\t"
"addiu %[b2], %[b2], 0x4000 \n\t"
"sra %[b2], %[b2], 15 \n\t"
"addu %[value32], %[value32], %[b2] \n\t"
: [value32] "=&r"(value32), [b1] "=&r"(b1), [b2] "=&r"(b2), [a1] "=&r"(a1)
: [a] "r"(a), [b] "r"(b)
: "hi", "lo");
return value32;
}
#if defined(MIPS_DSP_R1_LE)
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
__asm __volatile(
"shll_s.w %[value32], %[value32], 16 \n\t"
"sra %[value32], %[value32], 16 \n\t"
: [value32] "+r"(value32)
:);
int16_t out16 = (int16_t)value32;
return out16;
}
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
int32_t value32 = 0;
__asm __volatile("addq_s.ph %[value32], %[a], %[b] \n\t"
: [value32] "=r"(value32)
: [a] "r"(a), [b] "r"(b));
return (int16_t)value32;
}
static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
int32_t l_sum;
__asm __volatile(
"addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t"
: [l_sum] "=r"(l_sum)
: [l_var1] "r"(l_var1), [l_var2] "r"(l_var2));
return l_sum;
}
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
int32_t value32;
__asm __volatile("subq_s.ph %[value32], %[var1], %[var2] \n\t"
: [value32] "=r"(value32)
: [var1] "r"(var1), [var2] "r"(var2));
return (int16_t)value32;
}
static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
int32_t l_diff;
__asm __volatile(
"subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t"
: [l_diff] "=r"(l_diff)
: [l_var1] "r"(l_var1), [l_var2] "r"(l_var2));
return l_diff;
}
#endif
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
int bits = 0;
int i32 = 32;
__asm __volatile(
"clz %[bits], %[n] \n\t"
"subu %[bits], %[i32], %[bits] \n\t"
: [bits] "=&r"(bits)
: [n] "r"(n), [i32] "r"(i32));
return (int16_t)bits;
}
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
int zeros = 0;
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"bnez %[a], 1f \n\t"
" sra %[zeros], %[a], 31 \n\t"
"b 2f \n\t"
" move %[zeros], $zero \n\t"
"1: \n\t"
"xor %[zeros], %[a], %[zeros] \n\t"
"clz %[zeros], %[zeros] \n\t"
"addiu %[zeros], %[zeros], -1 \n\t"
"2: \n\t"
".set pop \n\t"
: [zeros] "=&r"(zeros)
: [a] "r"(a));
return (int16_t)zeros;
}
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
int zeros = 0;
__asm __volatile("clz %[zeros], %[a] \n\t"
: [zeros] "=r"(zeros)
: [a] "r"(a));
return (int16_t)(zeros & 0x1f);
}
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
int zeros = 0;
int a0 = a << 16;
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"bnez %[a0], 1f \n\t"
" sra %[zeros], %[a0], 31 \n\t"
"b 2f \n\t"
" move %[zeros], $zero \n\t"
"1: \n\t"
"xor %[zeros], %[a0], %[zeros] \n\t"
"clz %[zeros], %[zeros] \n\t"
"addiu %[zeros], %[zeros], -1 \n\t"
"2: \n\t"
".set pop \n\t"
: [zeros] "=&r"(zeros)
: [a0] "r"(a0));
return (int16_t)zeros;
}
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
int32_t res = 0, c1 = 0;
__asm __volatile(
#if defined(MIPS32_R2_LE)
"seh %[a], %[a] \n\t"
"seh %[b], %[b] \n\t"
#else
"sll %[a], %[a], 16 \n\t"
"sll %[b], %[b], 16 \n\t"
"sra %[a], %[a], 16 \n\t"
"sra %[b], %[b], 16 \n\t"
#endif
"mul %[res], %[a], %[b] \n\t"
"addu %[c1], %[c], %[res] \n\t"
: [c1] "=r"(c1), [res] "=&r"(res)
: [a] "r"(a), [b] "r"(b), [c] "r"(c)
: "hi", "lo");
return (c1);
}
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_

View file

@ -0,0 +1,249 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_LevinsonDurbin().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/sanitizer.h"
#define SPL_LEVINSON_MAXORDER 20
int16_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K,
size_t order)
{
size_t i, j;
// Auto-correlation coefficients in high precision
int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1];
// LPC coefficients in high precision
int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1];
// LPC coefficients for next iteration
int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1];
// Reflection coefficient in high precision
int16_t K_hi, K_low;
// Prediction gain Alpha in high precision and with scale factor
int16_t Alpha_hi, Alpha_low, Alpha_exp;
int16_t tmp_hi, tmp_low;
int32_t temp1W32, temp2W32, temp3W32;
int16_t norm;
// Normalize the autocorrelation R[0]...R[order+1]
norm = WebRtcSpl_NormW32(R[0]);
for (i = 0; i <= order; ++i)
{
temp1W32 = R[i] * (1 << norm);
// UBSan: 12 * 268435456 cannot be represented in type 'int'
// Put R in hi and low format
R_hi[i] = (int16_t)(temp1W32 >> 16);
R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] * 65536)) >> 1);
}
// K = A[1] = -R[1] / R[0]
temp2W32 = R[1] * (1 << norm); // R[1] in Q31
temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1]
temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31
// Put back the sign on R[1]
if (temp2W32 > 0)
{
temp1W32 = -temp1W32;
}
// Put K in hi and low format
K_hi = (int16_t)(temp1W32 >> 16);
K_low = (int16_t)((temp1W32 - ((int32_t)K_hi * 65536)) >> 1);
// Store first reflection coefficient
K[0] = K_hi;
temp1W32 >>= 4; // A[1] in Q27.
// Put A[1] in hi and low format
A_hi[1] = (int16_t)(temp1W32 >> 16);
A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] * 65536)) >> 1);
// Alpha = R[0] * (1-K^2)
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // = k^2 in Q31
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31
// Store temp1W32 = 1 - K[0]*K[0] on hi and low format
tmp_hi = (int16_t)(temp1W32 >> 16);
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
// Calculate Alpha in Q31
temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) +
(R_low[0] * tmp_hi >> 15)) << 1;
// Normalize Alpha and put it in hi and low format
Alpha_exp = WebRtcSpl_NormW32(temp1W32);
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp);
Alpha_hi = (int16_t)(temp1W32 >> 16);
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
// Perform the iterative calculations in the Levinson-Durbin algorithm
for (i = 2; i <= order; i++)
{
/* ----
temp1W32 = R[i] + > R[j]*A[i-j]
/
----
j=1..i-1
*/
temp1W32 = 0;
for (j = 1; j < i; j++)
{
// temp1W32 is in Q31
temp1W32 += (R_hi[j] * A_hi[i - j] * 2) +
(((R_hi[j] * A_low[i - j] >> 15) +
(R_low[j] * A_hi[i - j] >> 15)) * 2);
}
temp1W32 = temp1W32 * 16;
temp1W32 += ((int32_t)R_hi[i] * 65536)
+ WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1);
// K = -temp1W32 / Alpha
temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32)
temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha
// Put the sign of temp1W32 back again
if (temp1W32 > 0)
{
temp3W32 = -temp3W32;
}
// Use the Alpha shifts from earlier to de-normalize
norm = WebRtcSpl_NormW32(temp3W32);
if ((Alpha_exp <= norm) || (temp3W32 == 0))
{
temp3W32 = temp3W32 * (1 << Alpha_exp);
} else
{
if (temp3W32 > 0)
{
temp3W32 = (int32_t)0x7fffffffL;
} else
{
temp3W32 = (int32_t)0x80000000L;
}
}
// Put K on hi and low format
K_hi = (int16_t)(temp3W32 >> 16);
K_low = (int16_t)((temp3W32 - ((int32_t)K_hi * 65536)) >> 1);
// Store Reflection coefficient in Q15
K[i - 1] = K_hi;
// Test for unstable filter.
// If unstable return 0 and let the user decide what to do in that case
if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750)
{
return 0; // Unstable filter
}
/*
Compute updated LPC coefficient: Anew[i]
Anew[j]= A[j] + K*A[i-j] for j=1..i-1
Anew[i]= K
*/
for (j = 1; j < i; j++)
{
// temp1W32 = A[j] in Q27
temp1W32 = (int32_t)A_hi[j] * 65536
+ WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1);
// temp1W32 += K*A[i-j] in Q27
temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) +
(K_low * A_hi[i - j] >> 15)) * 2;
// Put Anew in hi and low format
A_upd_hi[j] = (int16_t)(temp1W32 >> 16);
A_upd_low[j] = (int16_t)(
(temp1W32 - ((int32_t)A_upd_hi[j] * 65536)) >> 1);
}
// temp3W32 = K in Q27 (Convert from Q31 to Q27)
temp3W32 >>= 4;
// Store Anew in hi and low format
A_upd_hi[i] = (int16_t)(temp3W32 >> 16);
A_upd_low[i] = (int16_t)(
(temp3W32 - ((int32_t)A_upd_hi[i] * 65536)) >> 1);
// Alpha = Alpha * (1-K^2)
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // K*K in Q31
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31
// Convert 1- K^2 in hi and low format
tmp_hi = (int16_t)(temp1W32 >> 16);
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
// Calculate Alpha = Alpha * (1-K^2) in Q31
temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) +
(Alpha_low * tmp_hi >> 15)) << 1;
// Normalize Alpha and store it on hi and low format
norm = WebRtcSpl_NormW32(temp1W32);
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm);
Alpha_hi = (int16_t)(temp1W32 >> 16);
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
// Update the total normalization of Alpha
Alpha_exp = Alpha_exp + norm;
// Update A[]
for (j = 1; j <= i; j++)
{
A_hi[j] = A_upd_hi[j];
A_low[j] = A_upd_low[j];
}
}
/*
Set A[0] to 1.0 and store the A[i] i=1...order in Q12
(Convert from Q27 and use rounding)
*/
A[0] = 4096;
for (i = 1; i <= order; i++)
{
// temp1W32 in Q27
temp1W32 = (int32_t)A_hi[i] * 65536
+ WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1);
// Round and store upper word
A[i] = (int16_t)(((temp1W32 * 2) + 32768) >> 16);
}
return 1; // Stable filters
}

View file

@ -0,0 +1,56 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_LpcToReflCoef().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50
void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16)
{
int m, k;
int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER];
int32_t tmp_inv_denom32;
int16_t tmp_inv_denom16;
k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15
for (m = use_order - 1; m > 0; m--)
{
// (1 - k^2) in Q30
tmp_inv_denom32 = 1073741823 - k16[m] * k16[m];
// (1 - k^2) in Q15
tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15);
for (k = 1; k <= m; k++)
{
// tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]);
// [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28
tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1);
tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13
}
for (k = 1; k < m; k++)
{
a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12
}
tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191);
k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15
}
return;
}

View file

@ -0,0 +1,256 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the implementation of functions
* WebRtcSpl_MaxAbsValueW16C()
* WebRtcSpl_MaxAbsValueW32C()
* WebRtcSpl_MaxValueW16C()
* WebRtcSpl_MaxValueW32C()
* WebRtcSpl_MinValueW16C()
* WebRtcSpl_MinValueW32C()
* WebRtcSpl_MaxAbsIndexW16()
* WebRtcSpl_MaxIndexW16()
* WebRtcSpl_MaxIndexW32()
* WebRtcSpl_MinIndexW16()
* WebRtcSpl_MinIndexW32()
*
*/
#include <stdlib.h>
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
// TODO(kma): Move the next six functions into min_max_operations_c.c.
// Maximum absolute value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
size_t i = 0;
int absolute = 0, maximum = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
}
}
// Guard the case for abs(-32768).
if (maximum > WEBRTC_SPL_WORD16_MAX) {
maximum = WEBRTC_SPL_WORD16_MAX;
}
return (int16_t)maximum;
}
// Maximum absolute value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
// Use uint32_t for the local variables, to accommodate the return value
// of abs(0x80000000), which is 0x80000000.
uint32_t absolute = 0, maximum = 0;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
}
}
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
return (int32_t)maximum;
}
// Maximum value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum)
maximum = vector[i];
}
return maximum;
}
// Maximum value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum)
maximum = vector[i];
}
return maximum;
}
// Minimum value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
}
return minimum;
}
// Minimum value of word32 vector. C version for generic platforms.
int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
}
return minimum;
}
// Index of maximum absolute value in a word16 vector.
size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
// Use type int for local variables, to accomodate the value of abs(-32768).
size_t i = 0, index = 0;
int absolute = 0, maximum = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
if (absolute > maximum) {
maximum = absolute;
index = i;
}
}
return index;
}
int16_t WebRtcSpl_MaxAbsElementW16(const int16_t* vector, size_t length) {
int16_t min_val, max_val;
WebRtcSpl_MinMaxW16(vector, length, &min_val, &max_val);
if (min_val == max_val || min_val < -max_val) {
return min_val;
}
return max_val;
}
// Index of maximum value in a word16 vector.
size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
size_t i = 0, index = 0;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
}
// Index of maximum value in a word32 vector.
size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
size_t i = 0, index = 0;
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] > maximum) {
maximum = vector[i];
index = i;
}
}
return index;
}
// Index of minimum value in a word16 vector.
size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
size_t i = 0, index = 0;
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum) {
minimum = vector[i];
index = i;
}
}
return index;
}
// Index of minimum value in a word32 vector.
size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
size_t i = 0, index = 0;
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum) {
minimum = vector[i];
index = i;
}
}
return index;
}
// Finds both the minimum and maximum elements in an array of 16-bit integers.
void WebRtcSpl_MinMaxW16(const int16_t* vector, size_t length,
int16_t* min_val, int16_t* max_val) {
#if defined(WEBRTC_HAS_NEON)
return WebRtcSpl_MinMaxW16Neon(vector, length, min_val, max_val);
#else
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
if (vector[i] > maximum)
maximum = vector[i];
}
*min_val = minimum;
*max_val = maximum;
#endif
}

View file

@ -0,0 +1,375 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the implementation of function
* WebRtcSpl_MaxAbsValueW16()
*
* The description header can be found in signal_processing_library.h.
*
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
// Maximum absolute value of word16 vector.
int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
int32_t totMax = 0;
int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3;
size_t i, loop_size;
RTC_DCHECK_GT(length, 0);
#if defined(MIPS_DSP_R1)
const int32_t* tmpvec32 = (int32_t*)vector;
loop_size = length >> 4;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lw %[tmp32_0], 0(%[tmpvec32]) \n\t"
"lw %[tmp32_1], 4(%[tmpvec32]) \n\t"
"lw %[tmp32_2], 8(%[tmpvec32]) \n\t"
"lw %[tmp32_3], 12(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"lw %[tmp32_0], 16(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"lw %[tmp32_1], 20(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"lw %[tmp32_2], 24(%[tmpvec32]) \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"lw %[tmp32_3], 28(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"addiu %[tmpvec32], %[tmpvec32], 32 \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
[totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
:
: "memory"
);
}
__asm__ volatile (
"rotr %[tmp32_0], %[totMax], 16 \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"packrl.ph %[totMax], $0, %[totMax] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
:
);
loop_size = length & 0xf;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvec32]) \n\t"
"addiu %[tmpvec32], %[tmpvec32], 2 \n\t"
"absq_s.w %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
:
: "memory"
);
}
#else // #if defined(MIPS_DSP_R1)
int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
int32_t r, r1, r2, r3;
const int16_t* tmpvector = vector;
loop_size = length >> 4;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"lh %[tmp32_1], 2(%[tmpvector]) \n\t"
"lh %[tmp32_2], 4(%[tmpvector]) \n\t"
"lh %[tmp32_3], 6(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 8(%[tmpvector]) \n\t"
"lh %[tmp32_1], 10(%[tmpvector]) \n\t"
"lh %[tmp32_2], 12(%[tmpvector]) \n\t"
"lh %[tmp32_3], 14(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 16(%[tmpvector]) \n\t"
"lh %[tmp32_1], 18(%[tmpvector]) \n\t"
"lh %[tmp32_2], 20(%[tmpvector]) \n\t"
"lh %[tmp32_3], 22(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 24(%[tmpvector]) \n\t"
"lh %[tmp32_1], 26(%[tmpvector]) \n\t"
"lh %[tmp32_2], 28(%[tmpvector]) \n\t"
"lh %[tmp32_3], 30(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"addiu %[tmpvector], %[tmpvector], 32 \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
[totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
:
: "memory"
);
}
loop_size = length & 0xf;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"addiu %[tmpvector], %[tmpvector], 2 \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
:
: "memory"
);
}
__asm__ volatile (
"slt %[r], %[v16MaxMax], %[totMax] \n\t"
"movn %[totMax], %[v16MaxMax], %[r] \n\t"
: [totMax] "+r" (totMax), [r] "=&r" (r)
: [v16MaxMax] "r" (v16MaxMax)
);
#endif // #if defined(MIPS_DSP_R1)
return (int16_t)totMax;
}
#if defined(MIPS_DSP_R1_LE)
// Maximum absolute value of word32 vector. Version for MIPS platform.
int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
// Use uint32_t for the local variables, to accommodate the return value
// of abs(0x80000000), which is 0x80000000.
uint32_t absolute = 0, maximum = 0;
int tmp1 = 0, max_value = 0x7fffffff;
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[absolute], 0(%[vector]) \n\t"
"absq_s.w %[absolute], %[absolute] \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[absolute] \n\t"
"movn %[maximum], %[absolute], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
"slt %[tmp1], %[max_value], %[maximum] \n\t"
"movn %[maximum], %[max_value], %[tmp1] \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
: [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
: "memory"
);
return (int32_t)maximum;
}
#endif // #if defined(MIPS_DSP_R1_LE)
// Maximum value of word16 vector. Version for MIPS platform.
int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
int tmp1;
int16_t value;
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return maximum;
}
// Maximum value of word32 vector. Version for MIPS platform.
int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
int tmp1, value;
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return maximum;
}
// Minimum value of word16 vector. Version for MIPS platform.
int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int tmp1;
int16_t value;
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return minimum;
}
// Minimum value of word32 vector. Version for MIPS platform.
int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
int tmp1, value;
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
return minimum;
}

View file

@ -0,0 +1,333 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include <stdlib.h>
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
// Maximum absolute value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) {
int absolute = 0, maximum = 0;
RTC_DCHECK_GT(length, 0);
const int16_t* p_start = vector;
size_t rest = length & 7;
const int16_t* p_end = vector + length - rest;
int16x8_t v;
uint16x8_t max_qv;
max_qv = vdupq_n_u16(0);
while (p_start < p_end) {
v = vld1q_s16(p_start);
// Note vabs doesn't change the value of -32768.
v = vabsq_s16(v);
// Use u16 so we don't lose the value -32768.
max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v));
p_start += 8;
}
#ifdef WEBRTC_ARCH_ARM64
maximum = (int)vmaxvq_u16(max_qv);
#else
uint16x4_t max_dv;
max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv));
max_dv = vpmax_u16(max_dv, max_dv);
max_dv = vpmax_u16(max_dv, max_dv);
maximum = (int)vget_lane_u16(max_dv, 0);
#endif
p_end = vector + length;
while (p_start < p_end) {
absolute = abs((int)(*p_start));
if (absolute > maximum) {
maximum = absolute;
}
p_start++;
}
// Guard the case for abs(-32768).
if (maximum > WEBRTC_SPL_WORD16_MAX) {
maximum = WEBRTC_SPL_WORD16_MAX;
}
return (int16_t)maximum;
}
// Maximum absolute value of word32 vector. NEON intrinsics version for
// ARM 32-bit/64-bit platforms.
int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) {
// Use uint32_t for the local variables, to accommodate the return value
// of abs(0x80000000), which is 0x80000000.
uint32_t absolute = 0, maximum = 0;
size_t i = 0;
size_t residual = length & 0x7;
RTC_DCHECK_GT(length, 0);
const int32_t* p_start = vector;
uint32x4_t max32x4_0 = vdupq_n_u32(0);
uint32x4_t max32x4_1 = vdupq_n_u32(0);
// First part, unroll the loop 8 times.
for (i = 0; i < length - residual; i += 8) {
int32x4_t in32x4_0 = vld1q_s32(p_start);
p_start += 4;
int32x4_t in32x4_1 = vld1q_s32(p_start);
p_start += 4;
in32x4_0 = vabsq_s32(in32x4_0);
in32x4_1 = vabsq_s32(in32x4_1);
// vabs doesn't change the value of 0x80000000.
// Use u32 so we don't lose the value 0x80000000.
max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0));
max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1));
}
uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1);
#if defined(WEBRTC_ARCH_ARM64)
maximum = vmaxvq_u32(max32x4);
#else
uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4));
max32x2 = vpmax_u32(max32x2, max32x2);
maximum = vget_lane_u32(max32x2, 0);
#endif
// Second part, do the remaining iterations (if any).
for (i = residual; i > 0; i--) {
absolute = abs((int)(*p_start));
if (absolute > maximum) {
maximum = absolute;
}
p_start++;
}
// Guard against the case for 0x80000000.
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
return (int32_t)maximum;
}
// Maximum value of word16 vector. NEON intrinsics version for
// ARM 32-bit/64-bit platforms.
int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) {
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;
size_t residual = length & 0x7;
RTC_DCHECK_GT(length, 0);
const int16_t* p_start = vector;
int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
// First part, unroll the loop 8 times.
for (i = 0; i < length - residual; i += 8) {
int16x8_t in16x8 = vld1q_s16(p_start);
max16x8 = vmaxq_s16(max16x8, in16x8);
p_start += 8;
}
#if defined(WEBRTC_ARCH_ARM64)
maximum = vmaxvq_s16(max16x8);
#else
int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
max16x4 = vpmax_s16(max16x4, max16x4);
max16x4 = vpmax_s16(max16x4, max16x4);
maximum = vget_lane_s16(max16x4, 0);
#endif
// Second part, do the remaining iterations (if any).
for (i = residual; i > 0; i--) {
if (*p_start > maximum)
maximum = *p_start;
p_start++;
}
return maximum;
}
// Maximum value of word32 vector. NEON intrinsics version for
// ARM 32-bit/64-bit platforms.
int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) {
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
size_t i = 0;
size_t residual = length & 0x7;
RTC_DCHECK_GT(length, 0);
const int32_t* p_start = vector;
int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
// First part, unroll the loop 8 times.
for (i = 0; i < length - residual; i += 8) {
int32x4_t in32x4_0 = vld1q_s32(p_start);
p_start += 4;
int32x4_t in32x4_1 = vld1q_s32(p_start);
p_start += 4;
max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0);
max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1);
}
int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1);
#if defined(WEBRTC_ARCH_ARM64)
maximum = vmaxvq_s32(max32x4);
#else
int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4));
max32x2 = vpmax_s32(max32x2, max32x2);
maximum = vget_lane_s32(max32x2, 0);
#endif
// Second part, do the remaining iterations (if any).
for (i = residual; i > 0; i--) {
if (*p_start > maximum)
maximum = *p_start;
p_start++;
}
return maximum;
}
// Minimum value of word16 vector. NEON intrinsics version for
// ARM 32-bit/64-bit platforms.
int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
size_t i = 0;
size_t residual = length & 0x7;
RTC_DCHECK_GT(length, 0);
const int16_t* p_start = vector;
int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
// First part, unroll the loop 8 times.
for (i = 0; i < length - residual; i += 8) {
int16x8_t in16x8 = vld1q_s16(p_start);
min16x8 = vminq_s16(min16x8, in16x8);
p_start += 8;
}
#if defined(WEBRTC_ARCH_ARM64)
minimum = vminvq_s16(min16x8);
#else
int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
min16x4 = vpmin_s16(min16x4, min16x4);
min16x4 = vpmin_s16(min16x4, min16x4);
minimum = vget_lane_s16(min16x4, 0);
#endif
// Second part, do the remaining iterations (if any).
for (i = residual; i > 0; i--) {
if (*p_start < minimum)
minimum = *p_start;
p_start++;
}
return minimum;
}
// Minimum value of word32 vector. NEON intrinsics version for
// ARM 32-bit/64-bit platforms.
int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
size_t i = 0;
size_t residual = length & 0x7;
RTC_DCHECK_GT(length, 0);
const int32_t* p_start = vector;
int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
// First part, unroll the loop 8 times.
for (i = 0; i < length - residual; i += 8) {
int32x4_t in32x4_0 = vld1q_s32(p_start);
p_start += 4;
int32x4_t in32x4_1 = vld1q_s32(p_start);
p_start += 4;
min32x4_0 = vminq_s32(min32x4_0, in32x4_0);
min32x4_1 = vminq_s32(min32x4_1, in32x4_1);
}
int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1);
#if defined(WEBRTC_ARCH_ARM64)
minimum = vminvq_s32(min32x4);
#else
int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4));
min32x2 = vpmin_s32(min32x2, min32x2);
minimum = vget_lane_s32(min32x2, 0);
#endif
// Second part, do the remaining iterations (if any).
for (i = residual; i > 0; i--) {
if (*p_start < minimum)
minimum = *p_start;
p_start++;
}
return minimum;
}
// Finds both the minimum and maximum elements in an array of 16-bit integers.
void WebRtcSpl_MinMaxW16Neon(const int16_t* vector, size_t length,
int16_t* min_val, int16_t* max_val) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;
size_t residual = length & 0x7;
RTC_DCHECK_GT(length, 0);
const int16_t* p_start = vector;
int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
// First part, unroll the loop 8 times.
for (i = 0; i < length - residual; i += 8) {
int16x8_t in16x8 = vld1q_s16(p_start);
min16x8 = vminq_s16(min16x8, in16x8);
max16x8 = vmaxq_s16(max16x8, in16x8);
p_start += 8;
}
#if defined(WEBRTC_ARCH_ARM64)
minimum = vminvq_s16(min16x8);
maximum = vmaxvq_s16(max16x8);
#else
int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
min16x4 = vpmin_s16(min16x4, min16x4);
min16x4 = vpmin_s16(min16x4, min16x4);
minimum = vget_lane_s16(min16x4, 0);
int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
max16x4 = vpmax_s16(max16x4, max16x4);
max16x4 = vpmax_s16(max16x4, max16x4);
maximum = vget_lane_s16(max16x4, 0);
#endif
// Second part, do the remaining iterations (if any).
for (i = residual; i > 0; i--) {
if (*p_start < minimum)
minimum = *p_start;
if (*p_start > maximum)
maximum = *p_start;
p_start++;
}
*min_val = minimum;
*max_val = maximum;
}

View file

@ -0,0 +1,115 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the randomization functions
* WebRtcSpl_RandU()
* WebRtcSpl_RandN()
* WebRtcSpl_RandUArray()
*
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
static const uint32_t kMaxSeedUsed = 0x80000000;
static const int16_t kRandNTable[] = {
9178, -7260, 40, 10189, 4894, -3531, -13779, 14764,
-4008, -8884, -8990, 1008, 7368, 5184, 3251, -5817,
-9786, 5963, 1770, 8066, -7135, 10772, -2298, 1361,
6484, 2241, -8633, 792, 199, -3344, 6553, -10079,
-15040, 95, 11608, -12469, 14161, -4176, 2476, 6403,
13685, -16005, 6646, 2239, 10916, -3004, -602, -3141,
2142, 14144, -5829, 5305, 8209, 4713, 2697, -5112,
16092, -1210, -2891, -6631, -5360, -11878, -6781, -2739,
-6392, 536, 10923, 10872, 5059, -4748, -7770, 5477,
38, -1025, -2892, 1638, 6304, 14375, -11028, 1553,
-1565, 10762, -393, 4040, 5257, 12310, 6554, -4799,
4899, -6354, 1603, -1048, -2220, 8247, -186, -8944,
-12004, 2332, 4801, -4933, 6371, 131, 8614, -5927,
-8287, -22760, 4033, -15162, 3385, 3246, 3153, -5250,
3766, 784, 6494, -62, 3531, -1582, 15572, 662,
-3952, -330, -3196, 669, 7236, -2678, -6569, 23319,
-8645, -741, 14830, -15976, 4903, 315, -11342, 10311,
1858, -7777, 2145, 5436, 5677, -113, -10033, 826,
-1353, 17210, 7768, 986, -1471, 8291, -4982, 8207,
-14911, -6255, -2449, -11881, -7059, -11703, -4338, 8025,
7538, -2823, -12490, 9470, -1613, -2529, -10092, -7807,
9480, 6970, -12844, 5123, 3532, 4816, 4803, -8455,
-5045, 14032, -4378, -1643, 5756, -11041, -2732, -16618,
-6430, -18375, -3320, 6098, 5131, -4269, -8840, 2482,
-7048, 1547, -21890, -6505, -7414, -424, -11722, 7955,
1653, -17299, 1823, 473, -9232, 3337, 1111, 873,
4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539,
3231, 7054, -8537, 7616, 6244, 16635, 447, -2915,
13967, 705, -2669, -1520, -1771, -16188, 5956, 5117,
6371, -9936, -1448, 2480, 5128, 7550, -8130, 5236,
8213, -6443, 7707, -1950, -13811, 7218, 7031, -3883,
67, 5731, -2874, 13480, -3743, 9298, -3280, 3552,
-4425, -18, -3785, -9988, -5357, 5477, -11794, 2117,
1416, -9935, 3376, 802, -5079, -8243, 12652, 66,
3653, -2368, 6781, -21895, -7227, 2487, 7839, -385,
6646, -7016, -4658, 5531, -1705, 834, 129, 3694,
-1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494,
-5626, 185, -3615, -2041, -7972, -3106, -60, -23497,
-1566, 17064, 3519, 2518, 304, -6805, -10269, 2105,
1936, -426, -736, -8122, -1467, 4238, -6939, -13309,
360, 7402, -7970, 12576, 3287, 12194, -6289, -16006,
9171, 4042, -9193, 9123, -2512, 6388, -4734, -8739,
1028, -5406, -1696, 5889, -666, -4736, 4971, 3565,
9362, -6292, 3876, -3652, -19666, 7523, -4061, 391,
-11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496,
7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847,
3698, 6978, 4751, -6957, -3581, -45, 6252, 1513,
-4797, -7925, 11270, 16188, -2359, -5269, 9376, -10777,
7262, 20031, -6515, -2208, -5353, 8085, -1341, -1303,
7333, 5576, 3625, 5763, -7931, 9833, -3371, -10305,
6534, -13539, -9971, 997, 8464, -4064, -1495, 1857,
13624, 5458, 9490, -11086, -4524, 12022, -550, -198,
408, -8455, -7068, 10289, 9712, -3366, 9028, -7621,
-5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563,
-62, -566, 1624, -7010, 14730, -17791, -3697, -2344,
-1741, 7099, -9509, -6855, -1989, 3495, -2289, 2031,
12784, 891, 14189, -3963, -5683, 421, -12575, 1724,
-12682, -5970, -8169, 3143, -1824, -5488, -5130, 8536,
12799, 794, 5738, 3459, -11689, -258, -3738, -3775,
-8742, 2333, 8312, -9383, 10331, 13119, 8398, 10644,
-19433, -6446, -16277, -11793, 16284, 9345, 15222, 15834,
2009, -7349, 130, -14547, 338, -5998, 3337, 21492,
2406, 7703, -951, 11196, -564, 3406, 2217, 4806,
2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492
};
static uint32_t IncreaseSeed(uint32_t* seed) {
seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1);
return seed[0];
}
int16_t WebRtcSpl_RandU(uint32_t* seed) {
return (int16_t)(IncreaseSeed(seed) >> 16);
}
int16_t WebRtcSpl_RandN(uint32_t* seed) {
return kRandNTable[IncreaseSeed(seed) >> 23];
}
// Creates an array of uniformly distributed variables.
int16_t WebRtcSpl_RandUArray(int16_t* vector,
int16_t vector_length,
uint32_t* seed) {
int i;
for (i = 0; i < vector_length; i++) {
vector[i] = WebRtcSpl_RandU(seed);
}
return vector_length;
}

View file

@ -0,0 +1,102 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/real_fft.h"
#include <stdlib.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
struct RealFFT {
int order;
};
struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
struct RealFFT* self = NULL;
if (order > kMaxFFTOrder || order < 0) {
return NULL;
}
self = malloc(sizeof(struct RealFFT));
if (self == NULL) {
return NULL;
}
self->order = order;
return self;
}
void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
if (self != NULL) {
free(self);
}
}
// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and
// WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued
// FFT implementation in SPL.
int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
const int16_t* real_data_in,
int16_t* complex_data_out) {
int i = 0;
int j = 0;
int result = 0;
int n = 1 << self->order;
// The complex-value FFT implementation needs a buffer to hold 2^order
// 16-bit COMPLEX numbers, for both time and frequency data.
int16_t complex_buffer[2 << kMaxFFTOrder];
// Insert zeros to the imaginary parts for complex forward FFT input.
for (i = 0, j = 0; i < n; i += 1, j += 2) {
complex_buffer[j] = real_data_in[i];
complex_buffer[j + 1] = 0;
};
WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
// For real FFT output, use only the first N + 2 elements from
// complex forward FFT.
memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
return result;
}
int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
const int16_t* complex_data_in,
int16_t* real_data_out) {
int i = 0;
int j = 0;
int result = 0;
int n = 1 << self->order;
// Create the buffer specific to complex-valued FFT implementation.
int16_t complex_buffer[2 << kMaxFFTOrder];
// For n-point FFT, first copy the first n + 2 elements into complex
// FFT, then construct the remaining n - 2 elements by real FFT's
// conjugate-symmetric properties.
memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
for (i = n + 2; i < 2 * n; i += 2) {
complex_buffer[i] = complex_data_in[2 * n - i];
complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
}
WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
// Strip out the imaginary parts of the complex inverse FFT output.
for (i = 0, j = 0; i < n; i += 1, j += 2) {
real_data_out[i] = complex_buffer[j];
}
return result;
}

View file

@ -0,0 +1,59 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_ReflCoefToLpc().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a)
{
int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
int16_t *aptr, *aptr2, *anyptr;
const int16_t *kptr;
int m, i;
kptr = k;
*a = 4096; // i.e., (Word16_MAX >> 3)+1.
*any = *a;
a[1] = *k >> 3;
for (m = 1; m < use_order; m++)
{
kptr++;
aptr = a;
aptr++;
aptr2 = &a[m];
anyptr = any;
anyptr++;
any[m + 1] = *kptr >> 3;
for (i = 0; i < m; i++)
{
*anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15);
anyptr++;
aptr++;
aptr2--;
}
aptr = a;
anyptr = any;
for (i = 0; i < (m + 2); i++)
{
*aptr = *anyptr;
aptr++;
anyptr++;
}
}
}

View file

@ -0,0 +1,505 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling functions for 22 kHz.
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/signal_processing/resample_by_2_internal.h"
// Declaration of internally used functions
static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out,
int32_t K);
void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out,
int32_t K);
// interpolation coefficients
static const int16_t kCoefficients32To22[5][9] = {
{127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154},
{-39, 230, -830, 2785, 32366, -2324, 760, -218, 38},
{117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137},
{-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71},
{ 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110}
};
//////////////////////
// 22 kHz -> 16 kHz //
//////////////////////
// number of subblocks; options: 1, 2, 4, 5, 10
#define SUB_BLOCKS_22_16 5
// 22 -> 16 resampler
void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out,
WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem)
{
int k;
// process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_22_16; k++)
{
///// 22 --> 44 /////
// int16_t in[220/SUB_BLOCKS_22_16]
// int32_t out[440/SUB_BLOCKS_22_16]
/////
WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44);
///// 44 --> 32 /////
// int32_t in[440/SUB_BLOCKS_22_16]
// int32_t out[320/SUB_BLOCKS_22_16]
/////
// copy state to and from input array
tmpmem[8] = state->S_44_32[0];
tmpmem[9] = state->S_44_32[1];
tmpmem[10] = state->S_44_32[2];
tmpmem[11] = state->S_44_32[3];
tmpmem[12] = state->S_44_32[4];
tmpmem[13] = state->S_44_32[5];
tmpmem[14] = state->S_44_32[6];
tmpmem[15] = state->S_44_32[7];
state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8];
state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9];
state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10];
state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11];
state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12];
state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13];
state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14];
state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15];
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16);
///// 32 --> 16 /////
// int32_t in[320/SUB_BLOCKS_22_16]
// int32_t out[160/SUB_BLOCKS_22_16]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16);
// move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead
in += 220 / SUB_BLOCKS_22_16;
out += 160 / SUB_BLOCKS_22_16;
}
}
// initialize state of 22 -> 16 resampler
void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_22_44[k] = 0;
state->S_44_32[k] = 0;
state->S_32_16[k] = 0;
}
}
//////////////////////
// 16 kHz -> 22 kHz //
//////////////////////
// number of subblocks; options: 1, 2, 4, 5, 10
#define SUB_BLOCKS_16_22 4
// 16 -> 22 resampler
void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out,
WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem)
{
int k;
// process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_16_22; k++)
{
///// 16 --> 32 /////
// int16_t in[160/SUB_BLOCKS_16_22]
// int32_t out[320/SUB_BLOCKS_16_22]
/////
WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32);
///// 32 --> 22 /////
// int32_t in[320/SUB_BLOCKS_16_22]
// int32_t out[220/SUB_BLOCKS_16_22]
/////
// copy state to and from input array
tmpmem[0] = state->S_32_22[0];
tmpmem[1] = state->S_32_22[1];
tmpmem[2] = state->S_32_22[2];
tmpmem[3] = state->S_32_22[3];
tmpmem[4] = state->S_32_22[4];
tmpmem[5] = state->S_32_22[5];
tmpmem[6] = state->S_32_22[6];
tmpmem[7] = state->S_32_22[7];
state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22];
state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1];
state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2];
state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3];
state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4];
state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5];
state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6];
state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7];
WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22);
// move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead
in += 160 / SUB_BLOCKS_16_22;
out += 220 / SUB_BLOCKS_16_22;
}
}
// initialize state of 16 -> 22 resampler
void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_16_32[k] = 0;
state->S_32_22[k] = 0;
}
}
//////////////////////
// 22 kHz -> 8 kHz //
//////////////////////
// number of subblocks; options: 1, 2, 5, 10
#define SUB_BLOCKS_22_8 2
// 22 -> 8 resampler
void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem)
{
int k;
// process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_22_8; k++)
{
///// 22 --> 22 lowpass /////
// int16_t in[220/SUB_BLOCKS_22_8]
// int32_t out[220/SUB_BLOCKS_22_8]
/////
WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22);
///// 22 --> 16 /////
// int32_t in[220/SUB_BLOCKS_22_8]
// int32_t out[160/SUB_BLOCKS_22_8]
/////
// copy state to and from input array
tmpmem[8] = state->S_22_16[0];
tmpmem[9] = state->S_22_16[1];
tmpmem[10] = state->S_22_16[2];
tmpmem[11] = state->S_22_16[3];
tmpmem[12] = state->S_22_16[4];
tmpmem[13] = state->S_22_16[5];
tmpmem[14] = state->S_22_16[6];
tmpmem[15] = state->S_22_16[7];
state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8];
state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9];
state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10];
state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11];
state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12];
state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13];
state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14];
state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15];
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8);
///// 16 --> 8 /////
// int32_t in[160/SUB_BLOCKS_22_8]
// int32_t out[80/SUB_BLOCKS_22_8]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8);
// move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead
in += 220 / SUB_BLOCKS_22_8;
out += 80 / SUB_BLOCKS_22_8;
}
}
// initialize state of 22 -> 8 resampler
void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_22_22[k] = 0;
state->S_22_22[k + 8] = 0;
state->S_22_16[k] = 0;
state->S_16_8[k] = 0;
}
}
//////////////////////
// 8 kHz -> 22 kHz //
//////////////////////
// number of subblocks; options: 1, 2, 5, 10
#define SUB_BLOCKS_8_22 2
// 8 -> 22 resampler
void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem)
{
int k;
// process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_8_22; k++)
{
///// 8 --> 16 /////
// int16_t in[80/SUB_BLOCKS_8_22]
// int32_t out[160/SUB_BLOCKS_8_22]
/////
WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16);
///// 16 --> 11 /////
// int32_t in[160/SUB_BLOCKS_8_22]
// int32_t out[110/SUB_BLOCKS_8_22]
/////
// copy state to and from input array
tmpmem[10] = state->S_16_11[0];
tmpmem[11] = state->S_16_11[1];
tmpmem[12] = state->S_16_11[2];
tmpmem[13] = state->S_16_11[3];
tmpmem[14] = state->S_16_11[4];
tmpmem[15] = state->S_16_11[5];
tmpmem[16] = state->S_16_11[6];
tmpmem[17] = state->S_16_11[7];
state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10];
state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11];
state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12];
state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13];
state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14];
state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15];
state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16];
state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17];
WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22);
///// 11 --> 22 /////
// int32_t in[110/SUB_BLOCKS_8_22]
// int16_t out[220/SUB_BLOCKS_8_22]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22);
// move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead
in += 80 / SUB_BLOCKS_8_22;
out += 220 / SUB_BLOCKS_8_22;
}
}
// initialize state of 8 -> 22 resampler
void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_8_16[k] = 0;
state->S_16_11[k] = 0;
state->S_11_22[k] = 0;
}
}
// compute two inner-products and store them to output array
static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2,
const int16_t* coef_ptr, int32_t* out1,
int32_t* out2)
{
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[8];
*out1 = tmp1 + coef * in1[8];
*out2 = tmp2 + coef * in2[-8];
}
// compute two inner-products and store them to output array
static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2,
const int16_t* coef_ptr, int16_t* out1,
int16_t* out2)
{
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[8];
tmp1 += coef * in1[8];
tmp2 += coef * in2[-8];
// scale down, round and saturate
tmp1 >>= 15;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
tmp2 >>= 15;
if (tmp2 > (int32_t)0x00007FFF)
tmp2 = 0x00007FFF;
if (tmp2 < (int32_t)0xFFFF8000)
tmp2 = 0xFFFF8000;
*out1 = (int16_t)tmp1;
*out2 = (int16_t)tmp2;
}
// Resampling ratio: 11/16
// input: int32_t (normalized, not saturated) :: size 16 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K
// K: Number of blocks
void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In,
int32_t* Out,
int32_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (16 input samples -> 11 output samples);
// process in sub blocks of size 16 samples.
int32_t m;
for (m = 0; m < K; m++)
{
// first output sample
Out[0] = ((int32_t)In[3] << 15) + (1 << 14);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
// update pointers
In += 16;
Out += 11;
}
}
// Resampling ratio: 11/16
// input: int32_t (normalized, not saturated) :: size 16 * K
// output: int16_t (saturated) :: size 11 * K
// K: Number of blocks
void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In,
int16_t *Out,
int32_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (16 input samples -> 11 output samples);
// process in sub blocks of size 16 samples.
int32_t tmp;
int32_t m;
for (m = 0; m < K; m++)
{
// first output sample
tmp = In[3];
if (tmp > (int32_t)0x00007FFF)
tmp = 0x00007FFF;
if (tmp < (int32_t)0xFFFF8000)
tmp = 0xFFFF8000;
Out[0] = (int16_t)tmp;
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
// update pointers
In += 16;
Out += 11;
}
}

View file

@ -0,0 +1,186 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains resampling functions between 48 kHz and nb/wb.
* The description header can be found in signal_processing_library.h
*
*/
#include <string.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/signal_processing/resample_by_2_internal.h"
////////////////////////////
///// 48 kHz -> 16 kHz /////
////////////////////////////
// 48 -> 16 resampler
void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
{
///// 48 --> 48(LP) /////
// int16_t in[480]
// int32_t out[480]
/////
WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
///// 48 --> 32 /////
// int32_t in[480]
// int32_t out[320]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
///// 32 --> 16 /////
// int32_t in[320]
// int16_t out[160]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
}
// initialize state of 48 -> 16 resampler
void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
{
memset(state->S_48_48, 0, 16 * sizeof(int32_t));
memset(state->S_48_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_16, 0, 8 * sizeof(int32_t));
}
////////////////////////////
///// 16 kHz -> 48 kHz /////
////////////////////////////
// 16 -> 48 resampler
void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
{
///// 16 --> 32 /////
// int16_t in[160]
// int32_t out[320]
/////
WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
///// 32 --> 24 /////
// int32_t in[320]
// int32_t out[240]
// copy state to and from input array
/////
memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
}
// initialize state of 16 -> 48 resampler
void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
{
memset(state->S_16_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
}
////////////////////////////
///// 48 kHz -> 8 kHz /////
////////////////////////////
// 48 -> 8 resampler
void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
{
///// 48 --> 24 /////
// int16_t in[480]
// int32_t out[240]
/////
WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
///// 24 --> 24(LP) /////
// int32_t in[240]
// int32_t out[240]
/////
WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
///// 24 --> 16 /////
// int32_t in[240]
// int32_t out[160]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
///// 16 --> 8 /////
// int32_t in[160]
// int16_t out[80]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
}
// initialize state of 48 -> 8 resampler
void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
{
memset(state->S_48_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_24, 0, 16 * sizeof(int32_t));
memset(state->S_24_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_8, 0, 8 * sizeof(int32_t));
}
////////////////////////////
///// 8 kHz -> 48 kHz /////
////////////////////////////
// 8 -> 48 resampler
void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
{
///// 8 --> 16 /////
// int16_t in[80]
// int32_t out[160]
/////
WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
///// 16 --> 12 /////
// int32_t in[160]
// int32_t out[120]
/////
// copy state to and from input array
memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
///// 12 --> 24 /////
// int32_t in[120]
// int16_t out[240]
/////
WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
}
// initialize state of 8 -> 48 resampler
void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
{
memset(state->S_8_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_12, 0, 8 * sizeof(int32_t));
memset(state->S_12_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
}

View file

@ -0,0 +1,183 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling by two functions.
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#ifdef WEBRTC_ARCH_ARM_V7
// allpass filter coefficients.
static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
static const uint32_t kResampleAllpass2[3] =
{12199, 37471 << 15, 60255 << 15};
// Multiply two 32-bit values and accumulate to another input value.
// Return: state + ((diff * tbl_value) >> 16)
static __inline int32_t MUL_ACCUM_1(int32_t tbl_value,
int32_t diff,
int32_t state) {
int32_t result;
__asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff),
"r"(tbl_value), "r"(state));
return result;
}
// Multiply two 32-bit values and accumulate to another input value.
// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
//
// The reason to introduce this function is that, in case we can't use smlawb
// instruction (in MUL_ACCUM_1) due to input value range, we can still use
// smmla to save some cycles.
static __inline int32_t MUL_ACCUM_2(int32_t tbl_value,
int32_t diff,
int32_t state) {
int32_t result;
__asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1),
"r"(tbl_value), "r"(state));
return result;
}
#else
// allpass filter coefficients.
static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
#endif // WEBRTC_ARCH_ARM_V7
// decimator
#if !defined(MIPS32_LE)
void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
int16_t* out, int32_t* filtState) {
int32_t tmp1, tmp2, diff, in32, out32;
size_t i;
register int32_t state0 = filtState[0];
register int32_t state1 = filtState[1];
register int32_t state2 = filtState[2];
register int32_t state3 = filtState[3];
register int32_t state4 = filtState[4];
register int32_t state5 = filtState[5];
register int32_t state6 = filtState[6];
register int32_t state7 = filtState[7];
for (i = (len >> 1); i > 0; i--) {
// lower allpass filter
in32 = (int32_t)(*in++) * (1 << 10);
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (int32_t)(*in++) * (1 << 10);
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
filtState[0] = state0;
filtState[1] = state1;
filtState[2] = state2;
filtState[3] = state3;
filtState[4] = state4;
filtState[5] = state5;
filtState[6] = state6;
filtState[7] = state7;
}
#endif // #if defined(MIPS32_LE)
void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
int16_t* out, int32_t* filtState) {
int32_t tmp1, tmp2, diff, in32, out32;
size_t i;
register int32_t state0 = filtState[0];
register int32_t state1 = filtState[1];
register int32_t state2 = filtState[2];
register int32_t state3 = filtState[3];
register int32_t state4 = filtState[4];
register int32_t state5 = filtState[5];
register int32_t state6 = filtState[6];
register int32_t state7 = filtState[7];
for (i = len; i > 0; i--) {
// lower allpass filter
in32 = (int32_t)(*in++) * (1 << 10);
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
state2 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state3 + 512) >> 10;
*out++ = WebRtcSpl_SatW32ToW16(out32);
// upper allpass filter
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
state6 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state7 + 512) >> 10;
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
filtState[0] = state0;
filtState[1] = state1;
filtState[2] = state2;
filtState[3] = state3;
filtState[4] = state4;
filtState[5] = state5;
filtState[6] = state6;
filtState[7] = state7;
}

View file

@ -0,0 +1,689 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file contains some internal resampling functions.
*
*/
#include "common_audio/signal_processing/resample_by_2_internal.h"
#include "rtc_base/sanitizer.h"
// allpass filter coefficients.
static const int16_t kResampleAllpass[2][3] = {
{821, 6110, 12382},
{3050, 9368, 15063}
};
//
// decimator
// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
// output: int16_t (saturated) (of length len/2)
// state: filter state array; length = 8
void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter (operates on even input samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[1];
// UBSan: -1771017321 - 999586185 cannot be represented in type 'int'
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// divide by two and store temporarily
in[i << 1] = (state[3] >> 1);
}
in++;
// upper allpass filter (operates on odd input samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// divide by two and store temporarily
in[i << 1] = (state[7] >> 1);
}
in--;
// combine allpass outputs
for (i = 0; i < len; i += 2)
{
// divide by two, add both allpass outputs and round
tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15;
tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15;
if (tmp0 > (int32_t)0x00007FFF)
tmp0 = 0x00007FFF;
if (tmp0 < (int32_t)0xFFFF8000)
tmp0 = 0xFFFF8000;
out[i] = (int16_t)tmp0;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
out[i + 1] = (int16_t)tmp1;
}
}
//
// decimator
// input: int16_t
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
// state: filter state array; length = 8
void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_DownBy2ShortToInt(const int16_t *in,
int32_t len,
int32_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter (operates on even input samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// UBSan: -1379909682 - 834099714 cannot be represented in type 'int'
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// divide by two and store temporarily
out[i] = (state[3] >> 1);
}
in++;
// upper allpass filter (operates on odd input samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// divide by two and store temporarily
out[i] += (state[7] >> 1);
}
in--;
}
//
// interpolator
// input: int16_t
// output: int32_t (normalized, not saturated) (of length len*2)
// state: filter state array; length = 8
void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
// upper allpass filter (generates odd output samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// scale down, round and store
out[i << 1] = state[7] >> 15;
}
out++;
// lower allpass filter (generates even output samples)
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3] >> 15;
}
}
//
// interpolator
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
// state: filter state array; length = 8
void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
// upper allpass filter (generates odd output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// scale down, round and store
out[i << 1] = state[7];
}
out++;
// lower allpass filter (generates even output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3];
}
}
//
// interpolator
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int16_t (saturated) (of length len*2)
// state: filter state array; length = 8
void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out,
int32_t *state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
// upper allpass filter (generates odd output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// scale down, saturate and store
tmp1 = state[7] >> 15;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
out[i << 1] = (int16_t)tmp1;
}
out++;
// lower allpass filter (generates even output samples)
for (i = 0; i < len; i++)
{
tmp0 = in[i];
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, saturate and store
tmp1 = state[3] >> 15;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
out[i << 1] = (int16_t)tmp1;
}
}
// lowpass filter
// input: int16_t
// output: int32_t (normalized, not saturated)
// state: filter state array; length = 8
void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out,
int32_t* state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter: odd input -> even output samples
in++;
// initial state of polyphase delay element
tmp0 = state[12];
for (i = 0; i < len; i++)
{
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3] >> 1;
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
}
in--;
// upper allpass filter: even input -> even output samples
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[5];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
}
// switch to odd output samples
out++;
// lower allpass filter: even input -> odd output samples
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[9];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[8] + diff * kResampleAllpass[1][0];
state[8] = tmp0;
diff = tmp1 - state[10];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[9] + diff * kResampleAllpass[1][1];
state[9] = tmp1;
diff = tmp0 - state[11];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[11] = state[10] + diff * kResampleAllpass[1][2];
state[10] = tmp0;
// scale down, round and store
out[i << 1] = state[11] >> 1;
}
// upper allpass filter: odd input -> odd output samples
in++;
for (i = 0; i < len; i++)
{
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
diff = tmp0 - state[13];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[12] + diff * kResampleAllpass[0][0];
state[12] = tmp0;
diff = tmp1 - state[14];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[13] + diff * kResampleAllpass[0][1];
state[13] = tmp1;
diff = tmp0 - state[15];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[15] = state[14] + diff * kResampleAllpass[0][2];
state[14] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
}
}
// lowpass filter
// input: int32_t (shifted 15 positions to the left, + offset 16384)
// output: int32_t (normalized, not saturated)
// state: filter state array; length = 8
void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
int32_t* state)
{
int32_t tmp0, tmp1, diff;
int32_t i;
len >>= 1;
// lower allpass filter: odd input -> even output samples
in++;
// initial state of polyphase delay element
tmp0 = state[12];
for (i = 0; i < len; i++)
{
diff = tmp0 - state[1];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[0] + diff * kResampleAllpass[1][0];
state[0] = tmp0;
diff = tmp1 - state[2];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[1] + diff * kResampleAllpass[1][1];
state[1] = tmp1;
diff = tmp0 - state[3];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[3] = state[2] + diff * kResampleAllpass[1][2];
state[2] = tmp0;
// scale down, round and store
out[i << 1] = state[3] >> 1;
tmp0 = in[i << 1];
}
in--;
// upper allpass filter: even input -> even output samples
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[5];
// UBSan: -794814117 - 1566149201 cannot be represented in type 'int'
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[4] + diff * kResampleAllpass[0][0];
state[4] = tmp0;
diff = tmp1 - state[6];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[5] + diff * kResampleAllpass[0][1];
state[5] = tmp1;
diff = tmp0 - state[7];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[7] = state[6] + diff * kResampleAllpass[0][2];
state[6] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
}
// switch to odd output samples
out++;
// lower allpass filter: even input -> odd output samples
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[9];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[8] + diff * kResampleAllpass[1][0];
state[8] = tmp0;
diff = tmp1 - state[10];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[9] + diff * kResampleAllpass[1][1];
state[9] = tmp1;
diff = tmp0 - state[11];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[11] = state[10] + diff * kResampleAllpass[1][2];
state[10] = tmp0;
// scale down, round and store
out[i << 1] = state[11] >> 1;
}
// upper allpass filter: odd input -> odd output samples
in++;
for (i = 0; i < len; i++)
{
tmp0 = in[i << 1];
diff = tmp0 - state[13];
// scale down and round
diff = (diff + (1 << 13)) >> 14;
tmp1 = state[12] + diff * kResampleAllpass[0][0];
state[12] = tmp0;
diff = tmp1 - state[14];
// scale down and round
diff = diff >> 14;
if (diff < 0)
diff += 1;
tmp0 = state[13] + diff * kResampleAllpass[0][1];
state[13] = tmp1;
diff = tmp0 - state[15];
// scale down and truncate
diff = diff >> 14;
if (diff < 0)
diff += 1;
state[15] = state[14] + diff * kResampleAllpass[0][2];
state[14] = tmp0;
// average the two allpass outputs, scale down and store
out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
}
}

View file

@ -0,0 +1,60 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file contains some internal resampling functions.
*
*/
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
#include <stdint.h>
/*******************************************************************
* resample_by_2_fast.c
* Functions for internal use in the other resample functions
******************************************************************/
void WebRtcSpl_DownBy2IntToShort(int32_t* in,
int32_t len,
int16_t* out,
int32_t* state);
void WebRtcSpl_DownBy2ShortToInt(const int16_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_UpBy2ShortToInt(const int16_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_UpBy2IntToInt(const int32_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_UpBy2IntToShort(const int32_t* in,
int32_t len,
int16_t* out,
int32_t* state);
void WebRtcSpl_LPBy2ShortToInt(const int16_t* in,
int32_t len,
int32_t* out,
int32_t* state);
void WebRtcSpl_LPBy2IntToInt(const int32_t* in,
int32_t len,
int32_t* out,
int32_t* state);
#endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_

View file

@ -0,0 +1,292 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling by two functions.
* The description header can be found in signal_processing_library.h
*
*/
#if defined(MIPS32_LE)
#include "common_audio/signal_processing/include/signal_processing_library.h"
#if !defined(MIPS_DSP_R2_LE)
// allpass filter coefficients.
static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
#endif
// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
// decimator
void WebRtcSpl_DownsampleBy2(const int16_t* in,
size_t len,
int16_t* out,
int32_t* filtState) {
int32_t out32;
size_t i, len1;
register int32_t state0 = filtState[0];
register int32_t state1 = filtState[1];
register int32_t state2 = filtState[2];
register int32_t state3 = filtState[3];
register int32_t state4 = filtState[4];
register int32_t state5 = filtState[5];
register int32_t state6 = filtState[6];
register int32_t state7 = filtState[7];
#if defined(MIPS_DSP_R2_LE)
int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2;
k1Res0= 3284;
k1Res1= 24441;
k1Res2= 49528;
k2Res0= 12199;
k2Res1= 37471;
k2Res2= 60255;
len1 = (len >> 1);
const int32_t* inw = (int32_t*)in;
int32_t tmp11, tmp12, tmp21, tmp22;
int32_t in322, in321;
int32_t diff1, diff2;
for (i = len1; i > 0; i--) {
__asm__ volatile (
"lh %[in321], 0(%[inw]) \n\t"
"lh %[in322], 2(%[inw]) \n\t"
"sll %[in321], %[in321], 10 \n\t"
"sll %[in322], %[in322], 10 \n\t"
"addiu %[inw], %[inw], 4 \n\t"
"subu %[diff1], %[in321], %[state1] \n\t"
"subu %[diff2], %[in322], %[state5] \n\t"
: [in322] "=&r" (in322), [in321] "=&r" (in321),
[diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw)
: [state1] "r" (state1), [state5] "r" (state5)
: "memory"
);
__asm__ volatile (
"mult $ac0, %[diff1], %[k2Res0] \n\t"
"mult $ac1, %[diff2], %[k1Res0] \n\t"
"extr.w %[tmp11], $ac0, 16 \n\t"
"extr.w %[tmp12], $ac1, 16 \n\t"
"addu %[tmp11], %[state0], %[tmp11] \n\t"
"addu %[tmp12], %[state4], %[tmp12] \n\t"
"addiu %[state0], %[in321], 0 \n\t"
"addiu %[state4], %[in322], 0 \n\t"
"subu %[diff1], %[tmp11], %[state2] \n\t"
"subu %[diff2], %[tmp12], %[state6] \n\t"
"mult $ac0, %[diff1], %[k2Res1] \n\t"
"mult $ac1, %[diff2], %[k1Res1] \n\t"
"extr.w %[tmp21], $ac0, 16 \n\t"
"extr.w %[tmp22], $ac1, 16 \n\t"
"addu %[tmp21], %[state1], %[tmp21] \n\t"
"addu %[tmp22], %[state5], %[tmp22] \n\t"
"addiu %[state1], %[tmp11], 0 \n\t"
"addiu %[state5], %[tmp12], 0 \n\t"
: [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21),
[tmp11] "=&r" (tmp11), [state0] "+r" (state0),
[state1] "+r" (state1),
[state2] "+r" (state2),
[state4] "+r" (state4), [tmp12] "=&r" (tmp12),
[state6] "+r" (state6), [state5] "+r" (state5)
: [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0),
[diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322),
[in321] "r" (in321), [k1Res0] "r" (k1Res0)
: "hi", "lo", "$ac1hi", "$ac1lo"
);
// upper allpass filter
__asm__ volatile (
"subu %[diff1], %[tmp21], %[state3] \n\t"
"subu %[diff2], %[tmp22], %[state7] \n\t"
"mult $ac0, %[diff1], %[k2Res2] \n\t"
"mult $ac1, %[diff2], %[k1Res2] \n\t"
"extr.w %[state3], $ac0, 16 \n\t"
"extr.w %[state7], $ac1, 16 \n\t"
"addu %[state3], %[state2], %[state3] \n\t"
"addu %[state7], %[state6], %[state7] \n\t"
"addiu %[state2], %[tmp21], 0 \n\t"
"addiu %[state6], %[tmp22], 0 \n\t"
// add two allpass outputs, divide by two and round
"addu %[out32], %[state3], %[state7] \n\t"
"addiu %[out32], %[out32], 1024 \n\t"
"sra %[out32], %[out32], 11 \n\t"
: [state3] "+r" (state3), [state6] "+r" (state6),
[state2] "+r" (state2), [diff2] "=&r" (diff2),
[out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7)
: [tmp22] "r" (tmp22), [tmp21] "r" (tmp21),
[k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2)
: "hi", "lo", "$ac1hi", "$ac1lo"
);
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
#else // #if defined(MIPS_DSP_R2_LE)
int32_t tmp1, tmp2, diff;
int32_t in32;
len1 = (len >> 1)/4;
for (i = len1; i > 0; i--) {
// lower allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
// lower allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
// lower allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
// lower allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (int32_t)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
#endif // #if defined(MIPS_DSP_R2_LE)
__asm__ volatile (
"sw %[state0], 0(%[filtState]) \n\t"
"sw %[state1], 4(%[filtState]) \n\t"
"sw %[state2], 8(%[filtState]) \n\t"
"sw %[state3], 12(%[filtState]) \n\t"
"sw %[state4], 16(%[filtState]) \n\t"
"sw %[state5], 20(%[filtState]) \n\t"
"sw %[state6], 24(%[filtState]) \n\t"
"sw %[state7], 28(%[filtState]) \n\t"
:
: [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2),
[state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5),
[state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState)
: "memory"
);
}
#endif // #if defined(MIPS32_LE)

View file

@ -0,0 +1,239 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling functions between 48, 44, 32 and 24 kHz.
* The description headers can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
// interpolation coefficients
static const int16_t kCoefficients48To32[2][8] = {
{778, -2050, 1087, 23285, 12903, -3783, 441, 222},
{222, 441, -3783, 12903, 23285, 1087, -2050, 778}
};
static const int16_t kCoefficients32To24[3][8] = {
{767, -2362, 2434, 24406, 10620, -3838, 721, 90},
{386, -381, -2646, 19062, 19062, -2646, -381, 386},
{90, 721, -3838, 10620, 24406, 2434, -2362, 767}
};
static const int16_t kCoefficients44To32[4][9] = {
{117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
{-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
{50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
{-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
};
// Resampling ratio: 2/3
// input: int32_t (normalized, not saturated) :: size 3 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
// K: number of blocks
void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (3 input samples -> 2 output samples);
// process in sub blocks of size 3 samples.
int32_t tmp;
size_t m;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
tmp += kCoefficients48To32[0][0] * In[0];
tmp += kCoefficients48To32[0][1] * In[1];
tmp += kCoefficients48To32[0][2] * In[2];
tmp += kCoefficients48To32[0][3] * In[3];
tmp += kCoefficients48To32[0][4] * In[4];
tmp += kCoefficients48To32[0][5] * In[5];
tmp += kCoefficients48To32[0][6] * In[6];
tmp += kCoefficients48To32[0][7] * In[7];
Out[0] = tmp;
tmp = 1 << 14;
tmp += kCoefficients48To32[1][0] * In[1];
tmp += kCoefficients48To32[1][1] * In[2];
tmp += kCoefficients48To32[1][2] * In[3];
tmp += kCoefficients48To32[1][3] * In[4];
tmp += kCoefficients48To32[1][4] * In[5];
tmp += kCoefficients48To32[1][5] * In[6];
tmp += kCoefficients48To32[1][6] * In[7];
tmp += kCoefficients48To32[1][7] * In[8];
Out[1] = tmp;
// update pointers
In += 3;
Out += 2;
}
}
// Resampling ratio: 3/4
// input: int32_t (normalized, not saturated) :: size 4 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
// K: number of blocks
void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (4 input samples -> 3 output samples);
// process in sub blocks of size 4 samples.
size_t m;
int32_t tmp;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
tmp += kCoefficients32To24[0][0] * In[0];
tmp += kCoefficients32To24[0][1] * In[1];
tmp += kCoefficients32To24[0][2] * In[2];
tmp += kCoefficients32To24[0][3] * In[3];
tmp += kCoefficients32To24[0][4] * In[4];
tmp += kCoefficients32To24[0][5] * In[5];
tmp += kCoefficients32To24[0][6] * In[6];
tmp += kCoefficients32To24[0][7] * In[7];
Out[0] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[1][0] * In[1];
tmp += kCoefficients32To24[1][1] * In[2];
tmp += kCoefficients32To24[1][2] * In[3];
tmp += kCoefficients32To24[1][3] * In[4];
tmp += kCoefficients32To24[1][4] * In[5];
tmp += kCoefficients32To24[1][5] * In[6];
tmp += kCoefficients32To24[1][6] * In[7];
tmp += kCoefficients32To24[1][7] * In[8];
Out[1] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[2][0] * In[2];
tmp += kCoefficients32To24[2][1] * In[3];
tmp += kCoefficients32To24[2][2] * In[4];
tmp += kCoefficients32To24[2][3] * In[5];
tmp += kCoefficients32To24[2][4] * In[6];
tmp += kCoefficients32To24[2][5] * In[7];
tmp += kCoefficients32To24[2][6] * In[8];
tmp += kCoefficients32To24[2][7] * In[9];
Out[2] = tmp;
// update pointers
In += 4;
Out += 3;
}
}
//
// fractional resampling filters
// Fout = 11/16 * Fin
// Fout = 8/11 * Fin
//
// compute two inner-products and store them to output array
static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
const int16_t *coef_ptr, int32_t *out1,
int32_t *out2)
{
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[8];
*out1 = tmp1 + coef * in1[8];
*out2 = tmp2 + coef * in2[-8];
}
// Resampling ratio: 8/11
// input: int32_t (normalized, not saturated) :: size 11 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K
// K: number of blocks
void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (11 input samples -> 8 output samples);
// process in sub blocks of size 11 samples.
int32_t tmp;
size_t m;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
// first output sample
Out[0] = ((int32_t)In[3] << 15) + tmp;
// sum and accumulate filter coefficients and input samples
tmp += kCoefficients44To32[3][0] * In[5];
tmp += kCoefficients44To32[3][1] * In[6];
tmp += kCoefficients44To32[3][2] * In[7];
tmp += kCoefficients44To32[3][3] * In[8];
tmp += kCoefficients44To32[3][4] * In[9];
tmp += kCoefficients44To32[3][5] * In[10];
tmp += kCoefficients44To32[3][6] * In[11];
tmp += kCoefficients44To32[3][7] * In[12];
tmp += kCoefficients44To32[3][8] * In[13];
Out[4] = tmp;
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
// update pointers
In += 11;
Out += 8;
}
}

View file

@ -0,0 +1,69 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Some code came from common/rtcd.c in the WebM project.
#include "common_audio/signal_processing/include/signal_processing_library.h"
// TODO(bugs.webrtc.org/9553): These function pointers are useless. Refactor
// things so that we simply have a bunch of regular functions with different
// implementations for different platforms.
#if defined(WEBRTC_HAS_NEON)
const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
const CrossCorrelation WebRtcSpl_CrossCorrelation =
WebRtcSpl_CrossCorrelationNeon;
const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
#elif defined(MIPS32_LE)
const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 =
#ifdef MIPS_DSP_R1_LE
WebRtcSpl_MaxAbsValueW32_mips;
#else
WebRtcSpl_MaxAbsValueW32C;
#endif
const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
const CrossCorrelation WebRtcSpl_CrossCorrelation =
WebRtcSpl_CrossCorrelation_mips;
const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound =
#ifdef MIPS_DSP_R1_LE
WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
#else
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
#endif
#else
const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
const CrossCorrelation WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound =
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
#endif

View file

@ -0,0 +1,24 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
#include "common_audio/signal_processing/include/spl_inl.h"
// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
// n.
const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = {
32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24,
4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9,
-1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12,
};

View file

@ -0,0 +1,194 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Sqrt().
* The description header can be found in signal_processing_library.h
*
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
int32_t WebRtcSpl_SqrtLocal(int32_t in);
int32_t WebRtcSpl_SqrtLocal(int32_t in)
{
int16_t x_half, t16;
int32_t A, B, x2;
/* The following block performs:
y=in/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
*/
B = in / 2;
B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2
B = B + ((int32_t)0x40000000); // B = 1 + x/2
B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
A = -x2; // A = -(x/2)^2
B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
A >>= 16;
A = A * A * 2; // A = (x/2)^4
t16 = (int16_t)(A >> 16);
B += -20480 * t16 * 2; // B = B - 0.625*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
A = x_half * t16 * 2; // A = (x/2)^5
t16 = (int16_t)(A >> 16);
B += 28672 * t16 * 2; // B = B + 0.875*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
t16 = (int16_t)(x2 >> 16);
A = x_half * t16 * 2; // A = x/2^3
B = B + (A >> 1); // B = B + 0.5*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
B = B + ((int32_t)32768); // Round off bit
return B;
}
int32_t WebRtcSpl_Sqrt(int32_t value)
{
/*
Algorithm:
Six term Taylor Series is used here to compute the square root of a number
y^0.5 = (1+x)^0.5 where x = y-1
= 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
0.5 <= x < 1
Example of how the algorithm works, with ut=sqrt(in), and
with in=73632 and ut=271 (even shift value case):
in=73632
y= in/131072
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))*512
or:
in=73632
in2=73632*2^14
y= in2/2^31
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
which gives:
in = 73632
in2 = 1206386688
y = 0.56176757812500
x = -0.43823242187500
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
or:
in=73632
in2=73632*2^14
y=in2/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
which gives:
in = 73632
in2 = 1206386688
y = 603193344
x = -470548480
x_half = -0.21911621093750
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
*/
int16_t x_norm, nshift, t16, sh;
int32_t A;
int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
A = value;
// The convention in this function is to calculate sqrt(abs(A)). Negate the
// input if it is negative.
if (A < 0) {
if (A == WEBRTC_SPL_WORD32_MIN) {
// This number cannot be held in an int32_t after negating.
// Map it to the maximum positive value.
A = WEBRTC_SPL_WORD32_MAX;
} else {
A = -A;
}
} else if (A == 0) {
return 0; // sqrt(0) = 0
}
sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
{
A = A + ((int32_t)32768); // Round off bit
} else
{
A = WEBRTC_SPL_WORD32_MAX;
}
x_norm = (int16_t)(A >> 16); // x_norm = AH
nshift = (sh / 2);
RTC_DCHECK_GE(nshift, 0);
A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
if (2 * nshift == sh) {
// Even shift value case
t16 = (int16_t)(A >> 16); // t16 = AH
A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16
A = A + ((int32_t)32768); // Round off
A = A & ((int32_t)0x7fff0000); // Round off
A >>= 15; // A = A>>16
} else
{
A >>= 16; // A = A>>16
}
A = A & ((int32_t)0x0000ffff);
A >>= nshift; // De-normalize the result.
return A;
}

View file

@ -0,0 +1,211 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the splitting filter functions.
*
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
// Maximum number of samples in a low/high-band frame.
enum
{
kMaxBandFrameLength = 320 // 10 ms at 64 kHz.
};
// QMF filter coefficients in Q16.
static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261};
static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010};
///////////////////////////////////////////////////////////////////////////////////////////////
// WebRtcSpl_AllPassQMF(...)
//
// Allpass filter used by the analysis and synthesis parts of the QMF filter.
//
// Input:
// - in_data : Input data sequence (Q10)
// - data_length : Length of data sequence (>2)
// - filter_coefficients : Filter coefficients (length 3, Q16)
//
// Input & Output:
// - filter_state : Filter state (length 6, Q10).
//
// Output:
// - out_data : Output data sequence (Q10), length equal to
// `data_length`
//
static void WebRtcSpl_AllPassQMF(int32_t* in_data,
size_t data_length,
int32_t* out_data,
const uint16_t* filter_coefficients,
int32_t* filter_state)
{
// The procedure is to filter the input with three first order all pass
// filters (cascade operations).
//
// a_3 + q^-1 a_2 + q^-1 a_1 + q^-1
// y[n] = ----------- ----------- ----------- x[n]
// 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1
//
// The input vector `filter_coefficients` includes these three filter
// coefficients. The filter state contains the in_data state, in_data[-1],
// followed by the out_data state, out_data[-1]. This is repeated for each
// cascade. The first cascade filter will filter the `in_data` and store
// the output in `out_data`. The second will the take the `out_data` as
// input and make an intermediate storage in `in_data`, to save memory. The
// third, and final, cascade filter operation takes the `in_data` (which is
// the output from the previous cascade filter) and store the output in
// `out_data`. Note that the input vector values are changed during the
// process.
size_t k;
int32_t diff;
// First all-pass cascade; filter from in_data to out_data.
// Let y_i[n] indicate the output of cascade filter i (with filter
// coefficient a_i) at vector position n. Then the final output will be
// y[n] = y_3[n]
// First loop, use the states stored in memory.
// "diff" should be safe from wrap around since max values are 2^25
// diff = (x[0] - y_1[-1])
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]);
// y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1])
out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]);
// For the remaining loops, use previous values.
for (k = 1; k < data_length; k++)
{
// diff = (x[n] - y_1[n-1])
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
// y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1])
out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]);
}
// Update states.
filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time
filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
// Second all-pass cascade; filter from out_data to in_data.
// diff = (y_1[0] - y_2[-1])
diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]);
// y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]);
for (k = 1; k < data_length; k++)
{
// diff = (y_1[n] - y_2[n-1])
diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]);
// y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]);
}
filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
// Third all-pass cascade; filter from in_data to out_data.
// diff = (y_2[0] - y[-1])
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]);
// y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1])
out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]);
for (k = 1; k < data_length; k++)
{
// diff = (y_2[n] - y[n-1])
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
// y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1])
out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]);
}
filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time
}
void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length,
int16_t* low_band, int16_t* high_band,
int32_t* filter_state1, int32_t* filter_state2)
{
size_t i;
int16_t k;
int32_t tmp;
int32_t half_in1[kMaxBandFrameLength];
int32_t half_in2[kMaxBandFrameLength];
int32_t filter1[kMaxBandFrameLength];
int32_t filter2[kMaxBandFrameLength];
const size_t band_length = in_data_length / 2;
RTC_DCHECK_EQ(0, in_data_length % 2);
RTC_DCHECK_LE(band_length, kMaxBandFrameLength);
// Split even and odd samples. Also shift them to Q10.
for (i = 0, k = 0; i < band_length; i++, k += 2)
{
half_in2[i] = ((int32_t)in_data[k]) * (1 << 10);
half_in1[i] = ((int32_t)in_data[k + 1]) * (1 << 10);
}
// All pass filter even and odd samples, independently.
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
WebRtcSpl_kAllPassFilter1, filter_state1);
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
WebRtcSpl_kAllPassFilter2, filter_state2);
// Take the sum and difference of filtered version of odd and even
// branches to get upper & lower band.
for (i = 0; i < band_length; i++)
{
tmp = (filter1[i] + filter2[i] + 1024) >> 11;
low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
tmp = (filter1[i] - filter2[i] + 1024) >> 11;
high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
}
}
void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band,
size_t band_length, int16_t* out_data,
int32_t* filter_state1, int32_t* filter_state2)
{
int32_t tmp;
int32_t half_in1[kMaxBandFrameLength];
int32_t half_in2[kMaxBandFrameLength];
int32_t filter1[kMaxBandFrameLength];
int32_t filter2[kMaxBandFrameLength];
size_t i;
int16_t k;
RTC_DCHECK_LE(band_length, kMaxBandFrameLength);
// Obtain the sum and difference channels out of upper and lower-band channels.
// Also shift to Q10 domain.
for (i = 0; i < band_length; i++)
{
tmp = (int32_t)low_band[i] + (int32_t)high_band[i];
half_in1[i] = tmp * (1 << 10);
tmp = (int32_t)low_band[i] - (int32_t)high_band[i];
half_in2[i] = tmp * (1 << 10);
}
// all-pass filter the sum and difference channels
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
WebRtcSpl_kAllPassFilter2, filter_state1);
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
WebRtcSpl_kAllPassFilter1, filter_state2);
// The filtered signals are even and odd samples of the output. Combine
// them. The signals are Q10 should shift them back to Q0 and take care of
// saturation.
for (i = 0, k = 0; i < band_length; i++)
{
tmp = (filter2[i] + 512) >> 10;
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
tmp = (filter1[i] + 512) >> 10;
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
}
}

View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared().
* The description header can be found in signal_processing_library.h
*
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length,
int16_t *yQ15)
{
int32_t sq;
size_t m;
int16_t tmp;
for (m = 0; m < vector_length; m++)
{
tmp = xQ15[m];
sq = tmp * tmp; // x^2 in Q30
sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30
sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15
yQ15[m] = (int16_t)sq;
}
}

View file

@ -0,0 +1,165 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the functions
* WebRtcSpl_VectorBitShiftW16()
* WebRtcSpl_VectorBitShiftW32()
* WebRtcSpl_VectorBitShiftW32ToW16()
* WebRtcSpl_ScaleVector()
* WebRtcSpl_ScaleVectorWithSat()
* WebRtcSpl_ScaleAndAddVectors()
* WebRtcSpl_ScaleAndAddVectorsWithRoundC()
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
const int16_t *in, int16_t right_shifts)
{
size_t i;
if (right_shifts > 0)
{
for (i = length; i > 0; i--)
{
(*res++) = ((*in++) >> right_shifts);
}
} else
{
for (i = length; i > 0; i--)
{
(*res++) = ((*in++) * (1 << (-right_shifts)));
}
}
}
void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
size_t vector_length,
const int32_t *in_vector,
int16_t right_shifts)
{
size_t i;
if (right_shifts > 0)
{
for (i = vector_length; i > 0; i--)
{
(*out_vector++) = ((*in_vector++) >> right_shifts);
}
} else
{
for (i = vector_length; i > 0; i--)
{
(*out_vector++) = ((*in_vector++) << (-right_shifts));
}
}
}
void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
const int32_t* in, int right_shifts) {
size_t i;
int32_t tmp_w32;
if (right_shifts >= 0) {
for (i = length; i > 0; i--) {
tmp_w32 = (*in++) >> right_shifts;
(*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
}
} else {
int left_shifts = -right_shifts;
for (i = length; i > 0; i--) {
tmp_w32 = (*in++) << left_shifts;
(*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
}
}
}
void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
int16_t gain, size_t in_vector_length,
int16_t right_shifts)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t *inptr;
int16_t *outptr;
inptr = in_vector;
outptr = out_vector;
for (i = 0; i < in_vector_length; i++)
{
*outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
}
}
void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
int16_t gain, size_t in_vector_length,
int16_t right_shifts)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t *inptr;
int16_t *outptr;
inptr = in_vector;
outptr = out_vector;
for (i = 0; i < in_vector_length; i++) {
*outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
}
}
void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
const int16_t *in2, int16_t gain2, int shift2,
int16_t *out, size_t vector_length)
{
// Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
size_t i;
const int16_t *in1ptr;
const int16_t *in2ptr;
int16_t *outptr;
in1ptr = in1;
in2ptr = in2;
outptr = out;
for (i = 0; i < vector_length; i++)
{
*outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
(int16_t)((gain2 * *in2ptr++) >> shift2);
}
}
// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
size_t length) {
size_t i = 0;
int round_value = (1 << right_shifts) >> 1;
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
length == 0 || right_shifts < 0) {
return -1;
}
for (i = 0; i < length; i++) {
out_vector[i] = (int16_t)((
in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
round_value) >> right_shifts);
}
return 0;
}

View file

@ -0,0 +1,57 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the functions
* WebRtcSpl_ScaleAndAddVectorsWithRound_mips()
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
int16_t in_vector1_scale,
const int16_t* in_vector2,
int16_t in_vector2_scale,
int right_shifts,
int16_t* out_vector,
size_t length) {
int16_t r0 = 0, r1 = 0;
int16_t *in1 = (int16_t*)in_vector1;
int16_t *in2 = (int16_t*)in_vector2;
int16_t *out = out_vector;
size_t i = 0;
int value32 = 0;
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
length == 0 || right_shifts < 0) {
return -1;
}
for (i = 0; i < length; i++) {
__asm __volatile (
"lh %[r0], 0(%[in1]) \n\t"
"lh %[r1], 0(%[in2]) \n\t"
"mult %[r0], %[in_vector1_scale] \n\t"
"madd %[r1], %[in_vector2_scale] \n\t"
"extrv_r.w %[value32], $ac0, %[right_shifts] \n\t"
"addiu %[in1], %[in1], 2 \n\t"
"addiu %[in2], %[in2], 2 \n\t"
"sh %[value32], 0(%[out]) \n\t"
"addiu %[out], %[out], 2 \n\t"
: [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1),
[in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1)
: [in_vector1_scale] "r" (in_vector1_scale),
[in_vector2_scale] "r" (in_vector2_scale),
[right_shifts] "r" (right_shifts)
: "hi", "lo", "memory"
);
}
return 0;
}

View file

@ -0,0 +1,147 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/smoothing_filter.h"
#include <math.h>
#include <cmath>
#include "rtc_base/checks.h"
#include "rtc_base/time_utils.h"
namespace webrtc {
SmoothingFilterImpl::SmoothingFilterImpl(int init_time_ms)
: init_time_ms_(init_time_ms),
// Duing the initalization time, we use an increasing alpha. Specifically,
// alpha(n) = exp(-powf(init_factor_, n)),
// where `init_factor_` is chosen such that
// alpha(init_time_ms_) = exp(-1.0f / init_time_ms_),
init_factor_(init_time_ms_ == 0
? 0.0f
: powf(init_time_ms_, -1.0f / init_time_ms_)),
// `init_const_` is to a factor to help the calculation during
// initialization phase.
init_const_(init_time_ms_ == 0
? 0.0f
: init_time_ms_ -
powf(init_time_ms_, 1.0f - 1.0f / init_time_ms_)) {
UpdateAlpha(init_time_ms_);
}
SmoothingFilterImpl::~SmoothingFilterImpl() = default;
void SmoothingFilterImpl::AddSample(float sample) {
const int64_t now_ms = rtc::TimeMillis();
if (!init_end_time_ms_) {
// This is equivalent to assuming the filter has been receiving the same
// value as the first sample since time -infinity.
state_ = last_sample_ = sample;
init_end_time_ms_ = now_ms + init_time_ms_;
last_state_time_ms_ = now_ms;
return;
}
ExtrapolateLastSample(now_ms);
last_sample_ = sample;
}
absl::optional<float> SmoothingFilterImpl::GetAverage() {
if (!init_end_time_ms_) {
// `init_end_time_ms_` undefined since we have not received any sample.
return absl::nullopt;
}
ExtrapolateLastSample(rtc::TimeMillis());
return state_;
}
bool SmoothingFilterImpl::SetTimeConstantMs(int time_constant_ms) {
if (!init_end_time_ms_ || last_state_time_ms_ < *init_end_time_ms_) {
return false;
}
UpdateAlpha(time_constant_ms);
return true;
}
void SmoothingFilterImpl::UpdateAlpha(int time_constant_ms) {
alpha_ = time_constant_ms == 0 ? 0.0f : std::exp(-1.0f / time_constant_ms);
}
void SmoothingFilterImpl::ExtrapolateLastSample(int64_t time_ms) {
RTC_DCHECK_GE(time_ms, last_state_time_ms_);
RTC_DCHECK(init_end_time_ms_);
float multiplier = 0.0f;
if (time_ms <= *init_end_time_ms_) {
// Current update is to be made during initialization phase.
// We update the state as if the `alpha` has been increased according
// alpha(n) = exp(-powf(init_factor_, n)),
// where n is the time (in millisecond) since the first sample received.
// With algebraic derivation as shown in the Appendix, we can find that the
// state can be updated in a similar manner as if alpha is a constant,
// except for a different multiplier.
if (init_time_ms_ == 0) {
// This means `init_factor_` = 0.
multiplier = 0.0f;
} else if (init_time_ms_ == 1) {
// This means `init_factor_` = 1.
multiplier = std::exp(last_state_time_ms_ - time_ms);
} else {
multiplier = std::exp(
-(powf(init_factor_, last_state_time_ms_ - *init_end_time_ms_) -
powf(init_factor_, time_ms - *init_end_time_ms_)) /
init_const_);
}
} else {
if (last_state_time_ms_ < *init_end_time_ms_) {
// The latest state update was made during initialization phase.
// We first extrapolate to the initialization time.
ExtrapolateLastSample(*init_end_time_ms_);
// Then extrapolate the rest by the following.
}
multiplier = powf(alpha_, time_ms - last_state_time_ms_);
}
state_ = multiplier * state_ + (1.0f - multiplier) * last_sample_;
last_state_time_ms_ = time_ms;
}
} // namespace webrtc
// Appendix: derivation of extrapolation during initialization phase.
// (LaTeX syntax)
// Assuming
// \begin{align}
// y(n) &= \alpha_{n-1} y(n-1) + \left(1 - \alpha_{n-1}\right) x(m) \\*
// &= \left(\prod_{i=m}^{n-1} \alpha_i\right) y(m) +
// \left(1 - \prod_{i=m}^{n-1} \alpha_i \right) x(m)
// \end{align}
// Taking $\alpha_{n} = \exp(-\gamma^n)$, $\gamma$ denotes init\_factor\_, the
// multiplier becomes
// \begin{align}
// \prod_{i=m}^{n-1} \alpha_i
// &= \exp\left(-\sum_{i=m}^{n-1} \gamma^i \right) \\*
// &= \begin{cases}
// \exp\left(-\frac{\gamma^m - \gamma^n}{1 - \gamma} \right)
// & \gamma \neq 1 \\*
// m-n & \gamma = 1
// \end{cases}
// \end{align}
// We know $\gamma = T^{-\frac{1}{T}}$, where $T$ denotes init\_time\_ms\_. Then
// $1 - \gamma$ approaches zero when $T$ increases. This can cause numerical
// difficulties. We multiply $T$ (if $T > 0$) to both numerator and denominator
// in the fraction. See.
// \begin{align}
// \frac{\gamma^m - \gamma^n}{1 - \gamma}
// &= \frac{T^\frac{T-m}{T} - T^\frac{T-n}{T}}{T - T^{1-\frac{1}{T}}}
// \end{align}

View file

@ -0,0 +1,75 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef COMMON_AUDIO_SMOOTHING_FILTER_H_
#define COMMON_AUDIO_SMOOTHING_FILTER_H_
#include <stdint.h>
#include "absl/types/optional.h"
namespace webrtc {
class SmoothingFilter {
public:
virtual ~SmoothingFilter() = default;
virtual void AddSample(float sample) = 0;
virtual absl::optional<float> GetAverage() = 0;
virtual bool SetTimeConstantMs(int time_constant_ms) = 0;
};
// SmoothingFilterImpl applies an exponential filter
// alpha = exp(-1.0 / time_constant_ms);
// y[t] = alpha * y[t-1] + (1 - alpha) * sample;
// This implies a sample rate of 1000 Hz, i.e., 1 sample / ms.
// But SmoothingFilterImpl allows sparse samples. All missing samples will be
// assumed to equal the last received sample.
class SmoothingFilterImpl final : public SmoothingFilter {
public:
// `init_time_ms` is initialization time. It defines a period starting from
// the arriving time of the first sample. During this period, the exponential
// filter uses a varying time constant so that a smaller time constant will be
// applied to the earlier samples. This is to allow the the filter to adapt to
// earlier samples quickly. After the initialization period, the time constant
// will be set to `init_time_ms` first and can be changed through
// `SetTimeConstantMs`.
explicit SmoothingFilterImpl(int init_time_ms);
SmoothingFilterImpl() = delete;
SmoothingFilterImpl(const SmoothingFilterImpl&) = delete;
SmoothingFilterImpl& operator=(const SmoothingFilterImpl&) = delete;
~SmoothingFilterImpl() override;
void AddSample(float sample) override;
absl::optional<float> GetAverage() override;
bool SetTimeConstantMs(int time_constant_ms) override;
// Methods used for unittests.
float alpha() const { return alpha_; }
private:
void UpdateAlpha(int time_constant_ms);
void ExtrapolateLastSample(int64_t time_ms);
const int init_time_ms_;
const float init_factor_;
const float init_const_;
absl::optional<int64_t> init_end_time_ms_;
float last_sample_;
float alpha_;
float state_;
int64_t last_state_time_ms_;
};
} // namespace webrtc
#endif // COMMON_AUDIO_SMOOTHING_FILTER_H_

View file

@ -0,0 +1,8 @@
/*
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
* Copyright Takuya OOURA, 1996-2001
*
* You may use, copy, modify and distribute this code for any purpose (include
* commercial use) and without fee. Please refer to this package when you modify
* this code.
*/

View file

@ -0,0 +1,13 @@
Name: General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package
Short Name: fft4g
URL: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
Version: 0
Date: 2018-06-19
License: Custome license
License File: LICENSE
Security Critical: yes
Description:
This is a package to calculate Discrete Fourier/Cosine/Sine Transforms of
1-dimensional sequences of length 2^N. This package contains C and Fortran
FFT codes.

View file

@ -0,0 +1,548 @@
/*
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
* Copyright Takuya OOURA, 1996-2001
*
* You may use, copy, modify and distribute this code for any purpose (include
* commercial use) and without fee. Please refer to this package when you modify
* this code.
*
* Changes by the WebRTC authors:
* - Trivial type modifications.
* - Minimal code subset to do rdft of length 128.
* - Optimizations because of known length.
* - Removed the global variables by moving the code in to a class in order
* to make it thread safe.
*
* All changes are covered by the WebRTC license and IP grant:
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_common.h"
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
namespace webrtc {
namespace {
#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
static void cft1st_128_C(float* a) {
const int n = 128;
int j, k1, k2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
// The processing of the first set of elements was simplified in C to avoid
// some operations (multiplication by zero or one, addition of two elements
// multiplied by the same weight, ...).
x0r = a[0] + a[2];
x0i = a[1] + a[3];
x1r = a[0] - a[2];
x1i = a[1] - a[3];
x2r = a[4] + a[6];
x2i = a[5] + a[7];
x3r = a[4] - a[6];
x3i = a[5] - a[7];
a[0] = x0r + x2r;
a[1] = x0i + x2i;
a[4] = x0r - x2r;
a[5] = x0i - x2i;
a[2] = x1r - x3i;
a[3] = x1i + x3r;
a[6] = x1r + x3i;
a[7] = x1i - x3r;
wk1r = rdft_w[2];
x0r = a[8] + a[10];
x0i = a[9] + a[11];
x1r = a[8] - a[10];
x1i = a[9] - a[11];
x2r = a[12] + a[14];
x2i = a[13] + a[15];
x3r = a[12] - a[14];
x3i = a[13] - a[15];
a[8] = x0r + x2r;
a[9] = x0i + x2i;
a[12] = x2i - x0i;
a[13] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[10] = wk1r * (x0r - x0i);
a[11] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[14] = wk1r * (x0i - x0r);
a[15] = wk1r * (x0i + x0r);
k1 = 0;
for (j = 16; j < n; j += 16) {
k1 += 2;
k2 = 2 * k1;
wk2r = rdft_w[k1 + 0];
wk2i = rdft_w[k1 + 1];
wk1r = rdft_w[k2 + 0];
wk1i = rdft_w[k2 + 1];
wk3r = rdft_wk3ri_first[k1 + 0];
wk3i = rdft_wk3ri_first[k1 + 1];
x0r = a[j + 0] + a[j + 2];
x0i = a[j + 1] + a[j + 3];
x1r = a[j + 0] - a[j + 2];
x1i = a[j + 1] - a[j + 3];
x2r = a[j + 4] + a[j + 6];
x2i = a[j + 5] + a[j + 7];
x3r = a[j + 4] - a[j + 6];
x3i = a[j + 5] - a[j + 7];
a[j + 0] = x0r + x2r;
a[j + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 4] = wk2r * x0r - wk2i * x0i;
a[j + 5] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 2] = wk1r * x0r - wk1i * x0i;
a[j + 3] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 6] = wk3r * x0r - wk3i * x0i;
a[j + 7] = wk3r * x0i + wk3i * x0r;
wk1r = rdft_w[k2 + 2];
wk1i = rdft_w[k2 + 3];
wk3r = rdft_wk3ri_second[k1 + 0];
wk3i = rdft_wk3ri_second[k1 + 1];
x0r = a[j + 8] + a[j + 10];
x0i = a[j + 9] + a[j + 11];
x1r = a[j + 8] - a[j + 10];
x1i = a[j + 9] - a[j + 11];
x2r = a[j + 12] + a[j + 14];
x2i = a[j + 13] + a[j + 15];
x3r = a[j + 12] - a[j + 14];
x3i = a[j + 13] - a[j + 15];
a[j + 8] = x0r + x2r;
a[j + 9] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j + 12] = -wk2i * x0r - wk2r * x0i;
a[j + 13] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j + 10] = wk1r * x0r - wk1i * x0i;
a[j + 11] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j + 14] = wk3r * x0r - wk3i * x0i;
a[j + 15] = wk3r * x0i + wk3i * x0r;
}
}
static void cftmdl_128_C(float* a) {
const int l = 8;
const int n = 128;
const int m = 32;
int j0, j1, j2, j3, k, k1, k2, m2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
for (j0 = 0; j0 < l; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
a[j2 + 0] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1 + 0] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3 + 0] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
wk1r = rdft_w[2];
for (j0 = m; j0 < l + m; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
a[j2 + 0] = x2i - x0i;
a[j2 + 1] = x0r - x2r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * (x0r - x0i);
a[j1 + 1] = wk1r * (x0r + x0i);
x0r = x3i + x1r;
x0i = x3r - x1i;
a[j3 + 0] = wk1r * (x0i - x0r);
a[j3 + 1] = wk1r * (x0i + x0r);
}
k1 = 0;
m2 = 2 * m;
for (k = m2; k < n; k += m2) {
k1 += 2;
k2 = 2 * k1;
wk2r = rdft_w[k1 + 0];
wk2i = rdft_w[k1 + 1];
wk1r = rdft_w[k2 + 0];
wk1i = rdft_w[k2 + 1];
wk3r = rdft_wk3ri_first[k1 + 0];
wk3i = rdft_wk3ri_first[k1 + 1];
for (j0 = k; j0 < l + k; j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2 + 0] = wk2r * x0r - wk2i * x0i;
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
wk1r = rdft_w[k2 + 2];
wk1i = rdft_w[k2 + 3];
wk3r = rdft_wk3ri_second[k1 + 0];
wk3i = rdft_wk3ri_second[k1 + 1];
for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
x0i = a[j0 + 1] + a[j1 + 1];
x1r = a[j0 + 0] - a[j1 + 0];
x1i = a[j0 + 1] - a[j1 + 1];
x2r = a[j2 + 0] + a[j3 + 0];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2 + 0] - a[j3 + 0];
x3i = a[j2 + 1] - a[j3 + 1];
a[j0 + 0] = x0r + x2r;
a[j0 + 1] = x0i + x2i;
x0r -= x2r;
x0i -= x2i;
a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
x0r = x1r - x3i;
x0i = x1i + x3r;
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
x0r = x1r + x3i;
x0i = x1i - x3r;
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
}
}
}
static void rftfsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
xi = a[j2 + 1] + a[k2 + 1];
yr = wkr * xr - wki * xi;
yi = wkr * xi + wki * xr;
a[j2 + 0] -= yr;
a[j2 + 1] -= yi;
a[k2 + 0] += yr;
a[k2 + 1] -= yi;
}
}
static void rftbsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
a[1] = -a[1];
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
xi = a[j2 + 1] + a[k2 + 1];
yr = wkr * xr + wki * xi;
yi = wkr * xi - wki * xr;
a[j2 + 0] = a[j2 + 0] - yr;
a[j2 + 1] = yi - a[j2 + 1];
a[k2 + 0] = yr + a[k2 + 0];
a[k2 + 1] = yi - a[k2 + 1];
}
a[65] = -a[65];
}
#endif
} // namespace
OouraFft::OouraFft(bool sse2_available) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
use_sse2_ = sse2_available;
#else
use_sse2_ = false;
#endif
}
OouraFft::OouraFft() {
#if defined(WEBRTC_ARCH_X86_FAMILY)
use_sse2_ = (GetCPUInfo(kSSE2) != 0);
#else
use_sse2_ = false;
#endif
}
OouraFft::~OouraFft() = default;
void OouraFft::Fft(float* a) const {
float xi;
bitrv2_128(a);
cftfsub_128(a);
rftfsub_128(a);
xi = a[0] - a[1];
a[0] += a[1];
a[1] = xi;
}
void OouraFft::InverseFft(float* a) const {
a[1] = 0.5f * (a[0] - a[1]);
a[0] -= a[1];
rftbsub_128(a);
bitrv2_128(a);
cftbsub_128(a);
}
void OouraFft::cft1st_128(float* a) const {
#if defined(MIPS_FPU_LE)
cft1st_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
cft1st_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
cft1st_128_SSE2(a);
} else {
cft1st_128_C(a);
}
#else
cft1st_128_C(a);
#endif
}
void OouraFft::cftmdl_128(float* a) const {
#if defined(MIPS_FPU_LE)
cftmdl_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
cftmdl_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
cftmdl_128_SSE2(a);
} else {
cftmdl_128_C(a);
}
#else
cftmdl_128_C(a);
#endif
}
void OouraFft::rftfsub_128(float* a) const {
#if defined(MIPS_FPU_LE)
rftfsub_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
rftfsub_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
rftfsub_128_SSE2(a);
} else {
rftfsub_128_C(a);
}
#else
rftfsub_128_C(a);
#endif
}
void OouraFft::rftbsub_128(float* a) const {
#if defined(MIPS_FPU_LE)
rftbsub_128_mips(a);
#elif defined(WEBRTC_HAS_NEON)
rftbsub_128_neon(a);
#elif defined(WEBRTC_ARCH_X86_FAMILY)
if (use_sse2_) {
rftbsub_128_SSE2(a);
} else {
rftbsub_128_C(a);
}
#else
rftbsub_128_C(a);
#endif
}
void OouraFft::cftbsub_128(float* a) const {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
cft1st_128(a);
cftmdl_128(a);
l = 32;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = -a[j + 1] - a[j1 + 1];
x1r = a[j] - a[j1];
x1i = -a[j + 1] + a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i - x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i + x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i - x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i + x3r;
}
}
void OouraFft::cftfsub_128(float* a) const {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
cft1st_128(a);
cftmdl_128(a);
l = 32;
for (j = 0; j < l; j += 2) {
j1 = j + l;
j2 = j1 + l;
j3 = j2 + l;
x0r = a[j] + a[j1];
x0i = a[j + 1] + a[j1 + 1];
x1r = a[j] - a[j1];
x1i = a[j + 1] - a[j1 + 1];
x2r = a[j2] + a[j3];
x2i = a[j2 + 1] + a[j3 + 1];
x3r = a[j2] - a[j3];
x3i = a[j2 + 1] - a[j3 + 1];
a[j] = x0r + x2r;
a[j + 1] = x0i + x2i;
a[j2] = x0r - x2r;
a[j2 + 1] = x0i - x2i;
a[j1] = x1r - x3i;
a[j1 + 1] = x1i + x3r;
a[j3] = x1r + x3i;
a[j3 + 1] = x1i - x3r;
}
}
void OouraFft::bitrv2_128(float* a) const {
/*
Following things have been attempted but are no faster:
(a) Storing the swap indexes in a LUT (index calculations are done
for 'free' while waiting on memory/L1).
(b) Consolidate the load/store of two consecutive floats by a 64 bit
integer (execution is memory/L1 bound).
(c) Do a mix of floats and 64 bit integer to maximize register
utilization (execution is memory/L1 bound).
(d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
(e) Hard-coding of the offsets to completely eliminates index
calculations.
*/
unsigned int j, j1, k, k1;
float xr, xi, yr, yi;
const int ip[4] = {0, 64, 32, 96};
for (k = 0; k < 4; k++) {
for (j = 0; j < k; j++) {
j1 = 2 * j + ip[k];
k1 = 2 * k + ip[j];
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 -= 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
j1 = 2 * k + 8 + ip[k];
k1 = j1 + 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
}
} // namespace webrtc

View file

@ -0,0 +1,64 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
#include "rtc_base/system/arch.h"
namespace webrtc {
#if defined(WEBRTC_ARCH_X86_FAMILY)
void cft1st_128_SSE2(float* a);
void cftmdl_128_SSE2(float* a);
void rftfsub_128_SSE2(float* a);
void rftbsub_128_SSE2(float* a);
#endif
#if defined(MIPS_FPU_LE)
void cft1st_128_mips(float* a);
void cftmdl_128_mips(float* a);
void rftfsub_128_mips(float* a);
void rftbsub_128_mips(float* a);
#endif
#if defined(WEBRTC_HAS_NEON)
void cft1st_128_neon(float* a);
void cftmdl_128_neon(float* a);
void rftfsub_128_neon(float* a);
void rftbsub_128_neon(float* a);
#endif
class OouraFft {
public:
// Ctor allowing the availability of SSE2 support to be specified.
explicit OouraFft(bool sse2_available);
// Deprecated: This Ctor will soon be removed.
OouraFft();
~OouraFft();
void Fft(float* a) const;
void InverseFft(float* a) const;
private:
void cft1st_128(float* a) const;
void cftmdl_128(float* a) const;
void rftfsub_128(float* a) const;
void rftbsub_128(float* a) const;
void cftfsub_128(float* a) const;
void cftbsub_128(float* a) const;
void bitrv2_128(float* a) const;
bool use_sse2_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,351 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* The rdft AEC algorithm, neon version of speed-critical functions.
*
* Based on the sse2 version.
*/
#include <arm_neon.h>
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_common.h"
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft_tables_neon_sse2.h"
namespace webrtc {
#if defined(WEBRTC_HAS_NEON)
void cft1st_128_neon(float* a) {
const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
int j, k2;
for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
float32x4_t a00v = vld1q_f32(&a[j + 0]);
float32x4_t a04v = vld1q_f32(&a[j + 4]);
float32x4_t a08v = vld1q_f32(&a[j + 8]);
float32x4_t a12v = vld1q_f32(&a[j + 12]);
float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
float32x4_t x0v = vaddq_f32(a01v, a23v);
const float32x4_t x1v = vsubq_f32(a01v, a23v);
const float32x4_t x2v = vaddq_f32(a45v, a67v);
const float32x4_t x3v = vsubq_f32(a45v, a67v);
const float32x4_t x3w = vrev64q_f32(x3v);
float32x4_t x0w;
a01v = vaddq_f32(x0v, x2v);
x0v = vsubq_f32(x0v, x2v);
x0w = vrev64q_f32(x0v);
a45v = vmulq_f32(wk2rv, x0v);
a45v = vmlaq_f32(a45v, wk2iv, x0w);
x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
x0w = vrev64q_f32(x0v);
a23v = vmulq_f32(wk1rv, x0v);
a23v = vmlaq_f32(a23v, wk1iv, x0w);
x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
x0w = vrev64q_f32(x0v);
a67v = vmulq_f32(wk3rv, x0v);
a67v = vmlaq_f32(a67v, wk3iv, x0w);
a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
vst1q_f32(&a[j + 0], a00v);
vst1q_f32(&a[j + 4], a04v);
vst1q_f32(&a[j + 8], a08v);
vst1q_f32(&a[j + 12], a12v);
}
}
void cftmdl_128_neon(float* a) {
int j;
const int l = 8;
const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
for (j = 0; j < l; j += 2) {
const float32x2_t a_00 = vld1_f32(&a[j + 0]);
const float32x2_t a_08 = vld1_f32(&a[j + 8]);
const float32x2_t a_32 = vld1_f32(&a[j + 32]);
const float32x2_t a_40 = vld1_f32(&a[j + 40]);
const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
const float32x2_t a_16 = vld1_f32(&a[j + 16]);
const float32x2_t a_24 = vld1_f32(&a[j + 24]);
const float32x2_t a_48 = vld1_f32(&a[j + 48]);
const float32x2_t a_56 = vld1_f32(&a[j + 56]);
const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
const float32x4_t x1_x3_add =
vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
const float32x4_t x1_x3_sub =
vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
const float32x4_t xx1_rev = vrev64q_f32(xx1);
const float32x4_t yy4_rev = vrev64q_f32(yy4);
vst1_f32(&a[j + 0], vget_low_f32(xx0));
vst1_f32(&a[j + 32], vget_high_f32(xx0));
vst1_f32(&a[j + 16], vget_low_f32(xx1));
vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
a[j + 48] = -a[j + 48];
vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
vst1_f32(&a[j + 40], vget_low_f32(yy4));
vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
}
{
const int k = 64;
const int k1 = 2;
const int k2 = 2 * k1;
const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
for (j = k; j < l + k; j += 2) {
const float32x2_t a_00 = vld1_f32(&a[j + 0]);
const float32x2_t a_08 = vld1_f32(&a[j + 8]);
const float32x2_t a_32 = vld1_f32(&a[j + 32]);
const float32x2_t a_40 = vld1_f32(&a[j + 40]);
const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
const float32x2_t a_16 = vld1_f32(&a[j + 16]);
const float32x2_t a_24 = vld1_f32(&a[j + 24]);
const float32x2_t a_48 = vld1_f32(&a[j + 48]);
const float32x2_t a_56 = vld1_f32(&a[j + 56]);
const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
const float32x4_t x1_x3_add =
vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
const float32x4_t x1_x3_sub =
vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
vst1_f32(&a[j + 0], vget_low_f32(xx));
vst1_f32(&a[j + 32], vget_high_f32(xx));
vst1_f32(&a[j + 16], vget_low_f32(xx4));
vst1_f32(&a[j + 48], vget_high_f32(xx4));
vst1_f32(&a[j + 8], vget_low_f32(xx12));
vst1_f32(&a[j + 40], vget_high_f32(xx12));
vst1_f32(&a[j + 24], vget_low_f32(xx22));
vst1_f32(&a[j + 56], vget_high_f32(xx22));
}
}
}
__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
// A B C D -> C D A B
const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
// C D A B -> D C B A
return vrev64q_f32(rev);
}
void rftfsub_128_neon(float* a) {
const float* c = rdft_w + 32;
int j1, j2;
const float32x4_t mm_half = vdupq_n_f32(0.5f);
// Vectorized code (four at once).
// Note: commented number are indexes for the first iteration of the loop.
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
// Load 'wk'.
const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
// Load and shuffle 'a'.
// 2, 4, 6, 8, 3, 5, 7, 9
float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
// 120, 122, 124, 126, 121, 123, 125, 127,
const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
// 126, 124, 122, 120
const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
// 127, 125, 123, 121
const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
// Calculate 'x'.
const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
// 2-126, 4-124, 6-122, 8-120,
const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
// 3-127, 5-125, 7-123, 9-121,
// Calculate product into 'y'.
// yr = wkr * xr - wki * xi;
// yi = wkr * xi + wki * xr;
const float32x4_t a_ = vmulq_f32(wkr_, xr_);
const float32x4_t b_ = vmulq_f32(wki_, xi_);
const float32x4_t c_ = vmulq_f32(wkr_, xi_);
const float32x4_t d_ = vmulq_f32(wki_, xr_);
const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] -= yr;
// a[j2 + 1] -= yi;
// a[k2 + 0] += yr;
// a[k2 + 1] -= yi;
// 126, 124, 122, 120,
const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
// 127, 125, 123, 121,
const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
// Shuffle in right order and store.
const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
// 124, 125, 126, 127, 120, 121, 122, 123
const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
// 2, 4, 6, 8,
a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
// 3, 5, 7, 9,
a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
// 2, 3, 4, 5, 6, 7, 8, 9,
vst2q_f32(&a[0 + j2], a_j2_p);
vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
}
// Scalar code for the remaining items.
for (; j2 < 64; j1 += 1, j2 += 2) {
const int k2 = 128 - j2;
const int k1 = 32 - j1;
const float wkr = 0.5f - c[k1];
const float wki = c[j1];
const float xr = a[j2 + 0] - a[k2 + 0];
const float xi = a[j2 + 1] + a[k2 + 1];
const float yr = wkr * xr - wki * xi;
const float yi = wkr * xi + wki * xr;
a[j2 + 0] -= yr;
a[j2 + 1] -= yi;
a[k2 + 0] += yr;
a[k2 + 1] -= yi;
}
}
void rftbsub_128_neon(float* a) {
const float* c = rdft_w + 32;
int j1, j2;
const float32x4_t mm_half = vdupq_n_f32(0.5f);
a[1] = -a[1];
// Vectorized code (four at once).
// Note: commented number are indexes for the first iteration of the loop.
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
// Load 'wk'.
const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
// Load and shuffle 'a'.
// 2, 4, 6, 8, 3, 5, 7, 9
float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
// 120, 122, 124, 126, 121, 123, 125, 127,
const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
// 126, 124, 122, 120
const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
// 127, 125, 123, 121
const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
// Calculate 'x'.
const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
// 2-126, 4-124, 6-122, 8-120,
const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
// 3-127, 5-125, 7-123, 9-121,
// Calculate product into 'y'.
// yr = wkr * xr - wki * xi;
// yi = wkr * xi + wki * xr;
const float32x4_t a_ = vmulq_f32(wkr_, xr_);
const float32x4_t b_ = vmulq_f32(wki_, xi_);
const float32x4_t c_ = vmulq_f32(wkr_, xi_);
const float32x4_t d_ = vmulq_f32(wki_, xr_);
const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] -= yr;
// a[j2 + 1] -= yi;
// a[k2 + 0] += yr;
// a[k2 + 1] -= yi;
// 126, 124, 122, 120,
const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
// 127, 125, 123, 121,
const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
// Shuffle in right order and store.
// 2, 3, 4, 5, 6, 7, 8, 9,
const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
// 124, 125, 126, 127, 120, 121, 122, 123
const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
// 2, 4, 6, 8,
a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
// 3, 5, 7, 9,
a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
// 2, 3, 4, 5, 6, 7, 8, 9,
vst2q_f32(&a[0 + j2], a_j2_p);
vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
}
// Scalar code for the remaining items.
for (; j2 < 64; j1 += 1, j2 += 2) {
const int k2 = 128 - j2;
const int k1 = 32 - j1;
const float wkr = 0.5f - c[k1];
const float wki = c[j1];
const float xr = a[j2 + 0] - a[k2 + 0];
const float xi = a[j2 + 1] + a[k2 + 1];
const float yr = wkr * xr + wki * xi;
const float yi = wkr * xi - wki * xr;
a[j2 + 0] = a[j2 + 0] - yr;
a[j2 + 1] = yi - a[j2 + 1];
a[k2 + 0] = yr + a[k2 + 0];
a[k2 + 1] = yi - a[k2 + 1];
}
a[65] = -a[65];
}
#endif
} // namespace webrtc

Some files were not shown because too many files have changed in this diff Show more