From c013f7648bd6f46b92b2e800bd0081ecb47390b9 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Tue, 12 Mar 2019 16:06:50 +1100 Subject: [PATCH] [common] added resample support While this on the surface may not make much sense to do, according to Xiph the RnNoise neural net is computed specifically for 48KHz and as such would require a re-train simply to operate at a different sample rate. This patch also implements a ring buffer to avoid excessive memory operations. --- .../include/common/RnNoiseCommonPlugin.h | 33 ++- src/common/src/RnNoiseCommonPlugin.cpp | 228 +++++++++++++----- 2 files changed, 192 insertions(+), 69 deletions(-) diff --git a/src/common/include/common/RnNoiseCommonPlugin.h b/src/common/include/common/RnNoiseCommonPlugin.h index 46c95fa7..cd52867e 100644 --- a/src/common/include/common/RnNoiseCommonPlugin.h +++ b/src/common/include/common/RnNoiseCommonPlugin.h @@ -3,30 +3,45 @@ #include #include +#include + struct DenoiseState; class RnNoiseCommonPlugin { public: + RnNoiseCommonPlugin(); + + void setSampleRate(unsigned long sampleRate); - void init(); + bool init(); void deinit(); + const char * getError() + { + return m_errorStr; + } + void process(const float *in, float *out, int32_t sampleFrames); private: + const char * m_errorStr; - void createDenoiseState(); + bool m_initialized; + bool m_resample; -private: static const int k_denoiseFrameSize = 480; static const int k_denoiseSampleRate = 48000; + std::shared_ptr m_srcIn; + std::shared_ptr m_srcOut; + double m_downRatio; + double m_upRatio; std::shared_ptr m_denoiseState; - std::vector m_inputBuffer; - std::vector m_outputBuffer; -}; - - - + std::vector m_inBuffer; + std::vector m_outBuffer; + size_t m_outBufferR; + size_t m_outBufferW; + size_t m_outBufferA; +}; \ No newline at end of file diff --git a/src/common/src/RnNoiseCommonPlugin.cpp b/src/common/src/RnNoiseCommonPlugin.cpp index 9f9af9b0..8ac21a6f 100644 --- a/src/common/src/RnNoiseCommonPlugin.cpp +++ b/src/common/src/RnNoiseCommonPlugin.cpp @@ -7,83 +7,191 @@ #include -void RnNoiseCommonPlugin::init() { - deinit(); - createDenoiseState(); +RnNoiseCommonPlugin::RnNoiseCommonPlugin() : + m_errorStr (NULL), + m_initialized(false), + m_resample (false) +{ } -void RnNoiseCommonPlugin::deinit() { - m_denoiseState.reset(); +void RnNoiseCommonPlugin::setSampleRate(unsigned long sampleRate) +{ + m_downRatio = (double)k_denoiseSampleRate / (double)sampleRate; + m_upRatio = (double)sampleRate / (double)k_denoiseSampleRate; + m_resample = sampleRate != 48000; } -void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames) { - if (sampleFrames == 0) { - return; - } - - if (!m_denoiseState) { - createDenoiseState(); - } +bool RnNoiseCommonPlugin::init() { + int err; - // Good case, we can copy less data around and rnnoise lib is built for it - if (sampleFrames == k_denoiseFrameSize) { - m_inputBuffer.resize(sampleFrames); + if (m_initialized) + deinit(); - for (size_t i = 0; i < sampleFrames; i++) { - m_inputBuffer[i] = in[i] * std::numeric_limits::max(); - } + m_srcIn = std::shared_ptr( + src_new(SRC_SINC_BEST_QUALITY, 1, &err), + [](SRC_STATE *st) + { + src_delete(st); + } + ); + + if (err) + { + m_errorStr = src_strerror(err); + return false; + } + + m_srcOut = std::shared_ptr( + src_new(SRC_SINC_BEST_QUALITY, 1, &err), + [](SRC_STATE *st) + { + src_delete(st); + } + ); + + if (err) + { + m_srcIn.reset(); + m_errorStr = src_strerror(err); + return false; + } + + m_denoiseState = std::shared_ptr( + rnnoise_create(), + [](DenoiseState *st) + { + rnnoise_destroy(st); + } + ); - rnnoise_process_frame(m_denoiseState.get(), out, &m_inputBuffer[0]); + src_set_ratio(m_srcIn.get(), m_downRatio); + src_set_ratio(m_srcOut .get(), m_upRatio ); - for (size_t i = 0; i < sampleFrames; i++) { - out[i] /= std::numeric_limits::max(); - } - } else { - m_inputBuffer.resize(m_inputBuffer.size() + sampleFrames); + m_inBuffer .resize(k_denoiseFrameSize); + m_outBuffer.resize(k_denoiseFrameSize * 2); + m_outBufferR = 0; + m_outBufferW = 0; + m_outBufferA = 0; - // From [-1.f,1.f] range to [min short, max short] range which rnnoise lib will understand - { - float *inputBufferWriteStart = (m_inputBuffer.end() - sampleFrames).base(); - for (size_t i = 0; i < sampleFrames; i++) { - inputBufferWriteStart[i] = in[i] * std::numeric_limits::max(); - } - } + m_initialized = true; + m_errorStr = NULL; + return true; +} - const size_t samplesToProcess = m_inputBuffer.size() / k_denoiseFrameSize; - const size_t framesToProcess = samplesToProcess * k_denoiseFrameSize; +void RnNoiseCommonPlugin::deinit() { + m_denoiseState.reset(); + m_srcIn .reset(); + m_srcOut .reset(); + m_initialized = false; +} - m_outputBuffer.resize(m_outputBuffer.size() + framesToProcess); +void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames) +{ + const float mul = 1.0f / std::numeric_limits::max(); + if (!sampleFrames) + return; + + if (!m_initialized) + init(); + + SRC_DATA srcIn; + srcIn.data_in = in; + srcIn.input_frames = sampleFrames; + srcIn.end_of_input = 0; + srcIn.src_ratio = m_downRatio; + srcIn.data_out = &m_inBuffer[0]; + srcIn.output_frames = m_inBuffer.size(); + + SRC_DATA srcOut; + srcOut.data_out = out; + srcOut.output_frames = sampleFrames; + srcOut.end_of_input = 0; + srcOut.src_ratio = m_upRatio; + + long frames = 0; + while(srcIn.input_frames) + { + if (m_resample) + { + // resample the samples and then scale them + src_process(m_srcIn.get(), &srcIn); + for(long i = 0; i < srcIn.output_frames_gen; ++i) + m_inBuffer[i] *= std::numeric_limits::max(); + } + else + { + // just copy the data and scale it + srcIn.input_frames_used = srcIn.input_frames; + if (srcIn.input_frames_used > srcIn.output_frames) + srcIn.input_frames_used = srcIn.output_frames; + srcIn.output_frames_gen = srcIn.input_frames_used; + + for(long i = 0; i < srcIn.output_frames_gen; ++i) + m_inBuffer[i] = in[i] * std::numeric_limits::max(); + } - // Process input buffer by chunks of k_denoiseFrameSize, put result into out buffer to return into range [-1.f,1.f] + srcIn.data_in += srcIn.input_frames_used; + srcIn.input_frames -= srcIn.input_frames_used; + + float *denoise_in = &m_inBuffer[0]; + while(srcIn.output_frames_gen) + { + const int wrote = rnnoise_add_samples(m_denoiseState.get(), denoise_in, srcIn.output_frames_gen); + denoise_in += wrote; + srcIn.output_frames_gen -= wrote; + + if (rnnoise_get_needed(m_denoiseState.get()) == 0) + { + rnnoise_process_frame(m_denoiseState.get(), &m_outBuffer[m_outBufferW]); + + // scale the levels back to normal + for(int32_t i = 0; i < k_denoiseFrameSize; ++i) + m_outBuffer[m_outBufferW + i] *= mul; + + m_outBufferW += k_denoiseFrameSize; + m_outBufferA += k_denoiseFrameSize; + if (m_outBufferW == m_outBuffer.size()) + m_outBufferW = 0; + } + + // resample what we can to the output + while(m_outBufferA && srcOut.output_frames) + { + srcOut.data_in = &m_outBuffer[m_outBufferR]; + srcOut.input_frames = m_outBufferW < m_outBufferR ? m_outBuffer.size() - m_outBufferR : m_outBufferW - m_outBufferR; + + if (m_resample) + src_process(m_srcOut.get(), &srcOut); + else { - float *outBufferWriteStart = (m_outputBuffer.end() - framesToProcess).base(); - - for (size_t i = 0; i < samplesToProcess; i++) { - float *currentOutBuffer = &outBufferWriteStart[i * k_denoiseFrameSize]; - float *currentInBuffer = &m_inputBuffer[i * k_denoiseFrameSize]; - rnnoise_process_frame(m_denoiseState.get(), currentOutBuffer, currentInBuffer); - - for (size_t j = 0; j < k_denoiseFrameSize; j++) { - currentOutBuffer[j] /= std::numeric_limits::max(); - } - } + // simply copy the buffer if we are not resampling + srcOut.input_frames_used = srcOut.input_frames; + if (srcOut.input_frames_used > srcOut.output_frames) + srcOut.input_frames_used = srcOut.output_frames; + memcpy(srcOut.data_out, srcOut.data_in, srcOut.input_frames_used * sizeof(float)); } - const size_t toCopyIntoOutput = std::min(m_outputBuffer.size(), static_cast(sampleFrames)); + if (!srcOut.input_frames_used && !srcOut.output_frames_gen) + break; - std::memcpy(out, &m_outputBuffer[0], toCopyIntoOutput * sizeof(float)); + m_outBufferR += srcOut.input_frames_used; + m_outBufferA -= srcOut.input_frames_used; - m_inputBuffer.erase(m_inputBuffer.begin(), m_inputBuffer.begin() + framesToProcess); - m_outputBuffer.erase(m_outputBuffer.begin(), m_outputBuffer.begin() + toCopyIntoOutput); + srcOut.data_out += srcOut.output_frames_gen; + srcOut.output_frames -= srcOut.output_frames_gen; + frames += srcOut.output_frames_gen; - if (toCopyIntoOutput < sampleFrames) { - std::fill(out + toCopyIntoOutput, out + sampleFrames, 0.f); - } + if (m_outBufferR == m_outBuffer.size()) + m_outBufferR = 0; + } } -} - -void RnNoiseCommonPlugin::createDenoiseState() { - m_denoiseState = std::shared_ptr(rnnoise_create(), [](DenoiseState *st) { - rnnoise_destroy(st); - }); + } + + // if we generated less frames then wanted, pad them across to the right + if (frames && frames < sampleFrames) + { + const size_t pad = sampleFrames - frames; + memmove(out + pad, out, frames); + memset(out, 0, pad); + } } \ No newline at end of file