Skip to content

Commit

Permalink
[common] added resample support
Browse files Browse the repository at this point in the history
While this on the surface may not make much sense to do, according to Xiph the
RnNoise neural net is computed specifically for 48KHz and as such would require
a re-train simply to operate at a different sample rate.

This patch also implements a ring buffer to avoid excessive memory operations.
  • Loading branch information
gnif committed Mar 12, 2019
1 parent 7f57360 commit 28c767a
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 69 deletions.
6 changes: 6 additions & 0 deletions src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ set(COMMON_SRC

add_library(RnNoisePluginCommon STATIC ${COMMON_SRC})

find_package(PkgConfig REQUIRED)
pkg_check_modules(SAMPLERATE REQUIRED samplerate)
target_link_libraries(RnNoisePluginCommon ${SAMPLERATE_LIBRARIES})
target_include_directories(RnNoisePluginCommon PUBLIC ${SAMPLERATE_INCLUDE_DIRS})
target_compile_options(RnNoisePluginCommon PUBLIC ${SAMPLERATE_CFLAGS_OTHER})

target_link_libraries(RnNoisePluginCommon RnNoise)

target_include_directories(RnNoisePluginCommon PUBLIC
Expand Down
33 changes: 24 additions & 9 deletions src/common/include/common/RnNoiseCommonPlugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,45 @@
#include <memory>
#include <vector>

#include <samplerate.h>

struct DenoiseState;

class RnNoiseCommonPlugin {
public:
RnNoiseCommonPlugin();

void setSampleRate(unsigned long sampleRate);

void init();
bool init();

void deinit();

const char * getError()
{
return m_errorStr;
}

void process(const float *in, float *out, int32_t sampleFrames);

private:
const char * m_errorStr;

void createDenoiseState();
bool m_initialized;
bool m_resample;

private:
static const int k_denoiseFrameSize = 480;
static const int k_denoiseSampleRate = 48000;

std::shared_ptr<SRC_STATE> m_srcIn;
std::shared_ptr<SRC_STATE> m_srcOut;
double m_downRatio;
double m_upRatio;
std::shared_ptr<DenoiseState> m_denoiseState;

std::vector<float> m_inputBuffer;
std::vector<float> m_outputBuffer;
};



std::vector<float> m_inBuffer;
std::vector<float> m_outBuffer;
size_t m_outBufferR;
size_t m_outBufferW;
size_t m_outBufferA;
};
228 changes: 168 additions & 60 deletions src/common/src/RnNoiseCommonPlugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,83 +7,191 @@

#include <rnnoise/rnnoise.h>

void RnNoiseCommonPlugin::init() {
deinit();
createDenoiseState();
RnNoiseCommonPlugin::RnNoiseCommonPlugin() :
m_errorStr (NULL),
m_initialized(false),
m_resample (false)
{
}

void RnNoiseCommonPlugin::deinit() {
m_denoiseState.reset();
void RnNoiseCommonPlugin::setSampleRate(unsigned long sampleRate)
{
m_downRatio = (double)k_denoiseSampleRate / (double)sampleRate;
m_upRatio = (double)sampleRate / (double)k_denoiseSampleRate;
m_resample = sampleRate != 48000;
}

void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames) {
if (sampleFrames == 0) {
return;
}

if (!m_denoiseState) {
createDenoiseState();
}
bool RnNoiseCommonPlugin::init() {
int err;

// Good case, we can copy less data around and rnnoise lib is built for it
if (sampleFrames == k_denoiseFrameSize) {
m_inputBuffer.resize(sampleFrames);
if (m_initialized)
deinit();

for (size_t i = 0; i < sampleFrames; i++) {
m_inputBuffer[i] = in[i] * std::numeric_limits<short>::max();
}
m_srcIn = std::shared_ptr<SRC_STATE>(
src_new(SRC_SINC_BEST_QUALITY, 1, &err),
[](SRC_STATE *st)
{
src_delete(st);
}
);

if (err)
{
m_errorStr = src_strerror(err);
return false;
}

m_srcOut = std::shared_ptr<SRC_STATE>(
src_new(SRC_SINC_BEST_QUALITY, 1, &err),
[](SRC_STATE *st)
{
src_delete(st);
}
);

if (err)
{
m_srcIn.reset();
m_errorStr = src_strerror(err);
return false;
}

m_denoiseState = std::shared_ptr<DenoiseState>(
rnnoise_create(),
[](DenoiseState *st)
{
rnnoise_destroy(st);
}
);

rnnoise_process_frame(m_denoiseState.get(), out, &m_inputBuffer[0]);
src_set_ratio(m_srcIn.get(), m_downRatio);
src_set_ratio(m_srcOut .get(), m_upRatio );

for (size_t i = 0; i < sampleFrames; i++) {
out[i] /= std::numeric_limits<short>::max();
}
} else {
m_inputBuffer.resize(m_inputBuffer.size() + sampleFrames);
m_inBuffer .resize(k_denoiseFrameSize);
m_outBuffer.resize(k_denoiseFrameSize * 2);
m_outBufferR = 0;
m_outBufferW = 0;
m_outBufferA = 0;

// From [-1.f,1.f] range to [min short, max short] range which rnnoise lib will understand
{
float *inputBufferWriteStart = (m_inputBuffer.end() - sampleFrames).base();
for (size_t i = 0; i < sampleFrames; i++) {
inputBufferWriteStart[i] = in[i] * std::numeric_limits<short>::max();
}
}
m_initialized = true;
m_errorStr = NULL;
return true;
}

const size_t samplesToProcess = m_inputBuffer.size() / k_denoiseFrameSize;
const size_t framesToProcess = samplesToProcess * k_denoiseFrameSize;
void RnNoiseCommonPlugin::deinit() {
m_denoiseState.reset();
m_srcIn .reset();
m_srcOut .reset();
m_initialized = false;
}

m_outputBuffer.resize(m_outputBuffer.size() + framesToProcess);
void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames)
{
const float mul = 1.0f / std::numeric_limits<short>::max();
if (!sampleFrames)
return;

if (!m_initialized)
init();

SRC_DATA srcIn;
srcIn.data_in = in;
srcIn.input_frames = sampleFrames;
srcIn.end_of_input = 0;
srcIn.src_ratio = m_downRatio;
srcIn.data_out = &m_inBuffer[0];
srcIn.output_frames = m_inBuffer.size();

SRC_DATA srcOut;
srcOut.data_out = out;
srcOut.output_frames = sampleFrames;
srcOut.end_of_input = 0;
srcOut.src_ratio = m_upRatio;

long frames = 0;
while(srcIn.input_frames)
{
if (m_resample)
{
// resample the samples and then scale them
src_process(m_srcIn.get(), &srcIn);
for(long i = 0; i < srcIn.output_frames_gen; ++i)
m_inBuffer[i] *= std::numeric_limits<short>::max();
}
else
{
// just copy the data and scale it
srcIn.input_frames_used = srcIn.input_frames;
if (srcIn.input_frames_used > srcIn.output_frames)
srcIn.input_frames_used = srcIn.output_frames;
srcIn.output_frames_gen = srcIn.input_frames_used;

for(long i = 0; i < srcIn.output_frames_gen; ++i)
m_inBuffer[i] = in[i] * std::numeric_limits<short>::max();
}

// Process input buffer by chunks of k_denoiseFrameSize, put result into out buffer to return into range [-1.f,1.f]
srcIn.data_in += srcIn.input_frames_used;
srcIn.input_frames -= srcIn.input_frames_used;

float *denoise_in = &m_inBuffer[0];
while(srcIn.output_frames_gen)
{
const int wrote = rnnoise_add_samples(m_denoiseState.get(), denoise_in, srcIn.output_frames_gen);
denoise_in += wrote;
srcIn.output_frames_gen -= wrote;

if (rnnoise_get_needed(m_denoiseState.get()) == 0)
{
rnnoise_process_frame(m_denoiseState.get(), &m_outBuffer[m_outBufferW]);

// scale the levels back to normal
for(int32_t i = 0; i < k_denoiseFrameSize; ++i)
m_outBuffer[m_outBufferW + i] *= mul;

m_outBufferW += k_denoiseFrameSize;
m_outBufferA += k_denoiseFrameSize;
if (m_outBufferW == m_outBuffer.size())
m_outBufferW = 0;
}

// resample what we can to the output
while(m_outBufferA && srcOut.output_frames)
{
srcOut.data_in = &m_outBuffer[m_outBufferR];
srcOut.input_frames = m_outBufferW < m_outBufferR ? m_outBuffer.size() - m_outBufferR : m_outBufferW - m_outBufferR;

if (m_resample)
src_process(m_srcOut.get(), &srcOut);
else
{
float *outBufferWriteStart = (m_outputBuffer.end() - framesToProcess).base();

for (size_t i = 0; i < samplesToProcess; i++) {
float *currentOutBuffer = &outBufferWriteStart[i * k_denoiseFrameSize];
float *currentInBuffer = &m_inputBuffer[i * k_denoiseFrameSize];
rnnoise_process_frame(m_denoiseState.get(), currentOutBuffer, currentInBuffer);

for (size_t j = 0; j < k_denoiseFrameSize; j++) {
currentOutBuffer[j] /= std::numeric_limits<short>::max();
}
}
// simply copy the buffer if we are not resampling
srcOut.input_frames_used = srcOut.input_frames;
if (srcOut.input_frames_used > srcOut.output_frames)
srcOut.input_frames_used = srcOut.output_frames;
memcpy(srcOut.data_out, srcOut.data_in, srcOut.input_frames_used * sizeof(float));
}

const size_t toCopyIntoOutput = std::min(m_outputBuffer.size(), static_cast<size_t>(sampleFrames));
if (!srcOut.input_frames_used && !srcOut.output_frames_gen)
break;

std::memcpy(out, &m_outputBuffer[0], toCopyIntoOutput * sizeof(float));
m_outBufferR += srcOut.input_frames_used;
m_outBufferA -= srcOut.input_frames_used;

m_inputBuffer.erase(m_inputBuffer.begin(), m_inputBuffer.begin() + framesToProcess);
m_outputBuffer.erase(m_outputBuffer.begin(), m_outputBuffer.begin() + toCopyIntoOutput);
srcOut.data_out += srcOut.output_frames_gen;
srcOut.output_frames -= srcOut.output_frames_gen;
frames += srcOut.output_frames_gen;

if (toCopyIntoOutput < sampleFrames) {
std::fill(out + toCopyIntoOutput, out + sampleFrames, 0.f);
}
if (m_outBufferR == m_outBuffer.size())
m_outBufferR = 0;
}
}
}

void RnNoiseCommonPlugin::createDenoiseState() {
m_denoiseState = std::shared_ptr<DenoiseState>(rnnoise_create(), [](DenoiseState *st) {
rnnoise_destroy(st);
});
}

// if we generated less frames then wanted, pad them across to the right
if (frames && frames < sampleFrames)
{
const size_t pad = sampleFrames - frames;
memmove(out + pad, out, frames);
memset(out, 0, pad);
}
}

0 comments on commit 28c767a

Please sign in to comment.