Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a C++ example to show streaming VAD + non-streaming ASR. #420

Merged
merged 1 commit into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/cmake_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def build_extension(self, ext: setuptools.extension.Extension):
binaries += ["sherpa-onnx-offline-websocket-server"]
binaries += ["sherpa-onnx-online-websocket-client"]
binaries += ["sherpa-onnx-vad-microphone"]
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
binaries += ["sherpa-onnx-offline-tts"]

if is_windows():
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def get_binaries_to_install():
binaries += ["sherpa-onnx-offline-websocket-server"]
binaries += ["sherpa-onnx-online-websocket-client"]
binaries += ["sherpa-onnx-vad-microphone"]
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
binaries += ["sherpa-onnx-offline-tts"]
if is_windows():
binaries += ["kaldi-native-fbank-core.dll"]
Expand Down
6 changes: 6 additions & 0 deletions sherpa-onnx/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
microphone.cc
)

add_executable(sherpa-onnx-vad-microphone-offline-asr
sherpa-onnx-vad-microphone-offline-asr.cc
microphone.cc
)

if(BUILD_SHARED_LIBS)
set(PA_LIB portaudio)
else()
Expand All @@ -235,6 +240,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
sherpa-onnx-microphone
sherpa-onnx-microphone-offline
sherpa-onnx-vad-microphone
sherpa-onnx-vad-microphone-offline-asr
)
foreach(exe IN LISTS exes)
target_link_libraries(${exe} ${PA_LIB} sherpa-onnx-core)
Expand Down
199 changes: 199 additions & 0 deletions sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
//
// Copyright (c) 2022-2023 Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <mutex> // NOLINT

#include "portaudio.h" // NOLINT
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"

bool stop = false;
std::mutex mutex;
sherpa_onnx::CircularBuffer buffer(16000 * 60);

static int32_t RecordCallback(const void *input_buffer,
void * /*output_buffer*/,
unsigned long frames_per_buffer, // NOLINT
const PaStreamCallbackTimeInfo * /*time_info*/,
PaStreamCallbackFlags /*status_flags*/,
void *user_data) {
std::lock_guard<std::mutex> lock(mutex);
buffer.Push(reinterpret_cast<const float *>(input_buffer), frames_per_buffer);

return stop ? paComplete : paContinue;
}

static void Handler(int32_t sig) {
stop = true;
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
signal(SIGINT, Handler);

const char *kUsageMessage = R"usage(
This program shows how to use a streaming VAD with non-streaming ASR in
sherpa-onnx.

Please download silero_vad.onnx from
https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx

For instance, use
wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx

Please refer to ./sherpa-onnx-microphone-offline.cc
to download models for offline ASR.

(1) Transducer from icefall

./bin/sherpa-onnx-vad-microphone-offline-asr \
--silero-vad-model=/path/to/silero_vad.onnx \
--tokens=/path/to/tokens.txt \
--encoder=/path/to/encoder.onnx \
--decoder=/path/to/decoder.onnx \
--joiner=/path/to/joiner.onnx

(2) Paraformer from FunASR

./bin/sherpa-onnx-vad-microphone-offline-asr \
--silero-vad-model=/path/to/silero_vad.onnx \
--tokens=/path/to/tokens.txt \
--paraformer=/path/to/model.onnx \
--num-threads=1

(3) Whisper models

./bin/sherpa-onnx-vad-microphone-offline-asr \
--silero-vad-model=/path/to/silero_vad.onnx \
--whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
--whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
--tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
--num-threads=1
)usage";

sherpa_onnx::ParseOptions po(kUsageMessage);
sherpa_onnx::VadModelConfig vad_config;

sherpa_onnx::OfflineRecognizerConfig asr_config;

vad_config.Register(&po);
asr_config.Register(&po);

po.Read(argc, argv);
if (po.NumArgs() != 0) {
po.PrintUsage();
exit(EXIT_FAILURE);
}

fprintf(stderr, "%s\n", vad_config.ToString().c_str());
fprintf(stderr, "%s\n", asr_config.ToString().c_str());

if (!vad_config.Validate()) {
fprintf(stderr, "Errors in vad_config!\n");
return -1;
}

if (!asr_config.Validate()) {
fprintf(stderr, "Errors in asr_config!\n");
return -1;
}

fprintf(stderr, "Creating recognizer ...\n");
sherpa_onnx::OfflineRecognizer recognizer(asr_config);
fprintf(stderr, "Recognizer created!\n");

sherpa_onnx::Microphone mic;

PaDeviceIndex num_devices = Pa_GetDeviceCount();
fprintf(stderr, "Num devices: %d\n", num_devices);

PaStreamParameters param;

param.device = Pa_GetDefaultInputDevice();
if (param.device == paNoDevice) {
fprintf(stderr, "No default input device found\n");
exit(EXIT_FAILURE);
}
fprintf(stderr, "Use default device: %d\n", param.device);

const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
fprintf(stderr, " Name: %s\n", info->name);
fprintf(stderr, " Max input channels: %d\n", info->maxInputChannels);

param.channelCount = 1;
param.sampleFormat = paFloat32;

param.suggestedLatency = info->defaultLowInputLatency;
param.hostApiSpecificStreamInfo = nullptr;
float sample_rate = 16000;

PaStream *stream;
PaError err =
Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
sample_rate,
0, // frames per buffer
paClipOff, // we won't output out of range samples
// so don't bother clipping them
RecordCallback, nullptr);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
}

err = Pa_StartStream(stream);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
}

auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);

fprintf(stderr, "Started. Please speak\n");

int32_t window_size = vad_config.silero_vad.window_size;
int32_t index = 0;

while (!stop) {
{
std::lock_guard<std::mutex> lock(mutex);

while (buffer.Size() >= window_size) {
std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
buffer.Pop(window_size);
vad->AcceptWaveform(samples.data(), samples.size());
}
}

while (!vad->Empty()) {
auto &segment = vad->Front();
auto s = recognizer.CreateStream();
s->AcceptWaveform(sample_rate, segment.samples.data(),
segment.samples.size());
recognizer.DecodeStream(s.get());
const auto &result = s->GetResult();
if (!result.text.empty()) {
fprintf(stderr, "%2d: %s\n", index, result.text.c_str());
++index;
}
vad->Pop();
}

Pa_Sleep(100); // sleep for 100ms
}

err = Pa_CloseStream(stream);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
}

return 0;
}
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/sherpa-onnx-vad-microphone.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
0, // frames per buffer
paClipOff, // we won't output out of range samples
// so don't bother clipping them
RecordCallback, &config.silero_vad.window_size);
RecordCallback, nullptr);
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(EXIT_FAILURE);
Expand Down
Loading