From 6f10d1f07cf14cac477287c2ad513337554f7351 Mon Sep 17 00:00:00 2001 From: yujinqiu Date: Mon, 16 Oct 2023 11:15:29 +0800 Subject: [PATCH] Add vad clear api for better performance --- .../com/k2fsa/sherpa/onnx/MainActivity.kt | 2 +- sherpa-onnx/c-api/c-api.cc | 5 +++ sherpa-onnx/c-api/c-api.h | 4 +++ sherpa-onnx/csrc/voice-activity-detector.cc | 2 ++ sherpa-onnx/csrc/voice-activity-detector.h | 1 + sherpa-onnx/jni/jni.cc | 10 ++++++ swift-api-examples/SherpaOnnx.swift | 4 +++ swift-api-examples/generate-subtitles.swift | 35 +++++++++---------- 8 files changed, 44 insertions(+), 19 deletions(-) diff --git a/android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index eb847b020..4d5ce7e74 100644 --- a/android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -161,9 +161,9 @@ class MainActivity : AppCompatActivity() { val samples = FloatArray(ret) { buffer[it] / 32768.0f } vad.acceptWaveform(samples) - while(!vad.empty()) {vad.pop();} val isSpeechDetected = vad.isSpeechDetected() + vad.clear() runOnUiThread { onVad(isSpeechDetected) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 3f6b35466..1e945ae3e 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -498,6 +498,11 @@ SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop( p->impl->Pop(); } +SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorClear( + SherpaOnnxVoiceActivityDetector *p) { + p->impl->Clear(); +} + SHERPA_ONNX_API const SherpaOnnxSpeechSegment * SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) { const sherpa_onnx::SpeechSegment &segment = p->impl->Front(); diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index b79b83072..b4b5780a9 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -580,6 +580,10 @@ SherpaOnnxVoiceActivityDetectorDetected(SherpaOnnxVoiceActivityDetector *p); SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop( SherpaOnnxVoiceActivityDetector *p); +// Clear current speech segments. +SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorClear( + SherpaOnnxVoiceActivityDetector *p); + // Return the first speech segment. // The user has to use SherpaOnnxDestroySpeechSegment() to free the returned // pointer to avoid memory leak. diff --git a/sherpa-onnx/csrc/voice-activity-detector.cc b/sherpa-onnx/csrc/voice-activity-detector.cc index 05660cd9d..a56caa43e 100644 --- a/sherpa-onnx/csrc/voice-activity-detector.cc +++ b/sherpa-onnx/csrc/voice-activity-detector.cc @@ -76,6 +76,8 @@ class VoiceActivityDetector::Impl { void Pop() { segments_.pop(); } + void Clear() { std::queue().swap(segments_); } + const SpeechSegment &Front() const { return segments_.front(); } void Reset() { diff --git a/sherpa-onnx/csrc/voice-activity-detector.h b/sherpa-onnx/csrc/voice-activity-detector.h index 61552139b..603bfbe78 100644 --- a/sherpa-onnx/csrc/voice-activity-detector.h +++ b/sherpa-onnx/csrc/voice-activity-detector.h @@ -36,6 +36,7 @@ class VoiceActivityDetector { void AcceptWaveform(const float *samples, int32_t n); bool Empty() const; void Pop(); + void Clear(); const SpeechSegment &Front() const; bool IsSpeechDetected() const; diff --git a/sherpa-onnx/jni/jni.cc b/sherpa-onnx/jni/jni.cc index f4f0f6d67..92e6e7203 100644 --- a/sherpa-onnx/jni/jni.cc +++ b/sherpa-onnx/jni/jni.cc @@ -124,6 +124,8 @@ class SherpaOnnxVad { void Pop() { vad_.Pop(); } + void Clear() { vad_.Clear();} + const SpeechSegment &Front() const { return vad_.Front(); } bool IsSpeechDetected() const { return vad_.IsSpeechDetected(); } @@ -556,6 +558,14 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_pop(JNIEnv *env, model->Pop(); } +SHERPA_ONNX_EXTERN_C +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_clear(JNIEnv *env, + jobject /*obj*/, + jlong ptr) { + auto model = reinterpret_cast(ptr); + model->Clear(); +} + // see // https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables static jobject NewInteger(JNIEnv *env, int32_t value) { diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 53637011c..6c74ba459 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -559,6 +559,10 @@ class SherpaOnnxVoiceActivityDetectorWrapper { SherpaOnnxVoiceActivityDetectorPop(vad) } + func clear() { + SherpaOnnxVoiceActivityDetectorClear(vad) + } + func front() -> SherpaOnnxSpeechSegmentWrapper { let p: UnsafePointer? = SherpaOnnxVoiceActivityDetectorFront(vad) return SherpaOnnxSpeechSegmentWrapper(p: p) diff --git a/swift-api-examples/generate-subtitles.swift b/swift-api-examples/generate-subtitles.swift index d06829455..b04b6faef 100644 --- a/swift-api-examples/generate-subtitles.swift +++ b/swift-api-examples/generate-subtitles.swift @@ -174,32 +174,31 @@ func run() { var segments: [SpeechSegment] = [] - while array.count > windowSize { - // todo(fangjun): avoid extra copies here - vad.acceptWaveform(samples: [Float](array[0..