From 42f042d8f97163048c8a8a53ac176ffa328e8bee Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 14 Sep 2023 17:44:34 +0800 Subject: [PATCH] Update C API to include timestamps for offline ASR --- sherpa-onnx/c-api/c-api.cc | 11 ++++ sherpa-onnx/c-api/c-api.h | 8 +++ ...ffline-paraformer-greedy-search-decoder.cc | 1 + sherpa-onnx/csrc/offline-paraformer-model.cc | 1 + swift-api-examples/SherpaOnnx.swift | 17 ++++++ .../decode-file-non-streaming.swift | 56 ++++++++++++++----- 6 files changed, 80 insertions(+), 14 deletions(-) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 6a989d542..520c47542 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -353,11 +353,22 @@ SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( std::copy(text.begin(), text.end(), const_cast(r->text)); const_cast(r->text)[text.size()] = 0; + if (!result.timestamps.empty()) { + r->timestamps = new float[result.timestamps.size()]; + std::copy(result.timestamps.begin(), result.timestamps.end(), + r->timestamps); + r->count = result.timestamps.size(); + } else { + r->timestamps = nullptr; + r->count = 0; + } + return r; } void DestroyOfflineRecognizerResult( const SherpaOnnxOfflineRecognizerResult *r) { delete[] r->text; + delete[] r->timestamps; delete r; } diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index d669bce27..71aa56426 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -408,6 +408,14 @@ SHERPA_ONNX_API void DecodeMultipleOfflineStreams( SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { const char *text; + + // Pointer to continuous memory which holds timestamps + // + // It is NULL if the model does not support timestamps + float *timestamps; + + // number of entries in timestamps + int32_t count; // TODO(fangjun): Add more fields } SherpaOnnxOfflineRecognizerResult; diff --git a/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc b/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc index 95b582ca0..c1d89a3ab 100644 --- a/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc +++ b/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc @@ -5,6 +5,7 @@ #include "sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h" #include +#include #include #include "sherpa-onnx/csrc/macros.h" diff --git a/sherpa-onnx/csrc/offline-paraformer-model.cc b/sherpa-onnx/csrc/offline-paraformer-model.cc index 874374f18..ce1851062 100644 --- a/sherpa-onnx/csrc/offline-paraformer-model.cc +++ b/sherpa-onnx/csrc/offline-paraformer-model.cc @@ -6,6 +6,7 @@ #include #include +#include #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/onnx-utils.h" diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 524499b2a..72c497cf8 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -349,6 +349,23 @@ class SherpaOnnxOfflineRecongitionResult { return String(cString: result.pointee.text) } + var count: Int32 { + return result.pointee.count + } + + var timestamps: [Float] { + if let p = result.pointee.timestamps { + var timestamps: [Float] = [] + for index in 0..!) { self.result = result } diff --git a/swift-api-examples/decode-file-non-streaming.swift b/swift-api-examples/decode-file-non-streaming.swift index a9485c5fd..6d0b4e8b5 100644 --- a/swift-api-examples/decode-file-non-streaming.swift +++ b/swift-api-examples/decode-file-non-streaming.swift @@ -13,21 +13,45 @@ extension AVAudioPCMBuffer { } func run() { - let encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx" - let decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx" - let tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt" - let whisperConfig = sherpaOnnxOfflineWhisperModelConfig( - encoder: encoder, - decoder: decoder - ) + var recognizer: SherpaOnnxOfflineRecognizer + var modelConfig: SherpaOnnxOfflineModelConfig + var modelType = "whisper" + // modelType = "paraformer" - let modelConfig = sherpaOnnxOfflineModelConfig( - tokens: tokens, - whisper: whisperConfig, - debug: 0, - modelType: "whisper" - ) + if modelType == "whisper" { + let encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx" + let decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx" + let tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt" + + let whisperConfig = sherpaOnnxOfflineWhisperModelConfig( + encoder: encoder, + decoder: decoder + ) + + modelConfig = sherpaOnnxOfflineModelConfig( + tokens: tokens, + whisper: whisperConfig, + debug: 0, + modelType: "whisper" + ) + } else if modelType == "paraformer" { + let model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx" + let tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt" + let paraformerConfig = sherpaOnnxOfflineParaformerModelConfig( + model: model + ) + + modelConfig = sherpaOnnxOfflineModelConfig( + tokens: tokens, + paraformer: paraformerConfig, + debug: 0, + modelType: "paraformer" + ) + } else { + print("Please specify a supported modelType \(modelType)") + return + } let featConfig = sherpaOnnxFeatureConfig( sampleRate: 16000, @@ -38,7 +62,7 @@ func run() { modelConfig: modelConfig ) - let recognizer = SherpaOnnxOfflineRecognizer(config: &config) + recognizer = SherpaOnnxOfflineRecognizer(config: &config) let filePath = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav" let fileURL: NSURL = NSURL(fileURLWithPath: filePath) @@ -55,6 +79,10 @@ func run() { let array: [Float]! = audioFileBuffer?.array() let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate)) print("\nresult is:\n\(result.text)") + if result.timestamps.count != 0 { + print("\ntimestamps is:\n\(result.timestamps)") + } + } @main