-
Notifications
You must be signed in to change notification settings - Fork 445
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
03413d2
commit 382be39
Showing
4 changed files
with
142 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,14 @@ | ||
A sample command-line application with an entrypoint in `bin/`, library code | ||
in `lib/`, and example unit test in `test/`. | ||
# Introduction | ||
|
||
This folder contains examples for non-streaming ASR with Dart API. | ||
|
||
| File | Description| | ||
|------|------------| | ||
|[./bin/nemo-ctc.dart](./bin/nemo-ctc.dart)| Use a NeMo Ctc model for speech recognition. See [./run-nemo-ctc.sh](./run-nemo-ctc.sh)| | ||
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)| | ||
|[./bin/paraformer.dart](./bin/paraformer.dart)|Use a paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)| | ||
|[./bin/telespeech-ctc.dart](./bin/telespeech-ctc.dart)| Use models from [Tele-AI/TeleSpeech-ASR](https://github.com/Tele-AI/TeleSpeech-ASR) for speech recognition. See [./run-telespeech-ctc.sh](./run-telespeech-ctc.sh)| | ||
|[./bin/whisper.dart](./bin/whisper.dart)| Use whisper for speech recognition. See [./run-whisper.sh](./run-whisper.sh)| | ||
|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a zipformer transducer for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)| | ||
|[./bin/vad-with-paraformer.dart](./bin/vad-with-paraformer.dart)| Use a [silero-vad](https://github.com/snakers4/silero-vad) with paraformer for speech recognition. See [./run-vad-with-paraformer.sh](./run-vad-with-paraformer.sh)| | ||
|
97 changes: 97 additions & 0 deletions
97
dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import 'dart:io'; | ||
import 'dart:typed_data'; | ||
|
||
import 'package:args/args.dart'; | ||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
|
||
import './init.dart'; | ||
|
||
void main(List<String> arguments) async { | ||
await initSherpaOnnx(); | ||
|
||
final parser = ArgParser() | ||
..addOption('silero-vad', help: 'Path to silero_vad.onnx') | ||
..addOption('model', help: 'Path to the paraformer model') | ||
..addOption('tokens', help: 'Path to tokens.txt') | ||
..addOption('input-wav', help: 'Path to input.wav to transcribe'); | ||
|
||
final res = parser.parse(arguments); | ||
if (res['silero-vad'] == null || | ||
res['model'] == null || | ||
res['tokens'] == null || | ||
res['input-wav'] == null) { | ||
print(parser.usage); | ||
exit(1); | ||
} | ||
|
||
final sileroVad = res['silero-vad'] as String; | ||
final model = res['model'] as String; | ||
final tokens = res['tokens'] as String; | ||
final inputWav = res['input-wav'] as String; | ||
|
||
final paraformer = sherpa_onnx.OfflineParaformerModelConfig( | ||
model: model, | ||
); | ||
|
||
final modelConfig = sherpa_onnx.OfflineModelConfig( | ||
paraformer: paraformer, | ||
tokens: tokens, | ||
debug: true, | ||
numThreads: 1, | ||
modelType: 'paraformer', | ||
); | ||
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); | ||
final recognizer = sherpa_onnx.OfflineRecognizer(config); | ||
|
||
final sileroVadConfig = sherpa_onnx.SileroVadModelConfig( | ||
model: sileroVad, | ||
minSilenceDuration: 0.25, | ||
minSpeechDuration: 0.5, | ||
); | ||
|
||
final vadConfig = sherpa_onnx.VadModelConfig( | ||
sileroVad: sileroVadConfig, | ||
numThreads: 1, | ||
debug: true, | ||
); | ||
|
||
final vad = sherpa_onnx.VoiceActivityDetector( | ||
config: vadConfig, bufferSizeInSeconds: 10); | ||
|
||
final waveData = sherpa_onnx.readWave(inputWav); | ||
|
||
int numSamples = waveData.samples.length; | ||
int numIter = numSamples ~/ vadConfig.sileroVad.windowSize; | ||
|
||
for (int i = 0; i != numIter; ++i) { | ||
int start = i * vadConfig.sileroVad.windowSize; | ||
vad.acceptWaveform(Float32List.sublistView( | ||
waveData.samples, start, start + vadConfig.sileroVad.windowSize)); | ||
|
||
if (vad.isDetected()) { | ||
while (!vad.isEmpty()) { | ||
final stream = recognizer.createStream(); | ||
final segment = vad.front(); | ||
stream.acceptWaveform( | ||
samples: segment.samples, sampleRate: waveData.sampleRate); | ||
recognizer.decode(stream); | ||
|
||
final result = recognizer.getResult(stream); | ||
|
||
final startTime = segment.start * 1.0 / waveData.sampleRate; | ||
final duration = segment.samples.length * 1.0 / waveData.sampleRate; | ||
final stopTime = startTime + duration; | ||
if (result.text != '') { | ||
print( | ||
'${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}'); | ||
} | ||
|
||
stream.free(); | ||
vad.pop(); | ||
} | ||
} | ||
} | ||
|
||
vad.free(); | ||
recognizer.free(); | ||
} |
27 changes: 27 additions & 0 deletions
27
dart-api-examples/non-streaming-asr/run-vad-with-paraformer.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
dart pub get | ||
|
||
if [[ ! -f ./silero_vad.onnx ]]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
fi | ||
|
||
if [[ ! -f ./lei-jun-test.wav ]]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
fi | ||
|
||
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
|
||
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | ||
fi | ||
|
||
dart run \ | ||
./bin/vad-with-paraformer.dart \ | ||
--silero-vad ./silero_vad.onnx \ | ||
--model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ | ||
--tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ | ||
--input-wav ./lei-jun-test.wav |