From d94506698daba388a251fc653c6f2c486006db91 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 14 Jun 2024 18:40:16 +0800 Subject: [PATCH] Add non-streaming ASR examples for Dart API (#1007) --- .github/scripts/test-dart.sh | 43 ++++++++ .github/workflows/test-dart-package.yaml | 52 ++++++++++ .github/workflows/test-dart.yaml | 45 +++++++-- dart-api-examples/.gitignore | 1 + .../non-streaming-asr/.gitignore | 3 + .../non-streaming-asr/CHANGELOG.md | 3 + dart-api-examples/non-streaming-asr/README.md | 14 +++ .../non-streaming-asr/analysis_options.yaml | 30 ++++++ .../non-streaming-asr/bin/init.dart | 1 + .../non-streaming-asr/bin/nemo-ctc.dart | 52 ++++++++++ .../bin/nemo-transducer.dart | 62 ++++++++++++ .../non-streaming-asr/bin/paraformer.dart | 55 +++++++++++ .../non-streaming-asr/bin/telespeech-ctc.dart | 51 ++++++++++ .../bin/vad-with-paraformer.dart | 97 +++++++++++++++++++ .../non-streaming-asr/bin/whisper.dart | 59 +++++++++++ .../bin/zipformer-transducer.dart | 62 ++++++++++++ .../non-streaming-asr/pubspec.lock | 96 ++++++++++++++++++ .../non-streaming-asr/pubspec.yaml | 18 ++++ .../non-streaming-asr/run-nemo-ctc.sh | 17 ++++ .../non-streaming-asr/run-nemo-transducer.sh | 20 ++++ .../non-streaming-asr/run-paraformer.sh | 18 ++++ .../non-streaming-asr/run-telespeech-ctc.sh | 18 ++++ .../run-vad-with-paraformer.sh | 27 ++++++ .../non-streaming-asr/run-whisper.sh | 19 ++++ .../run-zipformer-transducer.sh | 20 ++++ dart-api-examples/vad/bin/init.dart | 29 ++++++ dart-api-examples/vad/bin/vad.dart | 31 ++---- dart-api-examples/vad/pubspec.lock | 4 +- dart-api-examples/vad/pubspec.yaml | 2 +- scripts/dart/non-streaming-asr-pubspec.yaml | 19 ++++ scripts/dart/vad-pubspec.yaml | 18 ++++ .../flutter/lib/src/offline_recognizer.dart | 14 ++- .../flutter/lib/src/online_recognizer.dart | 12 ++- .../flutter/lib/src/sherpa_onnx_bindings.dart | 7 ++ sherpa-onnx/flutter/pubspec.yaml | 2 +- 35 files changed, 984 insertions(+), 37 deletions(-) create mode 100755 .github/scripts/test-dart.sh create mode 100644 .github/workflows/test-dart-package.yaml create mode 100644 dart-api-examples/.gitignore create mode 100644 dart-api-examples/non-streaming-asr/.gitignore create mode 100644 dart-api-examples/non-streaming-asr/CHANGELOG.md create mode 100644 dart-api-examples/non-streaming-asr/README.md create mode 100644 dart-api-examples/non-streaming-asr/analysis_options.yaml create mode 120000 dart-api-examples/non-streaming-asr/bin/init.dart create mode 100644 dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart create mode 100644 dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart create mode 100644 dart-api-examples/non-streaming-asr/bin/paraformer.dart create mode 100644 dart-api-examples/non-streaming-asr/bin/telespeech-ctc.dart create mode 100644 dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart create mode 100644 dart-api-examples/non-streaming-asr/bin/whisper.dart create mode 100644 dart-api-examples/non-streaming-asr/bin/zipformer-transducer.dart create mode 100644 dart-api-examples/non-streaming-asr/pubspec.lock create mode 100644 dart-api-examples/non-streaming-asr/pubspec.yaml create mode 100755 dart-api-examples/non-streaming-asr/run-nemo-ctc.sh create mode 100755 dart-api-examples/non-streaming-asr/run-nemo-transducer.sh create mode 100755 dart-api-examples/non-streaming-asr/run-paraformer.sh create mode 100755 dart-api-examples/non-streaming-asr/run-telespeech-ctc.sh create mode 100755 dart-api-examples/non-streaming-asr/run-vad-with-paraformer.sh create mode 100755 dart-api-examples/non-streaming-asr/run-whisper.sh create mode 100755 dart-api-examples/non-streaming-asr/run-zipformer-transducer.sh create mode 100644 dart-api-examples/vad/bin/init.dart create mode 100644 scripts/dart/non-streaming-asr-pubspec.yaml create mode 100644 scripts/dart/vad-pubspec.yaml diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh new file mode 100755 index 000000000..35c0fa951 --- /dev/null +++ b/.github/scripts/test-dart.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -ex + +cd dart-api-examples + +pushd non-streaming-asr + +echo '----------VAD with paraformer----------' +./run-vad-with-paraformer.sh +rm -rf sherpa-onnx-* + +echo '----------NeMo transducer----------' +./run-nemo-transducer.sh +rm -rf sherpa-onnx-* + +echo '----------NeMo CTC----------' +./run-nemo-ctc.sh +rm -rf sherpa-onnx-* + +echo '----------TeleSpeech CTC----------' +./run-telespeech-ctc.sh +rm -rf sherpa-onnx-* + +echo '----------paraformer----------' +./run-paraformer.sh +rm -rf sherpa-onnx-* + +echo '----------whisper----------' +./run-whisper.sh +rm -rf sherpa-onnx-* + +echo '----------zipformer transducer----------' +./run-zipformer-transducer.sh +rm -rf sherpa-onnx-* + +popd + +pushd vad +./run.sh +rm *.onnx +popd + diff --git a/.github/workflows/test-dart-package.yaml b/.github/workflows/test-dart-package.yaml new file mode 100644 index 000000000..84556ff7d --- /dev/null +++ b/.github/workflows/test-dart-package.yaml @@ -0,0 +1,52 @@ +name: test-dart-package + +on: + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly build at 15:50 UTC time every day + - cron: "50 15 * * *" + + workflow_dispatch: + +concurrency: + group: test-dart-package-${{ github.ref }} + cancel-in-progress: true + +jobs: + test_dart_package: + name: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest, ubuntu-latest] #, windows-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Flutter SDK + uses: flutter-actions/setup-flutter@v3 + with: + channel: stable + version: latest + + - name: Display flutter info + shell: bash + run: | + which flutter + which dart + + flutter --version + dart --version + flutter doctor + + - name: Run tests + shell: bash + run: | + .github/scripts/test-dart.sh diff --git a/.github/workflows/test-dart.yaml b/.github/workflows/test-dart.yaml index 0734df705..ae01373b3 100644 --- a/.github/workflows/test-dart.yaml +++ b/.github/workflows/test-dart.yaml @@ -21,19 +21,24 @@ concurrency: cancel-in-progress: true jobs: - dart: + test_dart: name: ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [macos-latest, ubuntu-latest] #, windows-latest] + os: [ubuntu-latest] steps: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-dart + - name: Setup Flutter SDK uses: flutter-actions/setup-flutter@v3 with: @@ -50,11 +55,39 @@ jobs: dart --version flutter doctor + - name: Build sherpa-onnx + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + mkdir build + + cd build + + cmake \ + -D BUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ + -DCMAKE_INSTALL_PREFIX=./install \ + .. + make -j install + + - name: Copy libs + shell: bash + run: | + cp -v build/install/lib/lib* ./sherpa-onnx/flutter/linux/ + + echo "--------------------" + + ls -lh ./sherpa-onnx/flutter/linux/ + - name: Run tests shell: bash run: | - cd dart-api-examples + cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml + cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml - pushd vad - ./run.sh - popd + .github/scripts/test-dart.sh diff --git a/dart-api-examples/.gitignore b/dart-api-examples/.gitignore new file mode 100644 index 000000000..248f032f1 --- /dev/null +++ b/dart-api-examples/.gitignore @@ -0,0 +1 @@ +!run*.sh diff --git a/dart-api-examples/non-streaming-asr/.gitignore b/dart-api-examples/non-streaming-asr/.gitignore new file mode 100644 index 000000000..3a8579040 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/.gitignore @@ -0,0 +1,3 @@ +# https://dart.dev/guides/libraries/private-files +# Created by `dart pub` +.dart_tool/ diff --git a/dart-api-examples/non-streaming-asr/CHANGELOG.md b/dart-api-examples/non-streaming-asr/CHANGELOG.md new file mode 100644 index 000000000..effe43c82 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/CHANGELOG.md @@ -0,0 +1,3 @@ +## 1.0.0 + +- Initial version. diff --git a/dart-api-examples/non-streaming-asr/README.md b/dart-api-examples/non-streaming-asr/README.md new file mode 100644 index 000000000..bfa21e4cd --- /dev/null +++ b/dart-api-examples/non-streaming-asr/README.md @@ -0,0 +1,14 @@ +# Introduction + +This folder contains examples for non-streaming ASR with Dart API. + +| File | Description| +|------|------------| +|[./bin/nemo-ctc.dart](./bin/nemo-ctc.dart)| Use a NeMo Ctc model for speech recognition. See [./run-nemo-ctc.sh](./run-nemo-ctc.sh)| +|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)| +|[./bin/paraformer.dart](./bin/paraformer.dart)|Use a paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)| +|[./bin/telespeech-ctc.dart](./bin/telespeech-ctc.dart)| Use models from [Tele-AI/TeleSpeech-ASR](https://github.com/Tele-AI/TeleSpeech-ASR) for speech recognition. See [./run-telespeech-ctc.sh](./run-telespeech-ctc.sh)| +|[./bin/whisper.dart](./bin/whisper.dart)| Use whisper for speech recognition. See [./run-whisper.sh](./run-whisper.sh)| +|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a zipformer transducer for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)| +|[./bin/vad-with-paraformer.dart](./bin/vad-with-paraformer.dart)| Use a [silero-vad](https://github.com/snakers4/silero-vad) with paraformer for speech recognition. See [./run-vad-with-paraformer.sh](./run-vad-with-paraformer.sh)| + diff --git a/dart-api-examples/non-streaming-asr/analysis_options.yaml b/dart-api-examples/non-streaming-asr/analysis_options.yaml new file mode 100644 index 000000000..dee8927aa --- /dev/null +++ b/dart-api-examples/non-streaming-asr/analysis_options.yaml @@ -0,0 +1,30 @@ +# This file configures the static analysis results for your project (errors, +# warnings, and lints). +# +# This enables the 'recommended' set of lints from `package:lints`. +# This set helps identify many issues that may lead to problems when running +# or consuming Dart code, and enforces writing Dart using a single, idiomatic +# style and format. +# +# If you want a smaller set of lints you can change this to specify +# 'package:lints/core.yaml'. These are just the most critical lints +# (the recommended set includes the core lints). +# The core lints are also what is used by pub.dev for scoring packages. + +include: package:lints/recommended.yaml + +# Uncomment the following section to specify additional rules. + +# linter: +# rules: +# - camel_case_types + +# analyzer: +# exclude: +# - path/to/excluded/files/** + +# For more information about the core and recommended set of lints, see +# https://dart.dev/go/core-lints + +# For additional information about configuring this file, see +# https://dart.dev/guides/language/analysis-options diff --git a/dart-api-examples/non-streaming-asr/bin/init.dart b/dart-api-examples/non-streaming-asr/bin/init.dart new file mode 120000 index 000000000..48508cfd3 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/init.dart @@ -0,0 +1 @@ +../../vad/bin/init.dart \ No newline at end of file diff --git a/dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart b/dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart new file mode 100644 index 000000000..fa90635fd --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart @@ -0,0 +1,52 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('model', help: 'Path to the NeMo CTC model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['model'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final model = res['model'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final nemo = sherpa_onnx.OfflineNemoEncDecCtcModelConfig(model: model); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + nemoCtc: nemo, + tokens: tokens, + debug: true, + numThreads: 1, + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart b/dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart new file mode 100644 index 000000000..881487455 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart @@ -0,0 +1,62 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('encoder', help: 'Path to the encoder model') + ..addOption('decoder', help: 'Path to decoder model') + ..addOption('joiner', help: 'Path to joiner model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['encoder'] == null || + res['decoder'] == null || + res['joiner'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final encoder = res['encoder'] as String; + final decoder = res['decoder'] as String; + final joiner = res['joiner'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final transducer = sherpa_onnx.OfflineTransducerModelConfig( + encoder: encoder, + decoder: decoder, + joiner: joiner, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + transducer: transducer, + tokens: tokens, + debug: true, + numThreads: 1, + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/bin/paraformer.dart b/dart-api-examples/non-streaming-asr/bin/paraformer.dart new file mode 100644 index 000000000..fd3886788 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/paraformer.dart @@ -0,0 +1,55 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('model', help: 'Path to the paraformer model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['model'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final model = res['model'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final paraformer = sherpa_onnx.OfflineParaformerModelConfig( + model: model, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + paraformer: paraformer, + tokens: tokens, + debug: true, + numThreads: 1, + modelType: 'paraformer', + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/bin/telespeech-ctc.dart b/dart-api-examples/non-streaming-asr/bin/telespeech-ctc.dart new file mode 100644 index 000000000..b9d21a0d0 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/telespeech-ctc.dart @@ -0,0 +1,51 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('model', help: 'Path to the telespeech CTC model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['model'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final model = res['model'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final modelConfig = sherpa_onnx.OfflineModelConfig( + telespeechCtc: model, + tokens: tokens, + debug: true, + numThreads: 1, + modelType: 'telespeech_ctc', + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart b/dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart new file mode 100644 index 000000000..1f03ed3e8 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart @@ -0,0 +1,97 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('silero-vad', help: 'Path to silero_vad.onnx') + ..addOption('model', help: 'Path to the paraformer model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['silero-vad'] == null || + res['model'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final sileroVad = res['silero-vad'] as String; + final model = res['model'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final paraformer = sherpa_onnx.OfflineParaformerModelConfig( + model: model, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + paraformer: paraformer, + tokens: tokens, + debug: true, + numThreads: 1, + modelType: 'paraformer', + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig( + model: sileroVad, + minSilenceDuration: 0.25, + minSpeechDuration: 0.5, + ); + + final vadConfig = sherpa_onnx.VadModelConfig( + sileroVad: sileroVadConfig, + numThreads: 1, + debug: true, + ); + + final vad = sherpa_onnx.VoiceActivityDetector( + config: vadConfig, bufferSizeInSeconds: 10); + + final waveData = sherpa_onnx.readWave(inputWav); + + int numSamples = waveData.samples.length; + int numIter = numSamples ~/ vadConfig.sileroVad.windowSize; + + for (int i = 0; i != numIter; ++i) { + int start = i * vadConfig.sileroVad.windowSize; + vad.acceptWaveform(Float32List.sublistView( + waveData.samples, start, start + vadConfig.sileroVad.windowSize)); + + if (vad.isDetected()) { + while (!vad.isEmpty()) { + final stream = recognizer.createStream(); + final segment = vad.front(); + stream.acceptWaveform( + samples: segment.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + + final startTime = segment.start * 1.0 / waveData.sampleRate; + final duration = segment.samples.length * 1.0 / waveData.sampleRate; + final stopTime = startTime + duration; + if (result.text != '') { + print( + '${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}'); + } + + stream.free(); + vad.pop(); + } + } + } + + vad.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/bin/whisper.dart b/dart-api-examples/non-streaming-asr/bin/whisper.dart new file mode 100644 index 000000000..0e4e0f3d7 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/whisper.dart @@ -0,0 +1,59 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('encoder', help: 'Path to the whisper encoder model') + ..addOption('decoder', help: 'Path to whisper decoder model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['encoder'] == null || + res['decoder'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final encoder = res['encoder'] as String; + final decoder = res['decoder'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final whisper = sherpa_onnx.OfflineWhisperModelConfig( + encoder: encoder, + decoder: decoder, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + whisper: whisper, + tokens: tokens, + modelType: 'whisper', + debug: false, + numThreads: 1, + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/bin/zipformer-transducer.dart b/dart-api-examples/non-streaming-asr/bin/zipformer-transducer.dart new file mode 100644 index 000000000..881487455 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/zipformer-transducer.dart @@ -0,0 +1,62 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('encoder', help: 'Path to the encoder model') + ..addOption('decoder', help: 'Path to decoder model') + ..addOption('joiner', help: 'Path to joiner model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['encoder'] == null || + res['decoder'] == null || + res['joiner'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final encoder = res['encoder'] as String; + final decoder = res['decoder'] as String; + final joiner = res['joiner'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final transducer = sherpa_onnx.OfflineTransducerModelConfig( + encoder: encoder, + decoder: decoder, + joiner: joiner, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + transducer: transducer, + tokens: tokens, + debug: true, + numThreads: 1, + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/pubspec.lock b/dart-api-examples/non-streaming-asr/pubspec.lock new file mode 100644 index 000000000..7c77c2a62 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/pubspec.lock @@ -0,0 +1,96 @@ +# Generated by pub +# See https://dart.dev/tools/pub/glossary#lockfile +packages: + args: + dependency: "direct main" + description: + name: args + sha256: "7cf60b9f0cc88203c5a190b4cd62a99feea42759a7fa695010eb5de1c0b2252a" + url: "https://pub.dev" + source: hosted + version: "2.5.0" + characters: + dependency: transitive + description: + name: characters + sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605" + url: "https://pub.dev" + source: hosted + version: "1.3.0" + collection: + dependency: transitive + description: + name: collection + sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a + url: "https://pub.dev" + source: hosted + version: "1.18.0" + ffi: + dependency: transitive + description: + name: ffi + sha256: "493f37e7df1804778ff3a53bd691d8692ddf69702cf4c1c1096a2e41b4779e21" + url: "https://pub.dev" + source: hosted + version: "2.1.2" + flutter: + dependency: transitive + description: flutter + source: sdk + version: "0.0.0" + lints: + dependency: "direct dev" + description: + name: lints + sha256: cbf8d4b858bb0134ef3ef87841abdf8d63bfc255c266b7bf6b39daa1085c4290 + url: "https://pub.dev" + source: hosted + version: "3.0.0" + material_color_utilities: + dependency: transitive + description: + name: material_color_utilities + sha256: "0e0a020085b65b6083975e499759762399b4475f766c21668c4ecca34ea74e5a" + url: "https://pub.dev" + source: hosted + version: "0.8.0" + meta: + dependency: transitive + description: + name: meta + sha256: "7687075e408b093f36e6bbf6c91878cc0d4cd10f409506f7bc996f68220b9136" + url: "https://pub.dev" + source: hosted + version: "1.12.0" + path: + dependency: "direct main" + description: + name: path + sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af" + url: "https://pub.dev" + source: hosted + version: "1.9.0" + sherpa_onnx: + dependency: "direct main" + description: + name: sherpa_onnx + sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8 + url: "https://pub.dev" + source: hosted + version: "1.9.29" + sky_engine: + dependency: transitive + description: flutter + source: sdk + version: "0.0.99" + vector_math: + dependency: transitive + description: + name: vector_math + sha256: "80b3257d1492ce4d091729e3a67a60407d227c27241d6927be0130c98e741803" + url: "https://pub.dev" + source: hosted + version: "2.1.4" +sdks: + dart: ">=3.4.0 <4.0.0" + flutter: ">=3.3.0" diff --git a/dart-api-examples/non-streaming-asr/pubspec.yaml b/dart-api-examples/non-streaming-asr/pubspec.yaml new file mode 100644 index 000000000..61dbe71f2 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/pubspec.yaml @@ -0,0 +1,18 @@ +name: non_streaming_asr +description: > + This example demonstrates how to use the Dart API for Non-streaming speech recognition. Specifically, we use the following models as examples, whisper, zipformer, and paraformer. + +version: 1.0.0 +# repository: https://github.com/my_org/my_repo + +environment: + sdk: ^3.4.0 + +# Add regular dependencies here. +dependencies: + sherpa_onnx: ^1.9.29 + path: ^1.9.0 + args: ^2.5.0 + +dev_dependencies: + lints: ^3.0.0 diff --git a/dart-api-examples/non-streaming-asr/run-nemo-ctc.sh b/dart-api-examples/non-streaming-asr/run-nemo-ctc.sh new file mode 100755 index 000000000..74775c0e6 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-nemo-ctc.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 + tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 + rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 +fi + +dart run \ + ./bin/nemo-ctc.dart \ + --model ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx \ + --tokens ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt \ + --input-wav ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav diff --git a/dart-api-examples/non-streaming-asr/run-nemo-transducer.sh b/dart-api-examples/non-streaming-asr/run-nemo-transducer.sh new file mode 100755 index 000000000..5f4854df3 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-nemo-transducer.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 + + tar xvf sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 + rm sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 +fi + +dart run \ + ./bin/nemo-transducer.dart \ + --encoder ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx \ + --decoder ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx \ + --joiner ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx \ + --tokens ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt \ + --input-wav ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav diff --git a/dart-api-examples/non-streaming-asr/run-paraformer.sh b/dart-api-examples/non-streaming-asr/run-paraformer.sh new file mode 100755 index 000000000..1e1f9c82a --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-paraformer.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +fi + +dart run \ + ./bin/paraformer.dart \ + --model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --input-wav ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav diff --git a/dart-api-examples/non-streaming-asr/run-telespeech-ctc.sh b/dart-api-examples/non-streaming-asr/run-telespeech-ctc.sh new file mode 100755 index 000000000..8f9023924 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-telespeech-ctc.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 + + tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 + rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 +fi + +dart run \ + ./bin/telespeech-ctc.dart \ + --model ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \ + --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \ + --input-wav ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav diff --git a/dart-api-examples/non-streaming-asr/run-vad-with-paraformer.sh b/dart-api-examples/non-streaming-asr/run-vad-with-paraformer.sh new file mode 100755 index 000000000..0a1670b2b --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-vad-with-paraformer.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [[ ! -f ./silero_vad.onnx ]]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +fi + +if [[ ! -f ./lei-jun-test.wav ]]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav +fi + +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +fi + +dart run \ + ./bin/vad-with-paraformer.dart \ + --silero-vad ./silero_vad.onnx \ + --model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --input-wav ./lei-jun-test.wav diff --git a/dart-api-examples/non-streaming-asr/run-whisper.sh b/dart-api-examples/non-streaming-asr/run-whisper.sh new file mode 100755 index 000000000..739b54372 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-whisper.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 + + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 + rm sherpa-onnx-whisper-tiny.en.tar.bz2 +fi + +dart run \ + ./bin/whisper.dart \ + --encoder ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx \ + --decoder ./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx \ + --tokens ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \ + --input-wav ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav diff --git a/dart-api-examples/non-streaming-asr/run-zipformer-transducer.sh b/dart-api-examples/non-streaming-asr/run-zipformer-transducer.sh new file mode 100755 index 000000000..9f115c62d --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-zipformer-transducer.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 + + tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 + rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 +fi + +dart run \ + ./bin/zipformer-transducer.dart \ + --encoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx \ + --joiner ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.int8.onnx \ + --tokens ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt \ + --input-wav ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1221-135766-0001.wav diff --git a/dart-api-examples/vad/bin/init.dart b/dart-api-examples/vad/bin/init.dart new file mode 100644 index 000000000..072ff29b8 --- /dev/null +++ b/dart-api-examples/vad/bin/init.dart @@ -0,0 +1,29 @@ +import 'dart:io'; +import 'dart:isolate'; +import 'package:path/path.dart' as p; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +Future initSherpaOnnx() async { + var uri = await Isolate.resolvePackageUri( + Uri.parse('package:sherpa_onnx/sherpa_onnx.dart')); + + if (uri == null) { + print('File not found'); + exit(1); + } + + String platform = ''; + + if (Platform.isMacOS) { + platform = 'macos'; + } else if (Platform.isLinux) { + platform = 'linux'; + } else if (Platform.isWindows) { + platform = 'windows'; + } else { + throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}'); + } + + final libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform); + sherpa_onnx.initBindings(libPath); +} diff --git a/dart-api-examples/vad/bin/vad.dart b/dart-api-examples/vad/bin/vad.dart index d981bad94..3ef9f66da 100644 --- a/dart-api-examples/vad/bin/vad.dart +++ b/dart-api-examples/vad/bin/vad.dart @@ -1,33 +1,9 @@ import 'dart:io'; -import 'dart:isolate'; import 'dart:typed_data'; import 'package:args/args.dart'; -import 'package:path/path.dart' as p; import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; - -Future initSherpaOnnx() async { - var uri = await Isolate.resolvePackageUri( - Uri.parse('package:sherpa_onnx/sherpa_onnx.dart')); - - if (uri == null) { - print('File not found'); - exit(1); - } - String platform = ''; - if (Platform.isMacOS) { - platform = 'macos'; - } else if (Platform.isLinux) { - platform = 'linux'; - } else if (Platform.isWindows) { - platform = 'windows'; - } else { - throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}'); - } - - final libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform); - sherpa_onnx.initBindings(libPath); -} +import './init.dart'; void main(List arguments) async { await initSherpaOnnx(); @@ -36,6 +12,7 @@ void main(List arguments) async { ..addOption('silero-vad', help: 'Path to silero_vad.onnx') ..addOption('input-wav', help: 'Path to input.wav') ..addOption('output-wav', help: 'Path to output.wav'); + final res = parser.parse(arguments); if (res['silero-vad'] == null || res['input-wav'] == null || @@ -53,6 +30,7 @@ void main(List arguments) async { minSilenceDuration: 0.25, minSpeechDuration: 0.5, ); + final config = sherpa_onnx.VadModelConfig( sileroVad: sileroVadConfig, numThreads: 1, @@ -86,8 +64,11 @@ void main(List arguments) async { } } + vad.free(); + final s = Float32List.fromList(allSamples.expand((x) => x).toList()); sherpa_onnx.writeWave( filename: outputWav, samples: s, sampleRate: waveData.sampleRate); + print('Saved to ${outputWav}'); } diff --git a/dart-api-examples/vad/pubspec.lock b/dart-api-examples/vad/pubspec.lock index a29b073e6..7c77c2a62 100644 --- a/dart-api-examples/vad/pubspec.lock +++ b/dart-api-examples/vad/pubspec.lock @@ -74,10 +74,10 @@ packages: dependency: "direct main" description: name: sherpa_onnx - sha256: "6cfadf7bc35001bb1284f9fac1e03e33787cafa918e0c45da96d1e91afa58751" + sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8 url: "https://pub.dev" source: hosted - version: "0.0.3" + version: "1.9.29" sky_engine: dependency: transitive description: flutter diff --git a/dart-api-examples/vad/pubspec.yaml b/dart-api-examples/vad/pubspec.yaml index e7c7bc1cc..54c13e0fa 100644 --- a/dart-api-examples/vad/pubspec.yaml +++ b/dart-api-examples/vad/pubspec.yaml @@ -9,7 +9,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^0.0.3 + sherpa_onnx: ^1.9.29 path: ^1.9.0 args: ^2.5.0 diff --git a/scripts/dart/non-streaming-asr-pubspec.yaml b/scripts/dart/non-streaming-asr-pubspec.yaml new file mode 100644 index 000000000..8d389f38f --- /dev/null +++ b/scripts/dart/non-streaming-asr-pubspec.yaml @@ -0,0 +1,19 @@ +name: non_streaming_asr +description: > + This example demonstrates how to use the Dart API for Non-streaming speech recognition. Specifically, we use the following models as examples, whisper, zipformer, and paraformer. + +version: 1.0.0 + +environment: + sdk: ^3.4.0 + +# Add regular dependencies here. +dependencies: + sherpa_onnx: + path: ../../sherpa-onnx/flutter + + path: ^1.9.0 + args: ^2.5.0 + +dev_dependencies: + lints: ^3.0.0 diff --git a/scripts/dart/vad-pubspec.yaml b/scripts/dart/vad-pubspec.yaml new file mode 100644 index 000000000..2d9758df1 --- /dev/null +++ b/scripts/dart/vad-pubspec.yaml @@ -0,0 +1,18 @@ +name: vad + +description: > + This example demonstrates how to use the Dart API for VAD (voice activity detection). + +version: 1.0.0 + +environment: + sdk: ^3.4.0 + +dependencies: + sherpa_onnx: + path: ../../sherpa-onnx/flutter + path: ^1.9.0 + args: ^2.5.0 + +dev_dependencies: + lints: ^3.0.0 diff --git a/sherpa-onnx/flutter/lib/src/offline_recognizer.dart b/sherpa-onnx/flutter/lib/src/offline_recognizer.dart index 633312424..7a220105c 100644 --- a/sherpa-onnx/flutter/lib/src/offline_recognizer.dart +++ b/sherpa-onnx/flutter/lib/src/offline_recognizer.dart @@ -102,11 +102,14 @@ class OfflineModelConfig { this.debug = true, this.provider = 'cpu', this.modelType = '', + this.modelingUnit = '', + this.bpeVocab = '', + this.telespeechCtc = '', }); @override String toString() { - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType)'; + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; } final OfflineTransducerModelConfig transducer; @@ -120,6 +123,9 @@ class OfflineModelConfig { final bool debug; final String provider; final String modelType; + final String modelingUnit; + final String bpeVocab; + final String telespeechCtc; } class OfflineRecognizerConfig { @@ -213,6 +219,9 @@ class OfflineRecognizer { c.ref.model.debug = config.model.debug ? 1 : 0; c.ref.model.provider = config.model.provider.toNativeUtf8(); c.ref.model.modelType = config.model.modelType.toNativeUtf8(); + c.ref.model.modelingUnit = config.model.modelingUnit.toNativeUtf8(); + c.ref.model.bpeVocab = config.model.bpeVocab.toNativeUtf8(); + c.ref.model.telespeechCtc = config.model.telespeechCtc.toNativeUtf8(); c.ref.lm.model = config.lm.model.toNativeUtf8(); c.ref.lm.scale = config.lm.scale; @@ -228,6 +237,9 @@ class OfflineRecognizer { calloc.free(c.ref.hotwordsFile); calloc.free(c.ref.decodingMethod); calloc.free(c.ref.lm.model); + calloc.free(c.ref.model.telespeechCtc); + calloc.free(c.ref.model.bpeVocab); + calloc.free(c.ref.model.modelingUnit); calloc.free(c.ref.model.modelType); calloc.free(c.ref.model.provider); calloc.free(c.ref.model.tokens); diff --git a/sherpa-onnx/flutter/lib/src/online_recognizer.dart b/sherpa-onnx/flutter/lib/src/online_recognizer.dart index 538c68dda..bee1f2683 100644 --- a/sherpa-onnx/flutter/lib/src/online_recognizer.dart +++ b/sherpa-onnx/flutter/lib/src/online_recognizer.dart @@ -58,11 +58,13 @@ class OnlineModelConfig { this.provider = 'cpu', this.debug = true, this.modelType = '', + this.modelingUnit = '', + this.bpeVocab = '', }); @override String toString() { - return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType)'; + return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)'; } final OnlineTransducerModelConfig transducer; @@ -78,6 +80,10 @@ class OnlineModelConfig { final bool debug; final String modelType; + + final String modelingUnit; + + final String bpeVocab; } class OnlineCtcFstDecoderConfig { @@ -180,6 +186,8 @@ class OnlineRecognizer { c.ref.model.provider = config.model.provider.toNativeUtf8(); c.ref.model.debug = config.model.debug ? 1 : 0; c.ref.model.modelType = config.model.modelType.toNativeUtf8(); + c.ref.model.modelingUnit = config.model.modelingUnit.toNativeUtf8(); + c.ref.model.bpeVocab = config.model.bpeVocab.toNativeUtf8(); c.ref.decodingMethod = config.decodingMethod.toNativeUtf8(); c.ref.maxActivePaths = config.maxActivePaths; @@ -199,6 +207,8 @@ class OnlineRecognizer { calloc.free(c.ref.ctcFstDecoderConfig.graph); calloc.free(c.ref.hotwordsFile); calloc.free(c.ref.decodingMethod); + calloc.free(c.ref.model.bpeVocab); + calloc.free(c.ref.model.modelingUnit); calloc.free(c.ref.model.modelType); calloc.free(c.ref.model.provider); calloc.free(c.ref.model.tokens); diff --git a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart index 997bfc70e..efc96a8f7 100644 --- a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart +++ b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart @@ -63,6 +63,9 @@ final class SherpaOnnxOfflineModelConfig extends Struct { external Pointer provider; external Pointer modelType; + external Pointer modelingUnit; + external Pointer bpeVocab; + external Pointer telespeechCtc; } final class SherpaOnnxOfflineRecognizerConfig extends Struct { @@ -111,6 +114,10 @@ final class SherpaOnnxOnlineModelConfig extends Struct { external int debug; external Pointer modelType; + + external Pointer modelingUnit; + + external Pointer bpeVocab; } final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { diff --git a/sherpa-onnx/flutter/pubspec.yaml b/sherpa-onnx/flutter/pubspec.yaml index 3b59ef479..2944a9a27 100644 --- a/sherpa-onnx/flutter/pubspec.yaml +++ b/sherpa-onnx/flutter/pubspec.yaml @@ -17,7 +17,7 @@ topics: - voice-activity-detection # remember to change the version in macos/sherpa_onnx.podspec -version: 0.0.2 +version: 1.9.29 homepage: https://github.com/k2-fsa/sherpa-onnx