Add non-streaming ASR examples for Dart API (#1007)

k2-fsa · Jun 14, 2024 · d945066 · d945066
1 parent b1f08c0
commit d945066
Show file tree

Hide file tree

Showing 35 changed files with 984 additions and 37 deletions.
diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+set -ex
+
+cd dart-api-examples
+
+pushd non-streaming-asr
+
+echo '----------VAD with paraformer----------'
+./run-vad-with-paraformer.sh
+rm -rf sherpa-onnx-*
+
+echo '----------NeMo transducer----------'
+./run-nemo-transducer.sh
+rm -rf sherpa-onnx-*
+
+echo '----------NeMo CTC----------'
+./run-nemo-ctc.sh
+rm -rf sherpa-onnx-*
+
+echo '----------TeleSpeech CTC----------'
+./run-telespeech-ctc.sh
+rm -rf sherpa-onnx-*
+
+echo '----------paraformer----------'
+./run-paraformer.sh
+rm -rf sherpa-onnx-*
+
+echo '----------whisper----------'
+./run-whisper.sh
+rm -rf sherpa-onnx-*
+
+echo '----------zipformer transducer----------'
+./run-zipformer-transducer.sh
+rm -rf sherpa-onnx-*
+
+popd
+
+pushd vad
+./run.sh
+rm *.onnx
+popd
+
diff --git a/.github/workflows/test-dart-package.yaml b/.github/workflows/test-dart-package.yaml
@@ -0,0 +1,52 @@
+name: test-dart-package
+
+on:
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
+  workflow_dispatch:
+
+concurrency:
+  group: test-dart-package-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test_dart_package:
+    name: ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest, ubuntu-latest] #, windows-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Flutter SDK
+        uses: flutter-actions/setup-flutter@v3
+        with:
+          channel: stable
+          version: latest
+
+      - name: Display flutter info
+        shell: bash
+        run: |
+          which flutter
+          which dart
+
+          flutter --version
+          dart --version
+          flutter doctor
+
+      - name: Run tests
+        shell: bash
+        run: |
+          .github/scripts/test-dart.sh
diff --git a/.github/workflows/test-dart.yaml b/.github/workflows/test-dart.yaml
@@ -21,19 +21,24 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  dart:
+  test_dart:
     name: ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        os: [macos-latest, ubuntu-latest] #, windows-latest]
+        os: [ubuntu-latest]
 
     steps:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ${{ matrix.os }}-dart
+
       - name: Setup Flutter SDK
         uses: flutter-actions/setup-flutter@v3
         with:
@@ -50,11 +55,39 @@ jobs:
           dart --version
           flutter doctor
 
+      - name: Build sherpa-onnx
+        shell: bash
+        run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+          cmake --version
+          mkdir build
+
+          cd build
+
+          cmake \
+            -D BUILD_SHARED_LIBS=ON \
+            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+            -DBUILD_ESPEAK_NG_EXE=OFF \
+            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+            -DCMAKE_INSTALL_PREFIX=./install \
+            ..
+          make -j install
+
+      - name: Copy libs
+        shell: bash
+        run: |
+          cp -v build/install/lib/lib* ./sherpa-onnx/flutter/linux/
+
+          echo "--------------------"
+
+          ls -lh ./sherpa-onnx/flutter/linux/
+
       - name: Run tests
         shell: bash
         run: |
-          cd dart-api-examples
+          cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
+          cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
 
-          pushd vad
-          ./run.sh
-          popd
+          .github/scripts/test-dart.sh
diff --git a/dart-api-examples/.gitignore b/dart-api-examples/.gitignore
@@ -0,0 +1 @@
+!run*.sh
diff --git a/dart-api-examples/non-streaming-asr/.gitignore b/dart-api-examples/non-streaming-asr/.gitignore
@@ -0,0 +1,3 @@
+# https://dart.dev/guides/libraries/private-files
+# Created by `dart pub`
+.dart_tool/
diff --git a/dart-api-examples/non-streaming-asr/CHANGELOG.md b/dart-api-examples/non-streaming-asr/CHANGELOG.md
@@ -0,0 +1,3 @@
+## 1.0.0
+
+- Initial version.
diff --git a/dart-api-examples/non-streaming-asr/README.md b/dart-api-examples/non-streaming-asr/README.md
@@ -0,0 +1,14 @@
+# Introduction
+
+This folder contains examples for non-streaming ASR with Dart API.
+
+| File | Description|
+|------|------------|
+|[./bin/nemo-ctc.dart](./bin/nemo-ctc.dart)| Use a NeMo Ctc model for speech recognition. See [./run-nemo-ctc.sh](./run-nemo-ctc.sh)|
+|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
+|[./bin/paraformer.dart](./bin/paraformer.dart)|Use a paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
+|[./bin/telespeech-ctc.dart](./bin/telespeech-ctc.dart)| Use models from [Tele-AI/TeleSpeech-ASR](https://github.com/Tele-AI/TeleSpeech-ASR) for speech recognition. See [./run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|
+|[./bin/whisper.dart](./bin/whisper.dart)| Use whisper for speech recognition. See [./run-whisper.sh](./run-whisper.sh)|
+|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a zipformer transducer for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|
+|[./bin/vad-with-paraformer.dart](./bin/vad-with-paraformer.dart)| Use a [silero-vad](https://github.com/snakers4/silero-vad) with paraformer for speech recognition. See [./run-vad-with-paraformer.sh](./run-vad-with-paraformer.sh)|
+
diff --git a/dart-api-examples/non-streaming-asr/analysis_options.yaml b/dart-api-examples/non-streaming-asr/analysis_options.yaml
@@ -0,0 +1,30 @@
+# This file configures the static analysis results for your project (errors,
+# warnings, and lints).
+#
+# This enables the 'recommended' set of lints from `package:lints`.
+# This set helps identify many issues that may lead to problems when running
+# or consuming Dart code, and enforces writing Dart using a single, idiomatic
+# style and format.
+#
+# If you want a smaller set of lints you can change this to specify
+# 'package:lints/core.yaml'. These are just the most critical lints
+# (the recommended set includes the core lints).
+# The core lints are also what is used by pub.dev for scoring packages.
+
+include: package:lints/recommended.yaml
+
+# Uncomment the following section to specify additional rules.
+
+# linter:
+#   rules:
+#     - camel_case_types
+
+# analyzer:
+#   exclude:
+#     - path/to/excluded/files/**
+
+# For more information about the core and recommended set of lints, see
+# https://dart.dev/go/core-lints
+
+# For additional information about configuring this file, see
+# https://dart.dev/guides/language/analysis-options
diff --git a/dart-api-examples/non-streaming-asr/bin/init.dart b/dart-api-examples/non-streaming-asr/bin/init.dart
@@ -0,0 +1 @@
+../../vad/bin/init.dart
diff --git a/dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart b/dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart
@@ -0,0 +1,52 @@
+import 'dart:io';
+import 'dart:typed_data';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('model', help: 'Path to the NeMo CTC model')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+
+  final res = parser.parse(arguments);
+  if (res['model'] == null ||
+      res['tokens'] == null ||
+      res['input-wav'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+
+  final model = res['model'] as String;
+  final tokens = res['tokens'] as String;
+  final inputWav = res['input-wav'] as String;
+
+  final nemo = sherpa_onnx.OfflineNemoEncDecCtcModelConfig(model: model);
+
+  final modelConfig = sherpa_onnx.OfflineModelConfig(
+    nemoCtc: nemo,
+    tokens: tokens,
+    debug: true,
+    numThreads: 1,
+  );
+  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+  final recognizer = sherpa_onnx.OfflineRecognizer(config);
+
+  final waveData = sherpa_onnx.readWave(inputWav);
+  final stream = recognizer.createStream();
+
+  stream.acceptWaveform(
+      samples: waveData.samples, sampleRate: waveData.sampleRate);
+  recognizer.decode(stream);
+
+  final result = recognizer.getResult(stream);
+  print(result.text);
+
+  stream.free();
+  recognizer.free();
+}
diff --git a/dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart b/dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart
@@ -0,0 +1,62 @@
+import 'dart:io';
+import 'dart:typed_data';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('encoder', help: 'Path to the encoder model')
+    ..addOption('decoder', help: 'Path to decoder model')
+    ..addOption('joiner', help: 'Path to joiner model')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+
+  final res = parser.parse(arguments);
+  if (res['encoder'] == null ||
+      res['decoder'] == null ||
+      res['joiner'] == null ||
+      res['tokens'] == null ||
+      res['input-wav'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+
+  final encoder = res['encoder'] as String;
+  final decoder = res['decoder'] as String;
+  final joiner = res['joiner'] as String;
+  final tokens = res['tokens'] as String;
+  final inputWav = res['input-wav'] as String;
+
+  final transducer = sherpa_onnx.OfflineTransducerModelConfig(
+    encoder: encoder,
+    decoder: decoder,
+    joiner: joiner,
+  );
+
+  final modelConfig = sherpa_onnx.OfflineModelConfig(
+    transducer: transducer,
+    tokens: tokens,
+    debug: true,
+    numThreads: 1,
+  );
+  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+  final recognizer = sherpa_onnx.OfflineRecognizer(config);
+
+  final waveData = sherpa_onnx.readWave(inputWav);
+  final stream = recognizer.createStream();
+
+  stream.acceptWaveform(
+      samples: waveData.samples, sampleRate: waveData.sampleRate);
+  recognizer.decode(stream);
+
+  final result = recognizer.getResult(stream);
+  print(result.text);
+
+  stream.free();
+  recognizer.free();
+}
diff --git a/dart-api-examples/non-streaming-asr/bin/paraformer.dart b/dart-api-examples/non-streaming-asr/bin/paraformer.dart
@@ -0,0 +1,55 @@
+import 'dart:io';
+import 'dart:typed_data';
+
+import 'package:args/args.dart';
+import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+
+import './init.dart';
+
+void main(List<String> arguments) async {
+  await initSherpaOnnx();
+
+  final parser = ArgParser()
+    ..addOption('model', help: 'Path to the paraformer model')
+    ..addOption('tokens', help: 'Path to tokens.txt')
+    ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+
+  final res = parser.parse(arguments);
+  if (res['model'] == null ||
+      res['tokens'] == null ||
+      res['input-wav'] == null) {
+    print(parser.usage);
+    exit(1);
+  }
+
+  final model = res['model'] as String;
+  final tokens = res['tokens'] as String;
+  final inputWav = res['input-wav'] as String;
+
+  final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
+    model: model,
+  );
+
+  final modelConfig = sherpa_onnx.OfflineModelConfig(
+    paraformer: paraformer,
+    tokens: tokens,
+    debug: true,
+    numThreads: 1,
+    modelType: 'paraformer',
+  );
+  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+  final recognizer = sherpa_onnx.OfflineRecognizer(config);
+
+  final waveData = sherpa_onnx.readWave(inputWav);
+  final stream = recognizer.createStream();
+
+  stream.acceptWaveform(
+      samples: waveData.samples, sampleRate: waveData.sampleRate);
+  recognizer.decode(stream);
+
+  final result = recognizer.getResult(stream);
+  print(result.text);
+
+  stream.free();
+  recognizer.free();
+}