From 6e09933d99c6c501f272b1a75dd2f8cfca17f150 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 17 Jun 2024 17:02:39 +0800 Subject: [PATCH] Inverse text normalization API for other programming languages (#1019) --- .github/scripts/test-dart.sh | 67 ++++----- .github/scripts/test-dot-net.sh | 1 + .github/scripts/test-nodejs-addon-npm.sh | 6 + .github/scripts/test-nodejs-npm.sh | 9 ++ .github/workflows/run-java-test.yaml | 2 + .github/workflows/test-dot-net.yaml | 59 +------- .github/workflows/test-go.yaml | 3 +- .github/workflows/test-nodejs-addon-api.yaml | 4 +- .gitignore | 1 + .../non-streaming-asr/bin/paraformer-itn.dart | 63 +++++++++ .../non-streaming-asr/run-paraformer-itn.sh | 27 ++++ .../offline-decode-files/Program.cs | 5 + .../run-paraformer-itn.sh | 24 ++++ .../run-telespeech-ctc.sh | 2 +- .../non-streaming-decode-files/main.go | 2 + .../run-paraformer-itn.sh | 28 ++++ .../run-telespeech-ctc.sh | 2 +- ...xtNormalizationNonStreamingParaformer.java | 54 ++++++++ ...n-inverse-text-normalization-paraformer.sh | 46 +++++++ kotlin-api-examples/run.sh | 29 ++++ kotlin-api-examples/test_itn_asr.kt | 37 +++++ .../test_asr_non_streaming_paraformer_itn.js | 48 +++++++ .../test-offline-paraformer-itn.js | 128 ++++++++++++++++++ scripts/dotnet/OfflineRecognizerConfig.cs | 9 +- .../run-paraformer-itn.sh | 1 + scripts/go/sherpa_onnx.go | 15 ++ scripts/node-addon-api/README.md | 4 +- .../node-addon-api/src/non-streaming-asr.cc | 10 ++ .../node-addon-api/src/non-streaming-tts.cc | 2 +- sherpa-onnx/c-api/c-api.cc | 3 + sherpa-onnx/c-api/c-api.h | 2 + .../flutter/lib/src/offline_recognizer.dart | 12 +- .../flutter/lib/src/sherpa_onnx_bindings.dart | 3 + .../sherpa/onnx/OfflineRecognizerConfig.java | 16 +++ sherpa-onnx/jni/offline-recognizer.cc | 12 ++ sherpa-onnx/kotlin-api/OfflineRecognizer.kt | 2 + swift-api-examples/SherpaOnnx.swift | 8 +- wasm/asr/sherpa-onnx-asr.js | 23 +++- wasm/nodejs/sherpa-onnx-wasm-nodejs.cc | 4 +- 39 files changed, 669 insertions(+), 104 deletions(-) create mode 100644 dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart create mode 100755 dart-api-examples/non-streaming-asr/run-paraformer-itn.sh create mode 100755 dotnet-examples/offline-decode-files/run-paraformer-itn.sh create mode 100755 go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh create mode 100644 java-api-examples/InverseTextNormalizationNonStreamingParaformer.java create mode 100755 java-api-examples/run-inverse-text-normalization-paraformer.sh create mode 100644 kotlin-api-examples/test_itn_asr.kt create mode 100644 nodejs-addon-examples/test_asr_non_streaming_paraformer_itn.js create mode 100644 nodejs-examples/test-offline-paraformer-itn.js create mode 120000 scripts/go/_internal/non-streaming-decode-files/run-paraformer-itn.sh diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh index 763f2bcc4..0850a72b1 100755 --- a/.github/scripts/test-dart.sh +++ b/.github/scripts/test-dart.sh @@ -4,6 +4,41 @@ set -ex cd dart-api-examples +pushd non-streaming-asr + +echo '----------paraformer itn----------' +./run-paraformer-itn.sh + +echo '----------paraformer----------' +./run-paraformer.sh +rm -rf sherpa-onnx-* + +echo '----------VAD with paraformer----------' +./run-vad-with-paraformer.sh +rm -rf sherpa-onnx-* + +echo '----------NeMo transducer----------' +./run-nemo-transducer.sh +rm -rf sherpa-onnx-* + +echo '----------NeMo CTC----------' +./run-nemo-ctc.sh +rm -rf sherpa-onnx-* + +echo '----------TeleSpeech CTC----------' +./run-telespeech-ctc.sh +rm -rf sherpa-onnx-* + +echo '----------whisper----------' +./run-whisper.sh +rm -rf sherpa-onnx-* + +echo '----------zipformer transducer----------' +./run-zipformer-transducer.sh +rm -rf sherpa-onnx-* + +popd # non-streaming-asr + pushd tts echo '----------piper tts----------' @@ -44,38 +79,6 @@ rm -rf sherpa-onnx-* popd # streaming-asr -pushd non-streaming-asr - -echo '----------VAD with paraformer----------' -./run-vad-with-paraformer.sh -rm -rf sherpa-onnx-* - -echo '----------NeMo transducer----------' -./run-nemo-transducer.sh -rm -rf sherpa-onnx-* - -echo '----------NeMo CTC----------' -./run-nemo-ctc.sh -rm -rf sherpa-onnx-* - -echo '----------TeleSpeech CTC----------' -./run-telespeech-ctc.sh -rm -rf sherpa-onnx-* - -echo '----------paraformer----------' -./run-paraformer.sh -rm -rf sherpa-onnx-* - -echo '----------whisper----------' -./run-whisper.sh -rm -rf sherpa-onnx-* - -echo '----------zipformer transducer----------' -./run-zipformer-transducer.sh -rm -rf sherpa-onnx-* - -popd # non-streaming-asr - pushd vad ./run.sh rm *.onnx diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index 6ae126037..395c67c83 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -3,6 +3,7 @@ cd dotnet-examples/ cd ./offline-decode-files +./run-paraformer-itn.sh ./run-telespeech-ctc.sh ./run-nemo-ctc.sh ./run-paraformer.sh diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index 7057ee2b4..a6c4345a8 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -119,6 +119,12 @@ tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 node ./test_asr_non_streaming_paraformer.js + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav + +node ./test_asr_non_streaming_paraformer_itn.js + rm -rf sherpa-onnx-paraformer-zh-2023-03-28 echo "----------tts----------" diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index a27214383..2098bb166 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -11,6 +11,15 @@ ls -lh node_modules # offline asr +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +ls -lh +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +node ./test-offline-paraformer-itn.js +rm -rf sherpa-onnx-paraformer-zh-2023-03-28 + curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 ls -lh tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index c000b277b..30fc6a827 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -190,6 +190,8 @@ jobs: shell: bash run: | cd ./java-api-examples + ./run-inverse-text-normalization-paraformer.sh + ./run-non-streaming-decode-file-paraformer.sh rm -rf sherpa-onnx-paraformer-zh-* diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index 7052542db..55bc8e6a3 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -39,7 +39,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest] python-version: ["3.8"] steps: @@ -72,45 +72,18 @@ jobs: cmake --build . --target install --config Release - - name: Build sherpa-onnx for windows x86 - if: matrix.os == 'windows-latest' - shell: bash - run: | - export CMAKE_CXX_COMPILER_LAUNCHER=ccache - export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - cmake --version - - mkdir build-win32 - cd build-win32 - cmake \ - -A Win32 \ - -DBUILD_SHARED_LIBS=ON \ - -DCMAKE_INSTALL_PREFIX=./install \ - -DCMAKE_BUILD_TYPE=Release \ - -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ - -DBUILD_ESPEAK_NG_EXE=OFF \ - -DSHERPA_ONNX_ENABLE_BINARY=ON \ - .. - cmake --build . --target install --config Release - - uses: actions/upload-artifact@v4 with: name: ${{ matrix.os }} path: ./build/install/lib/ - - uses: actions/upload-artifact@v4 - if: matrix.os == 'windows-latest' - with: - name: ${{ matrix.os }}-win32 - path: ./build-win32/install/lib/ - test-dot-net: runs-on: ${{ matrix.os }} needs: [build-libs] strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] #, windows-latest] + os: [ubuntu-latest] python-version: ["3.8"] steps: @@ -134,30 +107,11 @@ jobs: name: ubuntu-latest path: /tmp/linux - - name: Retrieve artifact from macos-latest - uses: actions/download-artifact@v4 - with: - name: macos-latest - path: /tmp/macos - - - name: Retrieve artifact from windows-latest - uses: actions/download-artifact@v4 - with: - name: windows-latest - path: /tmp/windows-x64 - - - name: Retrieve artifact from windows-latest - uses: actions/download-artifact@v4 - with: - name: windows-latest-win32 - path: /tmp/windows-x86 - - name: Setup .NET uses: actions/setup-dotnet@v4 with: dotnet-version: | 6.0.x - 7.0.x - name: Check dotnet run: dotnet --info @@ -171,15 +125,6 @@ jobs: echo "----------/tmp/linux----------" ls -lh /tmp/linux - echo "----------/tmp/macos----------" - ls -lh /tmp/macos - - echo "----------/tmp/windows-x64----------" - ls -lh /tmp/windows-x64 - - echo "----------/tmp/windows-x86----------" - ls -lh /tmp/windows-x86 - - name: Build shell: bash run: | diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index 5724d9cb9..1ec654418 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -127,7 +127,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: tts-waves + name: tts-waves-${{ matrix.os }} path: tts-waves - name: Test non-streaming decoding files (macOS) @@ -154,6 +154,7 @@ jobs: echo "Test paraformer" ./run-paraformer.sh + ./run-paraformer-itn.sh rm -rf sherpa-onnx-paraformer-zh-2023-03-28 echo "Test NeMo CTC" diff --git a/.github/workflows/test-nodejs-addon-api.yaml b/.github/workflows/test-nodejs-addon-api.yaml index 716a9fb73..224fc0f0b 100644 --- a/.github/workflows/test-nodejs-addon-api.yaml +++ b/.github/workflows/test-nodejs-addon-api.yaml @@ -39,8 +39,8 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-11, macos-14, ubuntu-20.04, ubuntu-22.04] #, windows-latest] - node-version: ["16", "17", "18", "19", "21", "22"] + os: [macos-latest, ubuntu-latest, ubuntu-latest] + node-version: ["16", "22"] python-version: ["3.8"] steps: diff --git a/.gitignore b/.gitignore index 1eb26e5c2..2176798be 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,4 @@ package-lock.json sherpa-onnx-nemo-* sherpa-onnx-vits-* sherpa-onnx-telespeech-ctc-* +*.fst diff --git a/dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart b/dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart new file mode 100644 index 000000000..c8d2c0801 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart @@ -0,0 +1,63 @@ +// Copyright (c) 2024 Xiaomi Corporation +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('model', help: 'Path to the paraformer model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('rule-fsts', + help: 'Path to rule fsts for inverse text normalization') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['model'] == null || + res['tokens'] == null || + res['rule-fsts'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final model = res['model'] as String; + final tokens = res['tokens'] as String; + final ruleFsts = res['rule-fsts'] as String; + final inputWav = res['input-wav'] as String; + + final paraformer = sherpa_onnx.OfflineParaformerModelConfig( + model: model, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + paraformer: paraformer, + tokens: tokens, + debug: true, + numThreads: 1, + modelType: 'paraformer', + ); + final config = sherpa_onnx.OfflineRecognizerConfig( + model: modelConfig, + ruleFsts: ruleFsts, + ); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/run-paraformer-itn.sh b/dart-api-examples/non-streaming-asr/run-paraformer-itn.sh new file mode 100755 index 000000000..7d8e30859 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-paraformer-itn.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +dart run \ + ./bin/paraformer-itn.dart \ + --model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --rule-fsts ./itn_zh_number.fst \ + --input-wav ./itn-zh-number.wav diff --git a/dotnet-examples/offline-decode-files/Program.cs b/dotnet-examples/offline-decode-files/Program.cs index ea30a14e2..301774f8e 100644 --- a/dotnet-examples/offline-decode-files/Program.cs +++ b/dotnet-examples/offline-decode-files/Program.cs @@ -69,6 +69,10 @@ class Options HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] public string DecodingMethod { get; set; } + [Option("rule-fsts", Required = false, Default = "", + HelpText = "If not empty, path to rule fst for inverse text normalization")] + public string RuleFsts { get; set; } + [Option("max-active-paths", Required = false, Default = 4, HelpText = @"Used only when --decoding--method is modified_beam_search. It specifies number of active paths to keep during the search")] @@ -233,6 +237,7 @@ private static void Run(Options options) config.MaxActivePaths = options.MaxActivePaths; config.HotwordsFile = options.HotwordsFile; config.HotwordsScore = options.HotwordsScore; + config.RuleFsts = options.RuleFsts; config.ModelConfig.Debug = 0; diff --git a/dotnet-examples/offline-decode-files/run-paraformer-itn.sh b/dotnet-examples/offline-decode-files/run-paraformer-itn.sh new file mode 100755 index 000000000..3f30d98f0 --- /dev/null +++ b/dotnet-examples/offline-decode-files/run-paraformer-itn.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +dotnet run \ + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \ + --rule-fsts=./itn_zh_number.fst \ + --num-threads=2 \ + --files ./itn-zh-number.wav diff --git a/dotnet-examples/offline-decode-files/run-telespeech-ctc.sh b/dotnet-examples/offline-decode-files/run-telespeech-ctc.sh index d678026d0..a7aae402c 100755 --- a/dotnet-examples/offline-decode-files/run-telespeech-ctc.sh +++ b/dotnet-examples/offline-decode-files/run-telespeech-ctc.sh @@ -11,5 +11,5 @@ fi dotnet run \ --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \ --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \ - --model-type=telespeech-ctc \ + --model-type=telespeech_ctc \ --files ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav diff --git a/go-api-examples/non-streaming-decode-files/main.go b/go-api-examples/non-streaming-decode-files/main.go index 48ad35302..b1d2063b9 100644 --- a/go-api-examples/non-streaming-decode-files/main.go +++ b/go-api-examples/non-streaming-decode-files/main.go @@ -48,6 +48,8 @@ func main() { flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") + flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") + flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") flag.Parse() diff --git a/go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh b/go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh new file mode 100755 index 000000000..93103d071 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d sherpa-onnx-paraformer-zh-2023-03-28 ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +go mod tidy +go build + +./non-streaming-decode-files \ + --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --model-type paraformer \ + --rule-fsts ./itn_zh_number.fst \ + --debug 0 \ + ./itn-zh-number.wav diff --git a/go-api-examples/non-streaming-decode-files/run-telespeech-ctc.sh b/go-api-examples/non-streaming-decode-files/run-telespeech-ctc.sh index d9785b2aa..cddf82865 100755 --- a/go-api-examples/non-streaming-decode-files/run-telespeech-ctc.sh +++ b/go-api-examples/non-streaming-decode-files/run-telespeech-ctc.sh @@ -14,6 +14,6 @@ go build ./non-streaming-decode-files \ --telespeech-ctc ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \ --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \ - --model-type telespeech-ctc \ + --model-type telespeech_ctc \ --debug 0 \ ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav diff --git a/java-api-examples/InverseTextNormalizationNonStreamingParaformer.java b/java-api-examples/InverseTextNormalizationNonStreamingParaformer.java new file mode 100644 index 000000000..53d18248b --- /dev/null +++ b/java-api-examples/InverseTextNormalizationNonStreamingParaformer.java @@ -0,0 +1,54 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use an offline paraformer, i.e., non-streaming paraformer, +// to decode files with inverse text normalization. +import com.k2fsa.sherpa.onnx.*; + +public class InverseTextNormalizationNonStreamingParaformer { + public static void main(String[] args) { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english + // to download model files + String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx"; + String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt"; + + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav + String waveFilename = "./itn-zh-number.wav"; + + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + String ruleFsts = "./itn_zh_number.fst"; + + WaveReader reader = new WaveReader(waveFilename); + + OfflineParaformerModelConfig paraformer = + OfflineParaformerModelConfig.builder().setModel(model).build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setParaformer(paraformer) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .setRuleFsts(ruleFsts) + .build(); + + OfflineRecognizer recognizer = new OfflineRecognizer(config); + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + recognizer.decode(stream); + + String text = recognizer.getResult(stream).getText(); + + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); + + stream.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-inverse-text-normalization-paraformer.sh b/java-api-examples/run-inverse-text-normalization-paraformer.sh new file mode 100755 index 000000000..606dba6f7 --- /dev/null +++ b/java-api-examples/run-inverse-text-normalization-paraformer.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + InverseTextNormalizationNonStreamingParaformer.java diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index e7946598d..a96c09748 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -203,6 +203,34 @@ function testOfflineAsr() { java -Djava.library.path=../build/lib -jar $out_filename } +function testInverseTextNormalizationAsr() { + if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + fi + + if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav + fi + + if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + + out_filename=test_offline_asr.jar + kotlinc-jvm -include-runtime -d $out_filename \ + test_itn_asr.kt \ + FeatureConfig.kt \ + OfflineRecognizer.kt \ + OfflineStream.kt \ + WaveReader.kt \ + faked-asset-manager.kt + + ls -lh $out_filename + java -Djava.library.path=../build/lib -jar $out_filename +} + function testPunctuation() { if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 @@ -229,3 +257,4 @@ testAudioTagging testSpokenLanguageIdentification testOfflineAsr testPunctuation +testInverseTextNormalizationAsr diff --git a/kotlin-api-examples/test_itn_asr.kt b/kotlin-api-examples/test_itn_asr.kt new file mode 100644 index 000000000..250af4172 --- /dev/null +++ b/kotlin-api-examples/test_itn_asr.kt @@ -0,0 +1,37 @@ +package com.k2fsa.sherpa.onnx + +fun main() { + test() +} + +fun test() { + val recognizer = createOfflineRecognizer() + val waveFilename = "./itn-zh-number.wav"; + + val objArray = WaveReader.readWaveFromFile( + filename = waveFilename, + ) + val samples: FloatArray = objArray[0] as FloatArray + val sampleRate: Int = objArray[1] as Int + + val stream = recognizer.createStream() + stream.acceptWaveform(samples, sampleRate=sampleRate) + recognizer.decode(stream) + + val result = recognizer.getResult(stream) + println(result) + + stream.release() + recognizer.release() +} + +fun createOfflineRecognizer(): OfflineRecognizer { + val config = OfflineRecognizerConfig( + featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80), + modelConfig = getOfflineModelConfig(0)!!, + ruleFsts = "./itn_zh_number.fst", + ) + + return OfflineRecognizer(config = config) +} + diff --git a/nodejs-addon-examples/test_asr_non_streaming_paraformer_itn.js b/nodejs-addon-examples/test_asr_non_streaming_paraformer_itn.js new file mode 100644 index 000000000..c5d0f34b0 --- /dev/null +++ b/nodejs-addon-examples/test_asr_non_streaming_paraformer_itn.js @@ -0,0 +1,48 @@ +// Copyright (c) 2024 Xiaomi Corporation +const sherpa_onnx = require('sherpa-onnx-node'); + +// Please download test files from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +const config = { + 'featConfig': { + 'sampleRate': 16000, + 'featureDim': 80, + }, + 'modelConfig': { + 'paraformer': { + 'model': './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx', + }, + 'tokens': './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt', + 'numThreads': 2, + 'provider': 'cpu', + 'debug': 1, + }, + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + ruleFsts: './itn_zh_number.fst', +}; + +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +const waveFilename = './itn-zh-number.wav'; + +const recognizer = new sherpa_onnx.OfflineRecognizer(config); +console.log('Started') +let start = Date.now(); +const stream = recognizer.createStream(); +const wave = sherpa_onnx.readWave(waveFilename); +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); + +recognizer.decode(stream); +result = recognizer.getResult(stream) +let stop = Date.now(); +console.log('Done') + +const elapsed_seconds = (stop - start) / 1000; +const duration = wave.samples.length / wave.sampleRate; +const real_time_factor = elapsed_seconds / duration; +console.log('Wave duration', duration.toFixed(3), 'secodns') +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') +console.log( + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, + real_time_factor.toFixed(3)) +console.log(waveFilename) +console.log('result\n', result) diff --git a/nodejs-examples/test-offline-paraformer-itn.js b/nodejs-examples/test-offline-paraformer-itn.js new file mode 100644 index 000000000..ebadc5b0e --- /dev/null +++ b/nodejs-examples/test-offline-paraformer-itn.js @@ -0,0 +1,128 @@ +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) + +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); + +const sherpa_onnx = require('sherpa-onnx'); + +function createOfflineRecognizer() { + let featConfig = { + sampleRate: 16000, + featureDim: 80, + }; + + let modelConfig = { + transducer: { + encoder: '', + decoder: '', + joiner: '', + }, + paraformer: { + model: './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx', + }, + nemoCtc: { + model: '', + }, + whisper: { + encoder: '', + decoder: '', + language: '', + task: '', + tailPaddings: -1, + }, + tdnn: { + model: '', + }, + tokens: './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt', + numThreads: 1, + debug: 0, + provider: 'cpu', + modelType: 'paraformer', + }; + + let lmConfig = { + model: '', + scale: 1.0, + }; + + let config = { + featConfig: featConfig, + modelConfig: modelConfig, + lmConfig: lmConfig, + decodingMethod: 'greedy_search', + maxActivePaths: 4, + hotwordsFile: '', + hotwordsScore: 1.5, + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + ruleFsts: './itn_zh_number.fst', + }; + + return sherpa_onnx.createOfflineRecognizer(config); +} + + +const recognizer = createOfflineRecognizer(); +const stream = recognizer.createStream(); + +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +const waveFilename = './itn-zh-number.wav'; + +const reader = new wav.Reader(); +const readable = new Readable().wrap(reader); +const buf = []; + +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { + if (sampleRate != recognizer.config.featConfig.sampleRate) { + throw new Error(`Only support sampleRate ${ + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); + } + + if (audioFormat != 1) { + throw new Error(`Only support PCM format. Given ${audioFormat}`); + } + + if (channels != 1) { + throw new Error(`Only a single channel. Given ${channel}`); + } + + if (bitDepth != 16) { + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); + } +}); + +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) + .pipe(reader) + .on('finish', function(err) { + // tail padding + const floatSamples = + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); + + buf.push(floatSamples); + const flattened = + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); + + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); + recognizer.decode(stream); + const text = recognizer.getResult(stream).text; + console.log(text); + + stream.free(); + recognizer.free(); + }); + +readable.on('readable', function() { + let chunk; + while ((chunk = readable.read()) != null) { + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); + + const floatSamples = new Float32Array(int16Samples.length); + for (let i = 0; i < floatSamples.length; i++) { + floatSamples[i] = int16Samples[i] / 32768.0; + } + + buf.push(floatSamples); + } +}); diff --git a/scripts/dotnet/OfflineRecognizerConfig.cs b/scripts/dotnet/OfflineRecognizerConfig.cs index 23d6e18a0..371a556ea 100644 --- a/scripts/dotnet/OfflineRecognizerConfig.cs +++ b/scripts/dotnet/OfflineRecognizerConfig.cs @@ -21,7 +21,8 @@ public OfflineRecognizerConfig() MaxActivePaths = 4; HotwordsFile = ""; HotwordsScore = 1.5F; - + RuleFsts = ""; + RuleFars = ""; } public FeatureConfig FeatConfig; public OfflineModelConfig ModelConfig; @@ -36,5 +37,11 @@ public OfflineRecognizerConfig() public string HotwordsFile; public float HotwordsScore; + + [MarshalAs(UnmanagedType.LPStr)] + public string RuleFsts; + + [MarshalAs(UnmanagedType.LPStr)] + public string RuleFars; } } diff --git a/scripts/go/_internal/non-streaming-decode-files/run-paraformer-itn.sh b/scripts/go/_internal/non-streaming-decode-files/run-paraformer-itn.sh new file mode 120000 index 000000000..58c03fd85 --- /dev/null +++ b/scripts/go/_internal/non-streaming-decode-files/run-paraformer-itn.sh @@ -0,0 +1 @@ +../../../../go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh \ No newline at end of file diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index a03031866..437f4f328 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -397,6 +397,10 @@ type OfflineRecognizerConfig struct { // Used only when DecodingMethod is modified_beam_search. MaxActivePaths int + HotwordsFile string + HotwordsScore float32 + RuleFsts string + RuleFars string } // It wraps a pointer from C @@ -491,6 +495,17 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { c.max_active_paths = C.int(config.MaxActivePaths) + c.hotwords_file = C.CString(config.HotwordsFile) + defer C.free(unsafe.Pointer(c.hotwords_file)) + + c.hotwords_score = C.float(config.HotwordsScore) + + c.rule_fsts = C.CString(config.RuleFsts) + defer C.free(unsafe.Pointer(c.rule_fsts)) + + c.rule_fars = C.CString(config.RuleFars) + defer C.free(unsafe.Pointer(c.rule_fars)) + recognizer := &OfflineRecognizer{} recognizer.impl = C.CreateOfflineRecognizer(&c) diff --git a/scripts/node-addon-api/README.md b/scripts/node-addon-api/README.md index f9b3cf0bf..fb291b77b 100644 --- a/scripts/node-addon-api/README.md +++ b/scripts/node-addon-api/README.md @@ -15,8 +15,8 @@ cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON .. make -j install export PKG_CONFIG_PATH=$PWD/install:$PKG_CONFIG_PATH cd ../scripts/node-addon-api/ - -./node_modules/.bin/node-gyp build --verbose +npm i +./node_modules/.bin/cmake-js compile --log-level verbose # see test/test_asr_streaming_transducer.js # for usages diff --git a/scripts/node-addon-api/src/non-streaming-asr.cc b/scripts/node-addon-api/src/non-streaming-asr.cc index 671528200..db14ef52d 100644 --- a/scripts/node-addon-api/src/non-streaming-asr.cc +++ b/scripts/node-addon-api/src/non-streaming-asr.cc @@ -180,6 +180,8 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths); SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile); SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore); + SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts); + SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); SherpaOnnxOfflineRecognizer *recognizer = CreateOfflineRecognizer(&c); @@ -259,6 +261,14 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { delete[] c.hotwords_file; } + if (c.rule_fsts) { + delete[] c.rule_fsts; + } + + if (c.rule_fars) { + delete[] c.rule_fars; + } + if (!recognizer) { Napi::TypeError::New(env, "Please check your config!") .ThrowAsJavaScriptException(); diff --git a/scripts/node-addon-api/src/non-streaming-tts.cc b/scripts/node-addon-api/src/non-streaming-tts.cc index c230b972a..70d97cddb 100644 --- a/scripts/node-addon-api/src/non-streaming-tts.cc +++ b/scripts/node-addon-api/src/non-streaming-tts.cc @@ -44,7 +44,7 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( c.vits = GetOfflineTtsVitsModelConfig(o); - SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, num_threads); + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); if (o.Has("debug") && (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index eb4e293d1..01e2191c3 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -388,6 +388,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( recognizer_config.hotwords_score = SHERPA_ONNX_OR(config->hotwords_score, 1.5); + recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); + recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); + if (config->model_config.debug) { SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); } diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index e75d1955f..0229f8059 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -411,6 +411,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { /// Bonus score for each token in hotwords. float hotwords_score; + const char *rule_fsts; + const char *rule_fars; } SherpaOnnxOfflineRecognizerConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer diff --git a/sherpa-onnx/flutter/lib/src/offline_recognizer.dart b/sherpa-onnx/flutter/lib/src/offline_recognizer.dart index 7a220105c..b5619e3e0 100644 --- a/sherpa-onnx/flutter/lib/src/offline_recognizer.dart +++ b/sherpa-onnx/flutter/lib/src/offline_recognizer.dart @@ -137,11 +137,13 @@ class OfflineRecognizerConfig { this.maxActivePaths = 4, this.hotwordsFile = '', this.hotwordsScore = 1.5, + this.ruleFsts = '', + this.ruleFars = '', }); @override String toString() { - return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore)'; + return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ruleFsts: $ruleFsts, ruleFars: $ruleFars)'; } final FeatureConfig feat; @@ -154,6 +156,9 @@ class OfflineRecognizerConfig { final String hotwordsFile; final double hotwordsScore; + + final String ruleFsts; + final String ruleFars; } class OfflineRecognizerResult { @@ -232,8 +237,13 @@ class OfflineRecognizer { c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8(); c.ref.hotwordsScore = config.hotwordsScore; + c.ref.ruleFsts = config.ruleFsts.toNativeUtf8(); + c.ref.ruleFars = config.ruleFars.toNativeUtf8(); + final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; + calloc.free(c.ref.ruleFars); + calloc.free(c.ref.ruleFsts); calloc.free(c.ref.hotwordsFile); calloc.free(c.ref.decodingMethod); calloc.free(c.ref.lm.model); diff --git a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart index bce1be589..70d9572e7 100644 --- a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart +++ b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart @@ -130,6 +130,9 @@ final class SherpaOnnxOfflineRecognizerConfig extends Struct { @Float() external double hotwordsScore; + + external Pointer ruleFsts; + external Pointer ruleFars; } final class SherpaOnnxOnlineTransducerModelConfig extends Struct { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java index 94d3debc9..a8222ad77 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java @@ -9,6 +9,8 @@ public class OfflineRecognizerConfig { private final int maxActivePaths; private final String hotwordsFile; private final float hotwordsScore; + private final String ruleFsts; + private final String ruleFars; private OfflineRecognizerConfig(Builder builder) { this.featConfig = builder.featConfig; @@ -17,6 +19,8 @@ private OfflineRecognizerConfig(Builder builder) { this.maxActivePaths = builder.maxActivePaths; this.hotwordsFile = builder.hotwordsFile; this.hotwordsScore = builder.hotwordsScore; + this.ruleFsts = builder.ruleFsts; + this.ruleFars = builder.ruleFars; } public static Builder builder() { @@ -34,6 +38,8 @@ public static class Builder { private int maxActivePaths = 4; private String hotwordsFile = ""; private float hotwordsScore = 1.5f; + private String ruleFsts = ""; + private String ruleFars = ""; public OfflineRecognizerConfig build() { return new OfflineRecognizerConfig(this); @@ -68,5 +74,15 @@ public Builder setHotwordsScore(float hotwordsScore) { this.hotwordsScore = hotwordsScore; return this; } + + public Builder setRuleFsts(String ruleFsts) { + this.ruleFsts = ruleFsts; + return this; + } + + public Builder setRuleFars(String ruleFars) { + this.ruleFars = ruleFars; + return this; + } } } diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc index cf69389a3..070d46f08 100644 --- a/sherpa-onnx/jni/offline-recognizer.cc +++ b/sherpa-onnx/jni/offline-recognizer.cc @@ -34,6 +34,18 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { fid = env->GetFieldID(cls, "hotwordsScore", "F"); ans.hotwords_score = env->GetFloatField(config, fid); + fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.rule_fsts = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.rule_fars = p; + env->ReleaseStringUTFChars(s, p); + //---------- feat config ---------- fid = env->GetFieldID(cls, "featConfig", "Lcom/k2fsa/sherpa/onnx/FeatureConfig;"); diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index 151ac73d5..c910e8d68 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -53,6 +53,8 @@ data class OfflineRecognizerConfig( var maxActivePaths: Int = 4, var hotwordsFile: String = "", var hotwordsScore: Float = 1.5f, + var ruleFsts: String = "", + var ruleFars: String = "", ) class OfflineRecognizer( diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 7346ac4b8..24082a827 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -387,7 +387,9 @@ func sherpaOnnxOfflineRecognizerConfig( decodingMethod: String = "greedy_search", maxActivePaths: Int = 4, hotwordsFile: String = "", - hotwordsScore: Float = 1.5 + hotwordsScore: Float = 1.5, + ruleFsts: String = "", + ruleFars: String = "" ) -> SherpaOnnxOfflineRecognizerConfig { return SherpaOnnxOfflineRecognizerConfig( feat_config: featConfig, @@ -396,7 +398,9 @@ func sherpaOnnxOfflineRecognizerConfig( decoding_method: toCPointer(decodingMethod), max_active_paths: Int32(maxActivePaths), hotwords_file: toCPointer(hotwordsFile), - hotwords_score: hotwordsScore + hotwords_score: hotwordsScore, + rule_fsts: toCPointer(ruleFsts), + rule_fars: toCPointer(ruleFars) ) } diff --git a/wasm/asr/sherpa-onnx-asr.js b/wasm/asr/sherpa-onnx-asr.js index 53afe1875..2179fd87d 100644 --- a/wasm/asr/sherpa-onnx-asr.js +++ b/wasm/asr/sherpa-onnx-asr.js @@ -628,7 +628,7 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); - const len = feat.len + model.len + lm.len + 4 * 4; + const len = feat.len + model.len + lm.len + 6 * 4; const ptr = Module._malloc(len); let offset = 0; @@ -643,7 +643,10 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1; const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; - const bufferLen = decodingMethodLen + hotwordsFileLen; + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; + const bufferLen = + decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen; const buffer = Module._malloc(bufferLen); offset = 0; @@ -651,6 +654,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { offset += decodingMethodLen; Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); + offset += hotwordsFileLen; + + Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen); + offset += ruleFstsLen; + + Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen); + offset += ruleFarsLen; offset = feat.len + model.len + lm.len; @@ -666,6 +676,15 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { Module.setValue(ptr + offset, config.hotwordsScore, 'float'); offset += 4; + Module.setValue( + ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); + offset += 4; + + Module.setValue( + ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen, + 'i8*'); + offset += 4; + return { buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm } diff --git a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc index 177fb6f04..6e138c76f 100644 --- a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc +++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc @@ -29,7 +29,7 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == sizeof(SherpaOnnxFeatureConfig) + sizeof(SherpaOnnxOfflineLMConfig) + - sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4, + sizeof(SherpaOnnxOfflineModelConfig) + 6 * 4, ""); void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) { @@ -103,6 +103,8 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { fprintf(stdout, "max active paths: %d\n", config->max_active_paths); fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); + fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts); + fprintf(stdout, "rule_fars: %s\n", config->rule_fars); } void CopyHeap(const char *src, int32_t num_bytes, char *dst) {