Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inverse text normalization API for other programming languages #1019

Merged
merged 9 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 35 additions & 32 deletions .github/scripts/test-dart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,41 @@ set -ex

cd dart-api-examples

pushd non-streaming-asr

echo '----------paraformer itn----------'
./run-paraformer-itn.sh

echo '----------paraformer----------'
./run-paraformer.sh
rm -rf sherpa-onnx-*

echo '----------VAD with paraformer----------'
./run-vad-with-paraformer.sh
rm -rf sherpa-onnx-*

echo '----------NeMo transducer----------'
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*

echo '----------NeMo CTC----------'
./run-nemo-ctc.sh
rm -rf sherpa-onnx-*

echo '----------TeleSpeech CTC----------'
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*

echo '----------whisper----------'
./run-whisper.sh
rm -rf sherpa-onnx-*

echo '----------zipformer transducer----------'
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*

popd # non-streaming-asr

pushd tts

echo '----------piper tts----------'
Expand Down Expand Up @@ -44,38 +79,6 @@ rm -rf sherpa-onnx-*

popd # streaming-asr

pushd non-streaming-asr

echo '----------VAD with paraformer----------'
./run-vad-with-paraformer.sh
rm -rf sherpa-onnx-*

echo '----------NeMo transducer----------'
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*

echo '----------NeMo CTC----------'
./run-nemo-ctc.sh
rm -rf sherpa-onnx-*

echo '----------TeleSpeech CTC----------'
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*

echo '----------paraformer----------'
./run-paraformer.sh
rm -rf sherpa-onnx-*

echo '----------whisper----------'
./run-whisper.sh
rm -rf sherpa-onnx-*

echo '----------zipformer transducer----------'
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*

popd # non-streaming-asr

pushd vad
./run.sh
rm *.onnx
Expand Down
1 change: 1 addition & 0 deletions .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
cd dotnet-examples/

cd ./offline-decode-files
./run-paraformer-itn.sh
./run-telespeech-ctc.sh
./run-nemo-ctc.sh
./run-paraformer.sh
Expand Down
6 changes: 6 additions & 0 deletions .github/scripts/test-nodejs-addon-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2

node ./test_asr_non_streaming_paraformer.js

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav

node ./test_asr_non_streaming_paraformer_itn.js

rm -rf sherpa-onnx-paraformer-zh-2023-03-28

echo "----------tts----------"
Expand Down
9 changes: 9 additions & 0 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ ls -lh node_modules

# offline asr

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
ls -lh
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
node ./test-offline-paraformer-itn.js
rm -rf sherpa-onnx-paraformer-zh-2023-03-28

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
ls -lh
tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/run-java-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ jobs:
shell: bash
run: |
cd ./java-api-examples
./run-inverse-text-normalization-paraformer.sh

./run-non-streaming-decode-file-paraformer.sh
rm -rf sherpa-onnx-paraformer-zh-*

Expand Down
59 changes: 2 additions & 57 deletions .github/workflows/test-dot-net.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
os: [ubuntu-latest]
python-version: ["3.8"]

steps:
Expand Down Expand Up @@ -72,45 +72,18 @@ jobs:

cmake --build . --target install --config Release

- name: Build sherpa-onnx for windows x86
if: matrix.os == 'windows-latest'
shell: bash
run: |
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version

mkdir build-win32
cd build-win32
cmake \
-A Win32 \
-DBUILD_SHARED_LIBS=ON \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DSHERPA_ONNX_ENABLE_BINARY=ON \
..
cmake --build . --target install --config Release

- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}
path: ./build/install/lib/

- uses: actions/upload-artifact@v4
if: matrix.os == 'windows-latest'
with:
name: ${{ matrix.os }}-win32
path: ./build-win32/install/lib/

test-dot-net:
runs-on: ${{ matrix.os }}
needs: [build-libs]
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest] #, windows-latest]
os: [ubuntu-latest]
python-version: ["3.8"]

steps:
Expand All @@ -134,30 +107,11 @@ jobs:
name: ubuntu-latest
path: /tmp/linux

- name: Retrieve artifact from macos-latest
uses: actions/download-artifact@v4
with:
name: macos-latest
path: /tmp/macos

- name: Retrieve artifact from windows-latest
uses: actions/download-artifact@v4
with:
name: windows-latest
path: /tmp/windows-x64

- name: Retrieve artifact from windows-latest
uses: actions/download-artifact@v4
with:
name: windows-latest-win32
path: /tmp/windows-x86

- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: |
6.0.x
7.0.x

- name: Check dotnet
run: dotnet --info
Expand All @@ -171,15 +125,6 @@ jobs:
echo "----------/tmp/linux----------"
ls -lh /tmp/linux

echo "----------/tmp/macos----------"
ls -lh /tmp/macos

echo "----------/tmp/windows-x64----------"
ls -lh /tmp/windows-x64

echo "----------/tmp/windows-x86----------"
ls -lh /tmp/windows-x86

- name: Build
shell: bash
run: |
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ jobs:

- uses: actions/upload-artifact@v4
with:
name: tts-waves
name: tts-waves-${{ matrix.os }}
path: tts-waves

- name: Test non-streaming decoding files (macOS)
Expand All @@ -154,6 +154,7 @@ jobs:

echo "Test paraformer"
./run-paraformer.sh
./run-paraformer-itn.sh
rm -rf sherpa-onnx-paraformer-zh-2023-03-28

echo "Test NeMo CTC"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-nodejs-addon-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-11, macos-14, ubuntu-20.04, ubuntu-22.04] #, windows-latest]
node-version: ["16", "17", "18", "19", "21", "22"]
os: [macos-latest, ubuntu-latest, ubuntu-latest]
node-version: ["16", "22"]
python-version: ["3.8"]

steps:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,4 @@ package-lock.json
sherpa-onnx-nemo-*
sherpa-onnx-vits-*
sherpa-onnx-telespeech-ctc-*
*.fst
63 changes: 63 additions & 0 deletions dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
await initSherpaOnnx();

final parser = ArgParser()
..addOption('model', help: 'Path to the paraformer model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('rule-fsts',
help: 'Path to rule fsts for inverse text normalization')
..addOption('input-wav', help: 'Path to input.wav to transcribe');

final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['rule-fsts'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}

final model = res['model'] as String;
final tokens = res['tokens'] as String;
final ruleFsts = res['rule-fsts'] as String;
final inputWav = res['input-wav'] as String;

final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
model: model,
);

final modelConfig = sherpa_onnx.OfflineModelConfig(
paraformer: paraformer,
tokens: tokens,
debug: true,
numThreads: 1,
modelType: 'paraformer',
);
final config = sherpa_onnx.OfflineRecognizerConfig(
model: modelConfig,
ruleFsts: ruleFsts,
);
final recognizer = sherpa_onnx.OfflineRecognizer(config);

final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();

stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);

final result = recognizer.getResult(stream);
print(result.text);

stream.free();
recognizer.free();
}
27 changes: 27 additions & 0 deletions dart-api-examples/non-streaming-asr/run-paraformer-itn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2

tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

dart run \
./bin/paraformer-itn.dart \
--model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
--tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
--rule-fsts ./itn_zh_number.fst \
--input-wav ./itn-zh-number.wav
5 changes: 5 additions & 0 deletions dotnet-examples/offline-decode-files/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,34 @@
public int FeatureDim { get; set; }

[Option(Required = false, HelpText = "Path to tokens.txt")]
public string Tokens { get; set; }

Check warning on line 26 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'Tokens' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
public string Encoder { get; set; }

Check warning on line 29 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'Encoder' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
public string Decoder { get; set; }

Check warning on line 32 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'Decoder' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
public string Joiner { get; set; }

Check warning on line 35 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'Joiner' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option("model-type", Required = false, Default = "", HelpText = "model type")]
public string ModelType { get; set; }

Check warning on line 38 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'ModelType' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
public string WhisperEncoder { get; set; }

Check warning on line 41 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'WhisperEncoder' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
public string WhisperDecoder { get; set; }

Check warning on line 44 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'WhisperDecoder' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
public string WhisperLanguage{ get; set; }

Check warning on line 47 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'WhisperLanguage' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
public string WhisperTask{ get; set; }

Check warning on line 50 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'WhisperTask' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.

[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
public string TdnnModel { get; set; }

Check warning on line 53 in dotnet-examples/offline-decode-files/Program.cs

View workflow job for this annotation

GitHub Actions / test-dot-net (ubuntu-latest, 3.8)

Non-nullable property 'TdnnModel' must contain a non-null value when exiting constructor. Consider declaring the property as nullable.


[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
Expand All @@ -69,6 +69,10 @@
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
public string DecodingMethod { get; set; }

[Option("rule-fsts", Required = false, Default = "",
HelpText = "If not empty, path to rule fst for inverse text normalization")]
public string RuleFsts { get; set; }

[Option("max-active-paths", Required = false, Default = 4,
HelpText = @"Used only when --decoding--method is modified_beam_search.
It specifies number of active paths to keep during the search")]
Expand Down Expand Up @@ -233,6 +237,7 @@
config.MaxActivePaths = options.MaxActivePaths;
config.HotwordsFile = options.HotwordsFile;
config.HotwordsScore = options.HotwordsScore;
config.RuleFsts = options.RuleFsts;

config.ModelConfig.Debug = 0;

Expand Down
24 changes: 24 additions & 0 deletions dotnet-examples/offline-decode-files/run-paraformer-itn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

dotnet run \
--tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
--paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
--rule-fsts=./itn_zh_number.fst \
--num-threads=2 \
--files ./itn-zh-number.wav
Loading
Loading