From e7d61c0161712c4c0af2b578a9a2dd9cff073715 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 17:57:40 +0800 Subject: [PATCH 1/8] Begin to add keyword spotting for C# --- scripts/dotnet/.gitignore | 6 +- scripts/dotnet/KeywordResult.cs | 44 ++++++++++ scripts/dotnet/KeywordSpotter.cs | 111 +++++++++++++++++++++++++ scripts/dotnet/KeywordSpotterConfig.cs | 32 +++++++ 4 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 scripts/dotnet/KeywordResult.cs create mode 100644 scripts/dotnet/KeywordSpotter.cs create mode 100644 scripts/dotnet/KeywordSpotterConfig.cs diff --git a/scripts/dotnet/.gitignore b/scripts/dotnet/.gitignore index 6d09be539..4dda52bb6 100644 --- a/scripts/dotnet/.gitignore +++ b/scripts/dotnet/.gitignore @@ -1,8 +1,10 @@ all macos-arm64 macos-x64 -linux -windows +linux-x64 +linux-arm64 +windows-arm64 windows-x64 windows-x86 packages +tmp diff --git a/scripts/dotnet/KeywordResult.cs b/scripts/dotnet/KeywordResult.cs new file mode 100644 index 000000000..13ed4e79d --- /dev/null +++ b/scripts/dotnet/KeywordResult.cs @@ -0,0 +1,44 @@ +/// Copyright (c) 2024 Xiaomi Corporation + +using System; +using System.Runtime.InteropServices; +using System.Text; + +namespace SherpaOnnx +{ + public class KeywordResult + { + public KeywordResult(IntPtr handle) + { + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); + + // PtrToStringUTF8() requires .net standard 2.1 + // _keyword = Marshal.PtrToStringUTF8(impl.Keyword); + + int length = 0; + + unsafe + { + byte* buffer = (byte*)impl.Keyword; + while (*buffer != 0) + { + ++buffer; + length += 1; + } + } + + byte[] stringBuffer = new byte[length]; + Marshal.Copy(impl.Keyword, stringBuffer, 0, length); + _keyword = Encoding.UTF8.GetString(stringBuffer); + } + + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Keyword; + } + + private String _keyword; + public String Keyword => _keyword; + } +} diff --git a/scripts/dotnet/KeywordSpotter.cs b/scripts/dotnet/KeywordSpotter.cs new file mode 100644 index 000000000..fc82e5b88 --- /dev/null +++ b/scripts/dotnet/KeywordSpotter.cs @@ -0,0 +1,111 @@ +/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace SherpaOnnx +{ + // please see + // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code + // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources + public class KeywordSpotter : IDisposable + { + public KeywordSpotter(KeywordSpotterConfig config) + { + IntPtr h = CreateKeywordSpotter(ref config); + _handle = new HandleRef(this, h); + } + + public OnlineStream CreateStream() + { + IntPtr p = CreateKeywordStream(_handle.Handle); + return new OnlineStream(p); + } + + /// Return true if the passed stream is ready for decoding. + public bool IsReady(OnlineStream stream) + { + return IsReady(_handle.Handle, stream.Handle) != 0; + } + + /// You have to ensure that IsReady(stream) returns true before + /// you call this method + public void Decode(OnlineStream stream) + { + Decode(_handle.Handle, stream.Handle); + } + + // The caller should ensure all passed streams are ready for decoding. + public void Decode(IEnumerable streams) + { + // TargetFramework=net20 does not support System.Linq + // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + List list = new List(); + foreach (OnlineStream s in streams) + { + list.Add(s.Handle); + } + + IntPtr[] ptrs = list.ToArray(); + Decode(_handle.Handle, ptrs, ptrs.Length); + } + + public KeywordResult GetResult(OnlineStream stream) + { + IntPtr h = GetResult(_handle.Handle, stream.Handle); + KeywordResult result = new KeywordResult(h); + DestroyResult(h); + return result; + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~KeywordSpotter() + { + Cleanup(); + } + + private void Cleanup() + { + DestroyKeywordSpotter(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateKeywordSpotter(ref KeywordSpotterConfig config); + + [DllImport(Dll.Filename)] + private static extern void DestroyKeywordSpotter(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateKeywordStream(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateKeywordStreamWithKeywords(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Keywords); + + [DllImport(Dll.Filename, EntryPoint = "IsKeywordStreamReady")] + private static extern int IsReady(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeKeywordStream")] + private static extern void Decode(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleKeywordStreams")] + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); + + [DllImport(Dll.Filename, EntryPoint = "GetKeywordResult")] + private static extern IntPtr GetResult(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DestroyKeywordResult")] + private static extern void DestroyResult(IntPtr result); + } +} diff --git a/scripts/dotnet/KeywordSpotterConfig.cs b/scripts/dotnet/KeywordSpotterConfig.cs new file mode 100644 index 000000000..125afb716 --- /dev/null +++ b/scripts/dotnet/KeywordSpotterConfig.cs @@ -0,0 +1,32 @@ +/// Copyright (c) 2024 Xiaomi Corporation + +using System.Runtime.InteropServices; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct KeywordSpotterConfig + { + public KeywordSpotterConfig() + { + FeatConfig = new FeatureConfig(); + ModelConfig = new OnlineModelConfig(); + + MaxActivePaths = 4; + NumTrailingBlanks = 1; + KeywordsScore = 1.0F; + KeywordsThreshold = 0.25F; + KeywordsFile = ""; + } + public FeatureConfig FeatConfig; + public OnlineModelConfig ModelConfig; + + public int MaxActivePaths; + public int NumTrailingBlanks; + public float KeywordsScore; + public float KeywordsThreshold; + + [MarshalAs(UnmanagedType.LPStr)] + public string KeywordsFile; + } +} From a04449ae0e2dfa2f5207c0ab15bb47a81ae9eef4 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 17:58:22 +0800 Subject: [PATCH 2/8] Begin to add kws .Net example --- .../keyword-spotting-from-files/Program.cs | 2 ++ .../keyword-spotting-from-files.csproj | 11 +++++++++++ dotnet-examples/sherpa-onnx.sln | 6 ++++++ 3 files changed, 19 insertions(+) create mode 100644 dotnet-examples/keyword-spotting-from-files/Program.cs create mode 100644 dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj diff --git a/dotnet-examples/keyword-spotting-from-files/Program.cs b/dotnet-examples/keyword-spotting-from-files/Program.cs new file mode 100644 index 000000000..83fa4f4d5 --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-files/Program.cs @@ -0,0 +1,2 @@ +// See https://aka.ms/new-console-template for more information +Console.WriteLine("Hello, World!"); diff --git a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj new file mode 100644 index 000000000..b7c5f20ee --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj @@ -0,0 +1,11 @@ + + + + Exe + net7.0 + keyword_spotting_from_files + enable + enable + + + diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln index d844c5036..b0d2e56c1 100644 --- a/dotnet-examples/sherpa-onnx.sln +++ b/dotnet-examples/sherpa-onnx.sln @@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "vad-non-streaming-asr-paraf EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csproj", "{401E963F-E25A-43CE-987D-8DB2D4715756}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -75,6 +77,10 @@ Global {401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.Build.0 = Debug|Any CPU {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.ActiveCfg = Release|Any CPU {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.Build.0 = Release|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE From 6e66c82098bd7348bb48a57e69a1c8dbe228fad7 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 19:21:39 +0800 Subject: [PATCH 3/8] Add keywords spotting for C# --- CMakeLists.txt | 5 - .../keyword-spotting-from-files/Program.cs | 101 +++++++++++++++++- .../keyword-spotting-from-files.csproj | 5 + scripts/dotnet/KeywordSpotter.cs | 8 ++ 4 files changed, 112 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d4d20d16..203b8a569 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,11 +21,6 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") cmake_policy(SET CMP0135 NEW) endif() - - - - - option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF) option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF) option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" OFF) diff --git a/dotnet-examples/keyword-spotting-from-files/Program.cs b/dotnet-examples/keyword-spotting-from-files/Program.cs index 83fa4f4d5..2fea260d1 100644 --- a/dotnet-examples/keyword-spotting-from-files/Program.cs +++ b/dotnet-examples/keyword-spotting-from-files/Program.cs @@ -1,2 +1,99 @@ -// See https://aka.ms/new-console-template for more information -Console.WriteLine("Hello, World!"); +// Copyright (c) 2024 Xiaomi Corporation +// +// This file shows how to do keyword spotting with sherpa-onnx. +// +// 1. Download a model from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +// +// 2. Now run it +// +// dotnet run + +using SherpaOnnx; +using System.Collections.Generic; +using System; + +class KeywordSpotterDemo +{ + static void Main(string[] args) + { + var config = new KeywordSpotterConfig(); + config.FeatConfig.SampleRate = 16000; + config.FeatConfig.FeatureDim = 80; + + config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"; + config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"; + config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"; + + config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"; + config.ModelConfig.Provider = "cpu"; + config.ModelConfig.NumThreads = 1; + config.ModelConfig.Debug = 1; + config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"; + + var kws = new KeywordSpotter(config); + + var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; + + WaveReader waveReader = new WaveReader(filename); + + Console.WriteLine("----------Use pre-defined keywords----------"); + + OnlineStream s = kws.CreateStream(); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + + float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; + s.AcceptWaveform(waveReader.SampleRate, tailPadding); + s.InputFinished(); + + while (kws.IsReady(s)) + { + kws.Decode(s); + var result = kws.GetResult(s); + if (result.Keyword != "") + { + Console.WriteLine("Detected: {0}", result.Keyword); + } + } + + Console.WriteLine("----------Use pre-defined keywords + add a new keyword----------"); + s = kws.CreateStream("y ǎn y uán @演员"); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + + s.AcceptWaveform(waveReader.SampleRate, tailPadding); + s.InputFinished(); + + while (kws.IsReady(s)) + { + kws.Decode(s); + var result = kws.GetResult(s); + if (result.Keyword != "") + { + Console.WriteLine("Detected: {0}", result.Keyword); + } + } + + Console.WriteLine("----------Use pre-defined keywords + add 2 new keywords----------"); + + // Note keywords are separated by / + s = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名"); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + + s.AcceptWaveform(waveReader.SampleRate, tailPadding); + s.InputFinished(); + + while (kws.IsReady(s)) + { + kws.Decode(s); + var result = kws.GetResult(s); + if (result.Keyword != "") + { + Console.WriteLine("Detected: {0}", result.Keyword); + } + } + } +} + diff --git a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj index b7c5f20ee..bfa74fb60 100644 --- a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj +++ b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj @@ -3,9 +3,14 @@ Exe net7.0 + net6.0 keyword_spotting_from_files enable enable + + + + diff --git a/scripts/dotnet/KeywordSpotter.cs b/scripts/dotnet/KeywordSpotter.cs index fc82e5b88..fc80e31b3 100644 --- a/scripts/dotnet/KeywordSpotter.cs +++ b/scripts/dotnet/KeywordSpotter.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.Runtime.InteropServices; +using System.Text; namespace SherpaOnnx { @@ -22,6 +23,13 @@ public OnlineStream CreateStream() return new OnlineStream(p); } + public OnlineStream CreateStream(string keywords) + { + byte[] utf8Bytes = Encoding.UTF8.GetBytes(keywords); + IntPtr p = CreateKeywordStreamWithKeywords(_handle.Handle, utf8Bytes); + return new OnlineStream(p); + } + /// Return true if the passed stream is ready for decoding. public bool IsReady(OnlineStream stream) { From 5e359025acd157130b9ab5a49abc4ab1636a9bbc Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 19:24:34 +0800 Subject: [PATCH 4/8] small fixes --- scripts/dotnet/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dotnet/run.sh b/scripts/dotnet/run.sh index 5b0000126..7af2a4379 100755 --- a/scripts/dotnet/run.sh +++ b/scripts/dotnet/run.sh @@ -29,7 +29,7 @@ mkdir -p linux-x64 linux-arm64 macos-x64 macos-arm64 windows-x64 windows-x86 win linux_x64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl linux_x64_wheel=$src_dir/$linux_x64_wheel_filename -linux_arm64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl +linux_arm64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl linux_arm64_wheel=$src_dir/$linux_arm64_wheel_filename macos_x64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl From 995e66c1707676cda44e67a27fc393f20df5dcd0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 19:28:40 +0800 Subject: [PATCH 5/8] add test for kws --- .github/scripts/test-dot-net.sh | 5 ++++- .../keyword-spotting-from-files.csproj | 1 - dotnet-examples/keyword-spotting-from-files/run.sh | 11 +++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100755 dotnet-examples/keyword-spotting-from-files/run.sh diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index 845162542..a4559f7b0 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -2,7 +2,10 @@ cd dotnet-examples/ -cd ./online-decode-files +cd ./keyword-spotting-from-files +./run.sh + +cd ../online-decode-files ./run-transducer-itn.sh ./run-zipformer2-ctc.sh ./run-transducer.sh diff --git a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj index bfa74fb60..992f8e0e3 100644 --- a/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj +++ b/dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj @@ -2,7 +2,6 @@ Exe - net7.0 net6.0 keyword_spotting_from_files enable diff --git a/dotnet-examples/keyword-spotting-from-files/run.sh b/dotnet-examples/keyword-spotting-from-files/run.sh new file mode 100755 index 000000000..1f07b9faa --- /dev/null +++ b/dotnet-examples/keyword-spotting-from-files/run.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +fi + +dotnet run -c Release From 559bf37d37f177673ff31090842c094af5c456b0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 20:41:39 +0800 Subject: [PATCH 6/8] update changelog --- CHANGELOG.md | 1 + flutter/sherpa_onnx/example/example.md | 1 + scripts/dotnet/run.sh | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f8c76d6d..6672c41be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## 1.10.13 * Update onnxruntime from 1.17.1 to 1.18.0 +* Add C# API for Keyword spotting ## 1.10.12 diff --git a/flutter/sherpa_onnx/example/example.md b/flutter/sherpa_onnx/example/example.md index f7e9fed4e..02b0e22fb 100644 --- a/flutter/sherpa_onnx/example/example.md +++ b/flutter/sherpa_onnx/example/example.md @@ -5,6 +5,7 @@ | Functions | URL | Supported Platforms| |---|---|---| |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, macOS, Windows| +|Speech synthesis| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/tts)| Android, iOS, Linux, macOS, Windows| ## Pure dart-examples diff --git a/scripts/dotnet/run.sh b/scripts/dotnet/run.sh index 7af2a4379..5b0000126 100755 --- a/scripts/dotnet/run.sh +++ b/scripts/dotnet/run.sh @@ -29,7 +29,7 @@ mkdir -p linux-x64 linux-arm64 macos-x64 macos-arm64 windows-x64 windows-x86 win linux_x64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl linux_x64_wheel=$src_dir/$linux_x64_wheel_filename -linux_arm64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl +linux_arm64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl linux_arm64_wheel=$src_dir/$linux_arm64_wheel_filename macos_x64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl From 1bb37f53174eb6600152b8e2fe68d6c3be9c06c6 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 21:14:11 +0800 Subject: [PATCH 7/8] disable using decoder.int8.onnx for transducer models --- .github/scripts/test-offline-transducer.sh | 4 ++-- .github/scripts/test-online-transducer.sh | 12 ++++++------ .../online-decode-files/run-transducer-itn.sh | 2 +- .../online-decode-files/run-transducer.sh | 2 +- .../run-transducer.sh | 2 +- sherpa-onnx/python/tests/test_keyword_spotter.py | 16 ++++++++-------- .../python/tests/test_offline_recognizer.py | 4 ++-- .../python/tests/test_online_recognizer.py | 4 ++-- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/scripts/test-offline-transducer.sh b/.github/scripts/test-offline-transducer.sh index eadc0f491..ee012de32 100755 --- a/.github/scripts/test-offline-transducer.sh +++ b/.github/scripts/test-offline-transducer.sh @@ -139,7 +139,7 @@ time $EXE \ time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $repo/test_wavs/0.wav \ @@ -172,7 +172,7 @@ time $EXE \ time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $repo/test_wavs/0.wav \ diff --git a/.github/scripts/test-online-transducer.sh b/.github/scripts/test-online-transducer.sh index 7616b18e9..ceb2be474 100755 --- a/.github/scripts/test-online-transducer.sh +++ b/.github/scripts/test-online-transducer.sh @@ -86,7 +86,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -126,7 +126,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-11-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-11-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-11-avg-1.onnx \ --joiner=$repo/joiner-epoch-11-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -168,7 +168,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -210,7 +210,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave @@ -231,7 +231,7 @@ if [ $EXE == "sherpa-onnx-ffmpeg" ]; then time $EXE \ $repo/tokens.txt \ $repo/encoder-epoch-99-avg-1.int8.onnx \ - $repo/decoder-epoch-99-avg-1.int8.onnx \ + $repo/decoder-epoch-99-avg-1.onnx \ $repo/joiner-epoch-99-avg-1.int8.onnx \ https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/resolve/main/test_wavs/4.wav \ 2 @@ -271,7 +271,7 @@ for wave in ${waves[@]}; do time $EXE \ --tokens=$repo/tokens.txt \ --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ + --decoder=$repo/decoder-epoch-99-avg-1.onnx \ --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ --num-threads=2 \ $wave diff --git a/dotnet-examples/online-decode-files/run-transducer-itn.sh b/dotnet-examples/online-decode-files/run-transducer-itn.sh index 0c81fc7d8..de3445dae 100755 --- a/dotnet-examples/online-decode-files/run-transducer-itn.sh +++ b/dotnet-examples/online-decode-files/run-transducer-itn.sh @@ -22,7 +22,7 @@ fi dotnet run -c Release \ --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ --rule-fsts ./itn_zh_number.fst \ --decoding-method greedy_search \ diff --git a/dotnet-examples/online-decode-files/run-transducer.sh b/dotnet-examples/online-decode-files/run-transducer.sh index b3ca7c7c4..824354295 100755 --- a/dotnet-examples/online-decode-files/run-transducer.sh +++ b/dotnet-examples/online-decode-files/run-transducer.sh @@ -14,7 +14,7 @@ fi dotnet run -c Release \ --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ --decoding-method greedy_search \ --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \ diff --git a/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh b/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh index e6184b4fc..ecafb2924 100755 --- a/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh +++ b/dotnet-examples/speech-recognition-from-microphone/run-transducer.sh @@ -18,5 +18,5 @@ fi dotnet run -c Release \ --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx diff --git a/sherpa-onnx/python/tests/test_keyword_spotter.py b/sherpa-onnx/python/tests/test_keyword_spotter.py index bdefa5d10..f4d79830a 100755 --- a/sherpa-onnx/python/tests/test_keyword_spotter.py +++ b/sherpa-onnx/python/tests/test_keyword_spotter.py @@ -50,12 +50,12 @@ def test_zipformer_transducer_en(self): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" else: - encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" + joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx" tokens = ( f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/tokens.txt" @@ -109,12 +109,12 @@ def test_zipformer_transducer_cn(self): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" else: - encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" - joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" + encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx" + decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" + joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx" tokens = ( f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt" diff --git a/sherpa-onnx/python/tests/test_offline_recognizer.py b/sherpa-onnx/python/tests/test_offline_recognizer.py index 159fb47c3..32f702c3c 100755 --- a/sherpa-onnx/python/tests/test_offline_recognizer.py +++ b/sherpa-onnx/python/tests/test_offline_recognizer.py @@ -52,7 +52,7 @@ def test_transducer_single_file(self): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx" @@ -85,7 +85,7 @@ def test_transducer_multiple_files(self): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx" diff --git a/sherpa-onnx/python/tests/test_online_recognizer.py b/sherpa-onnx/python/tests/test_online_recognizer.py index 9193fb0f2..5319f41ea 100755 --- a/sherpa-onnx/python/tests/test_online_recognizer.py +++ b/sherpa-onnx/python/tests/test_online_recognizer.py @@ -50,7 +50,7 @@ def test_transducer_single_file(self): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx" @@ -90,7 +90,7 @@ def test_transducer_multiple_files(self): for use_int8 in [True, False]: if use_int8: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx" - decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx" + decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx" joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx" else: encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx" From 4a7af7fdfef536a7f59e203a9f49c3d4e1747c17 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 10 Jul 2024 21:16:30 +0800 Subject: [PATCH 8/8] disable testing offline wenet ctc models --- .github/scripts/test-python.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh index 7bc7f0df3..68104baad 100755 --- a/.github/scripts/test-python.sh +++ b/.github/scripts/test-python.sh @@ -125,12 +125,15 @@ for name in ${wenet_models[@]}; do repo=$name log "Start testing ${repo_url}" - python3 ./python-api-examples/offline-decode-files.py \ - --tokens=$repo/tokens.txt \ - --wenet-ctc=$repo/model.onnx \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/8k.wav + if false; then + # offline wenet ctc models are not supported by onnxruntime >= 1.18 + python3 ./python-api-examples/offline-decode-files.py \ + --tokens=$repo/tokens.txt \ + --wenet-ctc=$repo/model.onnx \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/8k.wav + fi python3 ./python-api-examples/online-decode-files.py \ --tokens=$repo/tokens.txt \