Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add keyword spotting for C# #1105

Merged
merged 8 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

cd dotnet-examples/

cd ./online-decode-files
cd ./keyword-spotting-from-files
./run.sh

cd ../online-decode-files
./run-transducer-itn.sh
./run-zipformer2-ctc.sh
./run-transducer.sh
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/test-offline-transducer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ time $EXE \
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
--num-threads=2 \
$repo/test_wavs/0.wav \
Expand Down Expand Up @@ -172,7 +172,7 @@ time $EXE \
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
--num-threads=2 \
$repo/test_wavs/0.wav \
Expand Down
12 changes: 6 additions & 6 deletions .github/scripts/test-online-transducer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ for wave in ${waves[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
--num-threads=2 \
$wave
Expand Down Expand Up @@ -126,7 +126,7 @@ for wave in ${waves[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-11-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-11-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-11-avg-1.onnx \
--joiner=$repo/joiner-epoch-11-avg-1.int8.onnx \
--num-threads=2 \
$wave
Expand Down Expand Up @@ -168,7 +168,7 @@ for wave in ${waves[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
--num-threads=2 \
$wave
Expand Down Expand Up @@ -210,7 +210,7 @@ for wave in ${waves[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
--num-threads=2 \
$wave
Expand All @@ -231,7 +231,7 @@ if [ $EXE == "sherpa-onnx-ffmpeg" ]; then
time $EXE \
$repo/tokens.txt \
$repo/encoder-epoch-99-avg-1.int8.onnx \
$repo/decoder-epoch-99-avg-1.int8.onnx \
$repo/decoder-epoch-99-avg-1.onnx \
$repo/joiner-epoch-99-avg-1.int8.onnx \
https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/resolve/main/test_wavs/4.wav \
2
Expand Down Expand Up @@ -271,7 +271,7 @@ for wave in ${waves[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \
--decoder=$repo/decoder-epoch-99-avg-1.onnx \
--joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
--num-threads=2 \
$wave
Expand Down
15 changes: 9 additions & 6 deletions .github/scripts/test-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,15 @@ for name in ${wenet_models[@]}; do
repo=$name
log "Start testing ${repo_url}"

python3 ./python-api-examples/offline-decode-files.py \
--tokens=$repo/tokens.txt \
--wenet-ctc=$repo/model.onnx \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/8k.wav
if false; then
# offline wenet ctc models are not supported by onnxruntime >= 1.18
python3 ./python-api-examples/offline-decode-files.py \
--tokens=$repo/tokens.txt \
--wenet-ctc=$repo/model.onnx \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/8k.wav
fi

python3 ./python-api-examples/online-decode-files.py \
--tokens=$repo/tokens.txt \
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## 1.10.13

* Update onnxruntime from 1.17.1 to 1.18.0
* Add C# API for Keyword spotting

## 1.10.12

Expand Down
5 changes: 0 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
endif()






option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF)
option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF)
option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" OFF)
Expand Down
99 changes: 99 additions & 0 deletions dotnet-examples/keyword-spotting-from-files/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do keyword spotting with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
//
// 2. Now run it
//
// dotnet run

using SherpaOnnx;
using System.Collections.Generic;
using System;

class KeywordSpotterDemo
{
static void Main(string[] args)
{
var config = new KeywordSpotterConfig();
config.FeatConfig.SampleRate = 16000;
config.FeatConfig.FeatureDim = 80;

config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";

config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
config.ModelConfig.Provider = "cpu";
config.ModelConfig.NumThreads = 1;
config.ModelConfig.Debug = 1;
config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";

var kws = new KeywordSpotter(config);

var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";

WaveReader waveReader = new WaveReader(filename);

Console.WriteLine("----------Use pre-defined keywords----------");

OnlineStream s = kws.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished();

while (kws.IsReady(s))
{
kws.Decode(s);
var result = kws.GetResult(s);
if (result.Keyword != "")
{
Console.WriteLine("Detected: {0}", result.Keyword);
}
}

Console.WriteLine("----------Use pre-defined keywords + add a new keyword----------");
s = kws.CreateStream("y ǎn y uán @演员");
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished();

while (kws.IsReady(s))
{
kws.Decode(s);
var result = kws.GetResult(s);
if (result.Keyword != "")
{
Console.WriteLine("Detected: {0}", result.Keyword);
}
}

Console.WriteLine("----------Use pre-defined keywords + add 2 new keywords----------");

// Note keywords are separated by /
s = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名");
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished();

while (kws.IsReady(s))
{
kws.Decode(s);
var result = kws.GetResult(s);
if (result.Keyword != "")
{
Console.WriteLine("Detected: {0}", result.Keyword);
}
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>keyword_spotting_from_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>

</Project>
11 changes: 11 additions & 0 deletions dotnet-examples/keyword-spotting-from-files/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
fi

dotnet run -c Release
2 changes: 1 addition & 1 deletion dotnet-examples/online-decode-files/run-transducer-itn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ fi
dotnet run -c Release \
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
--rule-fsts ./itn_zh_number.fst \
--decoding-method greedy_search \
Expand Down
2 changes: 1 addition & 1 deletion dotnet-examples/online-decode-files/run-transducer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fi
dotnet run -c Release \
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
--decoding-method greedy_search \
--files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \
Expand Down
6 changes: 6 additions & 0 deletions dotnet-examples/sherpa-onnx.sln
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "vad-non-streaming-asr-paraf
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csproj", "{401E963F-E25A-43CE-987D-8DB2D4715756}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -75,6 +77,10 @@ Global
{401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.Build.0 = Debug|Any CPU
{401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.ActiveCfg = Release|Any CPU
{401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.Build.0 = Release|Any CPU
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ fi
dotnet run -c Release \
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx
1 change: 1 addition & 0 deletions flutter/sherpa_onnx/example/example.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
| Functions | URL | Supported Platforms|
|---|---|---|
|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, macOS, Windows|
|Speech synthesis| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/tts)| Android, iOS, Linux, macOS, Windows|

## Pure dart-examples

Expand Down
6 changes: 4 additions & 2 deletions scripts/dotnet/.gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
all
macos-arm64
macos-x64
linux
windows
linux-x64
linux-arm64
windows-arm64
windows-x64
windows-x86
packages
tmp
44 changes: 44 additions & 0 deletions scripts/dotnet/KeywordResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/// Copyright (c) 2024 Xiaomi Corporation

using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
public class KeywordResult
{
public KeywordResult(IntPtr handle)
{
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

// PtrToStringUTF8() requires .net standard 2.1
// _keyword = Marshal.PtrToStringUTF8(impl.Keyword);

int length = 0;

unsafe
{
byte* buffer = (byte*)impl.Keyword;
while (*buffer != 0)
{
++buffer;
length += 1;
}
}

byte[] stringBuffer = new byte[length];
Marshal.Copy(impl.Keyword, stringBuffer, 0, length);
_keyword = Encoding.UTF8.GetString(stringBuffer);
}

[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Keyword;
}

private String _keyword;
public String Keyword => _keyword;
}
}
Loading
Loading