diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index a91dd3093..65c72e174 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -191,6 +191,10 @@ jobs: go build ls -lh + echo "Test SenseVoice ctc" + ./run-sense-voice-small.sh + rm -rf sherpa-onnx-sense-* + echo "Test telespeech ctc" ./run-telespeech-ctc.sh rm -rf sherpa-onnx-telespeech-ctc-* diff --git a/go-api-examples/non-streaming-decode-files/main.go b/go-api-examples/non-streaming-decode-files/main.go index b1d2063b9..909b94e03 100644 --- a/go-api-examples/non-streaming-decode-files/main.go +++ b/go-api-examples/non-streaming-decode-files/main.go @@ -35,6 +35,10 @@ func main() { flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model") + flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model") + flag.StringVar(&config.ModelConfig.SenseVoice.Language, "sense-voice-language", "", "If not empty, specify the Language for the input wave") + flag.IntVar(&config.ModelConfig.SenseVoice.UseInverseTextNormalization, "sense-voice-use-itn", 1, " 1 to use inverse text normalization") + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") diff --git a/go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh b/go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh new file mode 100755 index 000000000..06dddc8f4 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +fi + +go mod tidy +go build + +./non-streaming-decode-files \ + --sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \ + --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \ + --debug 0 \ + ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav diff --git a/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small.sh b/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small.sh new file mode 120000 index 000000000..1e9fb8458 --- /dev/null +++ b/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small.sh @@ -0,0 +1 @@ +../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh \ No newline at end of file diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index cb66aeb69..f43c088ed 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -370,6 +370,12 @@ type OfflineTdnnModelConfig struct { Model string } +type OfflineSenseVoiceModelConfig struct { + Model string + Language string + UseInverseTextNormalization int +} + // Configuration for offline LM. type OfflineLMConfig struct { Model string // Path to the model @@ -382,6 +388,7 @@ type OfflineModelConfig struct { NemoCTC OfflineNemoEncDecCtcModelConfig Whisper OfflineWhisperModelConfig Tdnn OfflineTdnnModelConfig + SenseVoice OfflineSenseVoiceModelConfig Tokens string // Path to tokens.txt // Number of threads to use for neural network computation @@ -478,6 +485,14 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model) defer C.free(unsafe.Pointer(c.model_config.tdnn.model)) + c.model_config.sense_voice.model = C.CString(config.ModelConfig.SenseVoice.Model) + defer C.free(unsafe.Pointer(c.model_config.sense_voice.model)) + + c.model_config.sense_voice.language = C.CString(config.ModelConfig.SenseVoice.Language) + defer C.free(unsafe.Pointer(c.model_config.sense_voice.language)) + + c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization) + c.model_config.tokens = C.CString(config.ModelConfig.Tokens) defer C.free(unsafe.Pointer(c.model_config.tokens))