From a16c9aff8bbc1ccbc79593adcf5d6a39de8ae5f3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 13 Nov 2024 00:04:16 +0800 Subject: [PATCH] Add Lazarus example for Moonshine models. (#1532) --- build-ios-shared.sh | 2 + cmake/piper-phonemize.cmake | 16 +++--- .../generate_subtitles/my_init.pas | 54 +++++++++++++++++++ scripts/lazarus/generate-subtitles.py | 14 +++++ 4 files changed, 78 insertions(+), 8 deletions(-) diff --git a/build-ios-shared.sh b/build-ios-shared.sh index 90b2e9a5d..2af1b36f0 100755 --- a/build-ios-shared.sh +++ b/build-ios-shared.sh @@ -21,6 +21,8 @@ fi if [ ! -z CMAKE_VERBOSE_MAKEFILE ]; then CMAKE_VERBOSE_MAKEFILE=ON +else + CMAKE_VERBOSE_MAKEFILE=OFF fi if [ ! -f $onnxruntime_dir/onnxruntime.xcframework/ios-arm64/onnxruntime.a ]; then diff --git a/cmake/piper-phonemize.cmake b/cmake/piper-phonemize.cmake index 9c9c71f5a..0e11fd176 100644 --- a/cmake/piper-phonemize.cmake +++ b/cmake/piper-phonemize.cmake @@ -1,18 +1,18 @@ function(download_piper_phonemize) include(FetchContent) - set(piper_phonemize_URL "https://github.com/csukuangfj/piper-phonemize/archive/38ee199dcc49c7b6de89f7ebfb32ed682763fa1b.zip") - set(piper_phonemize_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/piper-phonemize-38ee199dcc49c7b6de89f7ebfb32ed682763fa1b.zip") - set(piper_phonemize_HASH "SHA256=ab4d06ca76047e1585c63c482f39ffead5315785345055360703cc9382c5e74b") + set(piper_phonemize_URL "https://github.com/csukuangfj/piper-phonemize/archive/78a788e0b719013401572d70fef372e77bff8e43.zip") + set(piper_phonemize_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip") + set(piper_phonemize_HASH "SHA256=89641a46489a4898754643ce57bda9c9b54b4ca46485fdc02bf0dc84b866645d") # If you don't have access to the Internet, # please pre-download kaldi-decoder set(possible_file_locations - $ENV{HOME}/Downloads/piper-phonemize-38ee199dcc49c7b6de89f7ebfb32ed682763fa1b.zip - ${CMAKE_SOURCE_DIR}/piper-phonemize-38ee199dcc49c7b6de89f7ebfb32ed682763fa1b.zip - ${CMAKE_BINARY_DIR}/piper-phonemize-38ee199dcc49c7b6de89f7ebfb32ed682763fa1b.zip - /tmp/piper-phonemize-38ee199dcc49c7b6de89f7ebfb32ed682763fa1b.zip - /star-fj/fangjun/download/github/piper-phonemize-38ee199dcc49c7b6de89f7ebfb32ed682763fa1b.zip + $ENV{HOME}/Downloads/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip + ${CMAKE_SOURCE_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip + ${CMAKE_BINARY_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip + /tmp/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip + /star-fj/fangjun/download/github/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip ) foreach(f IN LISTS possible_file_locations) diff --git a/lazarus-examples/generate_subtitles/my_init.pas b/lazarus-examples/generate_subtitles/my_init.pas index d57448b6d..d01cb6081 100644 --- a/lazarus-examples/generate_subtitles/my_init.pas +++ b/lazarus-examples/generate_subtitles/my_init.pas @@ -159,6 +159,30 @@ function CreateOfflineRecognizerWhisper( Result := TSherpaOnnxOfflineRecognizer.Create(Config); end; +function CreateOfflineRecognizerMoonshine( + Tokens: AnsiString; + Preprocessor: AnsiString; + Encoder: AnsiString; + UncachedDecoder: AnsiString; + CachedDecoder: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.Moonshine.Preprocessor := Preprocessor; + Config.ModelConfig.Moonshine.Encoder := Encoder; + Config.ModelConfig.Moonshine.UncachedDecoder := UncachedDecoder; + Config.ModelConfig.Moonshine.CachedDecoder := CachedDecoder; + + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString); begin inherited Create(CreateSuspended); @@ -193,6 +217,11 @@ procedure TMyInitThread.Execute; NeMoTransducerEncoder: AnsiString; NeMoTransducerDecoder: AnsiString; NeMoTransducerJoiner: AnsiString; + + MoonshinePreprocessor: AnsiString; + MoonshineEncoder: AnsiString; + MoonshineUncachedDecoder: AnsiString; + MoonshineCachedDecoder: AnsiString; begin VadFilename := ModelDir + 'silero_vad.onnx'; Tokens := ModelDir + 'tokens.txt'; @@ -292,6 +321,24 @@ procedure TMyInitThread.Execute; NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx'; NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx'; + { + Please Visit + https://k2-fsa.github.io/sherpa/onnx/moonshine/models.html + to download a Moonshine model. + + Note that you have to rename model files after downloading. The following + is an example. + + mv preprocess.onnx moonshine-preprocessor.onnx + mv encode.int8.onnx moonshine-encoder.onnx + mv uncached_decode.int8.onnx moonshine-uncached-decoder.onnx + mv cached_decode.int8.onnx moonshine-cached-decoder.onnx + } + MoonshinePreprocessor := ModelDir + 'moonshine-preprocessor.onnx'; + MoonshineEncoder := ModelDir + 'moonshine-encoder.onnx'; + MoonshineUncachedDecoder := ModelDir + 'moonshine-uncached-decoder.onnx'; + MoonshineCachedDecoder := ModelDir + 'moonshine-cached-decoder.onnx'; + if not FileExists(VadFilename) then begin Status := VadFilename + ' does not exist! Please download it from' + @@ -344,6 +391,13 @@ procedure TMyInitThread.Execute; NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer'); Msg := 'NeMo transducer'; end + else if FileExists(MoonshinePreprocessor) and FileExists(MoonshineEncoder) and FileExists(MoonshineUncachedDecoder) and FileExists(MoonshineCachedDecoder) then + begin + Form1.OfflineRecognizer := CreateOfflineRecognizerMoonshine(Tokens, + MoonshinePreprocessor, MoonshineEncoder, MoonshineUncachedDecoder, + MoonshineCachedDecoder); + Msg := 'Moonshine'; + end else begin Status := 'Please download at least one non-streaming speech recognition model first.'; diff --git a/scripts/lazarus/generate-subtitles.py b/scripts/lazarus/generate-subtitles.py index 6459886c1..b7d7f62a4 100755 --- a/scripts/lazarus/generate-subtitles.py +++ b/scripts/lazarus/generate-subtitles.py @@ -50,6 +50,20 @@ def get_models(): popd """, ), + Model( + model_name="sherpa-onnx-moonshine-tiny-en-int8", + lang="en", + short_name="moonshine_tiny", + cmd=""" + pushd $model_name + mv -v preprocess.onnx moonshine-preprocessor.onnx + mv -v encode.int8.onnx moonshine-encoder.onnx + mv -v uncached_decode.int8.onnx moonshine-uncached-decoder.onnx + mv -v cached_decode.int8.onnx moonshine-cached-decoder.onnx + + popd + """, + ), Model( model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", lang="zh_en_ko_ja_yue",