From db41778e995277c6aee898ac75ff90fd05a6e837 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 28 Nov 2023 19:12:58 +0800 Subject: [PATCH] Support piper-phonemize (#452) --- .github/scripts/test-offline-ctc.sh | 4 +- .github/workflows/arm-linux-gnueabihf.yaml | 3 + .github/workflows/test-piper-phonemize.yaml | 82 +++++++++++++ CMakeLists.txt | 7 ++ build-aarch64-linux-gnu.sh | 4 + build-android-arm64-v8a.sh | 4 + build-android-armv7-eabi.sh | 4 + build-android-x86-64.sh | 4 + build-android-x86.sh | 4 + build-arm-linux-gnueabihf.sh | 4 + build-ios.sh | 12 ++ cmake/espeak-ng-for-piper.cmake | 127 ++++++++++++++++++++ cmake/kaldi-decoder.cmake | 7 ++ cmake/onnxruntime.cmake | 1 + cmake/piper-phonemize.cmake | 73 +++++++++++ sherpa-onnx/csrc/CMakeLists.txt | 8 ++ sherpa-onnx/csrc/piper-phonemize-test.cc | 78 ++++++++++++ 17 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test-piper-phonemize.yaml create mode 100644 cmake/espeak-ng-for-piper.cmake create mode 100644 cmake/piper-phonemize.cmake create mode 100644 sherpa-onnx/csrc/piper-phonemize-test.cc diff --git a/.github/scripts/test-offline-ctc.sh b/.github/scripts/test-offline-ctc.sh index 16f7889df..a6d16d97d 100755 --- a/.github/scripts/test-offline-ctc.sh +++ b/.github/scripts/test-offline-ctc.sh @@ -19,10 +19,10 @@ log "------------------------------------------------------------" wenet_models=( sherpa-onnx-zh-wenet-aishell sherpa-onnx-zh-wenet-aishell2 -sherpa-onnx-zh-wenet-wenetspeech +# sherpa-onnx-zh-wenet-wenetspeech sherpa-onnx-zh-wenet-multi-cn sherpa-onnx-en-wenet-librispeech -sherpa-onnx-en-wenet-gigaspeech +# sherpa-onnx-en-wenet-gigaspeech ) for name in ${wenet_models[@]}; do repo_url=https://huggingface.co/csukuangfj/$name diff --git a/.github/workflows/arm-linux-gnueabihf.yaml b/.github/workflows/arm-linux-gnueabihf.yaml index cb342935a..541fc3e57 100644 --- a/.github/workflows/arm-linux-gnueabihf.yaml +++ b/.github/workflows/arm-linux-gnueabihf.yaml @@ -168,6 +168,9 @@ jobs: lib_type=${{ matrix.lib_type }} if [[ $lib_type == "shared" ]]; then cp -a build-arm-linux-gnueabihf/install/lib $dst/ + rm -v $dst/lib/libasound.so + rm -v $dst/lib/libonnxruntime.so + rm -v $dst/lib/libsherpa-onnx-fst.so fi tree $dst diff --git a/.github/workflows/test-piper-phonemize.yaml b/.github/workflows/test-piper-phonemize.yaml new file mode 100644 index 000000000..ab1c39387 --- /dev/null +++ b/.github/workflows/test-piper-phonemize.yaml @@ -0,0 +1,82 @@ +name: test-piper-phonemize +on: + push: + branches: + - master + + workflow_dispatch: + +concurrency: + group: test-piper-phonemize-${{ github.ref }} + cancel-in-progress: true + + +jobs: + test_piper_phonemize: + name: ${{ matrix.os }} ${{ matrix.build_type }} ${{ matrix.shared_lib }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + build_type: [Release, Debug] + shared_lib: [ON, OFF] + exclude: + - os: windows-latest + build_type: Debug + shared_lib: OFF + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-${{ matrix.build_type }}-shared-${{ matrix.shared_lib }} + + - name: Configure CMake + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build + cd build + cmake -DCMAKE_VERBOSE_MAKEFILE=ON -D SHERPA_ONNX_ENABLE_TESTS=ON -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. + + - name: Build + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + cd build + cmake --build . --target install --config ${{ matrix.build_type }} + + - name: run test + if: matrix.os != 'windows-latest' + shell: bash + run: | + cd build + + ls -lh install/ + ls -lh install/share + ls -lh install/share/espeak-ng-data/ + + ./bin/piper-phonemize-test + + - name: run test + if: matrix.os == 'windows-latest' + shell: bash + run: | + cd build + + ls -lh install/ + ls -lh install/share + ls -lh install/share/espeak-ng-data/ + + ./bin/${{ matrix.build_type }}/piper-phonemize-test diff --git a/CMakeLists.txt b/CMakeLists.txt index eba77a2d8..27b3d2c0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -159,6 +159,8 @@ endif() include(kaldi-native-fbank) include(kaldi-decoder) include(onnxruntime) +set(ONNXRUNTIME_DIR ${onnxruntime_SOURCE_DIR}) +message(STATUS "ONNXRUNTIME_DIR: ${ONNXRUNTIME_DIR}") if(SHERPA_ONNX_ENABLE_PORTAUDIO) include(portaudio) @@ -178,6 +180,11 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET) include(asio) endif() +include(espeak-ng-for-piper) +set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR}) +message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}") +include(piper-phonemize) + add_subdirectory(sherpa-onnx) if(SHERPA_ONNX_ENABLE_C_API) diff --git a/build-aarch64-linux-gnu.sh b/build-aarch64-linux-gnu.sh index 469812384..164182c79 100755 --- a/build-aarch64-linux-gnu.sh +++ b/build-aarch64-linux-gnu.sh @@ -45,6 +45,10 @@ if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then fi cmake \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -DCMAKE_INSTALL_PREFIX=./install \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \ diff --git a/build-android-arm64-v8a.sh b/build-android-arm64-v8a.sh index 5e46aa5a6..8297561f7 100755 --- a/build-android-arm64-v8a.sh +++ b/build-android-arm64-v8a.sh @@ -72,6 +72,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=ON \ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ diff --git a/build-android-armv7-eabi.sh b/build-android-armv7-eabi.sh index 14a07bad5..9157f4cdb 100755 --- a/build-android-armv7-eabi.sh +++ b/build-android-armv7-eabi.sh @@ -73,6 +73,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=ON \ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ diff --git a/build-android-x86-64.sh b/build-android-x86-64.sh index d4196a1ca..02a7b9590 100755 --- a/build-android-x86-64.sh +++ b/build-android-x86-64.sh @@ -73,6 +73,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=ON \ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ diff --git a/build-android-x86.sh b/build-android-x86.sh index 2eb6fecc4..1b6194f37 100755 --- a/build-android-x86.sh +++ b/build-android-x86.sh @@ -73,6 +73,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=ON \ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ diff --git a/build-arm-linux-gnueabihf.sh b/build-arm-linux-gnueabihf.sh index 90b6e7c18..06db39cb0 100755 --- a/build-arm-linux-gnueabihf.sh +++ b/build-arm-linux-gnueabihf.sh @@ -40,6 +40,10 @@ if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then fi cmake \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -DCMAKE_INSTALL_PREFIX=./install \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \ diff --git a/build-ios.sh b/build-ios.sh index 447d7fbb6..5f82c40b5 100755 --- a/build-ios.sh +++ b/build-ios.sh @@ -51,6 +51,10 @@ echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" # cmake \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -S .. \ -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \ -DPLATFORM=SIMULATOR64 \ @@ -74,6 +78,10 @@ cmake --build build/simulator_x86_64 -j 4 --verbose echo "Building for simulator (arm64)" cmake \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -S .. \ -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \ -DPLATFORM=SIMULATORARM64 \ @@ -101,6 +109,10 @@ export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/ios-onnxruntime/onnxruntime.xcframework/i cmake \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ -S .. \ -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \ -DPLATFORM=OS64 \ diff --git a/cmake/espeak-ng-for-piper.cmake b/cmake/espeak-ng-for-piper.cmake new file mode 100644 index 000000000..e1dfb2fc0 --- /dev/null +++ b/cmake/espeak-ng-for-piper.cmake @@ -0,0 +1,127 @@ +function(download_espeak_ng_for_piper) + include(FetchContent) + + set(espeak_ng_URL "https://github.com/csukuangfj/espeak-ng/archive/c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip") + set(espeak_ng_URL2 "") + set(espeak_ng_HASH "SHA256=8a48251e6926133dd91fcf6cb210c7c2e290a9b578d269446e2d32d710b0dfa0") + + set(USE_ASYNC OFF CACHE BOOL "" FORCE) + set(USE_MBROLA OFF CACHE BOOL "" FORCE) + set(USE_LIBSONIC OFF CACHE BOOL "" FORCE) + set(USE_LIBPCAUDIO OFF CACHE BOOL "" FORCE) + set(USE_KLATT OFF CACHE BOOL "" FORCE) + set(USE_SPEECHPLAYER OFF CACHE BOOL "" FORCE) + set(EXTRA_cmn ON CACHE BOOL "" FORCE) + set(EXTRA_ru ON CACHE BOOL "" FORCE) + + # If you don't have access to the Internet, + # please pre-download kaldi-decoder + set(possible_file_locations + $ENV{HOME}/Downloads/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip + ${PROJECT_SOURCE_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip + ${PROJECT_BINARY_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip + /tmp/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip + /star-fj/fangjun/download/github/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip + ) + + foreach(f IN LISTS possible_file_locations) + if(EXISTS ${f}) + set(espeak_ng_URL "${f}") + file(TO_CMAKE_PATH "${espeak_ng_URL}" espeak_ng_URL) + message(STATUS "Found local downloaded espeak-ng: ${espeak_ng_URL}") + set(espeak_ng_URL2 ) + break() + endif() + endforeach() + + FetchContent_Declare(espeak_ng + URL + ${espeak_ng_URL} + ${espeak_ng_URL2} + URL_HASH ${espeak_ng_HASH} + ) + + FetchContent_GetProperties(espeak_ng) + if(NOT espeak_ng_POPULATED) + message(STATUS "Downloading espeak-ng from ${espeak_ng_URL}") + FetchContent_Populate(espeak_ng) + endif() + message(STATUS "espeak-ng is downloaded to ${espeak_ng_SOURCE_DIR}") + message(STATUS "espeak-ng binary dir is ${espeak_ng_BINARY_DIR}") + + add_subdirectory(${espeak_ng_SOURCE_DIR} ${espeak_ng_BINARY_DIR}) + set(espeak_ng_SOURCE_DIR ${espeak_ng_SOURCE_DIR} PARENT_SCOPE) + + if(WIN32 AND MSVC) + target_compile_options(ucd PUBLIC + /wd4309 + ) + + target_compile_options(espeak-ng PUBLIC + /wd4005 + /wd4018 + /wd4067 + /wd4068 + /wd4090 + /wd4101 + /wd4244 + /wd4267 + /wd4996 + ) + + if(TARGET espeak-ng-bin) + target_compile_options(espeak-ng-bin PRIVATE + /wd4244 + /wd4024 + /wd4047 + /wd4067 + /wd4267 + /wd4996 + ) + endif() + endif() + + if(UNIX AND NOT APPLE) + target_compile_options(espeak-ng PRIVATE + -Wno-unused-result + -Wno-format-overflow + -Wno-format-truncation + -Wno-maybe-uninitialized + -Wno-format + ) + + if(TARGET espeak-ng-bin) + target_compile_options(espeak-ng-bin PRIVATE + -Wno-unused-result + ) + endif() + endif() + + target_include_directories(espeak-ng + INTERFACE + ${espeak_ng_SOURCE_DIR}/src/include + ${espeak_ng_SOURCE_DIR}/src/ucd-tools/src/include + ) + + if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32) + install(TARGETS + espeak-ng + DESTINATION ..) + else() + install(TARGETS + espeak-ng + DESTINATION lib) + endif() + + if(NOT BUILD_SHARED_LIBS) + install(TARGETS ucd DESTINATION lib) + endif() + + if(WIN32 AND BUILD_SHARED_LIBS) + install(TARGETS + espeak-ng + DESTINATION bin) + endif() +endfunction() + +download_espeak_ng_for_piper() diff --git a/cmake/kaldi-decoder.cmake b/cmake/kaldi-decoder.cmake index 9f85d70a7..25084febf 100644 --- a/cmake/kaldi-decoder.cmake +++ b/cmake/kaldi-decoder.cmake @@ -47,6 +47,13 @@ function(download_kaldi_decoder) include_directories(${kaldi_decoder_SOURCE_DIR}) add_subdirectory(${kaldi_decoder_SOURCE_DIR} ${kaldi_decoder_BINARY_DIR} EXCLUDE_FROM_ALL) + if(WIN32 AND MSVC) + target_compile_options(kaldi-decoder-core PUBLIC + /wd4018 + /wd4291 + ) + endif() + target_include_directories(kaldi-decoder-core INTERFACE ${kaldi-decoder_SOURCE_DIR}/ diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 28f18540f..5775df9e6 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -94,6 +94,7 @@ function(download_onnxruntime) message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") message(FATAL_ERROR "Only support Linux, macOS, and Windows at present. Will support other OSes later") endif() + set(onnxruntime_SOURCE_DIR ${onnxruntime_SOURCE_DIR} PARENT_SCOPE) endfunction() # First, we try to locate the header and the lib if the use has already diff --git a/cmake/piper-phonemize.cmake b/cmake/piper-phonemize.cmake new file mode 100644 index 000000000..13b6bca6e --- /dev/null +++ b/cmake/piper-phonemize.cmake @@ -0,0 +1,73 @@ +function(download_piper_phonemize) + include(FetchContent) + + set(piper_phonemize_URL "https://github.com/csukuangfj/piper-phonemize/archive/6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip") + set(piper_phonemize_URL2 "") + set(piper_phonemize_HASH "SHA256=6fbacf540b03f00d1386bb372fb7090e3bb852bd019d74e615d3f161f728bc93") + + # If you don't have access to the Internet, + # please pre-download kaldi-decoder + set(possible_file_locations + $ENV{HOME}/Downloads/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip + ${PROJECT_SOURCE_DIR}/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip + ${PROJECT_BINARY_DIR}/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip + /tmp/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip + /star-fj/fangjun/download/github/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip + ) + + foreach(f IN LISTS possible_file_locations) + if(EXISTS ${f}) + set(piper_phonemize_URL "${f}") + file(TO_CMAKE_PATH "${piper_phonemize_URL}" piper_phonemize_URL) + message(STATUS "Found local downloaded espeak-ng: ${piper_phonemize_URL}") + set(piper_phonemize_URL2 ) + break() + endif() + endforeach() + + FetchContent_Declare(piper_phonemize + URL + ${piper_phonemize_URL} + ${piper_phonemize_URL2} + URL_HASH ${piper_phonemize_HASH} + ) + + FetchContent_GetProperties(piper_phonemize) + if(NOT piper_phonemize_POPULATED) + message(STATUS "Downloading piper-phonemize from ${piper_phonemize_URL}") + FetchContent_Populate(piper_phonemize) + endif() + message(STATUS "piper-phonemize is downloaded to ${piper_phonemize_SOURCE_DIR}") + message(STATUS "piper-phonemize binary dir is ${piper_phonemize_BINARY_DIR}") + + add_subdirectory(${piper_phonemize_SOURCE_DIR} ${piper_phonemize_BINARY_DIR} EXCLUDE_FROM_ALL) + + if(WIN32 AND MSVC) + target_compile_options(piper_phonemize PUBLIC + /wd4309 + ) + endif() + + target_include_directories(piper_phonemize + INTERFACE + ${piper_phonemize_SOURCE_DIR}/src/include + ) + + if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32) + install(TARGETS + piper_phonemize + DESTINATION ..) + else() + install(TARGETS + piper_phonemize + DESTINATION lib) + endif() + + if(WIN32 AND BUILD_SHARED_LIBS) + install(TARGETS + piper_phonemize + DESTINATION bin) + endif() +endfunction() + +download_piper_phonemize() diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index 403931028..02ec56c0b 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -106,6 +106,11 @@ if(SHERPA_ONNX_ENABLE_CHECK) list(APPEND sources log.cc) endif() add_library(sherpa-onnx-core ${sources}) +if(APPLE) + target_compile_options(sherpa-onnx-core PRIVATE + -Wno-deprecated-declarations + ) +endif() if(NOT WIN32) target_link_libraries(sherpa-onnx-core -pthread) @@ -136,6 +141,8 @@ if(SHERPA_ONNX_ENABLE_GPU) ) endif() +target_link_libraries(sherpa-onnx-core piper_phonemize) + if(SHERPA_ONNX_ENABLE_CHECK) target_compile_definitions(sherpa-onnx-core PUBLIC SHERPA_ONNX_ENABLE_CHECK=1) @@ -343,6 +350,7 @@ if(SHERPA_ONNX_ENABLE_TESTS) context-graph-test.cc packed-sequence-test.cc pad-sequence-test.cc + piper-phonemize-test.cc slice-test.cc stack-test.cc transpose-test.cc diff --git a/sherpa-onnx/csrc/piper-phonemize-test.cc b/sherpa-onnx/csrc/piper-phonemize-test.cc new file mode 100644 index 000000000..b1d0790a6 --- /dev/null +++ b/sherpa-onnx/csrc/piper-phonemize-test.cc @@ -0,0 +1,78 @@ +// sherpa-onnx/csrc/piper-phonemize-test.cc +// +// Copyright (c) 2023 Xiaomi Corporation + +#include "espeak-ng/speak_lib.h" +#include "gtest/gtest.h" +#include "phoneme_ids.hpp" +#include "phonemize.hpp" +#include "sherpa-onnx/csrc/file-utils.h" +#include "sherpa-onnx/csrc/macros.h" + +namespace sherpa_onnx { + +TEST(PiperPhonemize, Case1) { + std::string data_dir = "./install/share/espeak-ng-data"; + if (!FileExists(data_dir + "/en_dict")) { + SHERPA_ONNX_LOGE("%s/en_dict does not exist. Skipping test", + data_dir.c_str()); + return; + } + + if (!FileExists(data_dir + "/phontab")) { + SHERPA_ONNX_LOGE("%s/phontab does not exist. Skipping test", + data_dir.c_str()); + return; + } + + if (!FileExists(data_dir + "/phonindex")) { + SHERPA_ONNX_LOGE("%s/phonindex does not exist. Skipping test", + data_dir.c_str()); + return; + } + + if (!FileExists(data_dir + "/phondata")) { + SHERPA_ONNX_LOGE("%s/phondata does not exist. Skipping test", + data_dir.c_str()); + return; + } + + if (!FileExists(data_dir + "/intonations")) { + SHERPA_ONNX_LOGE("%s/intonations does not exist. Skipping test", + data_dir.c_str()); + return; + } + int32_t result = + espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, data_dir.c_str(), 0); + EXPECT_EQ(result, 22050); + + piper::eSpeakPhonemeConfig config; + + // ./bin/espeak-ng --path ./install/share/espeak-ng-data/ --voices + // to list available voices + config.voice = "en-us"; + + std::vector> phonemes; + std::string text = "how are you doing?"; + piper::phonemize_eSpeak(text, config, phonemes); + + for (int32_t p : phonemes[0]) { + std::cout << p << " "; + } + std::cout << "\n"; + + std::vector phonemeIds; + std::map missingPhonemes; + + { + piper::PhonemeIdConfig config; + phonemes_to_ids(phonemes[0], config, phonemeIds, missingPhonemes); + } + + for (int32_t p : phonemeIds) { + std::cout << p << " "; + } + std::cout << "\n"; +} + +} // namespace sherpa_onnx