Skip to content

Commit

Permalink
Use piper-phonemize to convert text to token IDs (#453)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Nov 30, 2023
1 parent db41778 commit 62dc3c3
Show file tree
Hide file tree
Showing 55 changed files with 1,048 additions and 192 deletions.
11 changes: 5 additions & 6 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,13 @@ node ./test-online-transducer.js
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

# offline tts
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
tar xvf vits-vctk.tar.bz2
rm vits-vctk.tar.bz2

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
node ./test-offline-tts-en.js
rm -rf vits-vctk
rm vits-piper-en_US-amy-low.tar.bz2

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2
rm vits-zh-aishell3.tar.bz2
node ./test-offline-tts-zh.js
rm -rf vits-zh-aishell3
rm vits-zh-aishell3.tar.bz2
18 changes: 18 additions & 0 deletions .github/scripts/test-offline-tts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@ which $EXE
# test waves are saved in ./tts
mkdir ./tts

log "------------------------------------------------------------"
log "vits-piper-en_US-amy-low"
log "------------------------------------------------------------"
curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
rm vits-piper-en_US-amy-low.tar.bz2

$EXE \
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
--vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
--vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
--debug=1 \
--output-filename=./tts/amy.wav \
"“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.” The sun shone bleakly in the sky, its meager light struggling to penetrate the thick foliage of the forest. Birds sang their songs up in the crowns of the trees, fluttering from one branch to the other. A blanket of total tranquility lied over the forest. The peace was only broken by the steady gallop of the horses of the soldiers who were traveling to their upcoming knighting the morrow at Camelot, and rowdy conversation. “Finally we will get what we deserve,” “It’s been about time,” Perceval agreed. “We’ve been risking our arses for the past two years. It’s the least they could give us.” Merlin remained ostensibly silent, refusing to join the verbal parade of self-aggrandizing his fellow soldiers have engaged in. He found it difficult to happy about anything, when even if they had won the war, he had lost everything else in the process."

file ./tts/amy.wav
rm -rf vits-piper-en_US-amy-low

log "------------------------------------------------------------"
log "vits-ljs test"
log "------------------------------------------------------------"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/apk-tts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
total: ["12"]
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]
total: ["30"]
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29"]

steps:
- uses: actions/checkout@v4
Expand Down
80 changes: 80 additions & 0 deletions .github/workflows/test-build-wheel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: test-build-wheel

on:
push:
branches:
- master

pull_request:

workflow_dispatch:

concurrency:
group: test-build-wheel-${{ github.ref }}
cancel-in-progress: true

jobs:
test-build-wheel:
name: ${{ matrix.os }} ${{ matrix.python_version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: ccache
uses: hendrikmuhs/[email protected]
with:
key: ${{ matrix.os }}-${{ matrix.python_version }}

- name: Install python dependencies
shell: bash
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel twine setuptools
- name: Build
shell: bash
run: |
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
export SHERPA_ONNX_MAKE_ARGS="VERBOSE=1 -j"
python3 setup.py bdist_wheel
ls -lh dist
- name: Display wheel
shell: bash
run: |
ls -lh dist
- name: Install wheel
shell: bash
run: |
pip install --verbose ./dist/*.whl
- name: Test
shell: bash
run: |
# For windows
export PATH=/c/hostedtoolcache/windows/Python/3.7.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.8.10/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.11.6/x64/bin:$PATH
which sherpa-onnx
sherpa-onnx --help
4 changes: 4 additions & 0 deletions .github/workflows/test-nodejs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ jobs:
mkdir -p scripts/nodejs/lib/win-x64
dst=scripts/nodejs/lib/win-x64
fi
ls -lh build/install/lib/
rm -rf build/install/lib/pkgconfig
cp -v build/install/lib/* $dst/
- name: replace files
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,6 @@ xcuserdata/
vits-vctk
vits-zh-aishell3
jslint.mjs
vits-piper-en_US-amy-low
vits-piper-*-*-*
log
2 changes: 2 additions & 0 deletions android/SherpaOnnxTts/app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">

<uses-permission android:name="android.permission.WRITE_INTERNAL_STORAGE" />

<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager
import android.media.MediaPlayer
import android.net.Uri
import android.os.Bundle
Expand All @@ -9,6 +10,8 @@ import android.widget.EditText
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import java.io.File
import java.io.FileOutputStream
import java.io.IOException

const val TAG = "sherpa-onnx"

Expand All @@ -19,7 +22,6 @@ class MainActivity : AppCompatActivity() {
private lateinit var speed: EditText
private lateinit var generate: Button
private lateinit var play: Button
private var hasFile: Boolean = false

override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
Expand All @@ -46,10 +48,10 @@ class MainActivity : AppCompatActivity() {
val sampleText = ""
text.setText(sampleText)

play.isEnabled = false;
play.isEnabled = false
}

fun onClickGenerate() {
private fun onClickGenerate() {
val sidInt = sid.text.toString().toIntOrNull()
if (sidInt == null || sidInt < 0) {
Toast.makeText(
Expand Down Expand Up @@ -77,7 +79,7 @@ class MainActivity : AppCompatActivity() {
return
}

play.isEnabled = false;
play.isEnabled = false
val audio = tts.generate(text = textStr, sid = sidInt, speed = speedFloat)

val filename = application.filesDir.absolutePath + "/generated.wav"
Expand All @@ -89,7 +91,7 @@ class MainActivity : AppCompatActivity() {
}
}

fun onClickPlay() {
private fun onClickPlay() {
val filename = application.filesDir.absolutePath + "/generated.wav"
val mediaPlayer = MediaPlayer.create(
applicationContext,
Expand All @@ -98,32 +100,104 @@ class MainActivity : AppCompatActivity() {
mediaPlayer.start()
}

fun initTts() {
var modelDir :String?
var modelName :String?
private fun initTts() {
var modelDir: String?
var modelName: String?
var ruleFsts: String?
var lexicon: String?
var dataDir: String?
var assets: AssetManager? = application.assets

// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
modelDir = null
modelName = null
ruleFsts = null
lexicon = null
dataDir = null

// Example 1:
// modelDir = "vits-vctk"
// modelName = "vits-vctk.onnx"
// lexicon = "lexicon.txt"

// Example 2:
// modelDir = "vits-piper-en_US-lessac-medium"
// modelName = "en_US-lessac-medium.onnx"
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
// modelDir = "vits-piper-en_US-amy-low"
// modelName = "en_US-amy-low.onnx"
// dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"

// Example 3:
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// lexcion = "lexicon.txt"

val config = getOfflineTtsConfig(modelDir = modelDir!!, modelName = modelName!!, ruleFsts = ruleFsts ?: "")!!
tts = OfflineTts(assetManager = application.assets, config = config)
if (dataDir != null) {
val newDir = copyDataDir(modelDir)
modelDir = newDir + "/" + modelDir
dataDir = newDir + "/" + dataDir
assets = null
}

val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
)!!

tts = OfflineTts(assetManager = assets, config = config)
}


private fun copyDataDir(dataDir: String): String {
println("data dir is $dataDir")
copyAssets(dataDir)

val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
println("newDataDir: $newDataDir")
return newDataDir
}

private fun copyAssets(path: String) {
val assets: Array<String>?
try {
assets = application.assets.list(path)
if (assets!!.isEmpty()) {
copyFile(path)
} else {
val fullPath = "${application.getExternalFilesDir(null)}/$path"
val dir = File(fullPath)
dir.mkdirs()
for (asset in assets.iterator()) {
val p: String = if (path == "") "" else path + "/"
copyAssets(p + asset)
}
}
} catch (ex: IOException) {
Log.e(TAG, "Failed to copy $path. ${ex.toString()}")
}
}

private fun copyFile(filename: String) {
try {
val istream = application.assets.open(filename)
val newFilename = application.getExternalFilesDir(null).toString() + "/" + filename
val ostream = FileOutputStream(newFilename)
// Log.i(TAG, "Copying $filename to $newFilename")
val buffer = ByteArray(1024)
var read = 0
while (read != -1) {
ostream.write(buffer, 0, read)
read = istream.read(buffer)
}
istream.close()
ostream.flush()
ostream.close()
} catch (ex: Exception) {
Log.e(TAG, "Failed to copy $filename, ${ex.toString()}")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ import android.content.res.AssetManager

data class OfflineTtsVitsModelConfig(
var model: String,
var lexicon: String,
var lexicon: String = "",
var tokens: String,
var dataDir: String = "",
var noiseScale: Float = 0.667f,
var noiseScaleW: Float = 0.8f,
var lengthScale: Float = 1.0f,
Expand All @@ -22,6 +23,7 @@ data class OfflineTtsModelConfig(
data class OfflineTtsConfig(
var model: OfflineTtsModelConfig,
var ruleFsts: String = "",
var maxNumSentences: Int = 2,
)

class GeneratedAudio(
Expand Down Expand Up @@ -117,18 +119,25 @@ class OfflineTts(
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// to download models
fun getOfflineTtsConfig(modelDir: String, modelName: String, ruleFsts: String): OfflineTtsConfig? {
fun getOfflineTtsConfig(
modelDir: String,
modelName: String,
lexicon: String,
dataDir: String,
ruleFsts: String
): OfflineTtsConfig? {
return OfflineTtsConfig(
model = OfflineTtsModelConfig(
vits = OfflineTtsVitsModelConfig(
model = "$modelDir/$modelName",
lexicon = "$modelDir/lexicon.txt",
tokens = "$modelDir/tokens.txt"
lexicon = "$modelDir/$lexicon",
tokens = "$modelDir/tokens.txt",
dataDir = "$dataDir"
),
numThreads = 2,
debug = true,
provider = "cpu",
),
ruleFsts=ruleFsts,
ruleFsts = ruleFsts,
)
}
1 change: 1 addition & 0 deletions build-android-arm64-v8a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,4 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
make -j4
make install/strip
cp -fv android-onnxruntime-libs/jni/arm64-v8a/libonnxruntime.so install/lib
rm -rf install/lib/pkgconfig
1 change: 1 addition & 0 deletions build-android-armv7-eabi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,4 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
make -j4
make install/strip
cp -fv android-onnxruntime-libs/jni/armeabi-v7a/libonnxruntime.so install/lib
rm -rf install/lib/pkgconfig
1 change: 1 addition & 0 deletions build-android-x86-64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
make -j4
make install/strip
cp -fv android-onnxruntime-libs/jni/x86_64/libonnxruntime.so install/lib
rm -rf install/lib/pkgconfig
1 change: 1 addition & 0 deletions build-android-x86.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@ cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"
make -j4
make install/strip
cp -fv android-onnxruntime-libs/jni/x86/libonnxruntime.so install/lib
rm -rf install/lib/pkgconfig
Loading

0 comments on commit 62dc3c3

Please sign in to comment.