Skip to content

Commit

Permalink
Add TTS API and examples for Dart (#1010)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jun 15, 2024
1 parent e307767 commit e52d32b
Show file tree
Hide file tree
Showing 20 changed files with 874 additions and 0 deletions.
16 changes: 16 additions & 0 deletions .github/scripts/test-dart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@ set -ex

cd dart-api-examples

pushd tts

echo '----------piper tts----------'
./run-piper.sh
rm -rf vits-piper-*

echo '----------coqui tts----------'
./run-coqui.sh
rm -rf vits-coqui-*

echo '----------zh tts----------'
./run-zh.sh
rm -rf sherpa-onnx-*

popd # tts

pushd streaming-asr

echo '----------streaming zipformer ctc HLG----------'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test-dart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,6 @@ jobs:
cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml
.github/scripts/test-dart.sh
1 change: 1 addition & 0 deletions dart-api-examples/streaming-asr/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Introduction

This folder contains examples for streaming ASR with Dart API.

| File | Description|
|------|------------|
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
Expand Down
3 changes: 3 additions & 0 deletions dart-api-examples/tts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/
3 changes: 3 additions & 0 deletions dart-api-examples/tts/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 1.0.0

- Initial version.
10 changes: 10 additions & 0 deletions dart-api-examples/tts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Introduction

This folder contains examples for text to speech with Dart API.

| File | Description|
|------|------------|
|[./bin/piper.dart](./bin/piper.dart)| Use a Piper tts model for text to speech. See [./run-piper.sh](./run-piper.sh)|
|[./bin/coqui.dart](./bin/coqui.dart)| Use a Coqui tts model for text to speech. See [./run-coqui.sh](./run-coqui.sh)|
|[./bin/zh.dart](./bin/zh.dart)| Use a Chinese VITS tts model for text to speech. See [./run-zh.sh](./run-zh.sh)|

30 changes: 30 additions & 0 deletions dart-api-examples/tts/analysis_options.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
# rules:
# - camel_case_types

# analyzer:
# exclude:
# - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options
69 changes: 69 additions & 0 deletions dart-api-examples/tts/bin/coqui.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
await initSherpaOnnx();

final parser = ArgParser()
..addOption('model', help: 'Path to the ONNX model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('text', help: 'Text to generate TTS for')
..addOption('output-wav', help: 'Filename to save the generated audio')
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
..addOption(
'sid',
help: 'Speaker ID to select. Used only for multi-speaker TTS',
defaultsTo: '0',
);
final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['output-wav'] == null ||
res['text'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final text = res['text'] as String;
final outputWav = res['output-wav'] as String;
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
final sid = int.tryParse(res['sid'] as String) ?? 0;

if (speed == 0) {
speed = 1.0;
}

final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
model: model,
tokens: tokens,
lengthScale: 1 / speed,
);

final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
vits: vits,
numThreads: 1,
debug: true,
);
final config = sherpa_onnx.OfflineTtsConfig(
model: modelConfig,
maxNumSenetences: 1,
);

final tts = sherpa_onnx.OfflineTts(config);
final audio = tts.generate(text: text, sid: sid, speed: speed);
tts.free();

sherpa_onnx.writeWave(
filename: outputWav,
samples: audio.samples,
sampleRate: audio.sampleRate,
);
print('Saved to ${outputWav}');
}
1 change: 1 addition & 0 deletions dart-api-examples/tts/bin/init.dart
80 changes: 80 additions & 0 deletions dart-api-examples/tts/bin/piper.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
await initSherpaOnnx();

final parser = ArgParser()
..addOption('model', help: 'Path to the ONNX model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('data-dir', help: 'Path to espeak-ng-data directory')
..addOption('text', help: 'Text to generate TTS for')
..addOption('output-wav', help: 'Filename to save the generated audio')
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
..addOption(
'sid',
help: 'Speaker ID to select. Used only for multi-speaker TTS',
defaultsTo: '0',
);
final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['data-dir'] == null ||
res['output-wav'] == null ||
res['text'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final dataDir = res['data-dir'] as String;
final text = res['text'] as String;
final outputWav = res['output-wav'] as String;
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
final sid = int.tryParse(res['sid'] as String) ?? 0;

if (speed == 0) {
speed = 1.0;
}

final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
model: model,
tokens: tokens,
dataDir: dataDir,
lengthScale: 1 / speed,
);

final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
vits: vits,
numThreads: 1,
debug: true,
);
final config = sherpa_onnx.OfflineTtsConfig(
model: modelConfig,
maxNumSenetences: 1,
);

final tts = sherpa_onnx.OfflineTts(config);
final audio = tts.generateWithCallback(
text: text,
sid: sid,
speed: speed,
callback: (Float32List samples) {
print('${samples.length} samples received');
// You can play samples in a separate thread/isolate
});
tts.free();

sherpa_onnx.writeWave(
filename: outputWav,
samples: audio.samples,
sampleRate: audio.sampleRate,
);
print('Saved to ${outputWav}');
}
86 changes: 86 additions & 0 deletions dart-api-examples/tts/bin/zh.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
await initSherpaOnnx();

final parser = ArgParser()
..addOption('model', help: 'Path to the ONNX model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('lexicon', help: 'Path to lexicon.txt')
..addOption(
'dict-dir',
help: 'Path to jieba dict directory',
defaultsTo: '',
)
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
..addOption('text', help: 'Text to generate TTS for')
..addOption('output-wav', help: 'Filename to save the generated audio')
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
..addOption(
'sid',
help: 'Speaker ID to select. Used only for multi-speaker TTS',
defaultsTo: '0',
);
final res = parser.parse(arguments);
if (res['model'] == null ||
res['lexicon'] == null ||
res['tokens'] == null ||
res['output-wav'] == null ||
res['text'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final lexicon = res['lexicon'] as String;
final tokens = res['tokens'] as String;
final dictDir = res['dict-dir'] as String;
final ruleFsts = res['rule-fsts'] as String;
final ruleFars = res['rule-fars'] as String;
final text = res['text'] as String;
final outputWav = res['output-wav'] as String;
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
final sid = int.tryParse(res['sid'] as String) ?? 0;

if (speed == 0) {
speed = 1.0;
}

final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
model: model,
lexicon: lexicon,
tokens: tokens,
dictDir: dictDir,
lengthScale: 1 / speed,
);

final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
vits: vits,
numThreads: 1,
debug: true,
);
final config = sherpa_onnx.OfflineTtsConfig(
model: modelConfig,
maxNumSenetences: 1,
ruleFsts: ruleFsts,
ruleFars: ruleFars,
);

final tts = sherpa_onnx.OfflineTts(config);
final audio = tts.generate(text: text, sid: sid, speed: speed);
tts.free();

sherpa_onnx.writeWave(
filename: outputWav,
samples: audio.samples,
sampleRate: audio.sampleRate,
);
print('Saved to ${outputWav}');
}
Loading

0 comments on commit e52d32b

Please sign in to comment.