Skip to content

Commit

Permalink
Text-to-speech for iOS (#443)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Nov 23, 2023
1 parent 2f22e6e commit 94ef692
Show file tree
Hide file tree
Showing 12 changed files with 614 additions and 2 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ swift-api-examples/k2fsa-*
run-*.sh
two-pass-*.sh
build-*

## User settings
xcuserdata/

## Xcode 8 and earlier
*.xcscmblueprint
*.xccheckout
vits-vctk
vits-zh-aishell3
jslint.mjs
374 changes: 374 additions & 0 deletions ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"colors" : [
{
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"images" : [
{
"idiom" : "universal",
"platform" : "ios",
"size" : "1024x1024"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}
95 changes: 95 additions & 0 deletions ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//
// ContentView.swift
// SherpaOnnxTts
//
// Created by fangjun on 2023/11/23.
//
// Speech-to-text with Next-gen Kaldi on iOS without Internet connection

import SwiftUI
import AVFoundation

struct ContentView: View {
@State private var sid = "0"
@State private var speed = 1.0
@State private var text = ""
@State private var showAlert = false
@State var filename: URL = NSURL() as URL
@State var audioPlayer: AVAudioPlayer!

private var tts = createOfflineTts()

var body: some View {

VStack(alignment: .leading) {
HStack {
Spacer()
Text("Next-gen Kaldi: TTS").font(.title)
Spacer()
}
HStack{
Text("Speaker ID")
TextField("Please input a speaker ID", text: $sid).textFieldStyle(.roundedBorder)
.keyboardType(.numberPad)
}
HStack{
Text("Speed \(String(format: "%.1f", speed))")
.padding(.trailing)
Slider(value: $speed, in: 0.5...2.0, step: 0.1) {
Text("Speech speed")
}
}

Text("Please input your text below").padding([.trailing, .top, .bottom])

TextEditor(text: $text)
.font(.body)
.opacity(self.text.isEmpty ? 0.25 : 1)
.disableAutocorrection(true)
.border(Color.black)

Spacer()
HStack {
Spacer()
Button(action: {
let speakerId = Int(self.sid) ?? 0
let t = self.text.trimmingCharacters(in: .whitespacesAndNewlines)
if t.isEmpty {
self.showAlert = true
return
}

let audio = tts.generate(text: t, sid: speakerId, speed: Float(self.speed))
if self.filename.absoluteString.isEmpty {
let tempDirectoryURL = NSURL.fileURL(withPath: NSTemporaryDirectory(), isDirectory: true)
self.filename = tempDirectoryURL.appendingPathComponent("test.wav")
}

let ret = audio.save(filename: filename.path)

self.audioPlayer = try! AVAudioPlayer(contentsOf: filename)
self.audioPlayer.play()
}) {
Text("Generate")
}.alert(isPresented: $showAlert) {
Alert(title: Text("Empty text"), message: Text("Please input your text before clicking the Generate button"))
}
Spacer()
Button (action: {
self.audioPlayer.play()
}) {
Text("Play")
}.disabled(filename.absoluteString.isEmpty)
Spacer()
}
Spacer()
}
.padding()
}
}

struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}
17 changes: 17 additions & 0 deletions ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/SherpaOnnxTtsApp.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//
// SherpaOnnxTtsApp.swift
// SherpaOnnxTts
//
// Created by fangjun on 2023/11/23.
//

import SwiftUI

@main
struct SherpaOnnxTtsApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
68 changes: 68 additions & 0 deletions ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//
// ViewModel.swift
// SherpaOnnxTts
//
// Created by fangjun on 2023/11/23.
//

import Foundation

func getResource(_ forResource: String, _ ofType: String) -> String {
let path = Bundle.main.path(forResource: forResource, ofType: ofType)
precondition(
path != nil,
"\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
+ " Build Phases -> Copy Bundle Resources\n" + "to add it!"
)
return path!
}

/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
/// to download pre-trained models

func getTtsForVCTK() -> SherpaOnnxOfflineTtsWrapper {
// See the following link
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers

// vits-vctk.onnx
let model = getResource("vits-vctk", "onnx")

// lexicon.txt
let lexicon = getResource("lexicon", "txt")

// tokens.txt
let tokens = getResource("tokens", "txt")

let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
return SherpaOnnxOfflineTtsWrapper(config: &config)
}

func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
// See the following link
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3

// vits-vctk.onnx
let model = getResource("vits-aishell3", "onnx")

// lexicon.txt
let lexicon = getResource("lexicon", "txt")

// tokens.txt
let tokens = getResource("tokens", "txt")

let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
return SherpaOnnxOfflineTtsWrapper(config: &config)
}

func createOfflineTts() -> SherpaOnnxOfflineTtsWrapper {
return getTtsForVCTK()

// return getTtsForAishell3()

// please add more models on need by following the above two examples
}
4 changes: 2 additions & 2 deletions swift-api-examples/SherpaOnnx.swift
Original file line number Diff line number Diff line change
Expand Up @@ -650,8 +650,8 @@ class SherpaOnnxGeneratedAudioWrapper {
}
}

func save(filename: String) {
SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
func save(filename: String) -> Int32 {
return SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
}
}

Expand Down

0 comments on commit 94ef692

Please sign in to comment.