-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.py
69 lines (58 loc) · 2.64 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from STTS2Dash.tts import StyleTTS2Pipeline
import torch
if __name__ == '__main__':
tts = StyleTTS2Pipeline()
tts.load_from_files("./test_models/epoch 7.pth",
"./test_models/config_n.yml", is_tsukasa=True)
while True:
text = input("Enter text to synthesize (or 'quit' to exit): ").strip()
if text.lower() == 'quit':
break
if not text:
print("Text cannot be empty. Please try again.")
continue
# Default parameters
diffusion_steps = 30
alpha = 0.2
beta = 0.4
embedding_scale = 2
audio_style_path = "./test_models/audio.wav"
# Parameter customization
while True:
customize = input("Customize generation parameters? (y/n): ").strip().lower()
if customize == 'y':
try:
alpha = float(input("Enter alpha (current: {0}): ".format(alpha)) or alpha)
beta = float(input("Enter beta (current: {0}): ".format(beta)) or beta)
embedding_scale = float(
input("Enter embedding scale (current: {0}): ".format(embedding_scale)) or embedding_scale)
diffusion_steps = int(
input("Enter diffusion steps (current: {0}): ".format(diffusion_steps)) or diffusion_steps)
# New option for audio style path
audio_style_path = input(
"Enter audio style path (current: {0}): ".format(audio_style_path)) or audio_style_path
break
except ValueError:
print("Invalid input. Please enter numeric values for numeric parameters.")
continue
elif customize == 'n':
break
else:
print("Invalid input. Please enter 'y' or 'n'.")
# Generate audio
try:
tts.generate(text,
audio_style_path,
diffusion_steps=diffusion_steps,
alpha=alpha,
beta=beta,
embedding_scale=embedding_scale,
output_file_path="./test.wav",
language="en")
print(f"Done... Saved audio with parameters:")
print(f"Audio Style Path: {audio_style_path}")
print(
f"Alpha: {alpha}, Beta: {beta}, Embedding Scale: {embedding_scale}, Diffusion Steps: {diffusion_steps}")
except Exception as e:
print(f"Error generating audio: {e}")
print("Text-to-speech generation ended.")