-
Notifications
You must be signed in to change notification settings - Fork 2
/
ttts.py
65 lines (49 loc) · 2.61 KB
/
ttts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from google.cloud import texttospeech
import generate_transcript as gt
from pydub import AudioSegment
#eventually make this more configurable
def generate_characters():
# ben_voice = texttospeech.types.VoiceSelectionParams(name='de-DE-Wavenet-B',
# language_code="en-US")
# andy_voice = texttospeech.types.VoiceSelectionParams(name='de-DE-Wavenet-D',
# language_code="en-US")
# leslie_voice = texttospeech.types.VoiceSelectionParams(name='de-DE-Wavenet-A',
# language_code="en-US")
characters = {"Ben": 'de-DE-Wavenet-B', "Leslie": 'de-DE-Wavenet-A', "Andy": 'de-DE-Wavenet-D'}
return characters
def generate_audio_clip(client, node, node_num, language_code, characters):
audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.MP3)
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
snippet = configure_speaker_snippet(node, characters, client, language_code, audio_config)
output_file = "media/output"+str(node_num)+".mp3"
# The response's audio_content is binary.
with open(output_file, 'wb') as out:
# Write the response to the output file.
out.write(snippet.audio_content)
print('Audio content written to output file ' + str(node_num))
# time_delta = node.end - node.start
# audio = AudioSegment.from_file(output_file)
# audio.duration_seconds = time_delta
# audio.export(output_file, format='mp3')
def configure_speaker_snippet(node, character_voices, client, language_code, audio_config):
voice = configure_voice(node, character_voices, language_code)
synthesis_input = texttospeech.types.SynthesisInput(text=node.sentence)
snippet = client.synthesize_speech(synthesis_input, voice, audio_config)
return snippet
def configure_voice(node, character_voices, language_code):
voice = texttospeech.types.VoiceSelectionParams(name=character_voices[node.speaker], language_code=language_code)
return voice
def generate_mp3(source_audio,script,lang_code):
#Get timed audio transcript
transcript = gt.gen_transcript(source_audio, script,lang_code)
# Instantiates a client
client = texttospeech.TextToSpeechClient()
node_num = 1
characters = generate_characters()
#eventually adapt this to take command line input for dynamic language change
for node in transcript:
generate_audio_clip(client, node, node_num, lang_code, characters)
node_num = node_num + 1
return transcript