-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyespeak.py
265 lines (221 loc) · 11.6 KB
/
pyespeak.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# Copyright(c) Maxim Kolosov 2011-2013 [email protected]
# http://pyespeak.sf.net
# BSD license
__version__ = pyespeak_version = '0.2'
__versionTime__ = '2013-11-26'
__author__ = 'Maxim Kolosov <[email protected]>'
__doc__ = '''
pyespeak - Python ctypes module for eSpeak.
eSpeak is a compact, multi-language, open source
text-to-speech synthesizer (http://espeak.sourceforge.net).
'''
LANG_ENCODING = 'cp1251'
ESPEAK_API_REVISION = 6
# values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute
espeakRATE_MINIMUM = 80
espeakRATE_MAXIMUM = 450
espeakRATE_NORMAL = 175
espeak_EVENT_TYPE = 0
espeakEVENT_LIST_TERMINATED = 0 # Retrieval mode: terminates the event list.
espeakEVENT_WORD = 1 # Start of word
espeakEVENT_SENTENCE = 2 # Start of sentence
espeakEVENT_MARK = 3 # Mark
espeakEVENT_PLAY = 4 # Audio element
espeakEVENT_END = 5 # End of sentence or clause
espeakEVENT_MSG_TERMINATED = 6 # End of message
espeakEVENT_PHONEME = 7 # Phoneme, if enabled in espeak_Initialize()
espeakEVENT_SAMPLERATE = 8 # internal use, set sample rate
espeak_POSITION_TYPE = 0
POS_CHARACTER = 1
POS_WORD = 2
POS_SENTENCE = 3
espeak_AUDIO_OUTPUT = 0
AUDIO_OUTPUT_PLAYBACK = 0 # PLAYBACK mode: plays the audio data, supplies events to the calling program
AUDIO_OUTPUT_RETRIEVAL = 1 # RETRIEVAL mode: supplies audio data and events to the calling program
AUDIO_OUTPUT_SYNCHRONOUS = 2 # SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed
AUDIO_OUTPUT_SYNCH_PLAYBACK = 3 # Synchronous playback
espeak_ERROR = 0
EE_OK = 0
EE_INTERNAL_ERROR = -1
EE_BUFFER_FULL = 1
EE_NOT_FOUND = 2
espeakCHARS_AUTO = 0
espeakCHARS_UTF8 = 1
espeakCHARS_8BIT = 2
espeakCHARS_WCHAR = 3
espeakCHARS_16BIT = 4
espeakSSML = 0x10
espeakPHONEMES = 0x100
espeakENDPAUSE = 0x1000
espeakKEEP_NAMEDATA = 0x2000
espeak_PARAMETER = 0
espeakSILENCE = 0 # internal use
espeakRATE = 1
espeakVOLUME = 2
espeakPITCH = 3
espeakRANGE = 4
espeakPUNCTUATION = 5
espeakCAPITALS = 6
espeakWORDGAP = 7
espeakOPTIONS = 8 # reserved for misc. options. not yet used
espeakINTONATION = 9
espeakRESERVED1 = 10
espeakRESERVED2 = 11
espeakEMPHASIS = 12 # internal use
espeakLINELENGTH = 13 # internal use
espeakVOICETYPE = 14 # internal, 1=mbrola
N_SPEECH_PARAM = 15 # last enum
espeak_PUNCT_TYPE = 0
espeakPUNCT_NONE = 0
espeakPUNCT_ALL = 1
espeakPUNCT_SOME = 2
import ctypes
from sys import hexversion, platform
class FILE(ctypes.Structure):
pass
FILE_ptr = ctypes.POINTER(FILE)
if hexversion >= 0x03000000:
PyFile_FromFile = ctypes.pythonapi.PyFile_FromFd
PyFile_AsFile = ctypes.pythonapi.PyObject_AsFileDescriptor
type_str = bytes
type_unicode = str
else:
PyFile_FromFile = ctypes.pythonapi.PyFile_FromFile
PyFile_AsFile = ctypes.pythonapi.PyFile_AsFile
type_str = str
type_unicode = unicode
PyFile_FromFile.restype = ctypes.py_object
PyFile_FromFile.argtypes = [FILE_ptr, ctypes.c_char_p, ctypes.c_char_p, ctypes.CFUNCTYPE(ctypes.c_int, FILE_ptr)]
PyFile_AsFile.restype = FILE_ptr
PyFile_AsFile.argtypes = [ctypes.py_object]
func_type = ctypes.CFUNCTYPE
c_module = None
try:
c_module = ctypes.CDLL('espeak_lib')
#~ c_module = ctypes.CDLL('espeak_lib_d')
except:
c_module = ctypes.CDLL('espeak_sapi')
class espeak_ID(ctypes.Union):
_fields_ = [('number', ctypes.c_int),
('name', ctypes.c_char_p),
('string', ctypes.c_char * 8)
]
class espeak_EVENT(ctypes.Structure):
_fields_ = [('type', ctypes.c_int),
('unique_identifier', ctypes.c_uint),
('text_position', ctypes.c_int),
('length', ctypes.c_int),
('audio_position', ctypes.c_int),
('sample', ctypes.c_int),
('user_data', ctypes.c_void_p),
('id', espeak_ID)
]
#~ _anonymous_ = ('id',)
class espeak_VOICE(ctypes.Structure):
_fields_ = [('name', ctypes.c_char_p),
('languages', ctypes.c_char_p),
('identifier', ctypes.c_char_p),
('gender', ctypes.c_ubyte),
('age', ctypes.c_ubyte),
('variant', ctypes.c_ubyte),
('xx1', ctypes.c_ubyte),
('score', ctypes.c_int),
('spare', ctypes.c_void_p)
]
# ESPEAK_API int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
espeak_Initialize = func_type(ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_char_p, ctypes.c_int)(('espeak_Initialize', c_module))
# typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
t_espeak_callback = func_type(ctypes.c_int, ctypes.POINTER(ctypes.c_short), ctypes.c_int, ctypes.POINTER(espeak_EVENT))
#~ t_espeak_callback = func_type(ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.POINTER(espeak_EVENT))
#~ t_espeak_callback = func_type(ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)
# ESPEAK_API void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
espeak_SetSynthCallback = func_type(None, t_espeak_callback)(('espeak_SetSynthCallback', c_module))
# ESPEAK_API void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
UriCallback = func_type(ctypes.c_int, ctypes.c_int, ctypes.c_char_p, ctypes.c_char_p)
espeak_SetUriCallback = func_type(None, UriCallback)(('espeak_SetUriCallback', c_module))
# ESPEAK_API espeak_ERROR espeak_Synth(const void *text, size_t size, unsigned int position, espeak_POSITION_TYPE position_type, unsigned int end_position, unsigned int flags, unsigned int* unique_identifier, void* user_data);
espeak_Synth = func_type(ctypes.c_int, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_uint, ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_uint), ctypes.c_void_p)(('espeak_Synth', c_module))
#~ espeak_Synth = func_type(ctypes.c_int, ctypes.c_char_p, ctypes.c_size_t, ctypes.c_uint, ctypes.c_int, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_uint), ctypes.c_void_p)(('espeak_Synth', c_module))
# ESPEAK_API espeak_ERROR espeak_Synth_Mark(const void *text, size_t size, const char *index_mark, unsigned int end_position, unsigned int flags, unsigned int* unique_identifier, void* user_data);
espeak_Synth_Mark = func_type(ctypes.c_int, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_char_p, ctypes.c_uint, ctypes.c_uint, ctypes.POINTER(ctypes.c_uint), ctypes.c_void_p)(('espeak_Synth_Mark', c_module))
# ESPEAK_API espeak_ERROR espeak_Key(const char *key_name);
espeak_Key = func_type(ctypes.c_int, ctypes.c_char_p)(('espeak_Key', c_module))
# ESPEAK_API espeak_ERROR espeak_Char(wchar_t character);
espeak_Char = func_type(ctypes.c_int, ctypes.c_wchar)(('espeak_Char', c_module))
# ESPEAK_API espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
espeak_SetParameter = func_type(ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int)(('espeak_SetParameter', c_module))
# ESPEAK_API int espeak_GetParameter(espeak_PARAMETER parameter, int current);
espeak_GetParameter = func_type(ctypes.c_int, ctypes.c_int, ctypes.c_int)(('espeak_GetParameter', c_module))
# ESPEAK_API espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
espeak_SetPunctuationList = func_type(ctypes.c_int, ctypes.c_wchar_p)(('espeak_SetPunctuationList', c_module))
# ESPEAK_API void espeak_SetPhonemeTrace(int value, FILE *stream);
espeak_SetPhonemeTrace = func_type(None, ctypes.c_int, FILE_ptr)(('espeak_SetPhonemeTrace', c_module))
# ESPEAK_API void espeak_CompileDictionary(const char *path, FILE *log, int flags);
espeak_CompileDictionary = func_type(None, ctypes.c_char_p, FILE_ptr, ctypes.c_int)(('espeak_CompileDictionary', c_module))
# ESPEAK_API const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
espeak_ListVoices = func_type(ctypes.POINTER(ctypes.POINTER(espeak_VOICE)), ctypes.POINTER(espeak_VOICE))(('espeak_ListVoices', c_module))
# ESPEAK_API espeak_ERROR espeak_SetVoiceByName(const char *name);
espeak_SetVoiceByName = func_type(ctypes.c_int, ctypes.c_char_p)(('espeak_SetVoiceByName', c_module))
# ESPEAK_API espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
espeak_SetVoiceByProperties = func_type(ctypes.c_int, ctypes.POINTER(espeak_VOICE))(('espeak_SetVoiceByProperties', c_module))
# ESPEAK_API espeak_VOICE *espeak_GetCurrentVoice(void);
espeak_GetCurrentVoice = func_type(ctypes.POINTER(espeak_VOICE))(('espeak_GetCurrentVoice', c_module))
# ESPEAK_API espeak_ERROR espeak_Cancel(void);
espeak_Cancel = func_type(ctypes.c_int)(('espeak_Cancel', c_module))
# ESPEAK_API int espeak_IsPlaying(void);
espeak_IsPlaying = func_type(ctypes.c_int)(('espeak_IsPlaying', c_module))
# ESPEAK_API espeak_ERROR espeak_Synchronize(void);
espeak_Synchronize = func_type(ctypes.c_int)(('espeak_Synchronize', c_module))
# ESPEAK_API espeak_ERROR espeak_Terminate(void);
espeak_Terminate = func_type(ctypes.c_int)(('espeak_Terminate', c_module))
# ESPEAK_API const char *espeak_Info(const char **path_data);
_espeak_Info_ = func_type(ctypes.c_char_p, ctypes.POINTER(ctypes.c_char_p))(('espeak_Info', c_module))
def espeak_Info():
path_data = ctypes.c_char_p()
version = _espeak_Info_(ctypes.byref(path_data))
return (version, path_data.value)
def as_ansi(value, encoding = LANG_ENCODING):
if hexversion >= 0x03020000:
return bytes(value, encoding)
else:
return value
if __name__ == "__main__":
#~ output = AUDIO_OUTPUT_PLAYBACK
#~ output = AUDIO_OUTPUT_SYNCHRONOUS
output = AUDIO_OUTPUT_SYNCH_PLAYBACK
print('pyespeak version %s eSpeak library %s' % (pyespeak_version, repr(c_module)))
result = espeak_Initialize(output, 0, '.', 0)
if result == EE_INTERNAL_ERROR:
print('ERROR Initialize eSpeak')
else:
print('sample rate in Hz %d' % result)
print('eSpeak version %s' % repr(espeak_Info()))
if output in (AUDIO_OUTPUT_SYNCHRONOUS, AUDIO_OUTPUT_SYNCH_PLAYBACK):
def espeak_callback(wav, numsamples, events):
'''int SynthCallback(short *wav, int numsamples, espeak_EVENT *events)
wav: is the speech sound data which has been produced.
NULL indicates that the synthesis has been completed.
numsamples: is the number of entries in wav. This number may vary, may be less than
the value implied by the buflength parameter given in espeak_Initialize, and may
sometimes be zero (which does NOT indicate end of synthesis).
events: an array of espeak_EVENT items which indicate word and sentence events, and
also the occurance if <mark> and <audio> elements within the text. The list of
events is terminated by an event of type = 0.
Callback returns: 0=continue synthesis, 1=abort synthesis'''
# print(espeak_callback.__doc__)
print('=== espeak_callback %s' % repr((wav, numsamples, events)))
return 0
SynthCallback = t_espeak_callback(espeak_callback)
print('espeak_SetSynthCallback', espeak_SetSynthCallback(SynthCallback))
print('espeak_SetVoiceByName %d' % espeak_SetVoiceByName('default'))
print('espeak_Char %d' % espeak_Char(u'a'))
#~ print('espeak_Char', espeak_Char(u'a'))
synth_flags = espeakCHARS_AUTO | espeakPHONEMES | espeakENDPAUSE
#~ synth_flags = espeakCHARS_8BIT | espeakPHONEMES | espeakENDPAUSE
pytext = 'Hello world!'
text = ctypes.create_string_buffer(pytext, len(pytext))
print('espeak_Synth %d' % espeak_Synth(text, ctypes.sizeof(text)+1, 0, POS_CHARACTER, 0, synth_flags, None, None))
#~ print('espeak_Synth', espeak_Synth(pytext, len(pytext)+1, 0, POS_WORD, 0, synth_flags, None, 0))
#~ print('espeak_Synth', espeak_Synth(pytext, len(pytext)+1, 0, POS_SENTENCE, 0, synth_flags, None, 0))
print('espeak_Synchronize %d' % espeak_Synchronize())
print('espeak_Terminate %d' % espeak_Terminate())