-
Notifications
You must be signed in to change notification settings - Fork 17
/
pahh.py
267 lines (210 loc) · 7.54 KB
/
pahh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
#!/usr/bin/python
#Copyright (c) 2012, Eng Eder de Souza
#Accessing the Google API for speech recognition With Asterisk!
#Eng Eder de Souza
#date 15/01/2012
#http://ederwander.wordpress.com/2012/01/16/google-speech-python-asterisk/
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the COPYING file
# at the top of the source tree.
#
#Revision 0.3
#History:
#18/01/2012 bug fix in local variable declaration
#19/01/2012 suport for old python interpretator
#19/01/2012 removed matplotlib dependencies
#19/01/2012 Submission of warnings DeprecationWarning and UserWarning
#27/10/2014 Update to Google speech-api v2
import warnings
warnings.simplefilter("ignore", DeprecationWarning)
warnings.simplefilter("ignore", UserWarning)
from scikits.audiolab import Format, Sndfile
from scipy.signal import firwin, lfilter
from tempfile import mkstemp
import numpy as np
import urllib2
import math
import sys
import re
import os
#For Portuguese Brazilian Speech Recognizer!
Lang="pt-BR"
#or for English Speech Recognizer
#Lang="en-US"
#Old google speech V1 not working
#url = 'https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang='+Lang
#NOw for google speech V2
url='https://www.google.com/speech-api/v2/recognize?output=json&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&lang='+Lang
silence=True
env = {}
RawRate=8000
chunk=1024
#http://en.wikipedia.org/wiki/Vocal_range
#Assuming Vocal Range Frequency upper than 75 Hz
VocalRange = 75.0
#cd, FileNameTmp = mkstemp('TmpSpeechFile.flac')
#Assuming Energy threshold upper than 15 dB
Threshold = 15
#10 seconds x 16000 samples/second x ( 16 bits / 8bits/byte ) = 160000 bytes
#160000/1024 = +/- 157
#157*1024 = 160768
TimeoutSignal = 160768
#then 1 second x 16000 = 16000
#16000/1024 = 15,625 round to 16
#16*1024 = 16384
Timeout_NoSpeaking=16384
#normalization for RMS Calc
SHORT_NORMALIZE = (1.0/32768.0)
#
LastBlock=''
#File Descriptor delivery in Asterisk
FD=3
#Open File Descriptor
file=os.fdopen(FD, 'rb')
signal=0
all=[]
while 1:
line = sys.stdin.readline().strip()
if line == '':
break
key,data = line.split(':')
if key[:4] <> 'agi_':
sys.stderr.write("Did not work!\n");
sys.stderr.flush()
continue
key = key.strip()
data = data.strip()
if key <> '':
env[key] = data
for key in env.keys():
sys.stderr.write(" -- %s = %s\n" % (key, env[key]))
sys.stderr.flush()
def SendSpeech(File):
result=''
flac=open(File,"rb").read()
os.remove(File)
header = {'Content-Type' : 'audio/x-flac; rate=8000'}
req = urllib2.Request(url, flac, header)
data = urllib2.urlopen(req)
#find = re.findall('"utterance":(.*),', data.read())
find= re.findall('{"transcript":(.*)},', data.read())
#utterance
try:
#result = find[0].replace('"', '')
result = find[0].replace('{"transcript":', '')
except:
sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "speech not recognized ..." + "\" " + "\n")
sys.stdout.flush()
if result:
test=result.split('},')
result=test[len(test)-1].replace('"', '')
sys.stdout.write('SET VARIABLE GoogleUtterance "%s"\n'% str(result))
sys.stdout.flush()
sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" "%s \n"% str(result))
sys.stdout.flush()
def Filter(samps):
FC = 0.05/(0.5*RawRate)
N = 200
a = 1
b = firwin(N, cutoff=FC, window='hamming')
return lfilter(b, a, samps)
def Pitch(signal):
if sys.version_info < (2, 6):
crossing =[]
for s in signal:
crossing.append(s)
else:
crossing = [math.copysign(1.0, s) for s in signal]
#index = find(np.diff(crossing));
index = np.nonzero(np.diff(crossing));
index=np.array(index)[0].tolist()
f0=round(len(index) *RawRate /(2*np.prod(len(signal))))
return f0;
def rms(shorts):
rms2=0
count = len(shorts)/2
sum_squares = 0.0
for sample in shorts:
n = sample * SHORT_NORMALIZE
sum_squares += n*n
rms2 = math.pow(sum_squares/count,0.5)
return rms2 * 1000
def speaking(data):
rms_value = rms(data)
if rms_value > Threshold:
return True
else:
return False
def VAD(SumFrequency, data2):
AVGFrequency = SumFrequency/(Timeout_NoSpeaking+1);
if AVGFrequency > VocalRange/2:
S=speaking(data2)
if S:
return True;
else:
return False;
else:
return False;
def RecordSpeech(TimeoutSignal, LastBlock, LastLastBlock):
for s in LastLastBlock:
all.append(s)
for s in LastBlock:
all.append(s)
signal=0;
while signal <= TimeoutSignal:
RawSamps = file.read(Timeout_NoSpeaking)
samps = np.fromstring(RawSamps, dtype=np.int16)
for s in samps:
all.append(s)
signal = signal + Timeout_NoSpeaking;
#rms_value=rms(samps)
Speech=speaking(samps)
#sys.stdout.write("EXEC NOOP %s \"\"\"\n"% str(rms_value))
#sys.stdout.flush()
#if rms_value > Threshold:
if Speech:
sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Speech Found ..." + "\" " + "\n")
sys.stdout.flush()
else:
sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "End of the Speech..." + "\" " + "\n")
sys.stdout.flush()
signal=TimeoutSignal+1
def PlayStream (params):
sys.stderr.write("STREAM FILE %s \"\"\n" % str(params))
sys.stderr.flush()
sys.stdout.write("STREAM FILE %s \"\"\n" % str(params))
sys.stdout.flush()
result = sys.stdin.readline().strip()
sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Hello Waiting For Speech ..." + "\" " + "\n")
sys.stdout.flush()
PlayStream("beep");
sys.stdout.flush()
while silence:
#Input Real-time Data Raw Audio from Asterisk
RawSamps = file.read(chunk)
samps = np.fromstring(RawSamps, dtype=np.int16)
samps2=Filter(samps)
Frequency=Pitch(samps2)
rms_value=rms(samps)
signal = signal + chunk;
if (rms_value > Threshold) and (Frequency > VocalRange):
silence=False
LastLastBlock=LastBlock
LastBlock=samps
sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Speech Detected Recording..." + "\" " + "\n")
sys.stdout.flush()
if (signal > TimeoutSignal):
sys.stdout.write("EXEC " + "\"" + "NOOP" + "\" \"" + "Time Out No Speech Detected ..." + "\" " + "\n")
sys.stdout.flush()
sys.exit()
RecordSpeech(TimeoutSignal, LastBlock, LastLastBlock)
array = np.array(all)
fmt = Format('flac', 'pcm16')
nchannels = 1
cd, FileNameTmp = mkstemp('TmpSpeechFile.flac')
# making the file .flac
afile = Sndfile(FileNameTmp, 'w', fmt, nchannels, RawRate)
#writing in the file
afile.write_frames(array)
SendSpeech(FileNameTmp)