-
Notifications
You must be signed in to change notification settings - Fork 0
/
voicerecognizer.py
121 lines (91 loc) · 4.38 KB
/
voicerecognizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#This is a standalone version of the MFCC-GMM voice recognizer, to be used for testing.
#This code loops through a folder of training .wavs and a folder of testing .wavs and sees whether it can recognize the testing items after being trained on the training data. Outputs accuracy.
#Call it like this in the command line:
#python voicerecognizer.py trainingdir testdir
#Where trainingdir is the directory of training .wavs and testdir is the directory of test .wavs.
#Store this script in the same folder that contains trainingdir and testdir (NOT inside either trainingdir or testdir).
import scikits.talkbox.features
import scikits.talkbox
import scipy.io.wavfile
import os, sys
import numpy as np
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import mixture
from sklearn.externals import joblib
import warnings
warnings.filterwarnings("ignore")
#Extracts mfccs
def extract_mfccCall(wavfilename):
fs, wav = scipy.io.wavfile.read(wavfilename)
return scikits.talkbox.features.mfcc(wav, fs=fs)[0]
def extract_mfcc(wavdir):
mfccList = []
for wavfilename in os.listdir(wavdir):
ceps = extract_mfccCall(os.path.join(wavdir, wavfilename))
mfccList.append(ceps)
return mfccList
if __name__ == '__main__':
trainingdir = sys.argv[1]
testdir = sys.argv[2]
#----------------------------#
#----------Training----------#
#----------------------------#
#Get the MFCCs from the training data into a list.
allTrainingMFCCs = extract_mfcc(trainingdir)
#Get a list of the usernames of the training data
trainingUsernames = []
for trainingFile in os.listdir(trainingdir):
trainingFileSplit = trainingFile.split("_") #Split before "_train.wav"
username = trainingFileSplit[0] #Should just contain the username
trainingUsernames.append(username)
#Make a GMM for each training data file's MFCCs. Store the GMMs in a list.
trainingGMMs = []
for trainingfileMFCCs in allTrainingMFCCs: #For each item in the list
#Convert the list into a numpy array
input = np.array(trainingfileMFCCs)
#Make a GMM from the numpy array of MFCCs
gmix = Pipeline([("scaler", StandardScaler()), ("gmix", mixture.GMM(n_components=3, covariance_type='full'))])
gmix.fit(input)
#Put the GMM in the training GMM list
trainingGMMs.append(gmix)
#---------------------------#
#----------Testing----------#
#---------------------------#
#Get the MFCCs from the test data into a list.
allTestMFCCs = extract_mfcc(testdir)
# Get a list of the usernames of the testing data
testUsernames = []
for testFile in os.listdir(testdir):
testFileSplit = testFile.split("_") # Split before "_train.wav"
username = testFileSplit[0] # Should just contain the username
testUsernames.append(username)
#Prepare counters for percent calculating at the end.
totalCounter = 0
correctCounter = 0
#Test each test item.
for testfileMFCCs in allTestMFCCs:
#Convert the list into a numpy array
input2 = np.array(testfileMFCCs)
#Loop through list of trained GMMs and store loglikelihoods of fit with testing data.
accuracies = []
matchUsername = ""
for trainingGMM in trainingGMMs:
temp = trainingGMM.score(input2)
mean = np.mean(temp.ravel())
accuracies.append(mean)
#The highest likelihood in the list corresponds to the match GMM
maximum = max(accuracies)
maximumIndex = accuracies.index(maximum)
matchUsername = trainingUsernames[maximumIndex] #Username of matched GMM
#If the username we just found as the most likely match from the training GMMs is the same as the username of this particular test file, then the recognizer got it right.
if(testUsernames[totalCounter] == matchUsername):
correctCounter = correctCounter + 1
totalCounter = totalCounter + 1 #Increment with each pass of the outer for loop
print (totalCounter)
#Get percentage success rate
percentCorrect = (correctCounter / totalCounter) * 100
print("The recognizer recognizes correctly ")
print(percentCorrect)
print(" of the time.")