-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLAB5_assignment10.py
237 lines (183 loc) · 7.91 KB
/
LAB5_assignment10.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import numpy as np
import pandas as pd
import scipy.io.wavfile as wavfile
# Good Luck!
#
# INFO:
# Samples = Observations. Each audio file will is a single sample
# in our dataset.
#
# Audio Samples = https://en.wikipedia.org/wiki/Sampling_(signal_processing)
# Each .wav file is actually just a bunch of numeric samples, "sampled"
# from the analog signal. Sampling is a type of discretization. When we
# mention 'samples', we mean observations. When we mention 'audio samples',
# we mean the actually "features" of the audio file.
#
#
# The goal of this lab is to use multi-target, linear regression to generate
# by extrapolation, the missing portion of the test audio file.
#
# Each one audio_sample features will be the output of an equation,
# which is a function of the provided portion of the audio_samples:
#
# missing_samples = f(provided_samples)
#
# You can experiment with how much of the audio you want to chop off
# and have the computer generate using the Provided_Portion parameter.
#
# TODO: Play with this. This is how much of the audio file will
# be provided, in percent. The remaining percent of the file will
# be generated via linear extrapolation.
Provided_Portion = 0.75
# INFO: You have to download the dataset (audio files) from the website:
# https://github.com/Jakobovski/free-spoken-digit-dataset
#https://github.com/anarayanan86/Microsoft_DAT210x/blob/master/Module-5---Data-Modeling/Module5LR/assignment10.py
#
# TODO: Create a regular ol' Python List called 'zero'
# Loop through the dataset and load up all 50 of the 0_jackson*.wav files
# For each audio file, simply append the audio data (not the sample_rate,
# just the data!) to your Python list 'zero':
#
zero = []
import os
for file in os.listdir('C:/Users/mckinns/Documents/GitHub/free-spoken-digit-dataset/recordings/'):
if file.startswith('0_jackson'):
a = os.path.join('C:/Users/mckinns/Documents/GitHub/free-spoken-digit-dataset/recordings/', file)
sample_rate, audio_data = wavfile.read(a)
zero.append(audio_data)
print len(zero)
#
# TODO: Just for a second, convert zero into a DataFrame. When you do
# so, set the dtype to np.int16, since the input audio files are 16
# bits per sample. If you don't know how to do this, read up on the docs
# here:
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html
#
# Since these audio clips are unfortunately not length-normalized,
# we're going to have to just hard chop them to all be the same length.
# Since Pandas would have inserted NANs at any spot to make zero a
# perfectly rectangular [n_observed_samples, n_audio_samples] array,
# do a dropna on the Y axis here. Then, convert one back into an
# NDArray using .values
#
zero = pd.DataFrame(data=zero, dtype = np.int16)
zero.dropna(axis = 1, inplace = True)
zero = zero.values
print type(zero)
#
# TODO: It's important to know how (many audio_samples samples) long the
# data is now. 'zero' is currently shaped [n_samples, n_audio_samples],
# so get the n_audio_samples count and store it in a variable called
# n_audio_samples
#
n_audio_samples = zero.shape[1]
print n_audio_samples #4087
#
# TODO: Create your linear regression model here and store it in a
# variable called 'model'. Don't actually train or do anything else
# with it yet:
#
from sklearn import linear_model
model = linear_model.LinearRegression()
#
# INFO: There are 50 takes of each clip. You want to pull out just one
# of them, randomly, and that one will NOT be used in the training of
# your model. In other words, the one file we'll be testing / scoring
# on will be an unseen sample, independent to the rest of your
# training set:
from sklearn.utils.validation import check_random_state
rng = check_random_state(7) # Leave this alone until you've submitted your lab
random_idx = rng.randint(zero.shape[0])
test = zero[random_idx]
train = np.delete(zero, [random_idx], axis=0)
#
# TODO: Print out the shape of train, and the shape of test
# train will be shaped: [n_samples, n_audio_samples], where
# n_audio_samples are the 'features' of the audio file
# train will be shaped [n_audio_features], since it is a single
# sample (audio file, e.g. observation).
#
print "Shapes of train and test, respectively:", train.shape, test.shape
#
# INFO: The test data will have two parts, X_test and y_test. X_test is
# going to be the first portion of the test audio file, which we will
# be providing the computer as input. y_test, the "label" if you will,
# is going to be the remaining portion of the audio file. Like such,
# the computer will use linear regression to derive the missing
# portion of the sound file based off of the training data its received!
#
# Save the original 'test' clip, the one you're about to delete
# half of, so that you can compare it to the 'patched' clip once
# you've generated it. HINT: you should have got the sample_rate
# when you were loading up the .wav files:
wavfile.write('Original Test Clip.wav', sample_rate, test)
#
# TODO: Prepare the TEST date by creating a slice called X_test. It
# should have Provided_Portion * n_audio_samples audio sample features,
# taken from your test audio file, currently stored in the variable
# 'test'. In other words, grab the FIRST Provided_Portion *
# n_audio_samples audio features from test and store it in X_test.
#
X_test = test[:int(Provided_Portion * n_audio_samples)]
#
# TODO: If the first Provided_Portion * n_audio_samples features were
# stored in X_test, then we need to also grab the *remaining* audio
# features and store it in y_test. With the remaining features stored
# in there, we will be able to R^2 "score" how well our algorithm did
# in completing the sound file.
#
y_test = test[int(Provided_Portion * n_audio_samples):]
#
# TODO: Duplicate the same process for X_train, y_train. The only
# differences being: 1) Your will be getting your audio data from
# 'train' instead of from 'test', 2) Remember the shape of train that
# you printed out earlier? You want to do this slicing but for ALL
# samples (observations). For each observation, you want to slice
# the first Provided_Portion * n_audio_samples audio features into
# X_train, and the remaining go into y_test. All of this should be
# accomplishable using regular indexing in two lines of code.
#
X_train = train[:, :int(Provided_Portion*n_audio_samples)]
y_train = train[:, int(Provided_Portion*n_audio_samples):]
#
# TODO: SciKit-Learn gets mad if you don't supply your training
# data in the form of a 2D arrays: [n_samples, n_features].
#
# So if you only have one SAMPLE, such as is our case with X_test,
# and y_test, then by calling .reshape(1, -1), you can turn
# [n_features] into [1, n_features].
#
# On the other hand, if you only have one FEATURE, which currently
# doesn't apply, you can call .reshape(-1, 1) on your data to turn
# [n_samples] into [n_samples, 1]:
#
X_test = X_test.reshape(1, -1)
y_test = y_test.reshape(1, -1)
#
# TODO: Fit your model using your training data and label:
#
model.fit(X_train, y_train)
#
# TODO: Use your model to predict the 'label' of X_test. Store the
# resulting prediction in a variable called y_test_prediction
#
y_test_prediction = model.predict(X_test)
# INFO: SciKit-Learn will use float64 to generate your predictions
# so let's take those values back to int16:
y_test_prediction = y_test_prediction.astype(dtype=np.int16)
#
# TODO: Score how well your prediction would do for some good laughs,
# by passing in your test data and test label (y_test).
#
score = model.score(X_test, y_test)
print "Extrapolation R^2 Score: ", score
#
# First, take the first Provided_Portion portion of the test clip, the
# part you fed into your linear regression model. Then, stitch that
# together with the abomination the predictor model generated for you,
# and then save the completed audio clip:
completed_clip = np.hstack((X_test, y_test_prediction))
wavfile.write('Extrapolated Clip.wav', sample_rate, completed_clip[0])
#
# INFO: Congrats on making it to the end of this crazy lab =) !
#