-
Notifications
You must be signed in to change notification settings - Fork 1
/
Opp_data_handler.py
253 lines (192 loc) · 8.47 KB
/
Opp_data_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
import numpy as np
import cPickle as cp
import time
# From: https://github.com/sussexwearlab/DeepConvLSTM
# Which is from http://www.johnvinyard.com/blog/?p=268
import numpy as np
from numpy.lib.stride_tricks import as_strided as ast
def one_hot(y):
"""convert label from dense to one hot
argument:
label: ndarray dense label ,shape: [sample_num,1]
return:
one_hot_label: ndarray one hot, shape: [sample_num,n_class]
"""
# e.g.: [[5], [0], [3]] --> [[0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
y = y.reshape(len(y))
n_values = np.max(y) + 1
return np.eye(n_values)[np.array(y, dtype=np.int32)] # Returns FLOATS
def norm_shape(shape):
'''
Normalize numpy array shapes so they're always expressed as a tuple,
even for one-dimensional shapes.
Parameters
shape - an int, or a tuple of ints
Returns
a shape tuple
'''
try:
i = int(shape)
return (i,)
except TypeError:
# shape was not a number
pass
try:
t = tuple(shape)
return t
except TypeError:
# shape was not iterable
pass
raise TypeError('shape must be an int, or a tuple of ints')
def sliding_window(a,ws,ss = None,flatten = True):
'''
Return a sliding window over a in any number of dimensions
Parameters:
a - an n-dimensional numpy array
ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size
of each dimension of the window
ss - an int (a is 1D) or tuple (a is 2D or greater) representing the
amount to slide the window in each dimension. If not specified, it
defaults to ws.
flatten - if True, all slices are flattened, otherwise, there is an
extra dimension for each dimension of the input.
Returns
an array containing each n-dimensional window from a
'''
if None is ss:
# ss was not provided. the windows will not overlap in any direction.
ss = ws
ws = norm_shape(ws)
ss = norm_shape(ss)
# convert ws, ss, and a.shape to numpy arrays so that we can do math in every
# dimension at once.
ws = np.array(ws)
ss = np.array(ss)
shape = np.array(a.shape)
# ensure that ws, ss, and a.shape all have the same number of dimensions
ls = [len(shape),len(ws),len(ss)]
if 1 != len(set(ls)):
raise ValueError(\
'a.shape, ws and ss must all have the same length. They were %s' % str(ls))
# ensure that ws is smaller than a in every dimension
if np.any(ws > shape):
raise ValueError(\
'ws cannot be larger than a in any dimension.\
a.shape was %s and ws was %s' % (str(a.shape),str(ws)))
# how many slices will there be in each dimension?
newshape = norm_shape(((shape - ws) // ss) + 1)
# the shape of the strided array will be the number of slices in each dimension
# plus the shape of the window (tuple addition)
newshape += norm_shape(ws)
# the strides tuple will be the array's strides multiplied by step size, plus
# the array's strides (tuple addition)
newstrides = norm_shape(np.array(a.strides) * ss) + a.strides
strided = ast(a,shape = newshape,strides = newstrides)
if not flatten:
return strided
# Collapse strided so that it has one more dimension than the window. I.e.,
# the new array is a flat list of slices.
meat = len(ws) if ws.shape else 0
firstdim = (np.product(newshape[:-meat]),) if ws.shape else ()
dim = firstdim + (newshape[-meat:])
# remove any dimensions with size 1
dim = filter(lambda i : i != 1,dim)
return strided.reshape(dim)
#--------------------------------------------
# Neural net's config.
#--------------------------------------------
class Config(object):
"""
define a class to store parameters,
the input should be feature mat of training and testing
"""
def __init__(self, X_train, X_test):
# Data shaping
self.train_count = len(X_train) # nb of training series
self.test_data_count = len(X_test) # nb of testing series
self.n_steps = len(X_train[0]) # nb of time_steps per series
self.n_classes = 18 # Final output classes, one classification per series
# Training
self.learning_rate = 0.001
self.lambda_loss_amount = 0.005
self.training_epochs = 100
self.batch_size = 100
self.clip_gradients = 15.0
self.gradient_noise_scale = None
self.keep_prob_for_dropout = 0.85 # **(1/3.0) # Dropout is added on inputs and after each stacked layers (but not between residual layers).
# Linear+relu structure
self.bias_mean = 0.3
self.weights_stddev = 0.2 # I would recommend between 0.1 and 1.0 or to change and use a xavier initializer
########
# NOTE: I think that if any of the below parameters are changed,
# the best is to readjust every parameters in the "Training" section
# above to properly compare the architectures only once optimised.
########
# LSTM structure
self.n_inputs = len(X_train[0][0]) # Features count
self.n_hidden = 28 # nb of neurons inside the neural network
self.use_bidirectionnal_cells = True # Use bidir in every LSTM cell, or not:
# High-level deep architecture
self.also_add_dropout_between_stacked_cells = False # True
# NOTE: values of exactly 1 (int) for those 2 high-level parameters below totally disables them and result in only 1 starting LSTM.
# self.n_layers_in_highway = 1 # Number of residual connections to the LSTMs (highway-style), this is did for each stacked block (inside them).
# self.n_stacked_layers = 1 # Stack multiple blocks of residual layers.
#--------------------------------------------
# Dataset-specific constants and functions
#--------------------------------------------
# Hardcoded number of sensor channels employed in the OPPORTUNITY challenge
NB_SENSOR_CHANNELS = 113
NB_SENSOR_CHANNELS_WITH_FILTERING = 149
# Hardcoded number of classes in the gesture recognition problem
NUM_CLASSES = 18
# Hardcoded length of the sliding window mechanism employed to segment the data
SLIDING_WINDOW_LENGTH = 24
# Length of the input sequence after convolutional operations
FINAL_SEQUENCE_LENGTH = 8
# Hardcoded step of the sliding window mechanism employed to segment the data
SLIDING_WINDOW_STEP = int(SLIDING_WINDOW_LENGTH/2)
SLIDING_WINDOW_STEP_SHORT = SLIDING_WINDOW_STEP
# Batch Size
BATCH_SIZE = 100
# Number filters convolutional layers
NUM_FILTERS = 64
# Size filters convolutional layers
FILTER_SIZE = 5
# Number of unit in the long short-term recurrent layers
NUM_UNITS_LSTM = 128
def load_dataset(filename):
f = file(filename, 'rb')
data = cp.load(f)
f.close()
X_train, y_train = data[0]
X_test, y_test = data[1]
print(" ..from file {}".format(filename))
print(" ..reading instances: train {0}, test {1}".format(X_train.shape, X_test.shape))
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
# The targets are casted to int8 for GPU compatibility.
y_train = y_train.astype(np.uint8)
y_test = y_test.astype(np.uint8)
return X_train, y_train, X_test, y_test
print("Loading data...")
X_train, y_train, X_test, y_test = load_dataset('data/oppChallenge_gestures.data')
assert (NB_SENSOR_CHANNELS_WITH_FILTERING == X_train.shape[1] or NB_SENSOR_CHANNELS == X_train.shape[1])
def opp_sliding_window(data_x, data_y, ws, ss):
data_x = sliding_window(data_x,(ws,data_x.shape[1]),(ss,1))
data_y = np.asarray([[i[-1]] for i in sliding_window(data_y,ws,ss)])
data_x, data_y = data_x.astype(np.float32), one_hot(data_y.reshape(len(data_y)).astype(np.uint8))
print(" ..after sliding window (testing): inputs {0}, targets {1}".format(X_test.shape, y_test.shape))
return data_x, data_y
#--------------------------------------------
# Loading dataset
#--------------------------------------------
# Sensor data is segmented using a sliding window mechanism
X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP_SHORT)
X_train, y_train = opp_sliding_window(X_train, y_train, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)
series_size = len(X_train[0])
for mat in [X_train, y_train, X_test, y_test]:
print mat.shape
def get_Opp_data_with_series_size():
return (X_train, y_train, X_test, y_test, series_size)
if __name__ == '__main__':
print(get_Opp_data_with_series_size())