-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMINST_kerbas.py
158 lines (126 loc) · 5.6 KB
/
MINST_kerbas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense
from keras.utils import np_utils
import matplotlib.pyplot as plt
import numpy as np
import keras
import random
# Load MNIST data #
(xtrain,ytrain), (xtest,ytest) = mnist.load_data()
# xtrain and ytrain shape (60000, 28, 28)
# xtest and ytest shape (10000, 28,28)
# Vectorize image data #
num_pixels = xtrain.shape[1] * xtrain.shape[2]
# num_pixels = 784
xtrain = xtrain.reshape(xtrain.shape[0], num_pixels).astype('float32')
# xtrain.shape = (60000, 784)
xtest = xtest.reshape(xtest.shape[0], num_pixels).astype('float32')
#xtest.shape = (60000,1)
# Normalize Features #
xtrain = xtrain/255
xtest = xtest/255
# Transforming vector of classes to vector of binary values #
ytrain = np_utils.to_categorical(ytrain)
#ytrain.shape (60000,10)
ytest = np_utils.to_categorical(ytest)
#ytest.shape (10000,10)
num_classes = ytest.shape[1]
#num_classes = 10
# Nueral Networks #
## Numbers correspond to slack message from Justin #
## 0 ##
def three_layer_model():
model = Sequential()
# Hidden Layer 1 #
model.add(Dense(units=num_pixels,input_dim=num_pixels,activation='sigmoid'))
#num_pixels = 784
# Hidden Layer 2 #
model.add(Dense(units=num_pixels, activation='sigmoid'))
#1/(1+e^z) is our sigmoid function, utilized in scaling between 0 and 1 has disadvantage of vanishing gradients
# Output Layer #
model.add(Dense(units=num_classes,activation='softmax'))
#softmax scales values between 0 and 1 to give probabilites of which category input image could be
return model
## 1 ##
def three_layer_model_1000():
model = Sequential()
model.add(Dense(units=num_pixels,input_dim=num_pixels,activation='sigmoid'))
model.add(Dense(units=1000, activation='sigmoid'))
#1000x1000 layer
model.add(Dense(units=num_classes,activation='softmax'))
return model
## 2 ##
def three_layer_model_tanh():
model = Sequential()
model.add(Dense(units=num_pixels,input_dim=num_pixels,activation='tanh'))
model.add(Dense(units=num_pixels, activation='tanh'))
model.add(Dense(units=num_classes,activation='softmax'))
return model
def three_layer_model_relu():
model = Sequential()
model.add(Dense(units=num_pixels,input_dim=num_pixels,activation='relu'))
model.add(Dense(units=num_pixels, activation='relu'))
#relu: aids us with our vanishing gradient problem, leaky relu would solve it entirely
model.add(Dense(units=num_classes,activation='softmax'))
return model
## 3 ##
def three_layer_model_residual():
prob=random.uniform(0, 1)
threshold=0.4
model = Sequential()
model.add(Dense(units=num_pixels,input_dim=num_pixels,activation='relu'))
#If prob is greater than threshold then a layer in the middle will be added, otherwise it will be a 2 layer network... further steps could include to optimize the threshold for best results
if prob >= threshold:
model.add(Dense(units=num_pixels,input_dim=num_pixels, activation='relu'))
model.add(Dense(units=num_classes,activation='softmax'))
return model
## 4 ##
def five_layer_model():
model = Sequential()
# Hidden Layers 1,2,3,4 #
model.add(Dense(num_pixels, input_dim=num_pixels,activation='sigmoid'))
model.add(Dense(num_pixels,activation='sigmoid'))
model.add(Dense(num_pixels,activation='sigmoid'))
model.add(Dense(num_pixels,activation='sigmoid'))
# Output Layer #
model.add(Dense(units=num_classes,activation='softmax'))
return model
model3=three_layer_model()
model1000=three_layer_model_1000()
modeltanh=three_layer_model_tanh()
modelrelu=three_layer_model_relu()
modelresidual=three_layer_model_residual()
model5=five_layer_model()
i=1 ##tracks which model is being used
def evaluate(model,batch_size=128,epochs=5):
#Models accuracy and error via Keras
model.summary()
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
history = model.fit(xtrain,ytrain,validation_split=0.1,epochs=epochs,batch_size=batch_size,verbose=False)
loss, accuracy = model.evaluate(xtest,ytest,verbose=False)
#categorical_crossentropy: utlized as our categories are one-hot encoded, best optimization score function for our situation
#sgd vs adam: sgd gave lower accuracy in 5 layer model (which is expected as it should be overfitted) but adam gave better results even on 5 layer, facisinating...\
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel(print('epochs\n',epochs),print('\nbatch size\n',batch_size),print('\nmodel\n',i))
plt.legend(['training', 'validation'], loc='best')
plt.show()
print()
print(f'Test loss: {loss:.3}')
print(f'Test accuracy: {accuracy:.3}')
## EDIT 'model3' BELOW TO TRY THE DIFFERENT NUERAL NETS ##
# model3 / model1000/ modeltanh / modelrelu / modelresidual / model5
## EDIT 'batch_size' and 'epochs' AT YOUR DISCRETION ##
#batch_size: how many datapoints are fed to the ANN
#epochs: number of times ANN will be trained
evaluate(model=model3,batch_size=64,epochs=3)
#models = [model1000,modeltanh,modelrelu,modelresidual,model5]
#batches = [32,64,128]
#epochs = [3,5,7]
#for m in models:
# for b,e in zip(batches,epochs):
# evaluate(model=m,batch_size=b,epochs=e)
#i=i+1