-
Notifications
You must be signed in to change notification settings - Fork 1
/
mnist.py
115 lines (91 loc) · 3.51 KB
/
mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import random
from matrix import Matrix
import autodiff
import math
import scalar
from tensorflow.examples.tutorials.mnist import input_data
import skimage.transform
import time
def load_mnist_dataset(train_xs, train_ys):
#train_xs = train_xs[:5000,:]
#train_ys = train_ys[:5000,:]
# resize
resized = np.zeros((train_xs.shape[0], 7, 7))
for i in range(train_xs.shape[0]):
fullim = train_xs[i].copy()
fullim.resize((28,28))
resized[i] = skimage.transform.downscale_local_mean(fullim, (4, 4))
resized.resize(resized.shape[0], 7*7)
train_xs = resized
train_xs = Matrix(train_xs.shape[0], train_xs.shape[1], train_xs.tolist())
train_ys = Matrix(train_ys.shape[0], train_ys.shape[1], train_ys.tolist())
return train_xs, train_ys
def load_mnist():
mn = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_xs, train_ys = load_mnist_dataset(mn.train._images, mn.train._labels)
test_xs, test_ys = load_mnist_dataset(mn.test._images, mn.test._labels)
return train_xs, train_ys, test_xs, test_ys
def softmax(x):
m = x.apply_copy(lambda s: math.exp(1)**s)
s = m.reduce_sum()
return m / s
def linear_model(params, x):
W = params.reshape(10, params.rows // 10)
return = softmax(W.matmul(x))
def error_linear_model(params, x, y_target):
y = linear_model(params, x)
loss = 0
for r in range(y.rows):
loss += -y_target[r] * scalar.scalar_log(y[r] + 1e-8)
return loss
def error_batch_linear_model(params, xs, ys):
total_error = 0
for i in range(xs.rows):
total_error += error_linear_model(params, xs[i].transpose(), ys[i].transpose())
return total_error / len(range(xs.rows))
def eval_model(params, xs, ys):
correct = 0
for i in range(xs.rows):
prediction = linear_model(params, xs[i].transpose())
max_value = -1
predicted_label = 0
for j in range(prediction.rows):
if prediction[j] > max_value:
max_value = prediction[j]
predicted_label = j
if ys[i,predicted_label] == 1.0:
correct += 1
return correct / xs.rows
def train_linear_model(xs, ys, test_xs, test_ys):
params = Matrix(xs.cols * ys.cols, 1)
batch_size = 100
print('Initial accuracy', eval_model(params, test_xs, test_ys))
batch_indices = [i for i in range(xs.rows)]
momentum = Matrix(xs.cols, 1)
step_size = .5
for epoch in range(5):
random.shuffle(batch_indices)
num_batches = len(batch_indices) // batch_size
start = time.time()
for i in range(num_batches):
batch = batch_indices[i*batch_size:(i*batch_size + batch_size)]
batch_xs = xs.gather_rows(batch)
batch_ys = ys.gather_rows(batch)
f_val, f_grad, opcount = autodiff.compute_gradients(error_batch_linear_model, [params, batch_xs, batch_ys], 0, reverse_mode = True)
#f_val, f_grad = autodiff.f_d(error_batch_linear_model, [params, batch_xs, batch_ys], 0)
#momentum = 0.9*momentum + f_grad * -step_size
params += f_grad * -step_size
if (i + 1) % 10 == 0:
duration = time.time() - start
start = time.time()
print("Epoch %d, Batch %d (of %d) Error %f e_grad_norm=%f (opcount=%d) (%fs)" % (epoch + 1, i + 1, num_batches, f_val, f_grad.euclidean_norm(), opcount, duration))
accuracy = eval_model(params, test_xs, test_ys)
print("Epoch %d accuracy %f" % (epoch + 1, accuracy))
def main():
print("Loading data...")
train_xs, train_ys, test_xs, test_ys = load_mnist()
print("Training")
train_linear_model(train_xs, train_ys, test_xs, test_ys)
if __name__ == "__main__":
main()