Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MiniNN_Ian_Gluesing #11

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4c2e891
First changes to MiniNN.py
IanGluesing Nov 21, 2020
0772d3a
Added a few comments and removed code having to do with originally de…
IanGluesing Nov 21, 2020
d716180
Rearrange some code so that the Ws depend on the length of input and …
IanGluesing Nov 22, 2020
4256cdc
Update a few comments
IanGluesing Nov 22, 2020
ed45fa8
Training and test set for the MNIST dataset
IanGluesing Nov 29, 2020
00ba2b1
First updates to MiniNN for MNIST dataset
IanGluesing Nov 29, 2020
b1135ee
Updated file to read input from training file and get inputs and outp…
IanGluesing Nov 29, 2020
2614c8c
Updated command to read in all input information
IanGluesing Nov 29, 2020
005704f
Read in data and split the data into labels and x values
IanGluesing Nov 29, 2020
a1bbc9e
Add comments and create a list to store all of the input sample objects
IanGluesing Nov 29, 2020
ba3f4c3
Remove more code in the main method that was already given to us
IanGluesing Nov 29, 2020
d99d3ff
Added list to store the average gradients within the MiniNN class
IanGluesing Nov 29, 2020
087b2e4
Began to change the train method to train for more than one sample
IanGluesing Nov 29, 2020
0143bc6
Added a new method to add and remove predictions for each sample with…
IanGluesing Nov 29, 2020
0045a05
Updated a few things to use numpy arrays
IanGluesing Nov 30, 2020
62e6c09
First updates to backprop method, genralized to a sample instead of a…
IanGluesing Nov 30, 2020
38a98d0
Update more of backprop method
IanGluesing Nov 30, 2020
e7f5f86
Update backprop and update weights to use the average gradient of all…
IanGluesing Nov 30, 2020
9f8a59d
First run through testing with one sample
IanGluesing Nov 30, 2020
4b32f63
Update to now use all samples
IanGluesing Nov 30, 2020
088773d
Can now predict for a list of samples
IanGluesing Nov 30, 2020
2a3fdf5
Updated to change number of samples trained on
IanGluesing Nov 30, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 126 additions & 74 deletions 6_Neural_Networks/MiniNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,24 @@ def logistic_psi(self, x):
"""
return x * (1-x)

def __init__(self, Ws=None):
def __init__(self, Ws=None, SampleList=None):
"""Initialize an NN

hidden_layer: does not include bias
"""
self.samples = SampleList
self.Ws = Ws
self.AverageGradients = []
self.setAveGradientsZero(self.Ws)
self.L = len(Ws) # number of layers
self.phi = self.logistic # same activation function for all neurons
self.psi = self.logistic_psi

def setAveGradientsZero(self, Ws):
self.AverageGradients = []
for W in Ws:
self.AverageGradients.append(numpy.zeros(W.shape))

def feedforward(self, x, W, phi):
"""feedforward from previou layer output x to next layer via W and Phi
return an augmented out where the first element is 1, the bias
Expand All @@ -72,19 +80,19 @@ def feedforward(self, x, W, phi):
) # end of phi
)) # end of concatenate

def predict(self, X_0):
def predict(self, sample):
"""make prediction, and log the output of all neurons for backpropagation later

X_0: 1-D numpy array, the input vector, AUGMENTED
"""
Xs = [X_0]; X=X_0
sample.clearLayers()
X = sample.getX()
sample.addValueLayer(X)
# print (self.Ws)
for W in self.Ws:
# print (W,X, self.phi)
X = self.feedforward(X, W, self.phi)
Xs.append(X)
self.Xs = Xs
self.oracle = X[1:] # it is safe because Python preserves variables used in for-loops
sample.addValueLayer(X)

def backpropagate(self, delta_next, W_now, psi, x_now):
"""make on step of backpropagation
Expand All @@ -98,67 +106,58 @@ def backpropagate(self, delta_next, W_now, psi, x_now):
"""
delta_next = delta_next[1:] # drop the derivative of error on bias term

# first propagate error to the output of previou layer
# first propagate error to the output of previous layer
delta_now = numpy.matmul(W_now, delta_next) # transfer backward
# then propagate thru the activation function at previous layer
delta_now *= self.psi(x_now)
# hadamard product This ONLY works when activation function is logistic
return delta_now

def get_deltas(self, target):
def get_deltas(self, sample):
"""Produce deltas at every layer

target: 1-D numpy array, the target of a sample
delta : 1-D numpy array, delta at current layer
"""
delta = self.oracle - target # delta at output layer is prediction minus target
# only when activation function is logistic
delta = numpy.concatenate(([0], delta)) # artificially prepend the delta on bias to match that in non-output layers.
self.Deltas = [delta] # log delta's at all layers
sample.clearDeltas()
delta = numpy.subtract(sample.getOutputPrediction()[1:], sample.getY()) # delta at output layer is prediction minus target
# only when activation function is logistic
delta = numpy.concatenate(([0], delta)) # artificially prepend the bias on delta to match that in non-output layers.
sample.getDeltas().insert(0, delta)

for l in range(len(self.Ws)-1, -1, -1): # propagate error backwardly
# technically, no need to loop to l=0 the input layer. But we do it anyway
# l is the layer index
W, X = self.Ws[l], self.Xs[l]
W, X = self.Ws[l], sample.getLayer(l)
delta = self.backpropagate(delta, W, self.psi, X)
self.Deltas.insert(0, delta) # prepend, because BACK-propagate

def print_progress(self):
"""print Xs, Deltas, and gradients after a sample is feedforwarded and backpropagated
"""
print ("\n prediction: ", self.oracle)
for l in range(len(self.Ws)+1):
print ("layer", l)
print (" X:", self.Xs[l], "^T")
print (" delta:", self.Deltas[l], "^T")
if l < len(self.Ws): # last layer has not transfer matrix
print (' W:', numpy.array2string(self.Ws[l], prefix=' W: '))
try: # because in first feedforward round, no gradient computed yet
# also, last layer has no gradient
print(' gradient:', numpy.array2string(self.Grads[l], prefix=' gradient: '))
except:
pass
sample.getDeltas().insert(0, delta) # prepend, because BACK-propagate

def update_weights(self):
def update_AverageGradientWeights(self, sample):
""" Given a sequence of Deltas and a sequence of Xs, compute the gradient of error on each transform matrix and update it using gradient descent

Note that the first element on each delta is on the bias term. It should not be involved in computing the gradient on any weight because the bias term is not connected with previous layer.
"""
self.Grads = []
for l in range(len(Ws)): # l is layer index
x = self.Xs[l]
delta = self.Deltas[l+1]

for l in range(len(self.AverageGradients)): # l is layer index
x = sample.getLayer(l)
delta = sample.getDeltas()[l + 1]
# print (l, x, delta)
gradient = numpy.outer(x, delta[1:])
self.Ws[l] -= 1 * gradient # descent!

self.Grads.append(gradient)
self.AverageGradients[l] = numpy.add(self.AverageGradients[l], gradient)

# show that the new prediction will be better to help debug
# self.predict(self.Xs[0])
# print ("new prediction:", self.oracle)

def train(self, x, y, max_iter=100, verbose=False):
def averageSumOfGradients(self, size):
for i in range(len(self.AverageGradients)):
self.AverageGradients[i] = numpy.true_divide(self.AverageGradients[i], size)

def update_weights(self):
for l in range(len(Ws)):
self.Ws[l] -= 1 * self.AverageGradients[l]

def train(self,max_iter=100):
"""feedforward, backpropagation, and update weights
The train function updates an NN using one sample.
Unlike scikit-learn or Tensorflow's fit(), x and y here are not a bunch of samples.
Expand All @@ -170,55 +169,108 @@ def train(self, x, y, max_iter=100, verbose=False):

"""
for epoch in range(max_iter):
print ("epoch", epoch, end=":")
self.predict(x) # forward
print (self.oracle)
self.get_deltas(y) # backpropagate
if verbose:
self.print_progress()
self.update_weights() # update weights, and new prediction will be printed each epoch
print("epoch " , epoch)
self.setAveGradientsZero(self.Ws)

numTrainingSamples = 100

for s in self.samples[:numTrainingSamples]:
self.predict(s) # forward

self.get_deltas(s)

self.update_AverageGradientWeights(s)


self.averageSumOfGradients(numTrainingSamples)
self.update_weights()

def predictAll(self, predictionSet):

for s in predictionSet:
self.predict(s)
print(s.getOutputPrediction())

class Sample:
def __init__(self, x, y):
self.x = numpy.array(x)
self.y = numpy.array(y)
self.currentValues = []
self.currentValues.append(numpy.array(x))
self.deltas = []

def getX(self):
return self.x

def getY(self):
return self.y

def addValueLayer(self, x):
self.currentValues.append(numpy.array(x))

def clearLayers(self):
self.currentValues = []

def getLayer(self, index):
return self.currentValues[index]

def getOutputPrediction(self):
return self.currentValues[len(self.currentValues) - 1]

def getDeltas(self):
return self.deltas

def clearDeltas(self):
self.deltas = []


if __name__ == "__main__":

# Transfer matrix from input layer to hidden layer 1
W_0 = numpy.array(([[.4, .6,], # the first row maps the bias term to the two neurons of the next layer
[.7, -.4],
[-.2, .3]]))

# Transfer matrix from hidden layer 1 to hidden layer 2
W_1 = numpy.array(([[.4, .6,], # the first row maps the bias term to the two neurons of the next layer
[.7, -.4],
[-.2, .3]]))
print("Reading in data...")
# Read in all data from the file
inArray = numpy.genfromtxt('train.csv',delimiter=',')
# Get rid of first row, this row is not needed
inArray = numpy.delete(inArray, obj = 0, axis = 0)
print("Done reading data...")

# Transfer matrix from hidden layer 2 to input layer
W_2 = numpy.array(([[-.3], # the first row maps the bias term to the two neurons of the next layer
[.5],
[.1]] ))
samps = []

Ws = [W_0, W_1, W_2]
MNN = MiniNN(Ws=Ws) # initialize an NN with the transfer matrixes given
#Like the example, let user select this, set first and last values to be length of x - 1 and length of y.
nonBiasTerms = [inArray.shape[1] - 1,15,15,10]

# The training sample
x_0 = numpy.array(([1., 1, 0])) # just one sample, augmented
y_0 = numpy.array(([1])) # We support only one dimension in the output
# this number must be between 0 and 1 because we used logistic activation and cross entropy loss.
# Go through each row in the input array and split each row into its label and x values
for row in inArray:
y = numpy.array([0,0,0,0,0,0,0,0,0,0])
# set the value at the index of the label to be 1, if the sample is a 0, set index 0 to be a 1
y[int(row[0])] = 1
# The x values are located from the first index in the row to the end, length of x right now is 784
xInputs = row[1:len(row)]
# Add the bias term to the beginning of the sample
xInputs = numpy.insert(xInputs, 0,1.)

# To use functions individually
MNN.predict(x_0)
MNN.get_deltas(y_0)
MNN.print_progress()
MNN.update_weights()
MNN.print_progress()
samps.append(Sample(xInputs, y))

# Or a recursive training process
MNN = MiniNN(Ws=Ws) # re-init
MNN.train(x_0, y_0, max_iter=20, verbose=True)

# Counter to keep track of current element within the nonBiasTerms list
count = 0
# Initialize the array of Ws
Ws = []
for x in nonBiasTerms[:-1]:
#Append a W for each layer in the list except the last layer, assume one bias term
Ws.append(numpy.random.rand(x + 1, nonBiasTerms[count + 1]))
# Increse counter to properly index into the next term in the list
count += 1


MNN = MiniNN(Ws=Ws, SampleList = samps) # initialize an NN with the transfer matrixes given, as well as the samples to train the NN
print("Training...")
MNN.train(max_iter = 100000)

print("Predicting...")
MNN.predictAll(samps)


# In[ ]:



Expand Down
Loading