Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dropout layer #32

Merged
merged 6 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
- [ ] Implement batch norm
- [ ] Parallelize operations
- [ ] Read : https://arxiv.org/pdf/1412.6980.pdf
- [ ] Implement a dropout
- [ ] Add macos arm runner when available

## IN PROGRESS :

- [ ] Implement dropout
- [ ] Add gradient clipping
- [ ] Add default arguments to python bindings
- [ ] Add verbose argument for progess bar
- [ ] Interactive Python example
- [ ] Python tests
- [ ] Optimize `Catch2`'s build
- [ ] Add gradient clipping

## DONE :

- [x] Add verbose argument for progess bar
- [x] CI versioning
- [x] Docker image for quick start
- [x] Setup `clang-format`
Expand Down
4 changes: 2 additions & 2 deletions examples/train-predict-MNIST/helpers/event_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

NNP.models.Model.load_from_file("model.bin", network)

network.setup(optimizer=NNP.optimizers.SGD(0.001), loss=NNP.LOSS.MCE)
network.setup(optimizer=NNP.optimizers.SGD(0.1), loss=NNP.LOSS.MCE)

drawing = False # Track drawing
erasing = False # Track erasing
Expand Down Expand Up @@ -47,7 +47,7 @@ def handle_ui_button_pressed(context):
normalized_image = get_drawing(context)
target = float(ui_elements["dropdown"].selected_option)
loss = network.train([normalized_image], [target], 1, progBar=False)
ui_elements["guess_text"].append_html_text(f"I'm learning that it's a {int(target)}<br>loss : {loss}")
ui_elements["guess_text"].append_html_text(f"I'm learning that it's a {int(target)}<br>loss : {loss:.3f}")

if event.ui_element == ui_elements["clear_button"]:
ui_elements["drawing_surface"].fill(erasing_color)
Expand Down
7 changes: 4 additions & 3 deletions examples/train-predict-MNIST/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from helpers.utils import *
from halo import Halo

NUM_TRAININGS = 10000
NUM_TRAININGS = 30000
NUM_PREDICTIONS = 1000
MNIST_DATASET_FILE = "./dataset/mnist.npz"

Expand All @@ -31,6 +31,7 @@
network = NNP.models.Network()

network.addLayer(NNP.layers.Flatten((28, 28)))
network.addLayer(NNP.layers.Dropout(0.8))
network.addLayer(NNP.layers.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))

Expand All @@ -53,9 +54,9 @@

trainingData.batch(128)

callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.01, 1), NNP.callbacks.CSVLogger("training.csv")]
callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.01), NNP.callbacks.CSVLogger("training.csv")]

network.train(trainingData, 5, callbacks)
network.train(trainingData, 10, callbacks)

f_x_test = [normalize_img(x) for x in x_test]

Expand Down
4 changes: 0 additions & 4 deletions examples/train-predict-MNIST/training.csv

This file was deleted.

61 changes: 41 additions & 20 deletions src/NeuralNet/Network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ void Network::addLayer(std::shared_ptr<Layer> &layer) {
size_t numLayers = this->layers.size();
// Init layer with right amount of weights
if (numLayers > 0) {
std::shared_ptr<Layer> prevLayer = this->layers[this->layers.size() - 1];
layer->init(prevLayer->getNumNeurons());
const Layer &prevLayer = *this->layers[this->layers.size() - 1];
layer->init(prevLayer.getNumNeurons());
}

this->layers.push_back(layer);
Expand Down Expand Up @@ -137,7 +137,8 @@ double Network::miniBatchTraining(
{inputsSize, numOutputs});

// computing outputs from forward propagation
Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.batches[b]);
Eigen::MatrixXd o =
this->forwardProp(trainingData.inputs.batches[b], true);
loss = this->cmpLoss(o, y) / inputsSize;
accuracy = computeAccuracy(o, y);
sumLoss += loss;
Expand Down Expand Up @@ -167,7 +168,7 @@ double Network::batchTraining(
for (cEpoch = 0; cEpoch < epochs; cEpoch++) {
trainingCheckpoint("onEpochBegin", callbacks);
TrainingGauge g(1, 0, epochs, (cEpoch + 1));
Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data);
Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data, true);

loss = this->cmpLoss(o, y);
accuracy = computeAccuracy(o, y);
Expand Down Expand Up @@ -200,7 +201,7 @@ double Network::onlineTraining(
trainingCheckpoint("onEpochBegin", callbacks);
TrainingGauge tg(inputs.size(), 0, epochs, (cEpoch + 1));
for (auto &input : inputs) {
Eigen::MatrixXd o = this->forwardProp(inputs);
Eigen::MatrixXd o = this->forwardProp(inputs, true);
loss = this->cmpLoss(o, y);
sumLoss += loss;
tCorrect += computeAccuracy(o, y);
Expand Down Expand Up @@ -231,39 +232,43 @@ Eigen::MatrixXd Network::predict(
/**
* Forward propagation
*/
Eigen::MatrixXd Network::feedForward(Eigen::MatrixXd inputs, int startIdx) {
Eigen::MatrixXd Network::feedForward(Eigen::MatrixXd inputs, int startIdx,
bool training) {
assert(startIdx < this->layers.size());
Eigen::MatrixXd prevLayerOutputs = inputs;

for (int l = startIdx; l < this->layers.size(); l++) {
prevLayerOutputs = this->layers[l]->feedInputs(prevLayerOutputs);
Layer &cLayer = *this->layers[l];
if (cLayer.trainingOnly && !training) continue;
prevLayerOutputs = cLayer.feedInputs(prevLayerOutputs);
}

return prevLayerOutputs;
}

Eigen::MatrixXd Network::forwardProp(
std::vector<std::vector<std::vector<double>>> &inputs) {
std::vector<std::vector<std::vector<double>>> &inputs, bool training) {
// Passing the inputs as outputs to the input layer
this->layers[0]->feedInputs(inputs);

Eigen::MatrixXd prevLayerOutputs = this->layers[0]->getOutputs();

return feedForward(prevLayerOutputs, 1);
return feedForward(prevLayerOutputs, 1, training);
}

Eigen::MatrixXd Network::forwardProp(std::vector<std::vector<double>> &inputs) {
Eigen::MatrixXd Network::forwardProp(std::vector<std::vector<double>> &inputs,
bool training) {
// Previous layer outputs
Eigen::MatrixXd prevLayerO = vectorToMatrixXd(inputs);

return feedForward(prevLayerO);
return feedForward(prevLayerO, 0, training);
}

Eigen::MatrixXd Network::forwardProp(Eigen::MatrixXd &inputs) {
Eigen::MatrixXd Network::forwardProp(Eigen::MatrixXd &inputs, bool training) {
// Previous layer outputs
Eigen::MatrixXd prevLayerO = inputs;

return feedForward(prevLayerO);
return feedForward(prevLayerO, 0, training);
}

void Network::backProp(Eigen::MatrixXd &outputs, Eigen::MatrixXd &y) {
Expand All @@ -272,24 +277,40 @@ void Network::backProp(Eigen::MatrixXd &outputs, Eigen::MatrixXd &y) {
int m = beta.rows();

for (size_t i = this->layers.size(); --i > 0;) {
std::shared_ptr<Layer> cLayer = this->layers[i];
std::shared_ptr<Layer> nLayer = this->layers[i - 1];
Layer &cLayer = *this->layers[i];
Layer &nLayer = *this->layers[i - 1];

Eigen::MatrixXd nLayerOutputs = nLayer.getOutputs();

Dense *cDense = dynamic_cast<Dense *>(&cLayer);

if (!cDense || !nLayerOutputs.cols() || !nLayerOutputs.rows()) continue;

if (nLayer.type == LayerType::DROPOUT) {
// dropout layer
Dropout *doLayer = dynamic_cast<Dropout *>(&nLayer);

assert(doLayer);
// rescale outputs
nLayerOutputs /= doLayer->scaleRate;
}

// a'(L)
Eigen::MatrixXd aDer = cLayer->diff(cLayer->outputs);
Eigen::MatrixXd aDer = cDense->diff(cDense->outputs);

// a(L - 1) . a'(L)
Eigen::MatrixXd delta = beta.array() * aDer.array();

Eigen::MatrixXd gradW = (1.0 / m) * (nLayer->outputs.transpose() * delta);
Eigen::MatrixXd gradW = (1.0 / m) * (nLayerOutputs.transpose() * delta);

Eigen::MatrixXd gradB = (1.0 / m) * delta.colwise().sum();

// dL/dA(l - 1)
beta = delta * cLayer->weights.transpose();
beta = delta * cDense->weights.transpose();

// updating weights and biases
this->optimizer->updateWeights(cLayer->weights, gradW);
this->optimizer->updateBiases(cLayer->biases, gradB);
this->optimizer->updateWeights(cDense->weights, gradW);
this->optimizer->updateBiases(cDense->biases, gradB);
}
}

Expand Down
16 changes: 11 additions & 5 deletions src/NeuralNet/Network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
#include <cereal/types/vector.hpp>
#include <cstdlib>
#include <memory>
#include <variant>
#include <vector>

#include "Model.hpp"
#include "callbacks/Callback.hpp"
#include "data/Tensor.hpp"
#include "data/TrainingData.hpp"
#include "layers/Dense.hpp"
#include "layers/Dropout.hpp"
#include "layers/Flatten.hpp"
#include "layers/Layer.hpp"
#include "losses/losses.hpp"
Expand Down Expand Up @@ -178,7 +180,8 @@ class Network : public Model {
int cEpoch = 0; // Current epoch
double loss = 0, accuracy = 0;
std::vector<std::shared_ptr<Layer>> layers;
LOSS lossFunc; // Storing the loss function for serialization
LOSS lossFunc =
LOSS::QUADRATIC; // Storing the loss function for serialization
bool progBar = true;
double (*cmpLoss)(const Eigen::MatrixXd &, const Eigen::MatrixXd &);
Eigen::MatrixXd (*cmpLossGrad)(const Eigen::MatrixXd &,
Expand Down Expand Up @@ -303,7 +306,8 @@ class Network : public Model {
* @return The output of the network
*/
Eigen::MatrixXd forwardProp(
std::vector<std::vector<std::vector<double>>> &inputs);
std::vector<std::vector<std::vector<double>>> &inputs,
bool training = false);

/**
* @brief This method will pass the inputs through the network and return an
Expand All @@ -313,7 +317,8 @@ class Network : public Model {
*
* @return The output of the network
*/
Eigen::MatrixXd forwardProp(std::vector<std::vector<double>> &inputs);
Eigen::MatrixXd forwardProp(std::vector<std::vector<double>> &inputs,
bool training = false);

/**
* @brief This method will pass the inputs through the network and return an
Expand All @@ -323,9 +328,10 @@ class Network : public Model {
*
* @return The output of the network
*/
Eigen::MatrixXd forwardProp(Eigen::MatrixXd &inputs);
Eigen::MatrixXd forwardProp(Eigen::MatrixXd &inputs, bool training = false);

Eigen::MatrixXd feedForward(Eigen::MatrixXd inputs, int startIdx = 0);
Eigen::MatrixXd feedForward(Eigen::MatrixXd inputs, int startIdx = 0,
bool training = false);

/**
* @brief This method will compute the loss and backpropagate it through the
Expand Down
Loading
Loading