Az-r-ow · Az-r-ow · Apr 2, 2024 · Mar 29, 2024 · Mar 30, 2024 · Mar 31, 2024
diff --git a/TODO.md b/TODO.md
@@ -3,20 +3,20 @@
 - [ ] Implement batch norm
 - [ ] Parallelize operations
 - [ ] Read : https://arxiv.org/pdf/1412.6980.pdf
-- [ ] Implement a dropout
 - [ ] Add macos arm runner when available
 
 ## IN PROGRESS :
 
+- [ ] Implement dropout
+- [ ] Add gradient clipping
 - [ ] Add default arguments to python bindings
-- [ ] Add verbose argument for progess bar
 - [ ] Interactive Python example
 - [ ] Python tests
 - [ ] Optimize `Catch2`'s build
-- [ ] Add gradient clipping
 
 ## DONE :
 
+- [x] Add verbose argument for progess bar
 - [x] CI versioning
 - [x] Docker image for quick start
 - [x] Setup `clang-format`

diff --git a/examples/train-predict-MNIST/helpers/event_handlers.py b/examples/train-predict-MNIST/helpers/event_handlers.py
@@ -12,7 +12,7 @@
 
 NNP.models.Model.load_from_file("model.bin", network)
 
-network.setup(optimizer=NNP.optimizers.SGD(0.001), loss=NNP.LOSS.MCE)
+network.setup(optimizer=NNP.optimizers.SGD(0.1), loss=NNP.LOSS.MCE)
 
 drawing = False # Track drawing
 erasing = False # Track erasing
@@ -47,7 +47,7 @@ def handle_ui_button_pressed(context):
     normalized_image = get_drawing(context)
     target = float(ui_elements["dropdown"].selected_option)
     loss = network.train([normalized_image], [target], 1, progBar=False)
-    ui_elements["guess_text"].append_html_text(f"I'm learning that it's a {int(target)}<br>loss : {loss}")
+    ui_elements["guess_text"].append_html_text(f"I'm learning that it's a {int(target)}<br>loss : {loss:.3f}")
 
   if event.ui_element == ui_elements["clear_button"]:
     ui_elements["drawing_surface"].fill(erasing_color)

diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py
@@ -11,7 +11,7 @@
 from helpers.utils import *
 from halo import Halo
 
-NUM_TRAININGS = 10000
+NUM_TRAININGS = 30000
 NUM_PREDICTIONS = 1000
 MNIST_DATASET_FILE = "./dataset/mnist.npz"
 
@@ -31,6 +31,7 @@
 network = NNP.models.Network()
 
 network.addLayer(NNP.layers.Flatten((28, 28)))
+network.addLayer(NNP.layers.Dropout(0.8))
 network.addLayer(NNP.layers.Dense(128, NNP.ACTIVATION.RELU, NNP.WEIGHT_INIT.HE))
 network.addLayer(NNP.layers.Dense(10, NNP.ACTIVATION.SOFTMAX, NNP.WEIGHT_INIT.LECUN))
 
@@ -53,9 +54,9 @@
 
 trainingData.batch(128)
 
-callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.01, 1), NNP.callbacks.CSVLogger("training.csv")]
+callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.01), NNP.callbacks.CSVLogger("training.csv")]
 
-network.train(trainingData, 5, callbacks)
+network.train(trainingData, 10, callbacks)
 
 f_x_test = [normalize_img(x) for x in x_test]
 

diff --git a/examples/train-predict-MNIST/training.csv b/examples/train-predict-MNIST/training.csv
diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp
@@ -18,8 +18,8 @@ void Network::addLayer(std::shared_ptr<Layer> &layer) {
   size_t numLayers = this->layers.size();
   // Init layer with right amount of weights
   if (numLayers > 0) {
-    std::shared_ptr<Layer> prevLayer = this->layers[this->layers.size() - 1];
-    layer->init(prevLayer->getNumNeurons());
+    const Layer &prevLayer = *this->layers[this->layers.size() - 1];
+    layer->init(prevLayer.getNumNeurons());
   }
 
   this->layers.push_back(layer);
@@ -137,7 +137,8 @@ double Network::miniBatchTraining(
                                        {inputsSize, numOutputs});
 
       // computing outputs from forward propagation
-      Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.batches[b]);
+      Eigen::MatrixXd o =
+          this->forwardProp(trainingData.inputs.batches[b], true);
       loss = this->cmpLoss(o, y) / inputsSize;
       accuracy = computeAccuracy(o, y);
       sumLoss += loss;
@@ -167,7 +168,7 @@ double Network::batchTraining(
   for (cEpoch = 0; cEpoch < epochs; cEpoch++) {
     trainingCheckpoint("onEpochBegin", callbacks);
     TrainingGauge g(1, 0, epochs, (cEpoch + 1));
-    Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data);
+    Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.data, true);
 
     loss = this->cmpLoss(o, y);
     accuracy = computeAccuracy(o, y);
@@ -200,7 +201,7 @@ double Network::onlineTraining(
     trainingCheckpoint("onEpochBegin", callbacks);
     TrainingGauge tg(inputs.size(), 0, epochs, (cEpoch + 1));
     for (auto &input : inputs) {
-      Eigen::MatrixXd o = this->forwardProp(inputs);
+      Eigen::MatrixXd o = this->forwardProp(inputs, true);
       loss = this->cmpLoss(o, y);
       sumLoss += loss;
       tCorrect += computeAccuracy(o, y);
@@ -231,39 +232,43 @@ Eigen::MatrixXd Network::predict(
 /**
  * Forward propagation
  */
-Eigen::MatrixXd Network::feedForward(Eigen::MatrixXd inputs, int startIdx) {
+Eigen::MatrixXd Network::feedForward(Eigen::MatrixXd inputs, int startIdx,
+                                     bool training) {
   assert(startIdx < this->layers.size());
   Eigen::MatrixXd prevLayerOutputs = inputs;
 
   for (int l = startIdx; l < this->layers.size(); l++) {
-    prevLayerOutputs = this->layers[l]->feedInputs(prevLayerOutputs);
+    Layer &cLayer = *this->layers[l];
+    if (cLayer.trainingOnly && !training) continue;
+    prevLayerOutputs = cLayer.feedInputs(prevLayerOutputs);
   }
 
   return prevLayerOutputs;
 }
 
 Eigen::MatrixXd Network::forwardProp(
-    std::vector<std::vector<std::vector<double>>> &inputs) {
+    std::vector<std::vector<std::vector<double>>> &inputs, bool training) {
   // Passing the inputs as outputs to the input layer
   this->layers[0]->feedInputs(inputs);
 
   Eigen::MatrixXd prevLayerOutputs = this->layers[0]->getOutputs();
 
-  return feedForward(prevLayerOutputs, 1);
+  return feedForward(prevLayerOutputs, 1, training);
 }
 
-Eigen::MatrixXd Network::forwardProp(std::vector<std::vector<double>> &inputs) {
+Eigen::MatrixXd Network::forwardProp(std::vector<std::vector<double>> &inputs,
+                                     bool training) {
   // Previous layer outputs
   Eigen::MatrixXd prevLayerO = vectorToMatrixXd(inputs);
 
-  return feedForward(prevLayerO);
+  return feedForward(prevLayerO, 0, training);
 }
 
-Eigen::MatrixXd Network::forwardProp(Eigen::MatrixXd &inputs) {
+Eigen::MatrixXd Network::forwardProp(Eigen::MatrixXd &inputs, bool training) {
   // Previous layer outputs
   Eigen::MatrixXd prevLayerO = inputs;
 
-  return feedForward(prevLayerO);
+  return feedForward(prevLayerO, 0, training);
 }
 
 void Network::backProp(Eigen::MatrixXd &outputs, Eigen::MatrixXd &y) {
@@ -272,24 +277,40 @@ void Network::backProp(Eigen::MatrixXd &outputs, Eigen::MatrixXd &y) {
   int m = beta.rows();
 
   for (size_t i = this->layers.size(); --i > 0;) {
-    std::shared_ptr<Layer> cLayer = this->layers[i];
-    std::shared_ptr<Layer> nLayer = this->layers[i - 1];
+    Layer &cLayer = *this->layers[i];
+    Layer &nLayer = *this->layers[i - 1];
+
+    Eigen::MatrixXd nLayerOutputs = nLayer.getOutputs();
+
+    Dense *cDense = dynamic_cast<Dense *>(&cLayer);
+
+    if (!cDense || !nLayerOutputs.cols() || !nLayerOutputs.rows()) continue;
+
+    if (nLayer.type == LayerType::DROPOUT) {
+      // dropout layer
+      Dropout *doLayer = dynamic_cast<Dropout *>(&nLayer);
+
+      assert(doLayer);
+      // rescale outputs
+      nLayerOutputs /= doLayer->scaleRate;
+    }
+
     // a'(L)
-    Eigen::MatrixXd aDer = cLayer->diff(cLayer->outputs);
+    Eigen::MatrixXd aDer = cDense->diff(cDense->outputs);
 
     // a(L - 1) . a'(L)
     Eigen::MatrixXd delta = beta.array() * aDer.array();
 
-    Eigen::MatrixXd gradW = (1.0 / m) * (nLayer->outputs.transpose() * delta);
+    Eigen::MatrixXd gradW = (1.0 / m) * (nLayerOutputs.transpose() * delta);
 
     Eigen::MatrixXd gradB = (1.0 / m) * delta.colwise().sum();
 
     // dL/dA(l - 1)
-    beta = delta * cLayer->weights.transpose();
+    beta = delta * cDense->weights.transpose();
 
     // updating weights and biases
-    this->optimizer->updateWeights(cLayer->weights, gradW);
-    this->optimizer->updateBiases(cLayer->biases, gradB);
+    this->optimizer->updateWeights(cDense->weights, gradW);
+    this->optimizer->updateBiases(cDense->biases, gradB);
   }
 }
 

diff --git a/src/NeuralNet/Network.hpp b/src/NeuralNet/Network.hpp
@@ -5,13 +5,15 @@
 #include <cereal/types/vector.hpp>
 #include <cstdlib>
 #include <memory>
+#include <variant>
 #include <vector>
 
 #include "Model.hpp"
 #include "callbacks/Callback.hpp"
 #include "data/Tensor.hpp"
 #include "data/TrainingData.hpp"
 #include "layers/Dense.hpp"
+#include "layers/Dropout.hpp"
 #include "layers/Flatten.hpp"
 #include "layers/Layer.hpp"
 #include "losses/losses.hpp"
@@ -178,7 +180,8 @@ class Network : public Model {
   int cEpoch = 0;  // Current epoch
   double loss = 0, accuracy = 0;
   std::vector<std::shared_ptr<Layer>> layers;
-  LOSS lossFunc;  // Storing the loss function for serialization
+  LOSS lossFunc =
+      LOSS::QUADRATIC;  // Storing the loss function for serialization
   bool progBar = true;
   double (*cmpLoss)(const Eigen::MatrixXd &, const Eigen::MatrixXd &);
   Eigen::MatrixXd (*cmpLossGrad)(const Eigen::MatrixXd &,
@@ -303,7 +306,8 @@ class Network : public Model {
    * @return The output of the network
    */
   Eigen::MatrixXd forwardProp(
-      std::vector<std::vector<std::vector<double>>> &inputs);
+      std::vector<std::vector<std::vector<double>>> &inputs,
+      bool training = false);
 
   /**
    * @brief This method will pass the inputs through the network and return an
@@ -313,7 +317,8 @@ class Network : public Model {
    *
    * @return The output of the network
    */
-  Eigen::MatrixXd forwardProp(std::vector<std::vector<double>> &inputs);
+  Eigen::MatrixXd forwardProp(std::vector<std::vector<double>> &inputs,
+                              bool training = false);
 
   /**
    * @brief This method will pass the inputs through the network and return an
@@ -323,9 +328,10 @@ class Network : public Model {
    *
    * @return The output of the network
    */
-  Eigen::MatrixXd forwardProp(Eigen::MatrixXd &inputs);
+  Eigen::MatrixXd forwardProp(Eigen::MatrixXd &inputs, bool training = false);
 
-  Eigen::MatrixXd feedForward(Eigen::MatrixXd inputs, int startIdx = 0);
+  Eigen::MatrixXd feedForward(Eigen::MatrixXd inputs, int startIdx = 0,
+                              bool training = false);
 
   /**
    * @brief This method will compute the loss and backpropagate it through the