diff --git a/.gitignore b/.gitignore index 6d67616..b2f9b8e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ venv __pycache__ dataset/ +checkpoints/*.bin ### binaries ### *.bin \ No newline at end of file diff --git a/examples/train-predict-MNIST/checkpoints/gitKeep b/examples/train-predict-MNIST/checkpoints/gitKeep new file mode 100644 index 0000000..e69de29 diff --git a/examples/train-predict-MNIST/main.py b/examples/train-predict-MNIST/main.py index c8860bb..ebf02c0 100644 --- a/examples/train-predict-MNIST/main.py +++ b/examples/train-predict-MNIST/main.py @@ -15,15 +15,15 @@ NUM_PREDICTIONS = 1000 MNIST_DATASET_FILE = "./dataset/mnist.npz" -# Adding the module path to the sys path +# Adding the module path to the sys path so_dir = add_module_path_to_sys_path(__file__) import NeuralNetPy as NNP # If file doesn't exists create and download the data if not file_exists(MNIST_DATASET_FILE): - print("Mnist dataset not found") - get_MNIST_dataset(MNIST_DATASET_FILE) + print("Mnist dataset not found") + get_MNIST_dataset(MNIST_DATASET_FILE) # Otherwise load data from file (x_train, y_train), (x_test, y_test) = load_data(MNIST_DATASET_FILE) @@ -38,13 +38,13 @@ # Setting up the networks parameters network.setup(optimizer=NNP.optimizers.Adam(0.01), loss=NNP.LOSS.MCE) -# combining the data with the labels for later shuffling +# combining the data with the labels for later shuffling combined = list(zip(x_train, y_train)) -# shuffling the combined list +# shuffling the combined list random.shuffle(combined) -# separating them +# separating them x_train, y_train = zip(*combined) # preparing the training data @@ -54,7 +54,11 @@ trainingData.batch(128) -callbacks = [NNP.callbacks.EarlyStopping("LOSS", 0.01), NNP.callbacks.CSVLogger("training.csv"), NNP.callbacks.ModelCheckpoint("checkpoints", False, 2,verbose=True)] +callbacks = [ + NNP.callbacks.EarlyStopping("LOSS", 0.01), + NNP.callbacks.CSVLogger("training.csv"), + NNP.callbacks.ModelCheckpoint("checkpoints", True, verbose=True), +] network.train(trainingData, 10, callbacks) @@ -67,15 +71,15 @@ (accuracy, n, correct) = get_accuracy(predicted_numbers, y_test) -# Getting the prediction's accuracy +# Getting the prediction's accuracy print(f"Num correct predictions : {correct}/{n} - accuracy {accuracy}") # Saving the trained model in a bin file -NNP.models.Model.save_to_file('./model.bin', network) +NNP.models.Model.save_to_file("./model.bin", network) saved_model = NNP.models.Network() -NNP.models.Model.load_from_file('./model.bin', saved_model) +NNP.models.Model.load_from_file("./model.bin", saved_model) # preparing the testing data predictions = saved_model.predict(f_x_test[:NUM_PREDICTIONS]) @@ -84,8 +88,8 @@ (accuracy, n, correct) = get_accuracy(predicted_numbers, y_test) -# Getting the prediction's accuracy +# Getting the prediction's accuracy print(f"Num correct predictions : {correct}/{n} - accuracy {accuracy}") # Remove sys.path modification -sys.path.remove(so_dir) \ No newline at end of file +sys.path.remove(so_dir) diff --git a/src/NeuralNet/Network.cpp b/src/NeuralNet/Network.cpp index 69cc333..f2921e3 100644 --- a/src/NeuralNet/Network.cpp +++ b/src/NeuralNet/Network.cpp @@ -131,9 +131,11 @@ double Network::miniBatchTraining( trainingCheckpoint("onTrainBegin", callbacks); for (cEpoch = 0; cEpoch < epochs; cEpoch++) { + double sumBatchLoss = 0; + int numBatches = trainingData.inputs.size(); trainingCheckpoint("onEpochBegin", callbacks); - TrainingGauge g(trainingData.inputs.size(), 0, epochs, (cEpoch + 1)); - for (int b = 0; b < trainingData.inputs.size(); b++) { + TrainingGauge g(numBatches, 0, epochs, (cEpoch + 1)); + for (int b = 0; b < numBatches; b++) { trainingCheckpoint("onBatchBegin", callbacks); const int numOutputs = this->getOutputLayer()->getNumNeurons(); const int inputsSize = trainingData.inputs.batches[b].size(); @@ -144,6 +146,7 @@ double Network::miniBatchTraining( Eigen::MatrixXd o = this->forwardProp(trainingData.inputs.batches[b], true); loss = this->cmpLoss(o, y) / inputsSize; + sumBatchLoss += loss; accuracy = computeAccuracy(o, y); sumLoss += loss; this->backProp(o, y); @@ -151,6 +154,8 @@ double Network::miniBatchTraining( if (!this->progBar) continue; // Skip when disabled g.printWithLAndA(loss, accuracy); } + // calculating current epoch avg loss + loss = sumBatchLoss / static_cast(numBatches); trainingCheckpoint("onEpochEnd", callbacks); }