From 9f076c5c6421f54bbe4cd06d6189362e77d23a05 Mon Sep 17 00:00:00 2001 From: Antonello Lobianco Date: Wed, 6 Sep 2023 10:28:02 +0200 Subject: [PATCH] [breaking] NN optimizer update accounts for total number of epochs, mode data and better info in training callback 1. epochs ran in previous training are now accounded in the optizer update function (e.g. to reduce the step at each epoch) instead of departing from 1 at each fit! 2. the number of epochs already ran and the whole dataset (x,y) (and not only xbatch, ybatch) is given to the callback ran at each update. The default calback (`fitting_info`) then use this info to report the whole dataset loss at each epoch --- src/Nn/Nn.jl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Nn/Nn.jl b/src/Nn/Nn.jl index 253fa56b..ba3d0338 100644 --- a/src/Nn/Nn.jl +++ b/src/Nn/Nn.jl @@ -541,17 +541,20 @@ abstract type OptimisationAlgorithm end include("Nn_default_optalgs.jl") """ - fitting_info(nn,x,y;n,batch_size,epochs,verbosity,n_epoch,n_batch) + fitting_info(nn,xbatch,ybatch,x,y;n,batch_size,epochs,epochs_ran,verbosity,n_epoch,n_batch) Default callback funtion to display information during training, depending on the verbosity level # Parameters: * `nn`: Worker network -* `x`: Batch input to the network (batch_size,d) -* `y`: Batch label input (batch_size,d) +* `xbatch`: Batch input to the network (batch_size,din) +* `ybatch`: Batch label input (batch_size,dout) +* `x`: Full input to the network (n_records,din) +* `y`: Full label input (n_records,dout) * `n`: Size of the full training set * `n_batches` : Number of baches per epoch * `epochs`: Number of epochs defined for the training +* `epochs_ran`: Number of epochs already ran in previous training sessions * `verbosity`: Verbosity level defined for the training (NONE,LOW,STD,HIGH,FULL) * `n_epoch`: Counter of the current epoch * `n_batch`: Counter of the current batch @@ -559,19 +562,18 @@ Default callback funtion to display information during training, depending on th #Notes: * Reporting of the error (loss of the network) is expensive. Use `verbosity=NONE` for better performances """ -function fitting_info(nn,x,y;n,n_batches,epochs,verbosity,n_epoch,n_batch) +function fitting_info(nn,xbatch,ybatch,x,y;n,n_batches,epochs,epochs_ran,verbosity,n_epoch,n_batch) if verbosity == NONE return false # doesn't stop the training end nMsgDict = Dict(LOW => 0, STD => 10,HIGH => 100, FULL => n) nMsgs = nMsgDict[verbosity] - batch_size = size(x,1) if verbosity == FULL || ( n_batch == n_batches && ( n_epoch == 1 || n_epoch % ceil(epochs/nMsgs) == 0)) ϵ = loss(nn,x,y) - println("Training.. \t avg ϵ on (Epoch $n_epoch Batch $n_batch): \t $(ϵ)") + println("Training.. \t avg loss on epoch $n_epoch ($(n_epoch+epochs_ran)): \t $(ϵ)") end return false end @@ -614,7 +616,7 @@ Low leval function that trains a neural network with the given x,y data. - The verbosity can be set to any of `NONE`,`LOW`,`STD`,`HIGH`,`FULL`. - The update is done computing the average gradient for each batch and then calling `single_update!` to let the optimisation algorithm perform the parameters update """ -function train!(nn::NN,x,y; epochs=100, batch_size=min(size(x,1),32), sequential=false, verbosity::Verbosity=STD, cb=fitting_info, opt_alg::OptimisationAlgorithm=ADAM(),rng = Random.GLOBAL_RNG)#, η=t -> 1/(1+t), λ=1, rShuffle=true, nMsgs=10, tol=0opt_alg::SD=SD()) +function train!(nn::NN,x,y; epochs=100, batch_size=min(size(x,1),32), sequential=false, nepochs_ran=0,verbosity::Verbosity=STD, cb=fitting_info, opt_alg::OptimisationAlgorithm=ADAM(),rng = Random.GLOBAL_RNG)#, η=t -> 1/(1+t), λ=1, rShuffle=true, nMsgs=10, tol=0opt_alg::SD=SD()) if verbosity > STD @codelocation end @@ -671,9 +673,9 @@ function train!(nn::NN,x,y; epochs=100, batch_size=min(size(x,1),32), sequential #println("****foooo") #println(▽) - res = single_update!(θ,▽;n_epoch=t,n_batch=i,n_batches=n_batches,xbatch=xbatch,ybatch=ybatch,opt_alg=opt_alg) + res = single_update!(θ,▽;n_epoch=t+nepochs_ran,n_batch=i,n_batches=n_batches,xbatch=xbatch,ybatch=ybatch,opt_alg=opt_alg) set_params!(nn,res.θ) - cbOut = cb(nn,xbatch,ybatch,n=d,n_batches=n_batches,epochs=epochs,verbosity=verbosity,n_epoch=t,n_batch=i) + cbOut = cb(nn,xbatch,ybatch,x,y,n=d,n_batches=n_batches,epochs=epochs,epochs_ran=nepochs_ran,verbosity=verbosity,n_epoch=t,n_batch=i) if(res.stop==true || cbOut==true) nn.trained = true return (epochs=t,ϵ_epochs=ϵ_epochs,θ_epochs=θ_epochs) @@ -1062,7 +1064,7 @@ function fit!(m::NeuralNetworkEstimator,X,Y) nnstruct = m.par.nnstruct - out = train!(nnstruct,X,Y; epochs=epochs, batch_size=batch_size, sequential=!shuffle, verbosity=verbosity, cb=cb, opt_alg=opt_alg,rng = rng) + out = train!(nnstruct,X,Y; epochs=epochs, batch_size=batch_size, sequential=!shuffle, verbosity=verbosity, cb=cb, opt_alg=opt_alg,nepochs_ran=m.info["nepochs_ran"],rng = rng) m.info["nepochs_ran"] += out.epochs append!(m.info["loss_per_epoch"],out.ϵ_epochs)