Added MLJ model AutoEncoderMLJ, pushed to v0.10.4

TODO: autotune of the AutoEncoder still nto working
sylvaticus · Dec 29, 2023 · 40a71b8 · 40a71b8
1 parent f2b4e62
commit 40a71b8
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 29 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "BetaML"
 uuid = "024491cd-cc6b-443e-8034-08ea7eb7db2b"
 authors = ["Antonello Lobianco <[email protected]>"]
-version = "0.10.3"
+version = "0.10.4"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

diff --git a/src/BetaML.jl b/src/BetaML.jl
@@ -58,7 +58,7 @@ const MLJ_TREES_MODELS      = (DecisionTreeClassifier, DecisionTreeRegressor, Ra
 const MLJ_CLUSTERING_MODELS = (KMeans, KMedoids, GaussianMixtureClusterer)
 const MLJ_IMPUTERS_MODELS   = (SimpleImputer, GaussianMixtureImputer, RandomForestImputer,GeneralImputer) # these are the name of the MLJ models, not the BetaML ones...
 const MLJ_NN_MODELS         = (NeuralNetworkRegressor,MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier)
-const MLJ_OTHER_MODELS      = (GaussianMixtureRegressor,MultitargetGaussianMixtureRegressor)
+const MLJ_OTHER_MODELS      = (GaussianMixtureRegressor,MultitargetGaussianMixtureRegressor,AutoEncoderMLJ)
 const MLJ_INTERFACED_MODELS = (MLJ_PERCEPTRON_MODELS..., MLJ_TREES_MODELS..., MLJ_CLUSTERING_MODELS..., MLJ_IMPUTERS_MODELS..., MLJ_NN_MODELS..., MLJ_OTHER_MODELS...) 
 
 

diff --git a/src/Nn/Nn_MLJ.jl b/src/Nn/Nn_MLJ.jl
@@ -66,26 +66,26 @@ julia> hcat(y,ŷ)
 Base.@kwdef mutable struct NeuralNetworkRegressor <: MMI.Deterministic
     "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers"
     layers::Union{Array{AbstractLayer,1},Nothing} = nothing
-    """Loss (cost) function [def: `squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D
+    """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D
     !!! warning
         If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
     """
     loss::Union{Nothing,Function} = squared_cost
-    "Derivative of the loss function [def: `dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
+    "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
     dloss::Union{Function,Nothing}  = dsquared_cost
     "Number of epochs, i.e. passages trough the whole training sample [def: `200`]"
     epochs::Int64 = 200
     "Size of each individual batch [def: `16`]"
     batch_size::Int64 = 16
-    "The optimisation algorithm to update the gradient at each batch [def: `ADAM()`]"
+    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
     opt_alg::OptimisationAlgorithm = ADAM()
     "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
     shuffle::Bool = true  
     "An optional title and/or description for this model"
     descr::String = "" 
-    "A call back function to provide information during training [def: `fitting_info`"
+    "A call back function to provide information during training [def: `fitting_info`]"
     cb::Function=fitting_info
-    "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
+    "Random Number Generator (see [`BetaML.FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
     "
     rng::AbstractRNG = Random.GLOBAL_RNG
 end
@@ -180,26 +180,26 @@ julia> hcat(ydouble,ŷdouble)
 Base.@kwdef mutable struct MultitargetNeuralNetworkRegressor <: MMI.Deterministic
     "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers"
     layers::Union{Array{AbstractLayer,1},Nothing} = nothing
-    """Loss (cost) function [def: `squared_cost`].  Should always assume y and ŷ as matrices.
+    """Loss (cost) function [def: `BetaML.squared_cost`].  Should always assume y and ŷ as matrices.
     !!! warning
         If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
     """
     loss::Union{Nothing,Function} = squared_cost
-    "Derivative of the loss function [def: `dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
+    "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
     dloss::Union{Function,Nothing}  = dsquared_cost
     "Number of epochs, i.e. passages trough the whole training sample [def: `300`]"
     epochs::Int64 = 300
     "Size of each individual batch [def: `16`]"
     batch_size::Int64 = 16
-    "The optimisation algorithm to update the gradient at each batch [def: `ADAM()`]"
+    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
     opt_alg::OptimisationAlgorithm = ADAM()
     "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
     shuffle::Bool = true  
     "An optional title and/or description for this model"
     descr::String = "" 
-    "A call back function to provide information during training [def: `fitting_info`"
+    "A call back function to provide information during training [def: `BetaML.fitting_info`]"
     cb::Function=fitting_info
-    "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
+    "Random Number Generator (see [`BetaML.FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
     "
     rng::AbstractRNG = Random.GLOBAL_RNG
 end
@@ -288,24 +288,24 @@ julia> classes_est = predict(mach, X)
 Base.@kwdef mutable struct NeuralNetworkClassifier <: MMI.Probabilistic
     "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added."
     layers::Union{Array{AbstractLayer,1},Nothing} = nothing
-    """Loss (cost) function [def: `crossentropy`]. Should always assume y and ŷ as matrices.
+    """Loss (cost) function [def: `BetaML.crossentropy`]. Should always assume y and ŷ as matrices.
     !!! warning
         If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
     """
     loss::Union{Nothing,Function} = crossentropy
-    "Derivative of the loss function [def: `dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff."
+    "Derivative of the loss function [def: `BetaML.dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff."
     dloss::Union{Function,Nothing}  = dcrossentropy
     "Number of epochs, i.e. passages trough the whole training sample [def: `200`]"
     epochs::Int64 = 200
     "Size of each individual batch [def: `16`]"
     batch_size::Int64 = 16
-    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]"
+    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
     opt_alg::OptimisationAlgorithm = ADAM()
     "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
     shuffle::Bool = true  
     "An optional title and/or description for this model"
     descr::String = "" 
-    "A call back function to provide information during training [def: `BetaML.fitting_info`"
+    "A call back function to provide information during training [def: `BetaML.fitting_info`]"
     cb::Function=fitting_info
     "The categories to represent as columns. [def: `nothing`, i.e. unique training values]."  
     categories::Union{Vector,Nothing} = nothing

diff --git a/src/Utils/Utils_extra.jl b/src/Utils/Utils_extra.jl
@@ -7,8 +7,6 @@ export   AutoEncoder, AutoEncoderHyperParametersSet
 
 import ..Nn: AbstractLayer, ADAM, SGD, NeuralNetworkEstimator, OptimisationAlgorithm, DenseLayer, NN
 
-# ------------------------------------------------------------------------------
-# WORK IN PROGRESS IN
 
 """
 $(TYPEDEF)
@@ -24,9 +22,9 @@ Base.@kwdef mutable struct AutoEncoderHyperParametersSet <: BetaMLHyperParameter
    e_layers::Union{Nothing,Vector{AbstractLayer}} = nothing
    "The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]"
    d_layers::Union{Nothing,Vector{AbstractLayer}} = nothing
-   "The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]"
+   "The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]"
    outdims::Union{Float64,Int64}  = 0.333
-   "Inner layer dimension (i.e. number of neurons). If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part."
+   "Inner layer dimension (i.e. number of neurons). If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part."
    innerdims::Union{Int64,Float64,Nothing} = nothing 
    """Loss (cost) function [def: `squared_cost`]
    It must always assume y and ŷ as (n x d) matrices, eventually using `dropdims` inside.
@@ -175,21 +173,21 @@ function fit!(m::AutoEncoder,X)
         outdims_actual  = m.par.outdims_actual
         fullnn          = m.par.fullnn
     else
-        typeof(outdims) <: Integer ?  outdims_actual = outdims : outdims_actual = D * outdims 
+        typeof(outdims) <: Integer ?  outdims_actual = outdims : outdims_actual = max(1,Int(round(D * outdims))) 
         if isnothing(innerdims) 
             if D == 1
                 innerSize = 3
             elseif D < 5
-                innerSize = Int(round(D*D))   
+                innerSize = max(1,Int(round(D*D)))   
             elseif D < 10   
-                innerSize = Int(round(D*1.3*D/3)) 
+                innerSize = max(1,Int(round(D*1.3*D/3)))
             else
-                innerSize = Int(round(D*1.3*log(2,D))) 
+                innerSize = max(1,Int(round(D*1.3*log(2,D)))) 
             end
         elseif typeof(innerdims) <: Integer
             innerSize = innerdims
         else
-            innerSize = Int(round(D*innerdims))  
+            innerSize = max(1,Int(round(D*innerdims)) )
         end
 
         if isnothing(e_layers)
@@ -220,7 +218,7 @@ function fit!(m::AutoEncoder,X)
     m.par.outdims_actual  = outdims_actual
     m.par.fullnn = fullnn
     m.fitted=true
-    rme = relative_mean_error(X,x̂)
+    rme = cache ? relative_mean_error(X,x̂) : missing
 
     m.info["nepochs_ran"]     = info(fullnn)["nepochs_ran"]
     m.info["loss_per_epoch"]  = info(fullnn)["loss_per_epoch"]
@@ -262,8 +260,9 @@ function inverse_predict(m::AutoEncoder,X)
     return xtemp|> makematrix
 end
 
+include("Utils_MLJ.jl")     # Utility functions that depend on some BetaML functionality. Set them here to avoid recursive dependence
+
+end
+
 
-# WORK IN PROGRESS OUT 
-# ------------------------------------------------------------------------------
 
-end
diff --git a/test/Utils_tests.jl b/test/Utils_tests.jl
@@ -712,6 +712,22 @@ s1 = silhouette(pd,[1,2,2,2])
 s2  = silhouette(pd,[1,1,2,2])
 @test s2 ==  [0.7846062151896173, 0.7590778795827623, 0.8860577617518799, 0.8833580446365146]
 
+
+# MLJ Tests
+# ==================================
+# NEW TEST
+println("Testing MLJ interface for Utils....")
+import MLJBase
+const Mlj = MLJBase
+
+X, y        = Mlj.@load_iris
+model       = AutoEncoderMLJ(outdims=2,rng=copy(TESTRNG))
+ae          = Mlj.machine(model, X)
+Mlj.fit!(ae)
+X_latent    = Mlj.transform(ae, X)
+X_recovered = Mlj.inverse_transform(ae,X_latent)
+@test relative_mean_error(Mlj.matrix(X),X_recovered) < 0.05
+
 #=
 using Random, StableRNGs
 rDiff(rngFunction,seedBase,seedDiff,repetitions) = norm(rand(rngFunction(seedBase),repetitions) .- rand(rngFunction(seedBase+seedDiff),repetitions))/repetitions