diff --git a/Project.toml b/Project.toml index 8e1d1dc1..f8fb19fb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BetaML" uuid = "024491cd-cc6b-443e-8034-08ea7eb7db2b" authors = ["Antonello Lobianco "] -version = "0.11.0" +version = "0.11.1" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/Bmlj/Utils_mlj.jl b/src/Bmlj/Utils_mlj.jl index 7f830c16..0d318800 100644 --- a/src/Bmlj/Utils_mlj.jl +++ b/src/Bmlj/Utils_mlj.jl @@ -36,7 +36,7 @@ julia> X, y = @load_iris; julia> modelType = @load AutoEncoder pkg = "BetaML" verbosity=0; -julia> model = modelType(outdims=2,innerdims=10); +julia> model = modelType(encoded_size=2,layers_size=10); julia> mach = machine(model, X) untrained Machine; caches model-specific representations of data @@ -94,14 +94,14 @@ julia> BetaML.relative_mean_error(MLJ.matrix(X),X_recovered) ``` """ Base.@kwdef mutable struct AutoEncoder <: MMI.Unsupervised - "The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]. See `subtypes(BetaML.AbstractLayer)` for supported layers" + "The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]. See `subtypes(BetaML.AbstractLayer)` for supported layers" e_layers::Union{Nothing,Vector{AbstractLayer}} = nothing - "The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]. See `subtypes(BetaML.AbstractLayer)` for supported layers" + "The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]. See `subtypes(BetaML.AbstractLayer)` for supported layers" d_layers::Union{Nothing,Vector{AbstractLayer}} = nothing "The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]" - outdims::Union{Float64,Int64} = 0.333 + encoded_size::Union{Float64,Int64} = 0.333 "Inner layer dimension (i.e. number of neurons). If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part." - innerdims::Union{Int64,Float64,Nothing} = nothing + layers_size::Union{Int64,Float64,Nothing} = nothing """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as (n x d) matrices. !!! warning If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`. @@ -122,7 +122,7 @@ Base.@kwdef mutable struct AutoEncoder <: MMI.Unsupervised See [`SuccessiveHalvingSearch`](@ref) for the default method. To implement automatic hyperparameter tuning during the (first) `fit!` call simply set `autotune=true` and eventually change the default `tunemethod` options (including the parameter ranges, the resources to employ and the loss function to adopt). """ - tunemethod::AutoTuneMethod = BetaML.Utils.SuccessiveHalvingSearch(hpranges = Dict("epochs"=>[100,150,200],"batch_size"=>[8,16,32],"outdims"=>[0.2,0.3,0.5],"innerdims"=>[1.3,2.0,5.0]),multithreads=false) + tunemethod::AutoTuneMethod = BetaML.Utils.SuccessiveHalvingSearch(hpranges = Dict("epochs"=>[100,150,200],"batch_size"=>[8,16,32],"encoded_size"=>[0.2,0.3,0.5],"layers_size"=>[1.3,2.0,5.0]),multithreads=false) "An optional title and/or description for this model" descr::String = "" "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`] @@ -141,7 +141,7 @@ function MMI.fit(m::AutoEncoder, verbosity, X) typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") verbosity = mljverbosity_to_betaml_verbosity(verbosity) - mi = BetaML.Utils.AutoEncoder(;e_layers=m.e_layers,d_layers=m.d_layers,outdims=m.outdims,innerdims=m.innerdims,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, tunemethod=m.tunemethod, cache=false, descr=m.descr, rng=m.rng, verbosity=verbosity) + mi = BetaML.Utils.AutoEncoder(;e_layers=m.e_layers,d_layers=m.d_layers,encoded_size=m.encoded_size,layers_size=m.layers_size,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, tunemethod=m.tunemethod, cache=false, descr=m.descr, rng=m.rng, verbosity=verbosity) Api.fit!(mi,x) fitresults = mi cache = nothing diff --git a/src/Utils/Processing.jl b/src/Utils/Processing.jl index 3ad90bcd..4bde3c47 100644 --- a/src/Utils/Processing.jl +++ b/src/Utils/Processing.jl @@ -886,15 +886,15 @@ $(FIELDS) """ Base.@kwdef mutable struct PCAE_hp <: BetaMLHyperParametersSet - "The number of dimensions to maintain (with `outdims <= size(X,2)` ) [def: `nothing`, i.e. the number of output dimensions is determined from the parameter `max_unexplained_var`]" - outdims::Union{Nothing,Int64} = nothing - "The maximum proportion of variance that we are willing to accept when reducing the number of dimensions in our data [def: 0.05]. It doesn't have any effect when the output number of dimensions is explicitly chosen with the parameter `outdims`" + "The size, that is the number of dimensions, to maintain (with `encoded_size <= size(X,2)` ) [def: `nothing`, i.e. the number of output dimensions is determined from the parameter `max_unexplained_var`]" + encoded_size::Union{Nothing,Int64} = nothing + "The maximum proportion of variance that we are willing to accept when reducing the number of dimensions in our data [def: 0.05]. It doesn't have any effect when the output number of dimensions is explicitly chosen with the parameter `encoded_size`" max_unexplained_var::Float64 = 0.05 end Base.@kwdef mutable struct PCA_lp <: BetaMLLearnableParametersSet eigen_out::Union{Eigen,Nothing} =nothing - outdims_actual::Union{Int64,Nothing}=nothing + encoded_size_actual::Union{Int64,Nothing}=nothing end """ @@ -909,7 +909,7 @@ For the parameters see [`PCAE_hp`](@ref) and [`BML_options`](@ref) # Notes: - PCAEncoder doesn't automatically scale the data. It is suggested to apply the [`Scaler`](@ref) model before running it. - Missing data are not supported. Impute them first, see the [`Imputation`](@ref) module. -- If one doesn't know _a priori_ the maximum unexplained variance that he is willling to accept, nor the wished number of dimensions, he can run the model with all the dimensions in output (i.e. with `outdims=size(X,2)`), analise the proportions of explained cumulative variance by dimensions in `info(mod,""explained_var_by_dim")`, choose the number of dimensions K according to his needs and finally pick from the reprojected matrix only the number of dimensions required, i.e. `out.X[:,1:K]`. +- If one doesn't know _a priori_ the maximum unexplained variance that he is willling to accept, nor the wished number of dimensions, he can run the model with all the dimensions in output (i.e. with `encoded_size=size(X,2)`), analise the proportions of explained cumulative variance by dimensions in `info(mod,""explained_var_by_dim")`, choose the number of dimensions K according to his needs and finally pick from the reprojected matrix only the number of dimensions required, i.e. `out.X[:,1:K]`. # Example: @@ -966,7 +966,16 @@ function PCAEncoder(;kwargs...) found = true end end - found || error("Keyword \"$kw\" is not part of this model.") + # Correction for releasing without breaking.. to remove on v0.12 onward... + # found || error("Keyword \"$kw\" is not part of this model.") + if !found + if kw == :outdims + setproperty!(m.hpar,:encoded_size,kwv) + found = true + else + error("Keyword \"$kw\" is not part of this model.") + end + end end return m end @@ -976,7 +985,7 @@ end function fit!(m::PCAEncoder,X) # Parameter alias.. - outdims = m.hpar.outdims + encoded_size = m.hpar.encoded_size max_unexplained_var = m.hpar.max_unexplained_var cache = m.opt.cache verbosity = m.opt.verbosity @@ -987,35 +996,35 @@ function fit!(m::PCAEncoder,X) end (N,D) = size(X) - if !isnothing(outdims) && outdims > D - @error("The parameter `outdims` must be ≤ of the number of dimensions of the input data matrix") + if !isnothing(encoded_size) && encoded_size > D + @error("The parameter `encoded_size` must be ≤ of the number of dimensions of the input data matrix") end Σ = (1/N) * X'*(I-(1/N)*ones(N)*ones(N)')*X E = eigen(Σ) # eigenvalues are ordered from the smallest to the largest # Finding oudims_actual totvar = sum(E.values) explained_var_by_dim = cumsum(reverse(E.values)) ./ totvar - outdims_actual = isnothing(outdims) ? findfirst(x -> x >= (1-max_unexplained_var), explained_var_by_dim) : outdims + encoded_size_actual = isnothing(encoded_size) ? findfirst(x -> x >= (1-max_unexplained_var), explained_var_by_dim) : encoded_size m.par.eigen_out = E - m.par.outdims_actual = outdims_actual + m.par.encoded_size_actual = encoded_size_actual if cache - P = E.vectors[:,end:-1:D-outdims_actual+1] + P = E.vectors[:,end:-1:D-encoded_size_actual+1] m.cres = X*P end m.info["fitted_records"] = get(m.info,"fitted_records",0) + N m.info["xndims"] = D m.info["explained_var_by_dim"] = explained_var_by_dim - m.info["prop_explained_var"] = explained_var_by_dim[outdims_actual] - m.info["retained_dims"] = outdims_actual + m.info["prop_explained_var"] = explained_var_by_dim[encoded_size_actual] + m.info["retained_dims"] = encoded_size_actual m.fitted=true return cache ? m.cres : nothing end function predict(m::PCAEncoder,X) D = size(m.par.eigen_out.vectors,2) - P = m.par.eigen_out.vectors[:,end:-1:D-m.par.outdims_actual+1] + P = m.par.eigen_out.vectors[:,end:-1:D-m.par.encoded_size_actual+1] return X*P end diff --git a/src/Utils/Utils_extra.jl b/src/Utils/Utils_extra.jl index 5f932e52..b1f15d10 100644 --- a/src/Utils/Utils_extra.jl +++ b/src/Utils/Utils_extra.jl @@ -19,14 +19,14 @@ $(FIELDS) """ Base.@kwdef mutable struct AutoE_hp <: BetaMLHyperParametersSet - "The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]" + "The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]" e_layers::Union{Nothing,Vector{AbstractLayer}} = nothing - "The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `innerdims`]" + "The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]" d_layers::Union{Nothing,Vector{AbstractLayer}} = nothing - "The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]" - outdims::Union{Float64,Int64} = 0.333 - "Inner layer dimension (i.e. number of neurons). If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part." - innerdims::Union{Int64,Float64,Nothing} = nothing + "The desired size of the encoded data, that is the number of dimensions in output or the size of the latent space. This is the number of neurons of the layer sitting between the econding and decoding layers. If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]" + encoded_size::Union{Float64,Int64} = 0.333 + "Inner layers dimension (i.e. number of neurons). If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part." + layers_size::Union{Int64,Float64,Nothing} = nothing """Loss (cost) function [def: `squared_cost`] It must always assume y and ŷ as (n x d) matrices, eventually using `dropdims` inside. """ @@ -46,11 +46,11 @@ Base.@kwdef mutable struct AutoE_hp <: BetaMLHyperParametersSet See [`SuccessiveHalvingSearch`](@ref) for the default method. To implement automatic hyperparameter tuning during the (first) `fit!` call simply set `autotune=true` and eventually change the default `tunemethod` options (including the parameter ranges, the resources to employ and the loss function to adopt). """ - tunemethod::AutoTuneMethod = SuccessiveHalvingSearch(hpranges = Dict("epochs"=>[100,200,400],"batch_size"=>[8,16],"outdims"=>[0.2,0.3,0.5],"innerdims"=>[1.3,2.0,5.0,10.0,nothing]),multithreads=true) + tunemethod::AutoTuneMethod = SuccessiveHalvingSearch(hpranges = Dict("epochs"=>[100,200,400],"batch_size"=>[8,16],"encoded_size"=>[0.2,0.3,0.5],"layers_size"=>[1.3,2.0,5.0,10.0,nothing]),multithreads=true) end Base.@kwdef mutable struct AutoEncoder_lp <: BetaMLLearnableParametersSet - outdims_actual::Union{Int64,Nothing} = nothing + encoded_size_actual::Union{Int64,Nothing} = nothing fullnn::Union{NeuralNetworkEstimator,Nothing} = nothing n_el::Union{Nothing,Int64} = nothing n_dl::Union{Nothing,Int64} = nothing @@ -83,7 +83,7 @@ julia> x = [0.12 0.31 0.29 3.21 0.21; 0.35 0.93 0.91 10.04 0.71; 0.44 1.21 1.18 13.54 0.85]; -julia> m = AutoEncoder(outdims=1,epochs=400) +julia> m = AutoEncoder(encoded_size=1,epochs=400) A AutoEncoder BetaMLModel (unfitted) julia> x_reduced = fit!(m,x) @@ -141,7 +141,21 @@ function AutoEncoder(;kwargs...) found = true end end - found || error("Keyword \"$kw\" is not part of this model.") + # Correction for releasing without breaking.. to remove on v0.12 onward... + # found || error("Keyword \"$kw\" is not part of this model.") + if !found + if kw == :outdims + setproperty!(m.hpar,:encoded_size,kwv) + found = true + elseif kw == :innerdims + setproperty!(m.hpar,:layers_size,kwv) + found = true + else + error("Keyword \"$kw\" is not part of this model.") + end + end + + end return m end @@ -152,8 +166,8 @@ function fit!(m::AutoEncoder,X) # Parameter alias.. e_layers = m.hpar.e_layers d_layers = m.hpar.d_layers - outdims = m.hpar.outdims - innerdims = m.hpar.innerdims + encoded_size = m.hpar.encoded_size + layers_size = m.hpar.layers_size loss = m.hpar.loss dloss = m.hpar.dloss epochs = m.hpar.epochs @@ -171,13 +185,13 @@ function fit!(m::AutoEncoder,X) if fitted size(m.par.fullnn.par.nnstruct.layers[1])[1][1] == D || @error "The data used to re-fit the model have different dimensionality than the original data. [`reset!`](@ref) the model first." verbosity >= HIGH && @info "Re-fitting of the model on new data" - outdims_actual = m.par.outdims_actual + encoded_size_actual = m.par.encoded_size_actual fullnn = m.par.fullnn n_el = m.par.n_el n_dl = m.par.n_dl else - typeof(outdims) <: Integer ? outdims_actual = outdims : outdims_actual = max(1,Int(round(D * outdims))) - if isnothing(innerdims) + typeof(encoded_size) <: Integer ? encoded_size_actual = encoded_size : encoded_size_actual = max(1,Int(round(D * encoded_size))) + if isnothing(layers_size) if D == 1 innerSize = 3 elseif D < 5 @@ -187,22 +201,22 @@ function fit!(m::AutoEncoder,X) else innerSize = max(1,Int(round(D*1.3*log(2,D)))) end - elseif typeof(innerdims) <: Integer - innerSize = innerdims + elseif typeof(layers_size) <: Integer + innerSize = layers_size else - innerSize = max(1,Int(round(D*innerdims)) ) + innerSize = max(1,Int(round(D*layers_size)) ) end if isnothing(e_layers) l1 = DenseLayer(D,innerSize, f=relu, df=drelu, rng=rng) l2 = DenseLayer(innerSize,innerSize, f=relu, df=drelu, rng=rng) - l3 = DenseLayer(innerSize, outdims_actual, f=identity, df=didentity, rng=rng) + l3 = DenseLayer(innerSize, encoded_size_actual, f=identity, df=didentity, rng=rng) e_layers_actual = [l1,l2,l3] else e_layers_actual = copy(e_layers) end if isnothing(d_layers) - l1d = DenseLayer(outdims_actual,innerSize, f=relu, df=drelu, rng=rng) + l1d = DenseLayer(encoded_size_actual,innerSize, f=relu, df=drelu, rng=rng) l2d = DenseLayer(innerSize,innerSize, f=relu, df=drelu, rng=rng) l3d = DenseLayer(innerSize, D, f=identity, df=didentity, rng=rng) d_layers_actual = [l1d,l2d,l3d] @@ -217,7 +231,7 @@ function fit!(m::AutoEncoder,X) x̂ = fit!(fullnn,X,X) par = AutoEncoder_lp() - par.outdims_actual = outdims_actual + par.encoded_size_actual = encoded_size_actual par.fullnn = fullnn par.n_el = n_el par.n_dl = n_dl diff --git a/test/Utils_tests.jl b/test/Utils_tests.jl index 06137922..92c5a415 100644 --- a/test/Utils_tests.jl +++ b/test/Utils_tests.jl @@ -295,7 +295,7 @@ println("** Testing pca()...") X = [1 8; 4.5 5.5; 9.5 0.5] expectedX = [-4.58465 6.63182;-0.308999 7.09961; 6.75092 6.70262] -m = PCAEncoder(outdims=2) +m = PCAEncoder(encoded_size=2) fit!(m,X) ŷ = predict(m) @test isapprox(ŷ,expectedX,atol=0.00001) || isapprox(ŷ, (.- expectedX),atol=0.00001) @@ -323,9 +323,9 @@ tuning_method = SuccessiveHalvingSearch( res_shares = [0.2, 0.3], multithreads = true ) -m = AutoEncoder(epochs=400,outdims=2,autotune=true, +m = AutoEncoder(epochs=400,encoded_size=2,autotune=true, tunemethod=tuning_method, verbosity=NONE, rng=copy(TESTRNG) ) -#m = AutoEncoder(outdims=2,rng=copy(TESTRNG)) +#m = AutoEncoder(encoded_size=2,rng=copy(TESTRNG)) x2 = fit!(m,x) x2b = predict(m) x2c = predict(m,x) @@ -357,11 +357,13 @@ x = [0.12 0.31 0.29 3.21 0.21; 0.22 0.61 0.58 6.43 0.42; 0.12 0.31 0.29 3.21 0.21; 0.44 1.21 1.18 13.54 0.85]; -m = AutoEncoder(outdims=1,epochs=400,autotune=false) +m = AutoEncoder(encoded_size=2,layers_size=15,epochs=400,autotune=false) x_reduced = fit!(m,x) x̂ = inverse_predict(m,x_reduced) -info(m)["rme"] -hcat(x,x̂) +rme = info(m)["rme"] +@test size(x_reduced) == (10,2) +@test size(x̂ ) == (10,5) +@test rme < 0.1 # ================================== # New test @@ -741,7 +743,7 @@ import MLJBase const MLJ = MLJBase X, y = MLJ.@load_iris -model = BetaML.Bmlj.AutoEncoder(outdims=2,rng=copy(TESTRNG)) +model = BetaML.Bmlj.AutoEncoder(encoded_size=2,rng=copy(TESTRNG)) ae = MLJ.machine(model, X) MLJ.fit!(ae) X_latent = MLJ.transform(ae, X)