Skip to content

Commit

Permalink
Adapted l2loss_by_cv to unsupervised models with inverse_transform
Browse files Browse the repository at this point in the history
The fit is judged on the loss between the original data and the inverse transformed one
  • Loading branch information
sylvaticus committed Dec 29, 2023
1 parent 40a71b8 commit a2417af
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 30 deletions.
52 changes: 34 additions & 18 deletions src/Utils/Measures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,25 +182,41 @@ $(TYPEDEF)
Compute the loss of a given model over a given (x,y) dataset running cross-validation
"""
function l2loss_by_cv(m,data;nsplits=5,rng=Random.GLOBAL_RNG)
x,y = data[1],data[2]
sampler = KFold(nsplits=nsplits,rng=rng)
if (ndims(y) == 1)
ohm = OneHotEncoder(handle_unknown="infrequent",cache=false)
fit!(ohm,y)
end
(μ,σ) = cross_validation([x,y],sampler) do trainData,valData,rng
(xtrain,ytrain) = trainData; (xval,yval) = valData
fit!(m,xtrain,ytrain)
ŷval = predict(m,xval)
if (eltype(ŷval) <: Dict)
yval = predict(ohm,yval)
ŷval = predict(ohm,ŷval)
if length(data) == 2 # supervised model
x,y = data[1],data[2]
sampler = KFold(nsplits=nsplits,rng=rng)
if (ndims(y) == 1)
ohm = OneHotEncoder(handle_unknown="infrequent",cache=false)
fit!(ohm,y)
end
(μ,σ) = cross_validation([x,y],sampler) do trainData,valData,rng
(xtrain,ytrain) = trainData; (xval,yval) = valData
fit!(m,xtrain,ytrain)
ŷval = predict(m,xval)
if (eltype(ŷval) <: Dict)
yval = predict(ohm,yval)
ŷval = predict(ohm,ŷval)
end
ϵ = norm(yval-ŷval)/size(yval,1)
reset!(m)
return ismissing(ϵ) ? Inf : ϵ
end
ϵ = norm(yval-ŷval)/size(yval,1)
reset!(m)
return ismissing(ϵ) ? Inf : ϵ
end
return μ
return μ
elseif length(data) == 1 # unsupervised model with inverse_predict
x= data[1]
sampler = KFold(nsplits=nsplits,rng=rng)
(μ,σ) = cross_validation([x],sampler) do trainData,valData,rng
(xtrain,) = trainData; (xval,) = valData
fit!(m,xtrain)
x̂val = inverse_predict(m,xval)
ϵ = norm(xval .- x̂val)/size(xval,1)
reset!(m)
return ismissing(ϵ) ? Inf : ϵ
end
return μ
else
@error "Function `l2loss_by_cv` accepts only 1-lenght or 2-length data for respectivelly unsupervised and supervised models"
end
end

""" error(y,ŷ) - Categorical error with probabilistic prediction of a single datapoint (Int vs PMF). """
Expand Down
8 changes: 4 additions & 4 deletions src/Utils/Processing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1174,12 +1174,12 @@ Hyperparameter autotuning using the [`GridSearch`](@ref) method.
"""
function tune!(m,method::GridSearch,data)
options(m).verbosity >= STD && println("Starting hp autotuning (could take a while..)")
options(m).verbosity >= STD && println("Starting hyper-parameters autotuning (this could take a while..)")
options(m).verbosity >= HIGH && println(method)
hpranges = method.hpranges
candidates = _hpranges_2_candidates(hpranges)
rng = options(m).rng
multithreads = method.multithreads && Threads.nthreads() > 1
multithreads = method.multithreads && Threads.nthreads() > 1
compLock = ReentrantLock()
best_candidate = Dict()
lowest_loss = Inf
Expand Down Expand Up @@ -1228,7 +1228,7 @@ Hyperparameter autotuning using the [`SuccessiveHalvingSearch`](@ref) method.
"""
function tune!(m,method::SuccessiveHalvingSearch,data)
options(m).verbosity >= STD && println("Starting hp autotuning (could take a while..)")
options(m).verbosity >= STD && println("Starting hyper-parameters autotuning (this could take a while..)")
options(m).verbosity >= HIGH && println(method)
hpranges = method.hpranges
res_shares = method.res_shares
Expand All @@ -1250,7 +1250,7 @@ function tune!(m,method::SuccessiveHalvingSearch,data)
epochdata = (collect([esubs[i][1] for i in 1:length(esubs)])...,)
ncandidates_thisepoch = Int(round(ncandidates/shrinkfactor^(e-1)))
ncandidates_tokeep = Int(round(ncandidates/shrinkfactor^e))
options(m).verbosity >= STD && println("(e $e / $epochs) N data / candidates / candidates to retain : $(n_orig * res_share) \t $ncandidates_thisepoch $ncandidates_tokeep")
options(m).verbosity >= STD && println("(e $e / $epochs) N data / n candidates / n candidates to retain : $(n_orig * res_share) \t $ncandidates_thisepoch $ncandidates_tokeep")
scores = Vector{Tuple{Float64,Dict}}(undef,ncandidates_thisepoch)
masterSeed = rand(rng,100:typemax(Int64))
rngs = generate_parallel_rngs(rng,Threads.nthreads())
Expand Down
16 changes: 11 additions & 5 deletions src/Utils/Utils_extra.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Base.@kwdef mutable struct AutoEncoderHyperParametersSet <: BetaMLHyperParameter
See [`SuccessiveHalvingSearch`](@ref) for the default method.
To implement automatic hyperparameter tuning during the (first) `fit!` call simply set `autotune=true` and eventually change the default `tunemethod` options (including the parameter ranges, the resources to employ and the loss function to adopt).
"""
tunemethod::AutoTuneMethod = SuccessiveHalvingSearch(hpranges = Dict("epochs"=>[100,150,200],"batch_size"=>[8,16,32],"outdims"=>[0.2,0.3,0.5],"innerdims"=>[1.3,2.0,5.0]),multithreads=false)
tunemethod::AutoTuneMethod = SuccessiveHalvingSearch(hpranges = Dict("epochs"=>[100,200,400],"batch_size"=>[8,16],"outdims"=>[0.2,0.3,0.5],"innerdims"=>[1.3,2.0,5.0,10.0,nothing]),multithreads=true)
end

Base.@kwdef mutable struct AutoEncoderLearnableParameters <: BetaMLLearnableParametersSet
Expand Down Expand Up @@ -172,6 +172,8 @@ function fit!(m::AutoEncoder,X)
verbosity >= HIGH && @info "Re-fitting of the model on new data"
outdims_actual = m.par.outdims_actual
fullnn = m.par.fullnn
n_el = m.par.n_el
n_dl = m.par.n_dl
else
typeof(outdims) <: Integer ? outdims_actual = outdims : outdims_actual = max(1,Int(round(D * outdims)))
if isnothing(innerdims)
Expand Down Expand Up @@ -209,15 +211,19 @@ function fit!(m::AutoEncoder,X)
fullnn = NeuralNetworkEstimator(layers=[e_layers_actual...,d_layers_actual...],loss=loss,dloss=dloss,epochs=epochs,batch_size=batch_size,opt_alg=opt_alg,shuffle=shuffle,cache=cache,descr=descr,verbosity=verbosity,rng=rng )
n_el = length(e_layers_actual)
n_dl = length(d_layers_actual)
m.par.n_el = n_el
m.par.n_dl = n_dl
end

= fit!(fullnn,X,X)

m.par.outdims_actual = outdims_actual
m.par.fullnn = fullnn
par = AutoEncoderLearnableParameters()
par.outdims_actual = outdims_actual
par.fullnn = fullnn
par.n_el = n_el
par.n_dl = n_dl
m.par = par

m.fitted=true

rme = cache ? relative_mean_error(X,x̂) : missing

m.info["nepochs_ran"] = info(fullnn)["nepochs_ran"]
Expand Down
14 changes: 11 additions & 3 deletions test/Utils_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,16 @@ println("** Testing AutoEncoder...")
iris = readdlm(joinpath(@__DIR__,"data","iris_shuffled.csv"),',',skipstart=1)
x = convert(Array{Float64,2}, iris[:,1:4])
y = convert(Array{String,1}, iris[:,5])

m = AutoEncoder(outdims=2,rng=copy(TESTRNG))
tuning_method = SuccessiveHalvingSearch(
hpranges = Dict(
"innerdims"=>[2.0,5.0,nothing]
),
res_shares = [0.2, 0.3],
multithreads = true
)
m = AutoEncoder(epochs=400,outdims=2,autotune=true,
tunemethod=tuning_method, verbosity=NONE, rng=copy(TESTRNG) )
#m = AutoEncoder(outdims=2,rng=copy(TESTRNG))
x2 = fit!(m,x)
x2b = predict(m)
x2c = predict(m,x)
Expand Down Expand Up @@ -349,7 +357,7 @@ x = [0.12 0.31 0.29 3.21 0.21;
0.22 0.61 0.58 6.43 0.42;
0.12 0.31 0.29 3.21 0.21;
0.44 1.21 1.18 13.54 0.85];
m = AutoEncoder(outdims=1,epochs=400,autotune=false) #TODO: check why autotune is broken here
m = AutoEncoder(outdims=1,epochs=400,autotune=false)
x_reduced = fit!(m,x)
= inverse_predict(m,x_reduced)
info(m)["rme"]
Expand Down

0 comments on commit a2417af

Please sign in to comment.