From 55353cb797717ff74ce2ab5641e065f0018ea832 Mon Sep 17 00:00:00 2001 From: Antonello Lobianco Date: Sun, 21 Jan 2024 17:03:42 +0100 Subject: [PATCH] Moved all MLJ interface models to Bmlj module (not yet renamed), colved bug in GGImputer and GMMRegressor1 1) All MLJ interface models have been moved to the `Bmlj` module, so they can have the same name of BetaML models, but the actual renaming has not been performed yet 2) Solved a bug in `GMMImputer` and `GMMRegressor1` that it could not have been initinalised with the `mixtures` keyword being a type --- src/BetaML.jl | 10 +- src/Bmlj/Bmlj.jl | 22 +- .../Clustering_mlj.jl} | 13 +- src/{GMM/GMM_MLJ.jl => Bmlj/GMM_mlj.jl} | 43 +- .../Imputation_mlj.jl} | 49 ++- src/Bmlj/Nn_mlj.jl | 370 ++++++++++++++++- src/Bmlj/Perceptron_mlj.jl | 325 +++++++++++++++ src/{Trees/Trees_MLJ.jl => Bmlj/Trees_mlj.jl} | 35 +- src/Clustering/Clustering.jl | 2 - src/GMM/GMM.jl | 3 - src/GMM/GMM_regression.jl | 2 +- src/Imputation/Imputation.jl | 4 +- src/Nn/Nn.jl | 5 - src/Nn/Nn_MLJ.jl | 374 ------------------ src/Trees/Trees.jl | 1 - test/Clustering_tests.jl | 9 +- test/GMM_tests.jl | 14 +- test/Imputation_tests.jl | 28 +- test/Nn_tests.jl | 6 +- test/Trees_tests.jl | 8 +- 20 files changed, 813 insertions(+), 510 deletions(-) rename src/{Clustering/Clustering_MLJ.jl => Bmlj/Clustering_mlj.jl} (92%) rename src/{GMM/GMM_MLJ.jl => Bmlj/GMM_mlj.jl} (92%) rename src/{Imputation/Imputation_MLJ.jl => Bmlj/Imputation_mlj.jl} (94%) create mode 100644 src/Bmlj/Perceptron_mlj.jl rename src/{Trees/Trees_MLJ.jl => Bmlj/Trees_mlj.jl} (89%) delete mode 100644 src/Nn/Nn_MLJ.jl diff --git a/src/BetaML.jl b/src/BetaML.jl index 900c850c..7abc4c54 100644 --- a/src/BetaML.jl +++ b/src/BetaML.jl @@ -56,11 +56,11 @@ import .Bmlj # some MLJ models have the same name as BetaML models, set them in # ------------------------------------------------------------------------------ #MLJ interface... const MLJ_PERCEPTRON_MODELS = (Bmlj.LinearPerceptron, Bmlj.KernelPerceptron, Bmlj.Pegasos) -const MLJ_TREES_MODELS = (DecisionTreeClassifier, DecisionTreeRegressor, RandomForestClassifier, RandomForestRegressor) -const MLJ_CLUSTERING_MODELS = (KMeans, KMedoids, GaussianMixtureClusterer) -const MLJ_IMPUTERS_MODELS = (SimpleImputer, GaussianMixtureImputer, RandomForestImputer,GeneralImputer) # these are the name of the MLJ models, not the BetaML ones... -const MLJ_NN_MODELS = (NeuralNetworkRegressor,MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier) -const MLJ_OTHER_MODELS = (GaussianMixtureRegressor,MultitargetGaussianMixtureRegressor,Bmlj.AutoEncoder) +const MLJ_TREES_MODELS = (Bmlj.DecisionTreeClassifier, Bmlj.DecisionTreeRegressor, Bmlj.RandomForestClassifier, Bmlj.RandomForestRegressor) +const MLJ_CLUSTERING_MODELS = (Bmlj.KMeans, Bmlj.KMedoids, Bmlj.GaussianMixtureClusterer) +const MLJ_IMPUTERS_MODELS = (Bmlj.SimpleImputer, Bmlj.GaussianMixtureImputer, Bmlj.RandomForestImputer,Bmlj.GeneralImputer) # these are the name of the MLJ models, not the BetaML ones... +const MLJ_NN_MODELS = (Bmlj.NeuralNetworkRegressor,Bmlj.MultitargetNeuralNetworkRegressor, Bmlj.NeuralNetworkClassifier) +const MLJ_OTHER_MODELS = (Bmlj.GaussianMixtureRegressor,Bmlj.MultitargetGaussianMixtureRegressor,Bmlj.AutoEncoder) const MLJ_INTERFACED_MODELS = (MLJ_PERCEPTRON_MODELS..., MLJ_TREES_MODELS..., MLJ_CLUSTERING_MODELS..., MLJ_IMPUTERS_MODELS..., MLJ_NN_MODELS..., MLJ_OTHER_MODELS...) diff --git a/src/Bmlj/Bmlj.jl b/src/Bmlj/Bmlj.jl index dc4bfddd..0a4ee011 100644 --- a/src/Bmlj/Bmlj.jl +++ b/src/Bmlj/Bmlj.jl @@ -5,12 +5,15 @@ # MLJ interface for BetaML models In this module we define the interface of several BetaML models. They can be used using the [MLJ framework](https://github.com/alan-turing-institute/MLJ.jl). + +Note that MLJ models (whose name could be the same as the underlying BetaML model) are not exported. You can access them with `BetaML.Bmlj.ModelXYZ`. + """ module Bmlj -mljverbosity_to_betaml_verbosity +using Random, LinearAlgebra, Statistics using CategoricalArrays, DocStringExtensions -using Random + import MLJModelInterface # It seems that having done this in the top module is not enought const MMI = MLJModelInterface # We need to repeat it here @@ -24,12 +27,11 @@ import ..BetaML import ..Utils # can't using it as it exports some same-name models import ..Perceptron import ..Nn: AbstractLayer, ADAM, SGD, NeuralNetworkEstimator, OptimisationAlgorithm, DenseLayer, NN -import ..Utils: AbstractRNG, squared_cost, SuccessiveHalvingSearch, mljverbosity_to_betaml_verbosity +import ..Utils: AbstractRNG, squared_cost, SuccessiveHalvingSearch export mljverbosity_to_betaml_verbosity - """ $(TYPEDSIGNATURES) @@ -51,11 +53,11 @@ function mljverbosity_to_betaml_verbosity(i::Integer) end include("Perceptron_mlj.jl") # Perceptron-like algorithms -#include("Trees_mlj.jl") # Decision Trees and ensembles (Random Forests) -#include("Clustering_mlj.jl") # Clustering (hard) algorithms -#include("GMM_mlj.jl") # GMM-based learners (clustering, fitter, regression) -#include("Imputation_mlj.jl") -#include("Nn_mlj.jl") -include("Utils_mlj.jl") +include("Trees_mlj.jl") # Decision Trees and ensembles (Random Forests) +include("Clustering_mlj.jl") # Clustering (hard) algorithms +include("GMM_mlj.jl") # GMM-based learners (clustering, fitter, regression) +include("Imputation_mlj.jl") # Imputation models +include("Nn_mlj.jl") # Neural network models +include("Utils_mlj.jl") # Various transformers/encorders end \ No newline at end of file diff --git a/src/Clustering/Clustering_MLJ.jl b/src/Bmlj/Clustering_mlj.jl similarity index 92% rename from src/Clustering/Clustering_MLJ.jl rename to src/Bmlj/Clustering_mlj.jl index b6389181..b1dc2ca8 100644 --- a/src/Clustering/Clustering_MLJ.jl +++ b/src/Bmlj/Clustering_mlj.jl @@ -2,9 +2,6 @@ # MLJ interface for hard clustering models -import MLJModelInterface # It seems that having done this in the top module is not enought -const MMI = MLJModelInterface # We need to repeat it here - export KMeans, KMedoids # ------------------------------------------------------------------------------ @@ -165,11 +162,11 @@ function MMI.fit(m::Union{KMeans,KMedoids}, verbosity, X) x = MMI.matrix(X) # convert table to matrix # Using low level API here. We could switch to APIV2... typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) + verbosity = mljverbosity_to_betaml_verbosity(verbosity) if typeof(m) == KMeans - (assignedClasses,representatives) = kmeans(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng,verbosity=verbosity) + (assignedClasses,representatives) = BetaML.Clustering.kmeans(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng,verbosity=verbosity) else - (assignedClasses,representatives) = kmedoids(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng, verbosity=verbosity) + (assignedClasses,representatives) = BetaML.Clustering.kmedoids(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng, verbosity=verbosity) end cache=nothing report=nothing @@ -216,7 +213,7 @@ MMI.metadata_model(KMeans, output_scitype = MMI.Table(MMI.Continuous), # scitype of the output of `transform` target_scitype = AbstractArray{<:MMI.Multiclass}, # scitype of the output of `predict` supports_weights = false, # does the model support sample weights? - load_path = "BetaML.Clustering.KMeans" + load_path = "BetaML.Bmlj.KMeans" ) MMI.metadata_model(KMedoids, @@ -224,5 +221,5 @@ MMI.metadata_model(KMedoids, output_scitype = MMI.Table(MMI.Continuous), # scitype of the output of `transform` target_scitype = AbstractArray{<:MMI.Multiclass}, # scitype of the output of `predict` supports_weights = false, # does the model support sample weights? - load_path = "BetaML.Clustering.KMedoids" + load_path = "BetaML.Bmlj.KMedoids" ) \ No newline at end of file diff --git a/src/GMM/GMM_MLJ.jl b/src/Bmlj/GMM_mlj.jl similarity index 92% rename from src/GMM/GMM_MLJ.jl rename to src/Bmlj/GMM_mlj.jl index d8f29773..90bdccc2 100644 --- a/src/GMM/GMM_MLJ.jl +++ b/src/Bmlj/GMM_mlj.jl @@ -1,9 +1,6 @@ "Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT." -# MLJ interface for clustering models - -import MLJModelInterface # It seems that having done this in the top module is not enought -const MMI = MLJModelInterface # We need to repeat it here +# MLJ interface for GMM based models export GaussianMixtureClusterer, GaussianMixtureRegressor, MultitargetGaussianMixtureRegressor @@ -68,7 +65,7 @@ mutable struct GaussianMixtureClusterer <: MMI.Unsupervised This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes` mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def: `[DiagonalGaussian() for i in 1:n_classes]`]""" - mixtures::Union{Type,Vector{<: AbstractMixture}} + mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}} "Tolerance to stop the algorithm [default: 10^(-6)]" tol::Float64 "Minimum variance for the mixtures [default: 0.05]" @@ -92,7 +89,7 @@ end function GaussianMixtureClusterer(; n_classes = 3, initial_probmixtures = Float64[], - mixtures = [DiagonalGaussian() for i in 1:n_classes], + mixtures = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes], tol = 10^(-6), minimum_variance = 0.05, minimum_covariance = 0.0, @@ -162,7 +159,7 @@ mutable struct GaussianMixtureRegressor <: MMI.Deterministic This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def: `[DiagonalGaussian() for i in 1:n_classes]`]""" - mixtures::Union{Type,Vector{<: AbstractMixture}} + mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}} "Tolerance to stop the algorithm [default: 10^(-6)]" tol::Float64 "Minimum variance for the mixtures [default: 0.05]" @@ -186,7 +183,7 @@ end function GaussianMixtureRegressor(; n_classes = 3, initial_probmixtures = [], - mixtures = [DiagonalGaussian() for i in 1:n_classes], + mixtures = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes], tol = 10^(-6), minimum_variance = 0.05, minimum_covariance = 0.0, @@ -195,7 +192,7 @@ function GaussianMixtureRegressor(; rng = Random.GLOBAL_RNG ) if typeof(mixtures) <: UnionAll - mixtures = [mixtures() for i in 1:n_classes] + mixtures = [BetaML.GMM.mixtures() for i in 1:n_classes] end return GaussianMixtureRegressor(n_classes,initial_probmixtures,mixtures,tol,minimum_variance,minimum_covariance,initialisation_strategy,maximum_iterations,rng) end @@ -258,7 +255,7 @@ mutable struct MultitargetGaussianMixtureRegressor <: MMI.Deterministic This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def: `[DiagonalGaussian() for i in 1:n_classes]`]""" - mixtures::Union{Type,Vector{<: AbstractMixture}} + mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}} "Tolerance to stop the algorithm [default: 10^(-6)]" tol::Float64 "Minimum variance for the mixtures [default: 0.05]" @@ -282,7 +279,7 @@ end function MultitargetGaussianMixtureRegressor(; n_classes = 3, initial_probmixtures = [], - mixtures = [DiagonalGaussian() for i in 1:n_classes], + mixtures = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes], tol = 10^(-6), minimum_variance = 0.05, minimum_covariance = 0.0, @@ -314,9 +311,9 @@ function MMI.fit(m::GaussianMixtureClusterer, verbosity, X) end =# typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) + verbosity = mljverbosity_to_betaml_verbosity(verbosity) mixtures = m.mixtures - res = gmm(x,m.n_classes,initial_probmixtures=deepcopy(m.initial_probmixtures),mixtures=mixtures, minimum_variance=m.minimum_variance, minimum_covariance=m.minimum_covariance,initialisation_strategy=m.initialisation_strategy,verbosity=verbosity,maximum_iterations=m.maximum_iterations,rng=m.rng) + res = BetaML.GMM.gmm(x,m.n_classes,initial_probmixtures=deepcopy(m.initial_probmixtures),mixtures=mixtures, minimum_variance=m.minimum_variance, minimum_covariance=m.minimum_covariance,initialisation_strategy=m.initialisation_strategy,verbosity=verbosity,maximum_iterations=m.maximum_iterations,rng=m.rng) fitResults = (pₖ=res.pₖ,mixtures=res.mixtures) # res.pₙₖ cache = nothing report = (res.ϵ,res.lL,res.BIC,res.AIC) @@ -327,7 +324,7 @@ MMI.fitted_params(model::GaussianMixtureClusterer, fitresult) = (weights=fitresu function MMI.fit(m::GaussianMixtureRegressor, verbosity, X, y) x = MMI.matrix(X) # convert table to matrix typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) + verbosity = mljverbosity_to_betaml_verbosity(verbosity) ndims(y) < 2 || error("Trying to fit `GaussianMixtureRegressor` with a multidimensional target. Use `MultitargetGaussianMixtureRegressor` instead.") #= if typeof(y) <: AbstractMatrix @@ -345,7 +342,7 @@ function MMI.fit(m::GaussianMixtureRegressor, verbosity, X, y) end =# mixtures = m.mixtures - betamod = GMMRegressor2( + betamod = BetaML.GMM.GMMRegressor2( n_classes = m.n_classes, initial_probmixtures = m.initial_probmixtures, mixtures = mixtures, @@ -363,7 +360,7 @@ end function MMI.fit(m::MultitargetGaussianMixtureRegressor, verbosity, X, y) x = MMI.matrix(X) # convert table to matrix typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) + verbosity = mljverbosity_to_betaml_verbosity(verbosity) ndims(y) >= 2 || @warn "Trying to fit `MultitargetGaussianMixtureRegressor` with a single-dimensional target. You may want to consider `GaussianMixtureRegressor` instead." #= if typeof(y) <: AbstractMatrix @@ -381,7 +378,7 @@ function MMI.fit(m::MultitargetGaussianMixtureRegressor, verbosity, X, y) end =# mixtures = m.mixtures - betamod = GMMRegressor2( + betamod = BetaML.GMM.GMMRegressor2( n_classes = m.n_classes, initial_probmixtures = m.initial_probmixtures, mixtures = mixtures, @@ -407,7 +404,7 @@ function MMI.predict(m::GaussianMixtureClusterer, fitResults, X) (pₖ,mixtures) = (fitResults.pₖ, fitResults.mixtures) nCl = length(pₖ) # Compute the probabilities that maximise the likelihood given existing mistures and a single iteration (i.e. doesn't update the mixtures) - thisOut = gmm(x,nCl,initial_probmixtures=pₖ,mixtures=mixtures,tol=m.tol,verbosity=NONE,minimum_variance=m.minimum_variance,minimum_covariance=m.minimum_covariance,initialisation_strategy="given",maximum_iterations=1,rng=m.rng) + thisOut = BetaML.GMM.gmm(x,nCl,initial_probmixtures=pₖ,mixtures=mixtures,tol=m.tol,verbosity=NONE,minimum_variance=m.minimum_variance,minimum_covariance=m.minimum_covariance,initialisation_strategy="given",maximum_iterations=1,rng=m.rng) classes = CategoricalArray(1:nCl) predictions = MMI.UnivariateFinite(classes, thisOut.pₙₖ) return predictions @@ -416,12 +413,12 @@ end function MMI.predict(m::GaussianMixtureRegressor, fitResults, X) x = MMI.matrix(X) # convert table to matrix betamod = fitResults - return dropdims(predict(betamod,x),dims=2) + return dropdims(BetaML.Api.predict(betamod,x),dims=2) end function MMI.predict(m::MultitargetGaussianMixtureRegressor, fitResults, X) x = MMI.matrix(X) # convert table to matrix betamod = fitResults - return predict(betamod,x) + return BetaML.Api.predict(betamod,x) end @@ -434,7 +431,7 @@ MMI.metadata_model(GaussianMixtureClusterer, target_scitype = AbstractArray{<:MMI.Multiclass}, # scitype of the output of `predict` #prediction_type = :probabilistic, # option not added to metadata_model function, need to do it separately supports_weights = false, # does the model support sample weights? - load_path = "BetaML.GMM.GaussianMixtureClusterer" + load_path = "BetaML.Bmlj.GaussianMixtureClusterer" ) MMI.prediction_type(::Type{<:GaussianMixtureClusterer}) = :probabilistic @@ -442,11 +439,11 @@ MMI.metadata_model(GaussianMixtureRegressor, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Infinite}), target_scitype = AbstractVector{<: MMI.Continuous}, # for a supervised model, what target? supports_weights = false, # does the model support sample weights? - load_path = "BetaML.GMM.GaussianMixtureRegressor" + load_path = "BetaML.Bmlj.GaussianMixtureRegressor" ) MMI.metadata_model(MultitargetGaussianMixtureRegressor, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Infinite}), target_scitype = AbstractMatrix{<: MMI.Continuous}, # for a supervised model, what target? supports_weights = false, # does the model support sample weights? - load_path = "BetaML.GMM.MultitargetGaussianMixtureRegressor" + load_path = "BetaML.Bmlj.MultitargetGaussianMixtureRegressor" ) diff --git a/src/Imputation/Imputation_MLJ.jl b/src/Bmlj/Imputation_mlj.jl similarity index 94% rename from src/Imputation/Imputation_MLJ.jl rename to src/Bmlj/Imputation_mlj.jl index f7f1723b..b825d3c6 100644 --- a/src/Imputation/Imputation_MLJ.jl +++ b/src/Bmlj/Imputation_mlj.jl @@ -2,9 +2,6 @@ # MLJ interface for imputers models -import MLJModelInterface # It seems that having done this in the top module is not enought -const MMI = MLJModelInterface # We need to repeat it here - export SimpleImputer,GaussianMixtureImputer, RandomForestImputer, GeneralImputer """ @@ -115,7 +112,7 @@ mutable struct GaussianMixtureImputer <: MMI.Unsupervised This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type. Note that mixing of different mixture types is not currently supported and that currently implemented mixtures are `SphericalGaussian`, `DiagonalGaussian` and `FullGaussian`. [def: `DiagonalGaussian`]""" - mixtures::Union{Type,Vector{<: AbstractMixture}} + mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}} "Tolerance to stop the algorithm [default: 10^(-6)]" tol::Float64 "Minimum variance for the mixtures [default: 0.05]" @@ -137,7 +134,7 @@ end function GaussianMixtureImputer(; n_classes = 3, initial_probmixtures = Float64[], - mixtures = DiagonalGaussian, #[DiagonalGaussian() for i in 1:n_classes], + mixtures = BetaML.GMM.DiagonalGaussian, #[DiagonalGaussian() for i in 1:n_classes], tol = 10^(-6), minimum_variance = 0.05, minimum_covariance = 0.0, @@ -346,8 +343,8 @@ GeneralImputer(; function MMI.fit(m::SimpleImputer, verbosity, X) x = MMI.matrix(X) # convert table to matrix typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) - mod = FeatureBasedImputer( + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + mod = BetaML.Imputation.FeatureBasedImputer( statistic = m.statistic, norm = m.norm, verbosity = verbosity, @@ -356,14 +353,14 @@ function MMI.fit(m::SimpleImputer, verbosity, X) #fitResults = MMI.table(predict(mod)) fitResults = mod cache = nothing - report = info(mod) + report = BetaML.Api.info(mod) return (fitResults, cache, report) end function MMI.fit(m::GaussianMixtureImputer, verbosity, X) x = MMI.matrix(X) # convert table to matrix typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) + verbosity = mljverbosity_to_betaml_verbosity(verbosity) #=if m.mixtures == :diag_gaussian mixtures = [DiagonalGaussian() for i in 1:m.n_classes] elseif m.mixtures == :full_gaussian @@ -375,7 +372,7 @@ function MMI.fit(m::GaussianMixtureImputer, verbosity, X) end =# - mod = GMMImputer( + mod = BetaML.Imputation.GMMImputer( n_classes = m.n_classes, initial_probmixtures = m.initial_probmixtures, mixtures = m.mixtures, @@ -390,7 +387,7 @@ function MMI.fit(m::GaussianMixtureImputer, verbosity, X) #fitResults = MMI.table(predict(mod)) fitResults = mod cache = nothing - report = info(mod) + report = BetaML.Api.info(mod) return (fitResults, cache, report) end @@ -398,8 +395,8 @@ end function MMI.fit(m::RandomForestImputer, verbosity, X) x = MMI.matrix(X) # convert table to matrix typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) - mod = RFImputer( + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + mod = BetaML.Imputation.RFImputer( n_trees = m.n_trees, max_depth = m.max_depth, min_gain = m.min_gain, @@ -412,7 +409,7 @@ function MMI.fit(m::RandomForestImputer, verbosity, X) #multiple_imputations = m.multiple_imputations, rng = m.rng, ) - fit!(mod,x) + BetaML.Api.fit!(mod,x) #if m.multiple_imputations == 1 # fitResults = MMI.table(predict(mod)) #else @@ -420,15 +417,15 @@ function MMI.fit(m::RandomForestImputer, verbosity, X) #end fitResults = mod cache = nothing - report = info(mod) + report = BetaML.Api.info(mod) return (fitResults, cache, report) end function MMI.fit(m::GeneralImputer, verbosity, X) x = MMI.matrix(X) # convert table to matrix typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) - mod = UniversalImputer( + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + mod = BetaML.Imputation.UniversalImputer( cols_to_impute = m.cols_to_impute, estimator = m.estimator, missing_supported = m.missing_supported, @@ -438,7 +435,7 @@ function MMI.fit(m::GeneralImputer, verbosity, X) rng = m.rng, verbosity = verbosity, ) - fit!(mod,x) + BetaML.Api.fit!(mod,x) #if m.multiple_imputations == 1 # fitResults = MMI.table(predict(mod)) #else @@ -446,7 +443,7 @@ function MMI.fit(m::GeneralImputer, verbosity, X) #end fitResults = mod cache = nothing - report = info(mod) + report = BetaML.Api.info(mod) return (fitResults, cache, report) end @@ -457,7 +454,7 @@ end function MMI.transform(m::Union{SimpleImputer,GaussianMixtureImputer,RandomForestImputer}, fitResults, X) x = MMI.matrix(X) # convert table to matrix mod = fitResults - return MMI.table(predict(mod,x)) + return MMI.table(BetaML.Api.predict(mod,x)) end @@ -474,10 +471,10 @@ function MMI.transform(m::GeneralImputer, fitResults, X) if fitResults.hpar.recursive_passages == 1 || all(missing_supported) x = MMI.matrix(X) # convert table to matrix mod = fitResults - return MMI.table(predict(mod,x)) + return MMI.table(BetaML.Api.predict(mod,x)) else mod = fitResults - return MMI.table(predict(mod)) + return MMI.table(BetaML.Api.predict(mod)) end end @@ -488,25 +485,25 @@ MMI.metadata_model(SimpleImputer, input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Missing}), output_scitype = MMI.Table(MMI.Continuous), # for an unsupervised, what output? supports_weights = false, # does the model support sample weights? - load_path = "BetaML.Imputation.SimpleImputer" + load_path = "BetaML.Bmlj.SimpleImputer" ) MMI.metadata_model(GaussianMixtureImputer, input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Missing}), output_scitype = MMI.Table(MMI.Continuous), # for an unsupervised, what output? supports_weights = false, # does the model support sample weights? - load_path = "BetaML.Imputation.GaussianMixtureImputer" + load_path = "BetaML.Bmlj.GaussianMixtureImputer" ) MMI.metadata_model(RandomForestImputer, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Known}), output_scitype = MMI.Table(MMI.Known), # for an unsupervised, what output? supports_weights = false, # does the model support sample weights? - load_path = "BetaML.Imputation.RandomForestImputer" + load_path = "BetaML.Bmlj.RandomForestImputer" ) MMI.metadata_model(GeneralImputer, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Known}), output_scitype = MMI.Table(MMI.Known), # for an unsupervised, what output? supports_weights = false, # does the model support sample weights? - load_path = "BetaML.Imputation.GeneralImputer" + load_path = "BetaML.Bmlj.GeneralImputer" ) diff --git a/src/Bmlj/Nn_mlj.jl b/src/Bmlj/Nn_mlj.jl index 715c073a..9a10d6b6 100644 --- a/src/Bmlj/Nn_mlj.jl +++ b/src/Bmlj/Nn_mlj.jl @@ -1,6 +1,372 @@ "Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT." -# MLJ interface for the models of BetaML +# MLJ interface for Neural Networks models +using CategoricalArrays -export \ No newline at end of file +export NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier + + +# Model Structure declarations.. +""" +$(TYPEDEF) + +A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target. + +# Parameters: +$(FIELDS) + +# Notes: +- data must be numerical +- the label should be be a _n-records_ vector. + +# Example: +```julia +julia> using MLJ + +julia> X, y = @load_boston; + +julia> modelType = @load NeuralNetworkRegressor pkg = "BetaML" verbosity=0 +BetaML.Nn.NeuralNetworkRegressor + +julia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)]; + +julia> model = modelType(layers=layers,opt_alg=BetaML.ADAM()); +NeuralNetworkRegressor( + layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], + loss = BetaML.Utils.squared_cost, + dloss = BetaML.Utils.dsquared_cost, + epochs = 100, + batch_size = 32, + opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), + shuffle = true, + descr = "", + cb = BetaML.Nn.fitting_info, + rng = Random._GLOBAL_RNG()) + +julia> mach = machine(model, X, y); + +julia> fit!(mach); + +julia> ŷ = predict(mach, X); + +julia> hcat(y,ŷ) +506×2 Matrix{Float64}: + 24.0 30.7726 + 21.6 28.0811 + 34.7 31.3194 + ⋮ + 23.9 30.9032 + 22.0 29.49 + 11.9 27.2438 +``` +""" +Base.@kwdef mutable struct NeuralNetworkRegressor <: MMI.Deterministic + "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers" + layers::Union{Array{BetaML.Nn.AbstractLayer,1},Nothing} = nothing + """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D + !!! warning + If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`. + """ + loss::Union{Nothing,Function} = BetaML.Utils.squared_cost + "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff." + dloss::Union{Function,Nothing} = BetaML.Utils.dsquared_cost + "Number of epochs, i.e. passages trough the whole training sample [def: `200`]" + epochs::Int64 = 200 + "Size of each individual batch [def: `16`]" + batch_size::Int64 = 16 + "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers" + opt_alg::OptimisationAlgorithm = BetaML.Nn.ADAM() + "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" + shuffle::Bool = true + "An optional title and/or description for this model" + descr::String = "" + "A call back function to provide information during training [def: `fitting_info`]" + cb::Function=BetaML.Nn.fitting_info + "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`] + " + rng::AbstractRNG = Random.GLOBAL_RNG +end + +""" +$(TYPEDSIGNATURES) + +For the `verbosity` parameter see [`Verbosity`](@ref)) + +""" +function MMI.fit(m::NeuralNetworkRegressor, verbosity, X, y) + x = MMI.matrix(X) # convert table to matrix + typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + ndims(y) > 1 && error("The label should have only 1 dimensions. Use `MultitargetNeuralNetworkRegressor` or `NeuralNetworkClassifier` for multi_dimensional outputs.") + mi = BetaML.Nn.NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity) + fit!(mi,x,y) + fitresults = mi + cache = nothing + report = nothing + return fitresults, cache, report + end + + MMI.predict(m::NeuralNetworkRegressor, fitresult, Xnew) = BetaML.Api.predict(fitresult, MMI.matrix(Xnew)) + + MMI.metadata_model(NeuralNetworkRegressor, + input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Count}), + target_scitype = AbstractVector{<: Union{MMI.Continuous,MMI.Count}}, + supports_weights = false, + load_path = "BetaML.Bmlj.NeuralNetworkRegressor" +) + +# ------------------------------------------------------------------------------ +# Model Structure declarations.. +""" +$(TYPEDEF) + +A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets. + +# Parameters: +$(FIELDS) + +# Notes: +- data must be numerical +- the label should be a _n-records_ by _n-dimensions_ matrix + +# Example: +```julia +julia> using MLJ + +julia> X, y = @load_boston; + +julia> ydouble = hcat(y, y .*2 .+5); + +julia> modelType = @load MultitargetNeuralNetworkRegressor pkg = "BetaML" verbosity=0 +BetaML.Nn.MultitargetNeuralNetworkRegressor + +julia> layers = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)]; + +julia> model = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500) +MultitargetNeuralNetworkRegressor( + layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253 … -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947 … -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564 … 0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], + loss = BetaML.Utils.squared_cost, + dloss = BetaML.Utils.dsquared_cost, + epochs = 500, + batch_size = 32, + opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), + shuffle = true, + descr = "", + cb = BetaML.Nn.fitting_info, + rng = Random._GLOBAL_RNG()) + +julia> mach = machine(model, X, ydouble); + +julia> fit!(mach); + +julia> ŷdouble = predict(mach, X); + +julia> hcat(ydouble,ŷdouble) +506×4 Matrix{Float64}: + 24.0 53.0 28.4624 62.8607 + 21.6 48.2 22.665 49.7401 + 34.7 74.4 31.5602 67.9433 + 33.4 71.8 33.0869 72.4337 + ⋮ + 23.9 52.8 23.3573 50.654 + 22.0 49.0 22.1141 48.5926 + 11.9 28.8 19.9639 45.5823 +``` + +""" +Base.@kwdef mutable struct MultitargetNeuralNetworkRegressor <: MMI.Deterministic + "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers" + layers::Union{Array{BetaML.Nn.AbstractLayer,1},Nothing} = nothing + """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices. + !!! warning + If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`. + """ + loss::Union{Nothing,Function} = BetaML.Utils.squared_cost + "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff." + dloss::Union{Function,Nothing} = BetaML.Utils.dsquared_cost + "Number of epochs, i.e. passages trough the whole training sample [def: `300`]" + epochs::Int64 = 300 + "Size of each individual batch [def: `16`]" + batch_size::Int64 = 16 + "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers" + opt_alg::OptimisationAlgorithm = BetaML.Nn.ADAM() + "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" + shuffle::Bool = true + "An optional title and/or description for this model" + descr::String = "" + "A call back function to provide information during training [def: `BetaML.fitting_info`]" + cb::Function=BetaML.Nn.fitting_info + "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`] + " + rng::AbstractRNG = Random.GLOBAL_RNG +end +""" +$(TYPEDSIGNATURES) + +For the `verbosity` parameter see [`Verbosity`](@ref)) + +""" +function MMI.fit(m::MultitargetNeuralNetworkRegressor, verbosity, X, y) + x = MMI.matrix(X) # convert table to matrix + typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + ndims(y) > 1 || error("The label should have multiple dimensions. Use `NeuralNetworkRegressor` for single-dimensional outputs.") + mi = BetaML.Nn.NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity) + BetaML.Api.fit!(mi,x,y) + fitresults = mi + cache = nothing + report = nothing + return fitresults, cache, report + end + + MMI.predict(m::MultitargetNeuralNetworkRegressor, fitresult, Xnew) = BetaML.Api.predict(fitresult, MMI.matrix(Xnew)) + + MMI.metadata_model(MultitargetNeuralNetworkRegressor, + input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Count}), + target_scitype = AbstractMatrix{<: Union{MMI.Continuous,MMI.Count}}, + supports_weights = false, + load_path = "BetaML.Bmlj.MultitargetNeuralNetworkRegressor" +) + +# ------------------------------------------------------------------------------ + +""" +$(TYPEDEF) + +A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification problems. + +# Parameters: +$(FIELDS) + +# Notes: +- data must be numerical +- the label should be a _n-records_ by _n-dimensions_ matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories. + +# Example: +```julia +julia> using MLJ + +julia> X, y = @load_iris; + +julia> modelType = @load NeuralNetworkClassifier pkg = "BetaML" verbosity=0 +BetaML.Nn.NeuralNetworkClassifier + +julia> layers = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)]; + +julia> model = modelType(layers=layers,opt_alg=BetaML.ADAM()) +NeuralNetworkClassifier( + layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.376173352338049 0.7029289511758696 -0.5589563304592478 -0.21043274001651874; 0.044758889527899415 0.6687689636685921 0.4584331114653877 0.6820506583840453; … ; -0.26546358457167507 -0.28469736227283804 -0.164225549922154 -0.516785639164486; -0.5146043550684141 -0.0699113265130964 0.14959906603941908 -0.053706860039406834], [0.7003943613125758, -0.23990840466587576, -0.23823126271387746, 0.4018101580410387, 0.2274483050356888, -0.564975060667734, 0.1732063297031089, 0.11880299829896945], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.029467850439546583 0.4074661266592745 … 0.36775675246760053 -0.595524555448422; 0.42455597698371306 -0.2458082732997091 … -0.3324220683462514 0.44439454998610595; … ; -0.2890883863364267 -0.10109249362508033 … -0.0602680568207582 0.18177278845097555; -0.03432587226449335 -0.4301192922760063 … 0.5646018168286626 0.47269177680892693], [0.13777442835428688, 0.5473306726675433, 0.3781939472904011, 0.24021813428130567, -0.0714779477402877, -0.020386373530818958, 0.5465466618404464, -0.40339790713616525], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([0.6565120540082393 0.7139211611842745 … 0.07809812467915389 -0.49346311403373844; -0.4544472987041656 0.6502667641568863 … 0.43634608676548214 0.7213049952968921; 0.41212264783075303 -0.21993289366360613 … 0.25365007887755064 -0.5664469566269569], [-0.6911986792747682, -0.2149343209329364, -0.6347727539063817], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, BetaML.Utils.dsoftmax, nothing)], + loss = BetaML.Utils.crossentropy, + dloss = BetaML.Utils.dcrossentropy, + epochs = 100, + batch_size = 32, + opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), + shuffle = true, + descr = "", + cb = BetaML.Nn.fitting_info, + categories = nothing, + handle_unknown = "error", + other_categories_name = nothing, + rng = Random._GLOBAL_RNG()) + +julia> mach = machine(model, X, y); + +julia> fit!(mach); + +julia> classes_est = predict(mach, X) +150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}: + UnivariateFinite{Multiclass{3}}(setosa=>0.575, versicolor=>0.213, virginica=>0.213) + UnivariateFinite{Multiclass{3}}(setosa=>0.573, versicolor=>0.213, virginica=>0.213) + ⋮ + UnivariateFinite{Multiclass{3}}(setosa=>0.236, versicolor=>0.236, virginica=>0.529) + UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492) +``` +""" +Base.@kwdef mutable struct NeuralNetworkClassifier <: MMI.Probabilistic + "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added." + layers::Union{Array{BetaML.Nn.AbstractLayer,1},Nothing} = nothing + """Loss (cost) function [def: `BetaML.crossentropy`]. Should always assume y and ŷ as matrices. + !!! warning + If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`. + """ + loss::Union{Nothing,Function} = BetaML.Utils.crossentropy + "Derivative of the loss function [def: `BetaML.dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff." + dloss::Union{Function,Nothing} = BetaML.Utils.dcrossentropy + "Number of epochs, i.e. passages trough the whole training sample [def: `200`]" + epochs::Int64 = 200 + "Size of each individual batch [def: `16`]" + batch_size::Int64 = 16 + "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers" + opt_alg::OptimisationAlgorithm = BetaML.Nn.ADAM() + "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" + shuffle::Bool = true + "An optional title and/or description for this model" + descr::String = "" + "A call back function to provide information during training [def: `BetaML.fitting_info`]" + cb::Function=BetaML.Nn.fitting_info + "The categories to represent as columns. [def: `nothing`, i.e. unique training values]." + categories::Union{Vector,Nothing} = nothing + "How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories." + handle_unknown::String = "error" + "Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: ` nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings" + other_categories_name = nothing + "Random Number Generator [deafult: `Random.GLOBAL_RNG`]" + rng::AbstractRNG = Random.GLOBAL_RNG +end + +""" +MMI.fit(model::NeuralNetworkClassifier, verbosity, X, y) + +For the `verbosity` parameter see [`Verbosity`](@ref)) + +""" +function MMI.fit(m::NeuralNetworkClassifier, verbosity, X, y) + x = MMI.matrix(X) # convert table to matrix + typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + categories = deepcopy(m.categories) + if categories == nothing + #if occursin("CategoricalVector",string(typeof(y))) # to avoid dependency to CategoricalArrays or MLJBase + if typeof(y) <: CategoricalVector + categories = levels(y) + end + end + + ohmod = BetaML.Utils.OneHotEncoder(categories=categories,handle_unknown=m.handle_unknown,other_categories_name=m.other_categories_name, verbosity=verbosity) + Y_oh = BetaML.Api.fit!(ohmod,y) + + nR,nD = size(x) + (nRy,nDy) = size(Y_oh) + + nR == nRy || error("X and Y have different number of records (rows)") + + if isnothing(m.layers) + layers = nothing + else + layers = deepcopy(m.layers) + push!(layers,BetaML.Nn.VectorFunctionLayer(nDy,f=BetaML.Utils.softmax)) + end + mi = BetaML.Nn.NeuralNetworkEstimator(;layers=layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity) + BetaML.Api.fit!(mi,x,Y_oh) + fitresults = (mi,ohmod) + cache = nothing + report = nothing + return fitresults, cache, report + end + +function MMI.predict(m::NeuralNetworkClassifier, fitresult, Xnew) + nnmod, ohmod = fitresult + yhat = BetaML.Api.predict(nnmod, MMI.matrix(Xnew)) + classes = BetaML.Api.parameters(ohmod).categories_applied + predictions = MMI.UnivariateFinite(classes, yhat,pool=missing) + #return yhat + return predictions +end + + MMI.metadata_model(NeuralNetworkClassifier, + input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Count}), + target_scitype = AbstractVector{<: Union{MMI.Multiclass,MMI.Finite,MMI.Count}}, + supports_weights = false, + load_path = "BetaML.Bmlj.NeuralNetworkClassifier" +) diff --git a/src/Bmlj/Perceptron_mlj.jl b/src/Bmlj/Perceptron_mlj.jl new file mode 100644 index 00000000..36df7b3c --- /dev/null +++ b/src/Bmlj/Perceptron_mlj.jl @@ -0,0 +1,325 @@ +"Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT." + +# MLJ interface for Decision Trees/Random Forests models + +export LinearPerceptron, KernelPerceptron, Pegasos + + +# ------------------------------------------------------------------------------ +# Model Structure declarations.. +""" +$(TYPEDEF) + +The classical perceptron algorithm using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML). + +# Hyperparameters: +$(TYPEDFIELDS) + +# Example: +```julia +julia> using MLJ + +julia> X, y = @load_iris; + +julia> modelType = @load LinearPerceptron pkg = "BetaML" +[ Info: For silent loading, specify `verbosity=0`. +import BetaML ✔ +BetaML.Perceptron.LinearPerceptron + +julia> model = modelType() +LinearPerceptron( + initial_coefficients = nothing, + initial_constant = nothing, + epochs = 1000, + shuffle = true, + force_origin = false, + return_mean_hyperplane = false, + rng = Random._GLOBAL_RNG()) + +julia> mach = machine(model, X, y); + +julia> fit!(mach); +[ Info: Training machine(LinearPerceptron(initial_coefficients = nothing, …), …). +*** Avg. error after epoch 2 : 0.0 (all elements of the set has been correctly classified) +julia> est_classes = predict(mach, X) +150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}: + UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>2.53e-34, virginica=>0.0) + UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>1.27e-18, virginica=>1.86e-310) + ⋮ + UnivariateFinite{Multiclass{3}}(setosa=>2.77e-57, versicolor=>1.1099999999999999e-82, virginica=>1.0) + UnivariateFinite{Multiclass{3}}(setosa=>3.09e-22, versicolor=>4.03e-25, virginica=>1.0) +``` + +""" +mutable struct LinearPerceptron <: MMI.Probabilistic + "N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]" + initial_coefficients::Union{Matrix{Float64},Nothing} + "N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]" + initial_constant::Union{Vector{Float64},Nothing} + "Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]" + epochs::Int64 + "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" + shuffle::Bool + "Whether to force the parameter associated with the constant term to remain zero [def: `false`]" + force_origin::Bool + "Whether to return the average hyperplane coefficients instead of the final ones [def: `false`]" + return_mean_hyperplane::Bool + "A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]" + rng::AbstractRNG +end +LinearPerceptron(; + initial_coefficients=nothing, + initial_constant=nothing, + epochs=1000, + shuffle=true, + force_origin=false, + return_mean_hyperplane=false, + rng = Random.GLOBAL_RNG, + ) = LinearPerceptron(initial_coefficients,initial_constant,epochs,shuffle,force_origin,return_mean_hyperplane,rng) + +""" +$(TYPEDEF) + +The kernel perceptron algorithm using one-vs-one for multiclass, from the Beta Machine Learning Toolkit (BetaML). + +# Hyperparameters: +$(TYPEDFIELDS) + +# Example: +```julia +julia> using MLJ + +julia> X, y = @load_iris; + +julia> modelType = @load KernelPerceptron pkg = "BetaML" +[ Info: For silent loading, specify `verbosity=0`. +import BetaML ✔ +BetaML.Perceptron.KernelPerceptron + +julia> model = modelType() +KernelPerceptron( + kernel = BetaML.Utils.radial_kernel, + epochs = 100, + initial_errors = nothing, + shuffle = true, + rng = Random._GLOBAL_RNG()) + +julia> mach = machine(model, X, y); + +julia> fit!(mach); + +julia> est_classes = predict(mach, X) +150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}: + UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09) + UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09) + ⋮ + UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.245, virginica=>0.665) + UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.665, virginica=>0.245) +``` + +""" +mutable struct KernelPerceptron <: MMI.Probabilistic + "Kernel function to employ. See `?radial_kernel` or `?polynomial_kernel` (once loaded the BetaML package) for details or check `?BetaML.Utils` to verify if other kernels are defined (you can alsways define your own kernel) [def: [`radial_kernel`](@ref)]" + kernel::Function + "Maximum number of epochs, i.e. passages trough the whole training sample [def: `100`]" + epochs::Int64 + "Initial distribution of the number of errors errors [def: `nothing`, i.e. zeros]. If provided, this should be a nModels-lenght vector of nRecords integer values vectors , where nModels is computed as `(n_classes * (n_classes - 1)) / 2`" + initial_errors::Union{Nothing,Vector{Vector{Int64}}} + "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" + shuffle::Bool + "A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]" + rng::AbstractRNG +end +KernelPerceptron(; + kernel=BetaML.Utils.radial_kernel, + epochs=100, + initial_errors = nothing, + shuffle=true, + rng = Random.GLOBAL_RNG, + ) = KernelPerceptron(kernel,epochs,initial_errors,shuffle,rng) +""" +$(TYPEDEF) + +The gradient-based linear "pegasos" classifier using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML). + +# Hyperparameters: +$(TYPEDFIELDS) + +# Example: +```julia +julia> using MLJ + +julia> X, y = @load_iris; + +julia> modelType = @load Pegasos pkg = "BetaML" verbosity=0 +BetaML.Perceptron.Pegasos + +julia> model = modelType() +Pegasos( + initial_coefficients = nothing, + initial_constant = nothing, + learning_rate = BetaML.Perceptron.var"#71#73"(), + learning_rate_multiplicative = 0.5, + epochs = 1000, + shuffle = true, + force_origin = false, + return_mean_hyperplane = false, + rng = Random._GLOBAL_RNG()) + +julia> mach = machine(model, X, y); + +julia> fit!(mach); + +julia> est_classes = predict(mach, X) +150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}: + UnivariateFinite{Multiclass{3}}(setosa=>0.817, versicolor=>0.153, virginica=>0.0301) + UnivariateFinite{Multiclass{3}}(setosa=>0.791, versicolor=>0.177, virginica=>0.0318) + ⋮ + UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.5, virginica=>0.246) + UnivariateFinite{Multiclass{3}}(setosa=>0.283, versicolor=>0.51, virginica=>0.207) +``` +""" +mutable struct Pegasos <: MMI.Probabilistic + "N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]" + initial_coefficients::Union{Matrix{Float64},Nothing} + "N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]" + initial_constant::Union{Vector{Float64},Nothing} + "Learning rate [def: (epoch -> 1/sqrt(epoch))]" + learning_rate::Function + "Multiplicative term of the learning rate [def: `0.5`]" + learning_rate_multiplicative::Float64 + "Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]" + epochs::Int64 + "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" + shuffle::Bool + "Whether to force the parameter associated with the constant term to remain zero [def: `false`]" + force_origin::Bool + "Whether to return the average hyperplane coefficients instead of the final ones [def: `false`]" + return_mean_hyperplane::Bool + "A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]" + rng::AbstractRNG +end +Pegasos(; + initial_coefficients=nothing, + initial_constant=nothing, + learning_rate = (t -> 1/sqrt(t)), + learning_rate_multiplicative = 0.5, + epochs=1000, + shuffle=true, + force_origin=false, + return_mean_hyperplane=false, + rng = Random.GLOBAL_RNG, + ) = Pegasos(initial_coefficients,initial_constant,learning_rate,learning_rate_multiplicative,epochs,shuffle,force_origin,return_mean_hyperplane,rng) + +# ------------------------------------------------------------------------------ +# Fit functions... + +function MMI.fit(model::LinearPerceptron, verbosity, X, y) + x = MMI.matrix(X) # convert table to matrix + allClasses = levels(y) + typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + #initial_coefficients = length(model.initial_coefficients) == 0 ? zeros(size(x,2)) : model.initial_coefficients + fitresult = BetaML.Perceptron.perceptron(x, y; θ=model.initial_coefficients, θ₀=model.initial_constant, T=model.epochs, nMsgs=0, shuffle=model.shuffle, force_origin=model.force_origin, return_mean_hyperplane=model.return_mean_hyperplane,rng=model.rng, verbosity=verbosity) + cache=nothing + report=nothing + return (fitresult,allClasses), cache, report +end + +function MMI.fit(model::KernelPerceptron, verbosity, X, y) + x = MMI.matrix(X) # convert table to matrix + allClasses = levels(y) + typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + #initial_errors = length(model.initial_errors) == 0 ? zeros(Int64,length(y)) : model.initial_errors + fitresult = BetaML.Perceptron.kernelPerceptron(x, y; K=model.kernel, T=model.epochs, α=model.initial_errors, nMsgs=0, shuffle=model.shuffle,rng=model.rng, verbosity=verbosity) + cache = nothing + report = nothing + return (fitresult,allClasses), cache, report +end + +function MMI.fit(model::Pegasos, verbosity, X, y) + x = MMI.matrix(X) # convert table to matrix + allClasses = levels(y) + typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") + verbosity = mljverbosity_to_betaml_verbosity(verbosity) + #initial_coefficients = length(model.initial_coefficients) == 0 ? zeros(size(x,2)) : model.initial_coefficients + fitresult = BetaML.Perceptron.pegasos(x, y; θ=model.initial_coefficients,θ₀=model.initial_constant, λ=model.learning_rate_multiplicative,η=model.learning_rate, T=model.epochs, nMsgs=0, shuffle=model.shuffle, force_origin=model.force_origin, return_mean_hyperplane=model.return_mean_hyperplane,rng=model.rng, verbosity=verbosity) + cache=nothing + report=nothing + return (fitresult,allClasses), cache, report +end + +# ------------------------------------------------------------------------------ +# Predict functions.... +function MMI.predict(model::Union{LinearPerceptron,Pegasos}, fitresult, Xnew) + fittedModel = fitresult[1] + #classes = CategoricalVector(fittedModel.classes) + classes = fittedModel.classes + allClasses = fitresult[2] # as classes do not includes classes unsees at training time + nLevels = length(allClasses) + nRecords = MMI.nrows(Xnew) + modelPredictions = BetaML.Perceptron.predict(MMI.matrix(Xnew), fittedModel.θ, fittedModel.θ₀, fittedModel.classes) + predMatrix = zeros(Float64,(nRecords,nLevels)) + # Transform the predictions from a vector of dictionaries to a matrix + # where the rows are the PMF of each record + for n in 1:nRecords + for (c,cl) in enumerate(allClasses) + predMatrix[n,c] = get(modelPredictions[n],cl,0.0) + end + end + #predictions = [MMI.UnivariateFinite(classes, predMatrix[i,:]) + # for i in 1:nRecords] + predictions = MMI.UnivariateFinite(allClasses,predMatrix,pool=missing) + return predictions +end + +function MMI.predict(model::KernelPerceptron, fitresult, Xnew) + fittedModel = fitresult[1] + #classes = CategoricalVector(fittedModel.classes) + classes = fittedModel.classes + allClasses = fitresult[2] # as classes do not includes classes unsees at training time + nLevels = length(allClasses) + nRecords = MMI.nrows(Xnew) + #ŷtrain = Perceptron.predict([10 10; 2.2 2.5],model.x,model.y,model.α, model.classes,K=model.K) + modelPredictions = BetaML.Perceptron.predict(MMI.matrix(Xnew), fittedModel.x, fittedModel.y, fittedModel.α, fittedModel.classes, K=fittedModel.K) + predMatrix = zeros(Float64,(nRecords,nLevels)) + # Transform the predictions from a vector of dictionaries to a matrix + # where the rows are the PMF of each record + for n in 1:nRecords + for (c,cl) in enumerate(allClasses) + predMatrix[n,c] = get(modelPredictions[n],cl,0.0) + end + end + #predictions = [MMI.UnivariateFinite(classes, predMatrix[i,:]) + # for i in 1:nRecords] + #predictions = MMI.UnivariateFinite(classes, predMatrix) + predictions = MMI.UnivariateFinite(allClasses,predMatrix,pool=missing) + #predictions4 = MMI.UnivariateFinite(modelPredictions,pool=classes,ordered=false) + #predictions = MMI.UnivariateFinite(modelPredictions,pool=fittedModel.classes) + return predictions +end + +# ------------------------------------------------------------------------------ +# Model metadata for registration in MLJ... + +MMI.metadata_model(LinearPerceptron, + input_scitype = MMI.Table(MMI.Infinite), + target_scitype = AbstractVector{<: MMI.Finite}, + supports_weights = false, + load_path = "BetaML.Bmlj.LinearPerceptron" +) + +MMI.metadata_model(KernelPerceptron, + input_scitype = MMI.Table(MMI.Infinite), + target_scitype = AbstractVector{<: MMI.Finite}, + supports_weights = false, + load_path = "BetaML.Bmlj.KernelPerceptron" +) + +MMI.metadata_model(Pegasos, + input_scitype = MMI.Table(MMI.Infinite), + target_scitype = AbstractVector{<: MMI.Finite}, + supports_weights = false, + load_path = "BetaML.Bmlj.Pegasos" +) diff --git a/src/Trees/Trees_MLJ.jl b/src/Bmlj/Trees_mlj.jl similarity index 89% rename from src/Trees/Trees_MLJ.jl rename to src/Bmlj/Trees_mlj.jl index cd970ac0..9330979f 100644 --- a/src/Trees/Trees_MLJ.jl +++ b/src/Bmlj/Trees_mlj.jl @@ -2,9 +2,6 @@ # MLJ interface for Decision Trees/Random Forests models -import MLJModelInterface # It seems that having done this in the top module is not enought -const MMI = MLJModelInterface # We need to repoeat it here - export DecisionTreeRegressor, RandomForestRegressor, DecisionTreeClassifier, RandomForestClassifier @@ -76,7 +73,7 @@ DecisionTreeRegressor(; min_gain=0.0, min_records=2, max_features=0, - splitting_criterion=variance, + splitting_criterion=BetaML.Utils.variance, rng = Random.GLOBAL_RNG, ) = DecisionTreeRegressor(max_depth,min_gain,min_records,max_features,splitting_criterion,rng) @@ -141,7 +138,7 @@ DecisionTreeClassifier(; min_gain=0.0, min_records=2, max_features=0, - splitting_criterion=gini, + splitting_criterion=BetaML.Utils.gini, rng = Random.GLOBAL_RNG, ) = DecisionTreeClassifier(max_depth,min_gain,min_records,max_features,splitting_criterion,rng) @@ -216,7 +213,7 @@ RandomForestRegressor(; min_gain=0.0, min_records=2, max_features=0, - splitting_criterion=variance, + splitting_criterion=BetaML.Utils.variance, β=0.0, rng = Random.GLOBAL_RNG, ) = RandomForestRegressor(n_trees,max_depth,min_gain,min_records,max_features,splitting_criterion,β,rng) @@ -286,7 +283,7 @@ RandomForestClassifier(; min_gain=0.0, min_records=2, max_features=0, - splitting_criterion=gini, + splitting_criterion=BetaML.Utils.gini, β=0.0, rng = Random.GLOBAL_RNG, ) = RandomForestClassifier(n_trees,max_depth,min_gain,min_records,max_features,splitting_criterion,β,rng) @@ -314,15 +311,15 @@ MMI.hyperparameter_ranges(::Type{<:DecisionTreeRegressor}) = ( function MMI.fit(model::Union{DecisionTreeRegressor,RandomForestRegressor}, verbosity, X, y) x = MMI.matrix(X) # convert table to matrix typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) + verbosity = mljverbosity_to_betaml_verbosity(verbosity) max_depth = model.max_depth == 0 ? size(x,1) : model.max_depth # Using low level API here. We could switch to APIV2... if (typeof(model) == DecisionTreeRegressor) max_features = model.max_features == 0 ? size(x,2) : model.max_features - fitresult = buildTree(x, y, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion,rng=model.rng, verbosity=verbosity) + fitresult = BetaML.Trees.buildTree(x, y, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion,rng=model.rng, verbosity=verbosity) else max_features = model.max_features == 0 ? Int(round(sqrt(size(x,2)))) : model.max_features - fitresult = buildForest(x, y, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, β=model.β,rng=model.rng,verbosity=verbosity) + fitresult = BetaML.Trees.buildForest(x, y, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, β=model.β,rng=model.rng,verbosity=verbosity) end cache=nothing report=nothing @@ -335,15 +332,15 @@ function MMI.fit(model::Union{DecisionTreeClassifier,RandomForestClassifier}, ve #y_plain = MMI.int(y) .- 1 # integer relabeling should start at 0 yarray = convert(Vector{eltype(levels(y))},y) # convert to a simple Array{T} typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) + verbosity = mljverbosity_to_betaml_verbosity(verbosity) max_depth = model.max_depth == 0 ? size(x,1) : model.max_depth # Using low level API here. We could switch to APIV2... if (typeof(model) == DecisionTreeClassifier) max_features = model.max_features == 0 ? size(x,2) : model.max_features - fittedmodel = buildTree(x, yarray, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true,rng=model.rng, verbosity=verbosity) + fittedmodel = BetaML.Trees.buildTree(x, yarray, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true,rng=model.rng, verbosity=verbosity) else max_features = model.max_features == 0 ? Int(round(sqrt(size(x,2)))) : model.max_features - fittedmodel = buildForest(x, yarray, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true, β=model.β,rng=model.rng, verbosity=verbosity) + fittedmodel = BetaML.Trees.buildForest(x, yarray, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true, β=model.β,rng=model.rng, verbosity=verbosity) end cache = nothing report = nothing @@ -355,7 +352,7 @@ end # ------------------------------------------------------------------------------ # Predict functions.... -MMI.predict(model::Union{DecisionTreeRegressor,RandomForestRegressor}, fitresult, Xnew) = Trees.predict(fitresult, MMI.matrix(Xnew)) +MMI.predict(model::Union{DecisionTreeRegressor,RandomForestRegressor}, fitresult, Xnew) = BetaML.Trees.predict(fitresult, MMI.matrix(Xnew)) function MMI.predict(model::Union{DecisionTreeClassifier,RandomForestClassifier}, fitresult, Xnew) fittedModel = fitresult[1] @@ -364,7 +361,7 @@ function MMI.predict(model::Union{DecisionTreeClassifier,RandomForestClassifier} classes = MMI.classes(a_target_element) nLevels = length(classes) nRecords = MMI.nrows(Xnew) - treePredictions = Trees.predict(fittedModel, MMI.matrix(Xnew),rng=model.rng) + treePredictions = BetaML.Trees.predict(fittedModel, MMI.matrix(Xnew),rng=model.rng) predMatrix = zeros(Float64,(nRecords,nLevels)) # Transform the predictions from a vector of dictionaries to a matrix # where the rows are the PMF of each record @@ -384,23 +381,23 @@ MMI.metadata_model(DecisionTreeRegressor, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Known}), target_scitype = AbstractVector{<: MMI.Continuous}, # for a supervised model, what target? supports_weights = false, # does the model support sample weights? - load_path = "BetaML.Trees.DecisionTreeRegressor" + load_path = "BetaML.Bmlj.DecisionTreeRegressor" ) MMI.metadata_model(RandomForestRegressor, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Known}), target_scitype = AbstractVector{<: MMI.Continuous}, supports_weights = false, - load_path = "BetaML.Trees.RandomForestRegressor" + load_path = "BetaML.Bmlj.RandomForestRegressor" ) MMI.metadata_model(DecisionTreeClassifier, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Known}), target_scitype = AbstractVector{<: Union{MMI.Missing,MMI.Finite}}, supports_weights = false, - load_path = "BetaML.Trees.DecisionTreeClassifier" + load_path = "BetaML.Bmlj.DecisionTreeClassifier" ) MMI.metadata_model(RandomForestClassifier, input_scitype = MMI.Table(Union{MMI.Missing, MMI.Known}), target_scitype = AbstractVector{<: Union{MMI.Missing,MMI.Finite}}, supports_weights = false, - load_path = "BetaML.Trees.RandomForestClassifier" + load_path = "BetaML.Bmlj.RandomForestClassifier" ) diff --git a/src/Clustering/Clustering.jl b/src/Clustering/Clustering.jl index e0f5ae17..cd8dba72 100644 --- a/src/Clustering/Clustering.jl +++ b/src/Clustering/Clustering.jl @@ -32,8 +32,6 @@ import Base.show export KMeansHyperParametersSet, KMedoidsHyperParametersSet, KMeansClusterer, KMedoidsClusterer include("Clustering_hard.jl") # K-means and k-medoids -# MLJ interface -include("Clustering_MLJ.jl") end diff --git a/src/GMM/GMM.jl b/src/GMM/GMM.jl index e6a0ed2f..b1f853a3 100644 --- a/src/GMM/GMM.jl +++ b/src/GMM/GMM.jl @@ -51,8 +51,5 @@ include("GMM_clustering.jl") include("Mixtures.jl") include("GMM_regression.jl") -# MLJ interface -include("GMM_MLJ.jl") - end diff --git a/src/GMM/GMM_regression.jl b/src/GMM/GMM_regression.jl index bd534384..dc143d7a 100644 --- a/src/GMM/GMM_regression.jl +++ b/src/GMM/GMM_regression.jl @@ -6,7 +6,7 @@ import BetaML.Utils.allowmissing # GMMRegressor1 Base.@kwdef mutable struct GMMRegressor1LearnableParameters <: BetaMLLearnableParametersSet - mixtures::Vector{AbstractMixture} = [] + mixtures::Union{Type,Vector{<: AbstractMixture}} = DiagonalGaussian[] # The type is only temporary, it should always be replaced by an actual mixture initial_probmixtures::Vector{Float64} = [] #probRecords::Union{Nothing,Matrix{Float64}} = nothing meanYByMixture::Union{Nothing,Matrix{Float64}} = nothing diff --git a/src/Imputation/Imputation.jl b/src/Imputation/Imputation.jl index b8245edd..4d31ea6f 100644 --- a/src/Imputation/Imputation.jl +++ b/src/Imputation/Imputation.jl @@ -263,7 +263,7 @@ end # ------------------------------------------------------------------------------ # GMMImputer Base.@kwdef mutable struct GMMImputerLearnableParameters <: BetaMLLearnableParametersSet - mixtures::Vector{AbstractMixture} = [] + mixtures::Union{Type,Vector{<: AbstractMixture}} = DiagonalGaussian[] # The type is only temporary, it should always be replaced by an actual mixture initial_probmixtures::Vector{Float64} = [] probRecords::Union{Nothing,Matrix{Float64}} = nothing #imputedValues = nothing @@ -1211,7 +1211,5 @@ function show(io::IO, m::UniversalImputer) end end -# MLJ interface -include("Imputation_MLJ.jl") end # end Imputation module \ No newline at end of file diff --git a/src/Nn/Nn.jl b/src/Nn/Nn.jl index efcd14b5..4fec6cdb 100644 --- a/src/Nn/Nn.jl +++ b/src/Nn/Nn.jl @@ -1161,9 +1161,4 @@ function show(io::IO, m::NeuralNetworkEstimator) end end end - - -# MLJ interface -include("Nn_MLJ.jl") - end # end module diff --git a/src/Nn/Nn_MLJ.jl b/src/Nn/Nn_MLJ.jl deleted file mode 100644 index c23b7404..00000000 --- a/src/Nn/Nn_MLJ.jl +++ /dev/null @@ -1,374 +0,0 @@ -"Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT." - -# MLJ interface for Neural Networks models - -import MLJModelInterface # It seems that having done this in the top module is not enought -const MMI = MLJModelInterface # We need to repeat it here -using CategoricalArrays - -export NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier - - -# Model Structure declarations.. -""" -$(TYPEDEF) - -A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target. - -# Parameters: -$(FIELDS) - -# Notes: -- data must be numerical -- the label should be be a _n-records_ vector. - -# Example: -```julia -julia> using MLJ - -julia> X, y = @load_boston; - -julia> modelType = @load NeuralNetworkRegressor pkg = "BetaML" verbosity=0 -BetaML.Nn.NeuralNetworkRegressor - -julia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)]; - -julia> model = modelType(layers=layers,opt_alg=BetaML.ADAM()); -NeuralNetworkRegressor( - layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], - loss = BetaML.Utils.squared_cost, - dloss = BetaML.Utils.dsquared_cost, - epochs = 100, - batch_size = 32, - opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), - shuffle = true, - descr = "", - cb = BetaML.Nn.fitting_info, - rng = Random._GLOBAL_RNG()) - -julia> mach = machine(model, X, y); - -julia> fit!(mach); - -julia> ŷ = predict(mach, X); - -julia> hcat(y,ŷ) -506×2 Matrix{Float64}: - 24.0 30.7726 - 21.6 28.0811 - 34.7 31.3194 - ⋮ - 23.9 30.9032 - 22.0 29.49 - 11.9 27.2438 -``` -""" -Base.@kwdef mutable struct NeuralNetworkRegressor <: MMI.Deterministic - "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers" - layers::Union{Array{AbstractLayer,1},Nothing} = nothing - """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D - !!! warning - If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`. - """ - loss::Union{Nothing,Function} = squared_cost - "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff." - dloss::Union{Function,Nothing} = dsquared_cost - "Number of epochs, i.e. passages trough the whole training sample [def: `200`]" - epochs::Int64 = 200 - "Size of each individual batch [def: `16`]" - batch_size::Int64 = 16 - "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers" - opt_alg::OptimisationAlgorithm = ADAM() - "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" - shuffle::Bool = true - "An optional title and/or description for this model" - descr::String = "" - "A call back function to provide information during training [def: `fitting_info`]" - cb::Function=fitting_info - "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`] - " - rng::AbstractRNG = Random.GLOBAL_RNG -end - -""" -$(TYPEDSIGNATURES) - -For the `verbosity` parameter see [`Verbosity`](@ref)) - -""" -function MMI.fit(m::NeuralNetworkRegressor, verbosity, X, y) - x = MMI.matrix(X) # convert table to matrix - typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) - ndims(y) > 1 && error("The label should have only 1 dimensions. Use `MultitargetNeuralNetworkRegressor` or `NeuralNetworkClassifier` for multi_dimensional outputs.") - mi = NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity) - fit!(mi,x,y) - fitresults = mi - cache = nothing - report = nothing - return fitresults, cache, report - end - - MMI.predict(m::NeuralNetworkRegressor, fitresult, Xnew) = predict(fitresult, MMI.matrix(Xnew)) - - MMI.metadata_model(NeuralNetworkRegressor, - input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Count}), - target_scitype = AbstractVector{<: Union{MMI.Continuous,MMI.Count}}, - supports_weights = false, - load_path = "BetaML.Nn.NeuralNetworkRegressor" -) - -# ------------------------------------------------------------------------------ -# Model Structure declarations.. -""" -$(TYPEDEF) - -A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets. - -# Parameters: -$(FIELDS) - -# Notes: -- data must be numerical -- the label should be a _n-records_ by _n-dimensions_ matrix - -# Example: -```julia -julia> using MLJ - -julia> X, y = @load_boston; - -julia> ydouble = hcat(y, y .*2 .+5); - -julia> modelType = @load MultitargetNeuralNetworkRegressor pkg = "BetaML" verbosity=0 -BetaML.Nn.MultitargetNeuralNetworkRegressor - -julia> layers = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)]; - -julia> model = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500) -MultitargetNeuralNetworkRegressor( - layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253 … -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947 … -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564 … 0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], - loss = BetaML.Utils.squared_cost, - dloss = BetaML.Utils.dsquared_cost, - epochs = 500, - batch_size = 32, - opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), - shuffle = true, - descr = "", - cb = BetaML.Nn.fitting_info, - rng = Random._GLOBAL_RNG()) - -julia> mach = machine(model, X, ydouble); - -julia> fit!(mach); - -julia> ŷdouble = predict(mach, X); - -julia> hcat(ydouble,ŷdouble) -506×4 Matrix{Float64}: - 24.0 53.0 28.4624 62.8607 - 21.6 48.2 22.665 49.7401 - 34.7 74.4 31.5602 67.9433 - 33.4 71.8 33.0869 72.4337 - ⋮ - 23.9 52.8 23.3573 50.654 - 22.0 49.0 22.1141 48.5926 - 11.9 28.8 19.9639 45.5823 -``` - -""" -Base.@kwdef mutable struct MultitargetNeuralNetworkRegressor <: MMI.Deterministic - "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers" - layers::Union{Array{AbstractLayer,1},Nothing} = nothing - """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices. - !!! warning - If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`. - """ - loss::Union{Nothing,Function} = squared_cost - "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff." - dloss::Union{Function,Nothing} = dsquared_cost - "Number of epochs, i.e. passages trough the whole training sample [def: `300`]" - epochs::Int64 = 300 - "Size of each individual batch [def: `16`]" - batch_size::Int64 = 16 - "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers" - opt_alg::OptimisationAlgorithm = ADAM() - "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" - shuffle::Bool = true - "An optional title and/or description for this model" - descr::String = "" - "A call back function to provide information during training [def: `BetaML.fitting_info`]" - cb::Function=fitting_info - "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`] - " - rng::AbstractRNG = Random.GLOBAL_RNG -end -""" -$(TYPEDSIGNATURES) - -For the `verbosity` parameter see [`Verbosity`](@ref)) - -""" -function MMI.fit(m::MultitargetNeuralNetworkRegressor, verbosity, X, y) - x = MMI.matrix(X) # convert table to matrix - typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) - ndims(y) > 1 || error("The label should have multiple dimensions. Use `NeuralNetworkRegressor` for single-dimensional outputs.") - mi = NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity) - fit!(mi,x,y) - fitresults = mi - cache = nothing - report = nothing - return fitresults, cache, report - end - - MMI.predict(m::MultitargetNeuralNetworkRegressor, fitresult, Xnew) = predict(fitresult, MMI.matrix(Xnew)) - - MMI.metadata_model(MultitargetNeuralNetworkRegressor, - input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Count}), - target_scitype = AbstractMatrix{<: Union{MMI.Continuous,MMI.Count}}, - supports_weights = false, - load_path = "BetaML.Nn.MultitargetNeuralNetworkRegressor" -) - -# ------------------------------------------------------------------------------ - -""" -$(TYPEDEF) - -A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification problems. - -# Parameters: -$(FIELDS) - -# Notes: -- data must be numerical -- the label should be a _n-records_ by _n-dimensions_ matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories. - -# Example: -```julia -julia> using MLJ - -julia> X, y = @load_iris; - -julia> modelType = @load NeuralNetworkClassifier pkg = "BetaML" verbosity=0 -BetaML.Nn.NeuralNetworkClassifier - -julia> layers = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)]; - -julia> model = modelType(layers=layers,opt_alg=BetaML.ADAM()) -NeuralNetworkClassifier( - layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.376173352338049 0.7029289511758696 -0.5589563304592478 -0.21043274001651874; 0.044758889527899415 0.6687689636685921 0.4584331114653877 0.6820506583840453; … ; -0.26546358457167507 -0.28469736227283804 -0.164225549922154 -0.516785639164486; -0.5146043550684141 -0.0699113265130964 0.14959906603941908 -0.053706860039406834], [0.7003943613125758, -0.23990840466587576, -0.23823126271387746, 0.4018101580410387, 0.2274483050356888, -0.564975060667734, 0.1732063297031089, 0.11880299829896945], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.029467850439546583 0.4074661266592745 … 0.36775675246760053 -0.595524555448422; 0.42455597698371306 -0.2458082732997091 … -0.3324220683462514 0.44439454998610595; … ; -0.2890883863364267 -0.10109249362508033 … -0.0602680568207582 0.18177278845097555; -0.03432587226449335 -0.4301192922760063 … 0.5646018168286626 0.47269177680892693], [0.13777442835428688, 0.5473306726675433, 0.3781939472904011, 0.24021813428130567, -0.0714779477402877, -0.020386373530818958, 0.5465466618404464, -0.40339790713616525], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([0.6565120540082393 0.7139211611842745 … 0.07809812467915389 -0.49346311403373844; -0.4544472987041656 0.6502667641568863 … 0.43634608676548214 0.7213049952968921; 0.41212264783075303 -0.21993289366360613 … 0.25365007887755064 -0.5664469566269569], [-0.6911986792747682, -0.2149343209329364, -0.6347727539063817], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, BetaML.Utils.dsoftmax, nothing)], - loss = BetaML.Utils.crossentropy, - dloss = BetaML.Utils.dcrossentropy, - epochs = 100, - batch_size = 32, - opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), - shuffle = true, - descr = "", - cb = BetaML.Nn.fitting_info, - categories = nothing, - handle_unknown = "error", - other_categories_name = nothing, - rng = Random._GLOBAL_RNG()) - -julia> mach = machine(model, X, y); - -julia> fit!(mach); - -julia> classes_est = predict(mach, X) -150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}: - UnivariateFinite{Multiclass{3}}(setosa=>0.575, versicolor=>0.213, virginica=>0.213) - UnivariateFinite{Multiclass{3}}(setosa=>0.573, versicolor=>0.213, virginica=>0.213) - ⋮ - UnivariateFinite{Multiclass{3}}(setosa=>0.236, versicolor=>0.236, virginica=>0.529) - UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492) -``` -""" -Base.@kwdef mutable struct NeuralNetworkClassifier <: MMI.Probabilistic - "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added." - layers::Union{Array{AbstractLayer,1},Nothing} = nothing - """Loss (cost) function [def: `BetaML.crossentropy`]. Should always assume y and ŷ as matrices. - !!! warning - If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`. - """ - loss::Union{Nothing,Function} = crossentropy - "Derivative of the loss function [def: `BetaML.dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff." - dloss::Union{Function,Nothing} = dcrossentropy - "Number of epochs, i.e. passages trough the whole training sample [def: `200`]" - epochs::Int64 = 200 - "Size of each individual batch [def: `16`]" - batch_size::Int64 = 16 - "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers" - opt_alg::OptimisationAlgorithm = ADAM() - "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]" - shuffle::Bool = true - "An optional title and/or description for this model" - descr::String = "" - "A call back function to provide information during training [def: `BetaML.fitting_info`]" - cb::Function=fitting_info - "The categories to represent as columns. [def: `nothing`, i.e. unique training values]." - categories::Union{Vector,Nothing} = nothing - "How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories." - handle_unknown::String = "error" - "Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: ` nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings" - other_categories_name = nothing - "Random Number Generator [deafult: `Random.GLOBAL_RNG`]" - rng::AbstractRNG = Random.GLOBAL_RNG -end - -""" -MMI.fit(model::NeuralNetworkClassifier, verbosity, X, y) - -For the `verbosity` parameter see [`Verbosity`](@ref)) - -""" -function MMI.fit(m::NeuralNetworkClassifier, verbosity, X, y) - x = MMI.matrix(X) # convert table to matrix - typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.") - verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity) - categories = deepcopy(m.categories) - if categories == nothing - #if occursin("CategoricalVector",string(typeof(y))) # to avoid dependency to CategoricalArrays or MLJBase - if typeof(y) <: CategoricalVector - categories = levels(y) - end - end - - ohmod = OneHotEncoder(categories=categories,handle_unknown=m.handle_unknown,other_categories_name=m.other_categories_name, verbosity=verbosity) - Y_oh = fit!(ohmod,y) - - nR,nD = size(x) - (nRy,nDy) = size(Y_oh) - - nR == nRy || error("X and Y have different number of records (rows)") - - if isnothing(m.layers) - layers = nothing - else - layers = deepcopy(m.layers) - push!(layers,VectorFunctionLayer(nDy,f=softmax)) - end - mi = NeuralNetworkEstimator(;layers=layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity) - fit!(mi,x,Y_oh) - fitresults = (mi,ohmod) - cache = nothing - report = nothing - return fitresults, cache, report - end - -function MMI.predict(m::NeuralNetworkClassifier, fitresult, Xnew) - nnmod, ohmod = fitresult - yhat = predict(nnmod, MMI.matrix(Xnew)) - classes = parameters(ohmod).categories_applied - predictions = MMI.UnivariateFinite(classes, yhat,pool=missing) - #return yhat - return predictions -end - - MMI.metadata_model(NeuralNetworkClassifier, - input_scitype = MMI.Table(Union{MMI.Continuous,MMI.Count}), - target_scitype = AbstractVector{<: Union{MMI.Multiclass,MMI.Finite,MMI.Count}}, - supports_weights = false, - load_path = "BetaML.Nn.NeuralNetworkClassifier" -) diff --git a/src/Trees/Trees.jl b/src/Trees/Trees.jl index 06f81ffe..f0c4e458 100644 --- a/src/Trees/Trees.jl +++ b/src/Trees/Trees.jl @@ -46,7 +46,6 @@ export RandomForestEstimator, RFHyperParametersSet include("DecisionTrees.jl") # Decision Trees algorithm and API include("AbstractTrees_BetaML_interface.jl") # Code to allow plotting of a DecisionTree include("RandomForests.jl") # Random Forests algorithm and API -include("Trees_MLJ.jl") # MLJ interface end # end module diff --git a/test/Clustering_tests.jl b/test/Clustering_tests.jl index 1d9b3880..26d8c93b 100644 --- a/test/Clustering_tests.jl +++ b/test/Clustering_tests.jl @@ -1,7 +1,6 @@ using Test, DelimitedFiles -import MLJBase -const Mlj = MLJBase + using BetaML import BetaML.Clustering: init_representatives, kmeans, kmedoids @@ -86,9 +85,11 @@ s = mean(silhouette(pd,ŷ)) # ================================== # NEW TEST println("Testing MLJ interface for Clustering models....") +import MLJBase +const Mlj = MLJBase X, y = Mlj.@load_iris -model = KMeans(rng=copy(TESTRNG)) +model = BetaML.Bmlj.KMeans(rng=copy(TESTRNG)) modelMachine = Mlj.machine(model, X) (fitResults, cache, report) = Mlj.fit(model, 0, X) distances = Mlj.transform(model,fitResults,X) @@ -96,7 +97,7 @@ yhat = Mlj.predict(model, fitResults, X) acc = BetaML.accuracy(Mlj.levelcode.(yhat),Mlj.levelcode.(y),ignorelabels=true) @test acc > 0.8 -model = KMedoids(rng=copy(TESTRNG)) +model = BetaML.Bmlj.KMedoids(rng=copy(TESTRNG)) modelMachine = Mlj.machine(model, X) (fitResults, cache, report) = Mlj.fit(model, 0, X) distances = Mlj.transform(model,fitResults,X) diff --git a/test/GMM_tests.jl b/test/GMM_tests.jl index eadaf146..8b50374a 100644 --- a/test/GMM_tests.jl +++ b/test/GMM_tests.jl @@ -1,7 +1,5 @@ using Test -import MLJBase -const Mlj = MLJBase import Distributions using BetaML import BetaML.GMM: gmm, initVariances!, updateVariances! @@ -128,6 +126,10 @@ ŷtrain2db = predict(m) mreTrain2d = relative_mean_error(ytrain2d,ŷtrain2d,normrec=true) @test mreTrain2d <= 0.08 +m = GMMRegressor1(n_classes=2,rng=copy(TESTRNG), verbosity=NONE, mixtures= SphericalGaussian) +est = fit!(m,xtrain,ytrain2d) +@test typeof(est) == Matrix{Float64} + # Testing GMM Regressor 2 m = GMMRegressor2(n_classes=2,rng=copy(TESTRNG), verbosity=NONE) fit!(m,xtrain,ytrain) @@ -167,9 +169,11 @@ fit!(m,xtrain,ytrain) # ================================== # NEW TEST println("Testing MLJ interface for GMM models....") +import MLJBase +const Mlj = MLJBase X, y = Mlj.@load_iris -model = GaussianMixtureClusterer(mixtures=[DiagonalGaussian() for i in 1:3],rng=copy(TESTRNG)) +model = BetaML.Bmlj.GaussianMixtureClusterer(mixtures=[DiagonalGaussian() for i in 1:3],rng=copy(TESTRNG)) modelMachine = Mlj.machine(model, X) # DimensionMismatch (fitResults, cache, report) = Mlj.fit(model, 0, X) yhat_prob = Mlj.predict(model, fitResults, X) # Mlj.transform(model,fitResults,X) @@ -180,14 +184,14 @@ yhat_prob = Mlj.predict(model, fitResults, X) # Mlj.transfor println("Testing MLJ interface for GMMRegressor models....") X, y = Mlj.@load_boston -model_gmmr = GaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG)) +model_gmmr = BetaML.Bmlj.GaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG)) regressor_gmmr = Mlj.machine(model_gmmr, X, y) (fitresult_gmmr, cache, report) = Mlj.fit(model_gmmr, 0, X, y) yhat_gmmr = Mlj.predict(model_gmmr, fitresult_gmmr, X) @test relative_mean_error(y,yhat_gmmr,normrec=true) < 0.3 ydouble = hcat(y,y) -model_gmmr2 = MultitargetGaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG)) +model_gmmr2 = BetaML.Bmlj.MultitargetGaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG)) regressor_gmmr2 = Mlj.machine(model_gmmr2, X, ydouble) (fitresult_gmmr2, cache, report) = Mlj.fit(model_gmmr2, 0, X, ydouble) yhat_gmmr2 = Mlj.predict(model_gmmr2, fitresult_gmmr2, X) diff --git a/test/Imputation_tests.jl b/test/Imputation_tests.jl index 12768a00..4235c013 100644 --- a/test/Imputation_tests.jl +++ b/test/Imputation_tests.jl @@ -4,10 +4,6 @@ using Statistics, Random using BetaML import DecisionTree - -import MLJBase -const Mlj = MLJBase - TESTRNG = FIXEDRNG # This could change... @@ -101,6 +97,11 @@ X̂3 = predict(mod,X3) reset!(mod) #predict(mod,X3) +mod = GMMImputer(mixtures=DiagonalGaussian) +X2 = [3 6 9; 2000 missing 10000; 1 2 5; 1500 3000 9000; 1.5 3 6] +fit!(mod,X2) +X̂2 = predict(mod) +@test typeof(X̂2) == Matrix{Float64} # ------------------------------------------------------------------------------ @@ -229,15 +230,14 @@ Xfull2 = BetaML.fit!(mod2,X) println("Testing MLJ Interfaces...") -# ------------------------------------------------------------------------------ - - +import MLJBase +const Mlj = MLJBase println("Testing MLJ Interface for SimpleImputer...") X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] Xt = Mlj.table(X) -model = SimpleImputer(norm=1) +model = BetaML.Bmlj.SimpleImputer(norm=1) modelMachine = Mlj.machine(model,Xt) (fitResults, cache, report) = Mlj.fit(model, 0, Xt) XM = Mlj.transform(model,fitResults,Xt) @@ -253,7 +253,7 @@ println("Testing MLJ Interface for GaussianMixtureImputer...") X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] Xt = Mlj.table(X) -model = GaussianMixtureImputer(initialisation_strategy="grid",rng=copy(TESTRNG)) +model = BetaML.Bmlj.GaussianMixtureImputer(initialisation_strategy="grid",rng=copy(TESTRNG)) modelMachine = Mlj.machine(model,Xt) (fitResults, cache, report) = Mlj.fit(model, 0, Xt) XM = Mlj.transform(model,fitResults,Xt) @@ -264,12 +264,16 @@ Xnew_withMissing = Mlj.table([1.5 missing; missing 38; missing -2.3; XDNew = Mlj.transform(model,fitResults,Xnew_withMissing) XDMNew = Mlj.matrix(XDNew) @test isapprox(XDMNew[1,2],x̂[2,2]) +model = BetaML.Bmlj.GaussianMixtureImputer(initialisation_strategy="grid",rng=copy(TESTRNG), mixtures=BetaML.SphericalGaussian) +modelMachine = Mlj.machine(model,Xt) +(fitResults, cache, report) = Mlj.fit(model, 0, Xt) +@test report["AIC"] < 100000 println("Testing MLJ Interface for RandomForestImputer...") X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] Xt = Mlj.table(X) -model = RandomForestImputer(n_trees=40,rng=copy(TESTRNG)) +model = BetaML.Bmlj.RandomForestImputer(n_trees=40,rng=copy(TESTRNG)) modelMachine = Mlj.machine(model,Xt) (fitResults, cache, report) = Mlj.fit(model, 0, Xt) XM = Mlj.transform(model,fitResults,Xt) @@ -286,7 +290,7 @@ println("Testing MLJ Interface for GeneralImputer...") X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] Xt = Mlj.table(X) trng = copy(TESTRNG) -model = GeneralImputer(estimator=[GMMRegressor1(rng=copy(TESTRNG),verbosity=NONE),RandomForestEstimator(n_trees=40,rng=copy(TESTRNG),verbosity=NONE)],recursive_passages=2, missing_supported=true, rng = copy(TESTRNG)) +model = BetaML.Bmlj.GeneralImputer(estimator=[GMMRegressor1(rng=copy(TESTRNG),verbosity=NONE),RandomForestEstimator(n_trees=40,rng=copy(TESTRNG),verbosity=NONE)],recursive_passages=2, missing_supported=true, rng = copy(TESTRNG)) modelMachine = Mlj.machine(model,Xt) (fitResults, cache, report) = Mlj.fit(model, 0, Xt) XM = Mlj.transform(model,fitResults,Xt) @@ -311,7 +315,7 @@ X = [ 12 0.3 5 11; Xt = Mlj.table(X) trng = copy(TESTRNG) -model = GeneralImputer(estimator=DecisionTree.DecisionTreeRegressor(), fit_function=DecisionTree.fit!,predict_function=DecisionTree.predict,recursive_passages=10, rng = copy(TESTRNG)) +model = BetaML.Bmlj.GeneralImputer(estimator=DecisionTree.DecisionTreeRegressor(), fit_function=DecisionTree.fit!,predict_function=DecisionTree.predict,recursive_passages=10, rng = copy(TESTRNG)) modelMachine = Mlj.machine(model,Xt) (fitResults, cache, report) = Mlj.fit(model, 0, Xt) XM = Mlj.transform(model,fitResults,Xt) diff --git a/test/Nn_tests.jl b/test/Nn_tests.jl index e628c2aa..7e48b97b 100644 --- a/test/Nn_tests.jl +++ b/test/Nn_tests.jl @@ -623,7 +623,7 @@ import MLJBase const Mlj = MLJBase import StatisticalMeasures X, y = Mlj.@load_boston -model = NeuralNetworkRegressor(rng=copy(TESTRNG)) +model = BetaML.Bmlj.NeuralNetworkRegressor(rng=copy(TESTRNG)) regressor = Mlj.machine(model, X, y) (fitresult, cache, report) = Mlj.fit(model, -1, X, y) yhat = Mlj.predict(model, fitresult, X) @@ -631,14 +631,14 @@ yhat = Mlj.predict(model, fitresult, X) X, y = Mlj.@load_boston y2d = [y y] -model = MultitargetNeuralNetworkRegressor(rng=copy(TESTRNG)) +model = BetaML.Bmlj.MultitargetNeuralNetworkRegressor(rng=copy(TESTRNG)) regressor = Mlj.machine(model, X, y2d) (fitresult, cache, report) = Mlj.fit(model, -1, X, y2d) yhat = Mlj.predict(model, fitresult, X) @test relative_mean_error(y2d,yhat,normrec=true) < 0.2 X, y = Mlj.@load_iris -model = NeuralNetworkClassifier(rng=copy(TESTRNG),epochs=500,batch_size=64) +model = BetaML.Bmlj.NeuralNetworkClassifier(rng=copy(TESTRNG),epochs=500,batch_size=64) regressor = Mlj.machine(model, X, y) (fitresult, cache, report) = Mlj.fit(model, -1, X, y) yhat = Mlj.predict(model, fitresult, X) diff --git a/test/Trees_tests.jl b/test/Trees_tests.jl index 965a29a6..42529021 100644 --- a/test/Trees_tests.jl +++ b/test/Trees_tests.jl @@ -341,27 +341,27 @@ accβ = accuracy(ytest,ŷtestβ,rng=copy(TESTRNG)) # NEW TEST println("Testing MLJ interface for Trees models....") X, y = Mlj.@load_boston -model_dtr = DecisionTreeRegressor(rng=copy(TESTRNG)) +model_dtr = BetaML.Bmlj.DecisionTreeRegressor(rng=copy(TESTRNG)) regressor_dtr = Mlj.machine(model_dtr, X, y) (fitresult_dtr, cache, report) = Mlj.fit(model_dtr, 0, X, y) yhat_dtr = Mlj.predict(model_dtr, fitresult_dtr, X) @test relative_mean_error(y,yhat_dtr,normrec=true) < 0.02 -model_rfr = RandomForestRegressor(rng=copy(TESTRNG)) +model_rfr = BetaML.Bmlj.RandomForestRegressor(rng=copy(TESTRNG)) regressor_rfr = Mlj.machine(model_rfr, X, y) (fitresult_rfr, cache, report) = Mlj.fit(model_rfr, 0, X, y) yhat_rfr = Mlj.predict(model_rfr, fitresult_rfr, X) @test relative_mean_error(y,yhat_rfr,normrec=true) < 0.06 X, y = Mlj.@load_iris -model_dtc = DecisionTreeClassifier(rng=copy(TESTRNG)) +model_dtc = BetaML.Bmlj.DecisionTreeClassifier(rng=copy(TESTRNG)) regressor_dtc = Mlj.machine(model_dtc, X, y) (fitresult_dtc, cache, report) = Mlj.fit(model_dtc, 0, X, y) yhat_dtc = Mlj.predict(model_dtc, fitresult_dtc, X) @test Mlj.mean(StatisticalMeasures.LogLoss(tol=1e-4)(yhat_dtc, y)) < 0.0002 @test sum(Mlj.mode.(yhat_dtc) .== y)/length(y) == 1 -model_rfc = RandomForestClassifier(max_features=3,rng=copy(TESTRNG)) +model_rfc = BetaML.Bmlj.RandomForestClassifier(max_features=3,rng=copy(TESTRNG)) regressor_rfc = Mlj.machine(model_rfc, X, y) (fitresult_rfc, cache, report) = Mlj.fit(model_rfc, 0, X, y) yhat_rfc = Mlj.predict(model_rfc, fitresult_rfc, X)