Skip to content

Commit

Permalink
Moved all MLJ interface models to Bmlj module (not yet renamed), colv…
Browse files Browse the repository at this point in the history
…ed bug in GGImputer and GMMRegressor1

1) All MLJ interface models have been moved to the `Bmlj` module, so they can have the same name of BetaML models,
but the actual renaming has not been performed yet
2) Solved a bug in `GMMImputer` and `GMMRegressor1` that it could not have been initinalised with the `mixtures`
keyword being a type
  • Loading branch information
sylvaticus committed Jan 21, 2024
1 parent d89b42e commit 55353cb
Show file tree
Hide file tree
Showing 20 changed files with 813 additions and 510 deletions.
10 changes: 5 additions & 5 deletions src/BetaML.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ import .Bmlj # some MLJ models have the same name as BetaML models, set them in
# ------------------------------------------------------------------------------
#MLJ interface...
const MLJ_PERCEPTRON_MODELS = (Bmlj.LinearPerceptron, Bmlj.KernelPerceptron, Bmlj.Pegasos)
const MLJ_TREES_MODELS = (DecisionTreeClassifier, DecisionTreeRegressor, RandomForestClassifier, RandomForestRegressor)
const MLJ_CLUSTERING_MODELS = (KMeans, KMedoids, GaussianMixtureClusterer)
const MLJ_IMPUTERS_MODELS = (SimpleImputer, GaussianMixtureImputer, RandomForestImputer,GeneralImputer) # these are the name of the MLJ models, not the BetaML ones...
const MLJ_NN_MODELS = (NeuralNetworkRegressor,MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier)
const MLJ_OTHER_MODELS = (GaussianMixtureRegressor,MultitargetGaussianMixtureRegressor,Bmlj.AutoEncoder)
const MLJ_TREES_MODELS = (Bmlj.DecisionTreeClassifier, Bmlj.DecisionTreeRegressor, Bmlj.RandomForestClassifier, Bmlj.RandomForestRegressor)
const MLJ_CLUSTERING_MODELS = (Bmlj.KMeans, Bmlj.KMedoids, Bmlj.GaussianMixtureClusterer)
const MLJ_IMPUTERS_MODELS = (Bmlj.SimpleImputer, Bmlj.GaussianMixtureImputer, Bmlj.RandomForestImputer,Bmlj.GeneralImputer) # these are the name of the MLJ models, not the BetaML ones...
const MLJ_NN_MODELS = (Bmlj.NeuralNetworkRegressor,Bmlj.MultitargetNeuralNetworkRegressor, Bmlj.NeuralNetworkClassifier)
const MLJ_OTHER_MODELS = (Bmlj.GaussianMixtureRegressor,Bmlj.MultitargetGaussianMixtureRegressor,Bmlj.AutoEncoder)
const MLJ_INTERFACED_MODELS = (MLJ_PERCEPTRON_MODELS..., MLJ_TREES_MODELS..., MLJ_CLUSTERING_MODELS..., MLJ_IMPUTERS_MODELS..., MLJ_NN_MODELS..., MLJ_OTHER_MODELS...)


Expand Down
22 changes: 12 additions & 10 deletions src/Bmlj/Bmlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
# MLJ interface for BetaML models
In this module we define the interface of several BetaML models. They can be used using the [MLJ framework](https://github.com/alan-turing-institute/MLJ.jl).
Note that MLJ models (whose name could be the same as the underlying BetaML model) are not exported. You can access them with `BetaML.Bmlj.ModelXYZ`.
"""
module Bmlj
mljverbosity_to_betaml_verbosity

using Random, LinearAlgebra, Statistics
using CategoricalArrays, DocStringExtensions
using Random


import MLJModelInterface # It seems that having done this in the top module is not enought
const MMI = MLJModelInterface # We need to repeat it here
Expand All @@ -24,12 +27,11 @@ import ..BetaML
import ..Utils # can't using it as it exports some same-name models
import ..Perceptron
import ..Nn: AbstractLayer, ADAM, SGD, NeuralNetworkEstimator, OptimisationAlgorithm, DenseLayer, NN
import ..Utils: AbstractRNG, squared_cost, SuccessiveHalvingSearch, mljverbosity_to_betaml_verbosity
import ..Utils: AbstractRNG, squared_cost, SuccessiveHalvingSearch


export mljverbosity_to_betaml_verbosity


"""
$(TYPEDSIGNATURES)
Expand All @@ -51,11 +53,11 @@ function mljverbosity_to_betaml_verbosity(i::Integer)
end

include("Perceptron_mlj.jl") # Perceptron-like algorithms
#include("Trees_mlj.jl") # Decision Trees and ensembles (Random Forests)
#include("Clustering_mlj.jl") # Clustering (hard) algorithms
#include("GMM_mlj.jl") # GMM-based learners (clustering, fitter, regression)
#include("Imputation_mlj.jl")
#include("Nn_mlj.jl")
include("Utils_mlj.jl")
include("Trees_mlj.jl") # Decision Trees and ensembles (Random Forests)
include("Clustering_mlj.jl") # Clustering (hard) algorithms
include("GMM_mlj.jl") # GMM-based learners (clustering, fitter, regression)
include("Imputation_mlj.jl") # Imputation models
include("Nn_mlj.jl") # Neural network models
include("Utils_mlj.jl") # Various transformers/encorders

end
13 changes: 5 additions & 8 deletions src/Clustering/Clustering_MLJ.jl → src/Bmlj/Clustering_mlj.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

# MLJ interface for hard clustering models

import MLJModelInterface # It seems that having done this in the top module is not enought
const MMI = MLJModelInterface # We need to repeat it here

export KMeans, KMedoids

# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -165,11 +162,11 @@ function MMI.fit(m::Union{KMeans,KMedoids}, verbosity, X)
x = MMI.matrix(X) # convert table to matrix
# Using low level API here. We could switch to APIV2...
typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")
verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
verbosity = mljverbosity_to_betaml_verbosity(verbosity)
if typeof(m) == KMeans
(assignedClasses,representatives) = kmeans(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng,verbosity=verbosity)
(assignedClasses,representatives) = BetaML.Clustering.kmeans(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng,verbosity=verbosity)
else
(assignedClasses,representatives) = kmedoids(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng, verbosity=verbosity)
(assignedClasses,representatives) = BetaML.Clustering.kmedoids(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng, verbosity=verbosity)
end
cache=nothing
report=nothing
Expand Down Expand Up @@ -216,13 +213,13 @@ MMI.metadata_model(KMeans,
output_scitype = MMI.Table(MMI.Continuous), # scitype of the output of `transform`
target_scitype = AbstractArray{<:MMI.Multiclass}, # scitype of the output of `predict`
supports_weights = false, # does the model support sample weights?
load_path = "BetaML.Clustering.KMeans"
load_path = "BetaML.Bmlj.KMeans"
)

MMI.metadata_model(KMedoids,
input_scitype = MMI.Table(MMI.Continuous), # scitype of the inputs
output_scitype = MMI.Table(MMI.Continuous), # scitype of the output of `transform`
target_scitype = AbstractArray{<:MMI.Multiclass}, # scitype of the output of `predict`
supports_weights = false, # does the model support sample weights?
load_path = "BetaML.Clustering.KMedoids"
load_path = "BetaML.Bmlj.KMedoids"
)
43 changes: 20 additions & 23 deletions src/GMM/GMM_MLJ.jl → src/Bmlj/GMM_mlj.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT."

# MLJ interface for clustering models

import MLJModelInterface # It seems that having done this in the top module is not enought
const MMI = MLJModelInterface # We need to repeat it here
# MLJ interface for GMM based models

export GaussianMixtureClusterer, GaussianMixtureRegressor, MultitargetGaussianMixtureRegressor

Expand Down Expand Up @@ -68,7 +65,7 @@ mutable struct GaussianMixtureClusterer <: MMI.Unsupervised
This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes` mixtures of the specified type.
Note that mixing of different mixture types is not currently supported.
[def: `[DiagonalGaussian() for i in 1:n_classes]`]"""
mixtures::Union{Type,Vector{<: AbstractMixture}}
mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}}
"Tolerance to stop the algorithm [default: 10^(-6)]"
tol::Float64
"Minimum variance for the mixtures [default: 0.05]"
Expand All @@ -92,7 +89,7 @@ end
function GaussianMixtureClusterer(;
n_classes = 3,
initial_probmixtures = Float64[],
mixtures = [DiagonalGaussian() for i in 1:n_classes],
mixtures = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes],
tol = 10^(-6),
minimum_variance = 0.05,
minimum_covariance = 0.0,
Expand Down Expand Up @@ -162,7 +159,7 @@ mutable struct GaussianMixtureRegressor <: MMI.Deterministic
This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type.
Note that mixing of different mixture types is not currently supported.
[def: `[DiagonalGaussian() for i in 1:n_classes]`]"""
mixtures::Union{Type,Vector{<: AbstractMixture}}
mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}}
"Tolerance to stop the algorithm [default: 10^(-6)]"
tol::Float64
"Minimum variance for the mixtures [default: 0.05]"
Expand All @@ -186,7 +183,7 @@ end
function GaussianMixtureRegressor(;
n_classes = 3,
initial_probmixtures = [],
mixtures = [DiagonalGaussian() for i in 1:n_classes],
mixtures = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes],
tol = 10^(-6),
minimum_variance = 0.05,
minimum_covariance = 0.0,
Expand All @@ -195,7 +192,7 @@ function GaussianMixtureRegressor(;
rng = Random.GLOBAL_RNG
)
if typeof(mixtures) <: UnionAll
mixtures = [mixtures() for i in 1:n_classes]
mixtures = [BetaML.GMM.mixtures() for i in 1:n_classes]
end
return GaussianMixtureRegressor(n_classes,initial_probmixtures,mixtures,tol,minimum_variance,minimum_covariance,initialisation_strategy,maximum_iterations,rng)
end
Expand Down Expand Up @@ -258,7 +255,7 @@ mutable struct MultitargetGaussianMixtureRegressor <: MMI.Deterministic
This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type.
Note that mixing of different mixture types is not currently supported.
[def: `[DiagonalGaussian() for i in 1:n_classes]`]"""
mixtures::Union{Type,Vector{<: AbstractMixture}}
mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}}
"Tolerance to stop the algorithm [default: 10^(-6)]"
tol::Float64
"Minimum variance for the mixtures [default: 0.05]"
Expand All @@ -282,7 +279,7 @@ end
function MultitargetGaussianMixtureRegressor(;
n_classes = 3,
initial_probmixtures = [],
mixtures = [DiagonalGaussian() for i in 1:n_classes],
mixtures = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes],
tol = 10^(-6),
minimum_variance = 0.05,
minimum_covariance = 0.0,
Expand Down Expand Up @@ -314,9 +311,9 @@ function MMI.fit(m::GaussianMixtureClusterer, verbosity, X)
end
=#
typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")
verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
verbosity = mljverbosity_to_betaml_verbosity(verbosity)
mixtures = m.mixtures
res = gmm(x,m.n_classes,initial_probmixtures=deepcopy(m.initial_probmixtures),mixtures=mixtures, minimum_variance=m.minimum_variance, minimum_covariance=m.minimum_covariance,initialisation_strategy=m.initialisation_strategy,verbosity=verbosity,maximum_iterations=m.maximum_iterations,rng=m.rng)
res = BetaML.GMM.gmm(x,m.n_classes,initial_probmixtures=deepcopy(m.initial_probmixtures),mixtures=mixtures, minimum_variance=m.minimum_variance, minimum_covariance=m.minimum_covariance,initialisation_strategy=m.initialisation_strategy,verbosity=verbosity,maximum_iterations=m.maximum_iterations,rng=m.rng)
fitResults = (pₖ=res.pₖ,mixtures=res.mixtures) # res.pₙₖ
cache = nothing
report = (res.ϵ,res.lL,res.BIC,res.AIC)
Expand All @@ -327,7 +324,7 @@ MMI.fitted_params(model::GaussianMixtureClusterer, fitresult) = (weights=fitresu
function MMI.fit(m::GaussianMixtureRegressor, verbosity, X, y)
x = MMI.matrix(X) # convert table to matrix
typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")
verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
verbosity = mljverbosity_to_betaml_verbosity(verbosity)
ndims(y) < 2 || error("Trying to fit `GaussianMixtureRegressor` with a multidimensional target. Use `MultitargetGaussianMixtureRegressor` instead.")
#=
if typeof(y) <: AbstractMatrix
Expand All @@ -345,7 +342,7 @@ function MMI.fit(m::GaussianMixtureRegressor, verbosity, X, y)
end
=#
mixtures = m.mixtures
betamod = GMMRegressor2(
betamod = BetaML.GMM.GMMRegressor2(
n_classes = m.n_classes,
initial_probmixtures = m.initial_probmixtures,
mixtures = mixtures,
Expand All @@ -363,7 +360,7 @@ end
function MMI.fit(m::MultitargetGaussianMixtureRegressor, verbosity, X, y)
x = MMI.matrix(X) # convert table to matrix
typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")
verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
verbosity = mljverbosity_to_betaml_verbosity(verbosity)
ndims(y) >= 2 || @warn "Trying to fit `MultitargetGaussianMixtureRegressor` with a single-dimensional target. You may want to consider `GaussianMixtureRegressor` instead."
#=
if typeof(y) <: AbstractMatrix
Expand All @@ -381,7 +378,7 @@ function MMI.fit(m::MultitargetGaussianMixtureRegressor, verbosity, X, y)
end
=#
mixtures = m.mixtures
betamod = GMMRegressor2(
betamod = BetaML.GMM.GMMRegressor2(
n_classes = m.n_classes,
initial_probmixtures = m.initial_probmixtures,
mixtures = mixtures,
Expand All @@ -407,7 +404,7 @@ function MMI.predict(m::GaussianMixtureClusterer, fitResults, X)
(pₖ,mixtures) = (fitResults.pₖ, fitResults.mixtures)
nCl = length(pₖ)
# Compute the probabilities that maximise the likelihood given existing mistures and a single iteration (i.e. doesn't update the mixtures)
thisOut = gmm(x,nCl,initial_probmixtures=pₖ,mixtures=mixtures,tol=m.tol,verbosity=NONE,minimum_variance=m.minimum_variance,minimum_covariance=m.minimum_covariance,initialisation_strategy="given",maximum_iterations=1,rng=m.rng)
thisOut = BetaML.GMM.gmm(x,nCl,initial_probmixtures=pₖ,mixtures=mixtures,tol=m.tol,verbosity=NONE,minimum_variance=m.minimum_variance,minimum_covariance=m.minimum_covariance,initialisation_strategy="given",maximum_iterations=1,rng=m.rng)
classes = CategoricalArray(1:nCl)
predictions = MMI.UnivariateFinite(classes, thisOut.pₙₖ)
return predictions
Expand All @@ -416,12 +413,12 @@ end
function MMI.predict(m::GaussianMixtureRegressor, fitResults, X)
x = MMI.matrix(X) # convert table to matrix
betamod = fitResults
return dropdims(predict(betamod,x),dims=2)
return dropdims(BetaML.Api.predict(betamod,x),dims=2)
end
function MMI.predict(m::MultitargetGaussianMixtureRegressor, fitResults, X)
x = MMI.matrix(X) # convert table to matrix
betamod = fitResults
return predict(betamod,x)
return BetaML.Api.predict(betamod,x)
end


Expand All @@ -434,19 +431,19 @@ MMI.metadata_model(GaussianMixtureClusterer,
target_scitype = AbstractArray{<:MMI.Multiclass}, # scitype of the output of `predict`
#prediction_type = :probabilistic, # option not added to metadata_model function, need to do it separately
supports_weights = false, # does the model support sample weights?
load_path = "BetaML.GMM.GaussianMixtureClusterer"
load_path = "BetaML.Bmlj.GaussianMixtureClusterer"
)
MMI.prediction_type(::Type{<:GaussianMixtureClusterer}) = :probabilistic

MMI.metadata_model(GaussianMixtureRegressor,
input_scitype = MMI.Table(Union{MMI.Missing, MMI.Infinite}),
target_scitype = AbstractVector{<: MMI.Continuous}, # for a supervised model, what target?
supports_weights = false, # does the model support sample weights?
load_path = "BetaML.GMM.GaussianMixtureRegressor"
load_path = "BetaML.Bmlj.GaussianMixtureRegressor"
)
MMI.metadata_model(MultitargetGaussianMixtureRegressor,
input_scitype = MMI.Table(Union{MMI.Missing, MMI.Infinite}),
target_scitype = AbstractMatrix{<: MMI.Continuous}, # for a supervised model, what target?
supports_weights = false, # does the model support sample weights?
load_path = "BetaML.GMM.MultitargetGaussianMixtureRegressor"
load_path = "BetaML.Bmlj.MultitargetGaussianMixtureRegressor"
)
Loading

0 comments on commit 55353cb

Please sign in to comment.