From 55353cb797717ff74ce2ab5641e065f0018ea832 Mon Sep 17 00:00:00 2001
From: Antonello Lobianco <antonello@lobianco.org>
Date: Sun, 21 Jan 2024 17:03:42 +0100
Subject: [PATCH] Moved all MLJ interface models to Bmlj module (not yet
 renamed), colved bug in GGImputer and GMMRegressor1

1) All MLJ interface models have been moved to the `Bmlj` module, so they can have the same name of BetaML models,
but the actual renaming has not been performed yet
2) Solved a bug in `GMMImputer` and `GMMRegressor1` that it could not have been initinalised with the `mixtures`
keyword being a type
---
 src/BetaML.jl                                 |  10 +-
 src/Bmlj/Bmlj.jl                              |  22 +-
 .../Clustering_mlj.jl}                        |  13 +-
 src/{GMM/GMM_MLJ.jl => Bmlj/GMM_mlj.jl}       |  43 +-
 .../Imputation_mlj.jl}                        |  49 ++-
 src/Bmlj/Nn_mlj.jl                            | 370 ++++++++++++++++-
 src/Bmlj/Perceptron_mlj.jl                    | 325 +++++++++++++++
 src/{Trees/Trees_MLJ.jl => Bmlj/Trees_mlj.jl} |  35 +-
 src/Clustering/Clustering.jl                  |   2 -
 src/GMM/GMM.jl                                |   3 -
 src/GMM/GMM_regression.jl                     |   2 +-
 src/Imputation/Imputation.jl                  |   4 +-
 src/Nn/Nn.jl                                  |   5 -
 src/Nn/Nn_MLJ.jl                              | 374 ------------------
 src/Trees/Trees.jl                            |   1 -
 test/Clustering_tests.jl                      |   9 +-
 test/GMM_tests.jl                             |  14 +-
 test/Imputation_tests.jl                      |  28 +-
 test/Nn_tests.jl                              |   6 +-
 test/Trees_tests.jl                           |   8 +-
 20 files changed, 813 insertions(+), 510 deletions(-)
 rename src/{Clustering/Clustering_MLJ.jl => Bmlj/Clustering_mlj.jl} (92%)
 rename src/{GMM/GMM_MLJ.jl => Bmlj/GMM_mlj.jl} (92%)
 rename src/{Imputation/Imputation_MLJ.jl => Bmlj/Imputation_mlj.jl} (94%)
 create mode 100644 src/Bmlj/Perceptron_mlj.jl
 rename src/{Trees/Trees_MLJ.jl => Bmlj/Trees_mlj.jl} (89%)
 delete mode 100644 src/Nn/Nn_MLJ.jl

diff --git a/src/BetaML.jl b/src/BetaML.jl
index 900c850c..7abc4c54 100644
--- a/src/BetaML.jl
+++ b/src/BetaML.jl
@@ -56,11 +56,11 @@ import .Bmlj # some MLJ models have the same name as BetaML models, set them in
 # ------------------------------------------------------------------------------
 #MLJ interface...
 const MLJ_PERCEPTRON_MODELS = (Bmlj.LinearPerceptron, Bmlj.KernelPerceptron, Bmlj.Pegasos)
-const MLJ_TREES_MODELS      = (DecisionTreeClassifier, DecisionTreeRegressor, RandomForestClassifier, RandomForestRegressor)
-const MLJ_CLUSTERING_MODELS = (KMeans, KMedoids, GaussianMixtureClusterer)
-const MLJ_IMPUTERS_MODELS   = (SimpleImputer, GaussianMixtureImputer, RandomForestImputer,GeneralImputer) # these are the name of the MLJ models, not the BetaML ones...
-const MLJ_NN_MODELS         = (NeuralNetworkRegressor,MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier)
-const MLJ_OTHER_MODELS      = (GaussianMixtureRegressor,MultitargetGaussianMixtureRegressor,Bmlj.AutoEncoder)
+const MLJ_TREES_MODELS      = (Bmlj.DecisionTreeClassifier, Bmlj.DecisionTreeRegressor, Bmlj.RandomForestClassifier, Bmlj.RandomForestRegressor)
+const MLJ_CLUSTERING_MODELS = (Bmlj.KMeans, Bmlj.KMedoids, Bmlj.GaussianMixtureClusterer)
+const MLJ_IMPUTERS_MODELS   = (Bmlj.SimpleImputer, Bmlj.GaussianMixtureImputer, Bmlj.RandomForestImputer,Bmlj.GeneralImputer) # these are the name of the MLJ models, not the BetaML ones...
+const MLJ_NN_MODELS         = (Bmlj.NeuralNetworkRegressor,Bmlj.MultitargetNeuralNetworkRegressor, Bmlj.NeuralNetworkClassifier)
+const MLJ_OTHER_MODELS      = (Bmlj.GaussianMixtureRegressor,Bmlj.MultitargetGaussianMixtureRegressor,Bmlj.AutoEncoder)
 const MLJ_INTERFACED_MODELS = (MLJ_PERCEPTRON_MODELS..., MLJ_TREES_MODELS..., MLJ_CLUSTERING_MODELS..., MLJ_IMPUTERS_MODELS..., MLJ_NN_MODELS..., MLJ_OTHER_MODELS...) 
 
 
diff --git a/src/Bmlj/Bmlj.jl b/src/Bmlj/Bmlj.jl
index dc4bfddd..0a4ee011 100644
--- a/src/Bmlj/Bmlj.jl
+++ b/src/Bmlj/Bmlj.jl
@@ -5,12 +5,15 @@
 # MLJ interface for BetaML models
 
 In this module we define the interface of several BetaML models. They can be used using the [MLJ framework](https://github.com/alan-turing-institute/MLJ.jl).
+
+Note that MLJ models (whose name could be the same as the underlying BetaML model) are not exported. You can access them with `BetaML.Bmlj.ModelXYZ`.
+
 """
 module Bmlj
-mljverbosity_to_betaml_verbosity
 
+using Random, LinearAlgebra, Statistics
 using CategoricalArrays, DocStringExtensions
-using Random
+
 
 import MLJModelInterface       # It seems that having done this in the top module is not enought
 const MMI = MLJModelInterface  # We need to repeat it here
@@ -24,12 +27,11 @@ import ..BetaML
 import ..Utils # can't using it as it exports some same-name models
 import ..Perceptron
 import ..Nn: AbstractLayer, ADAM, SGD, NeuralNetworkEstimator, OptimisationAlgorithm, DenseLayer, NN
-import ..Utils: AbstractRNG, squared_cost, SuccessiveHalvingSearch, mljverbosity_to_betaml_verbosity
+import ..Utils: AbstractRNG, squared_cost, SuccessiveHalvingSearch
 
 
 export mljverbosity_to_betaml_verbosity
 
-
 """
 $(TYPEDSIGNATURES)
 
@@ -51,11 +53,11 @@ function mljverbosity_to_betaml_verbosity(i::Integer)
 end
 
 include("Perceptron_mlj.jl") # Perceptron-like algorithms
-#include("Trees_mlj.jl")           # Decision Trees and ensembles (Random Forests)
-#include("Clustering_mlj.jl") # Clustering (hard) algorithms
-#include("GMM_mlj.jl")               # GMM-based learners (clustering, fitter, regression) 
-#include("Imputation_mlj.jl") 
-#include("Nn_mlj.jl")
-include("Utils_mlj.jl")
+include("Trees_mlj.jl")      # Decision Trees and ensembles (Random Forests)
+include("Clustering_mlj.jl") # Clustering (hard) algorithms
+include("GMM_mlj.jl")        # GMM-based learners (clustering, fitter, regression) 
+include("Imputation_mlj.jl") # Imputation models
+include("Nn_mlj.jl")         # Neural network models
+include("Utils_mlj.jl")      # Various transformers/encorders
 
 end
\ No newline at end of file
diff --git a/src/Clustering/Clustering_MLJ.jl b/src/Bmlj/Clustering_mlj.jl
similarity index 92%
rename from src/Clustering/Clustering_MLJ.jl
rename to src/Bmlj/Clustering_mlj.jl
index b6389181..b1dc2ca8 100644
--- a/src/Clustering/Clustering_MLJ.jl
+++ b/src/Bmlj/Clustering_mlj.jl
@@ -2,9 +2,6 @@
 
 # MLJ interface for hard clustering models
 
-import MLJModelInterface       # It seems that having done this in the top module is not enought
-const MMI = MLJModelInterface  # We need to repeat it here
-
 export KMeans, KMedoids
 
 # ------------------------------------------------------------------------------
@@ -165,11 +162,11 @@ function MMI.fit(m::Union{KMeans,KMedoids}, verbosity, X)
     x  = MMI.matrix(X)                        # convert table to matrix
     # Using low level API here. We could switch to APIV2...
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
     if typeof(m) == KMeans
-        (assignedClasses,representatives) = kmeans(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng,verbosity=verbosity)
+        (assignedClasses,representatives) = BetaML.Clustering.kmeans(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng,verbosity=verbosity)
     else
-        (assignedClasses,representatives) = kmedoids(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng, verbosity=verbosity)
+        (assignedClasses,representatives) = BetaML.Clustering.kmedoids(x,m.n_classes,dist=m.dist,initialisation_strategy=m.initialisation_strategy,initial_representatives=m.initial_representatives,rng=m.rng, verbosity=verbosity)
     end
     cache=nothing
     report=nothing
@@ -216,7 +213,7 @@ MMI.metadata_model(KMeans,
     output_scitype   = MMI.Table(MMI.Continuous),         # scitype of the output of `transform`
     target_scitype   = AbstractArray{<:MMI.Multiclass},   # scitype of the output of `predict`
     supports_weights = false,                             # does the model support sample weights?
-	load_path        = "BetaML.Clustering.KMeans"
+	load_path        = "BetaML.Bmlj.KMeans"
 )
 
 MMI.metadata_model(KMedoids,
@@ -224,5 +221,5 @@ MMI.metadata_model(KMedoids,
     output_scitype   = MMI.Table(MMI.Continuous),         # scitype of the output of `transform`
     target_scitype   = AbstractArray{<:MMI.Multiclass},   # scitype of the output of `predict`
     supports_weights = false,                             # does the model support sample weights?
-	load_path        = "BetaML.Clustering.KMedoids"
+	load_path        = "BetaML.Bmlj.KMedoids"
 )
\ No newline at end of file
diff --git a/src/GMM/GMM_MLJ.jl b/src/Bmlj/GMM_mlj.jl
similarity index 92%
rename from src/GMM/GMM_MLJ.jl
rename to src/Bmlj/GMM_mlj.jl
index d8f29773..90bdccc2 100644
--- a/src/GMM/GMM_MLJ.jl
+++ b/src/Bmlj/GMM_mlj.jl
@@ -1,9 +1,6 @@
 "Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT."
 
-# MLJ interface for clustering models
-
-import MLJModelInterface       # It seems that having done this in the top module is not enought
-const MMI = MLJModelInterface  # We need to repeat it here
+# MLJ interface for GMM based models
 
 export  GaussianMixtureClusterer, GaussianMixtureRegressor, MultitargetGaussianMixtureRegressor
 
@@ -68,7 +65,7 @@ mutable struct GaussianMixtureClusterer <: MMI.Unsupervised
     This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes` mixtures of the specified type.
     Note that mixing of different mixture types is not currently supported.
     [def: `[DiagonalGaussian() for i in 1:n_classes]`]"""
-  mixtures::Union{Type,Vector{<: AbstractMixture}}
+  mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}}
   "Tolerance to stop the algorithm [default: 10^(-6)]"
   tol::Float64
   "Minimum variance for the mixtures [default: 0.05]"
@@ -92,7 +89,7 @@ end
 function GaussianMixtureClusterer(;
     n_classes             = 3,
     initial_probmixtures            = Float64[],
-    mixtures      = [DiagonalGaussian() for i in 1:n_classes],
+    mixtures      = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes],
     tol           = 10^(-6),
     minimum_variance   = 0.05,
     minimum_covariance = 0.0,
@@ -162,7 +159,7 @@ mutable struct GaussianMixtureRegressor <: MMI.Deterministic
     This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type.
     Note that mixing of different mixture types is not currently supported.
     [def: `[DiagonalGaussian() for i in 1:n_classes]`]"""
-    mixtures::Union{Type,Vector{<: AbstractMixture}}
+    mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}}
     "Tolerance to stop the algorithm [default: 10^(-6)]"
     tol::Float64
     "Minimum variance for the mixtures [default: 0.05]"
@@ -186,7 +183,7 @@ end
 function GaussianMixtureRegressor(;
     n_classes      = 3,
     initial_probmixtures  = [],
-    mixtures      = [DiagonalGaussian() for i in 1:n_classes],
+    mixtures      = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes],
     tol           = 10^(-6),
     minimum_variance   = 0.05,
     minimum_covariance = 0.0,
@@ -195,7 +192,7 @@ function GaussianMixtureRegressor(;
     rng           = Random.GLOBAL_RNG
    )
    if typeof(mixtures) <: UnionAll
-     mixtures = [mixtures() for i in 1:n_classes]
+     mixtures = [BetaML.GMM.mixtures() for i in 1:n_classes]
    end
    return GaussianMixtureRegressor(n_classes,initial_probmixtures,mixtures,tol,minimum_variance,minimum_covariance,initialisation_strategy,maximum_iterations,rng)
 end
@@ -258,7 +255,7 @@ mutable struct MultitargetGaussianMixtureRegressor <: MMI.Deterministic
     This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type.
     Note that mixing of different mixture types is not currently supported.
     [def: `[DiagonalGaussian() for i in 1:n_classes]`]"""
-    mixtures::Union{Type,Vector{<: AbstractMixture}}
+    mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}}
     "Tolerance to stop the algorithm [default: 10^(-6)]"
     tol::Float64
     "Minimum variance for the mixtures [default: 0.05]"
@@ -282,7 +279,7 @@ end
 function MultitargetGaussianMixtureRegressor(;
     n_classes      = 3,
     initial_probmixtures  = [],
-    mixtures      = [DiagonalGaussian() for i in 1:n_classes],
+    mixtures      = [BetaML.GMM.DiagonalGaussian() for i in 1:n_classes],
     tol           = 10^(-6),
     minimum_variance   = 0.05,
     minimum_covariance = 0.0,
@@ -314,9 +311,9 @@ function MMI.fit(m::GaussianMixtureClusterer, verbosity, X)
     end
     =#
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
     mixtures = m.mixtures
-    res        = gmm(x,m.n_classes,initial_probmixtures=deepcopy(m.initial_probmixtures),mixtures=mixtures, minimum_variance=m.minimum_variance, minimum_covariance=m.minimum_covariance,initialisation_strategy=m.initialisation_strategy,verbosity=verbosity,maximum_iterations=m.maximum_iterations,rng=m.rng)
+    res        = BetaML.GMM.gmm(x,m.n_classes,initial_probmixtures=deepcopy(m.initial_probmixtures),mixtures=mixtures, minimum_variance=m.minimum_variance, minimum_covariance=m.minimum_covariance,initialisation_strategy=m.initialisation_strategy,verbosity=verbosity,maximum_iterations=m.maximum_iterations,rng=m.rng)
     fitResults = (pₖ=res.pₖ,mixtures=res.mixtures) # res.pₙₖ
     cache      = nothing
     report     = (res.ϵ,res.lL,res.BIC,res.AIC)
@@ -327,7 +324,7 @@ MMI.fitted_params(model::GaussianMixtureClusterer, fitresult) = (weights=fitresu
 function MMI.fit(m::GaussianMixtureRegressor, verbosity, X, y)
     x  = MMI.matrix(X) # convert table to matrix
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
     ndims(y) < 2 || error("Trying to fit `GaussianMixtureRegressor` with a multidimensional target. Use `MultitargetGaussianMixtureRegressor` instead.")
     #=
     if typeof(y) <: AbstractMatrix
@@ -345,7 +342,7 @@ function MMI.fit(m::GaussianMixtureRegressor, verbosity, X, y)
     end
     =#
     mixtures = m.mixtures
-    betamod = GMMRegressor2(
+    betamod = BetaML.GMM.GMMRegressor2(
         n_classes     = m.n_classes,
         initial_probmixtures = m.initial_probmixtures,
         mixtures     = mixtures,
@@ -363,7 +360,7 @@ end
 function MMI.fit(m::MultitargetGaussianMixtureRegressor, verbosity, X, y)
     x  = MMI.matrix(X) # convert table to matrix
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
     ndims(y) >= 2 || @warn "Trying to fit `MultitargetGaussianMixtureRegressor` with a single-dimensional target. You may want to consider `GaussianMixtureRegressor` instead."
     #=
     if typeof(y) <: AbstractMatrix
@@ -381,7 +378,7 @@ function MMI.fit(m::MultitargetGaussianMixtureRegressor, verbosity, X, y)
     end
     =#
     mixtures = m.mixtures
-    betamod = GMMRegressor2(
+    betamod = BetaML.GMM.GMMRegressor2(
         n_classes     = m.n_classes,
         initial_probmixtures = m.initial_probmixtures,
         mixtures     = mixtures,
@@ -407,7 +404,7 @@ function MMI.predict(m::GaussianMixtureClusterer, fitResults, X)
     (pₖ,mixtures)   = (fitResults.pₖ, fitResults.mixtures)
     nCl             = length(pₖ)
     # Compute the probabilities that maximise the likelihood given existing mistures and a single iteration (i.e. doesn't update the mixtures)
-    thisOut         = gmm(x,nCl,initial_probmixtures=pₖ,mixtures=mixtures,tol=m.tol,verbosity=NONE,minimum_variance=m.minimum_variance,minimum_covariance=m.minimum_covariance,initialisation_strategy="given",maximum_iterations=1,rng=m.rng)
+    thisOut         = BetaML.GMM.gmm(x,nCl,initial_probmixtures=pₖ,mixtures=mixtures,tol=m.tol,verbosity=NONE,minimum_variance=m.minimum_variance,minimum_covariance=m.minimum_covariance,initialisation_strategy="given",maximum_iterations=1,rng=m.rng)
     classes         = CategoricalArray(1:nCl)
     predictions     = MMI.UnivariateFinite(classes, thisOut.pₙₖ)
     return predictions
@@ -416,12 +413,12 @@ end
 function MMI.predict(m::GaussianMixtureRegressor, fitResults, X)
     x               = MMI.matrix(X) # convert table to matrix
     betamod         = fitResults
-    return dropdims(predict(betamod,x),dims=2)
+    return dropdims(BetaML.Api.predict(betamod,x),dims=2)
 end
 function MMI.predict(m::MultitargetGaussianMixtureRegressor, fitResults, X)
     x               = MMI.matrix(X) # convert table to matrix
     betamod         = fitResults
-    return predict(betamod,x)
+    return BetaML.Api.predict(betamod,x)
 end
 
 
@@ -434,7 +431,7 @@ MMI.metadata_model(GaussianMixtureClusterer,
     target_scitype   = AbstractArray{<:MMI.Multiclass},       # scitype of the output of `predict`
     #prediction_type  = :probabilistic,  # option not added to metadata_model function, need to do it separately
     supports_weights = false,                                 # does the model support sample weights?
-	load_path        = "BetaML.GMM.GaussianMixtureClusterer"
+	load_path        = "BetaML.Bmlj.GaussianMixtureClusterer"
 )
 MMI.prediction_type(::Type{<:GaussianMixtureClusterer}) = :probabilistic
 
@@ -442,11 +439,11 @@ MMI.metadata_model(GaussianMixtureRegressor,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Infinite}),
     target_scitype   = AbstractVector{<: MMI.Continuous},           # for a supervised model, what target?
     supports_weights = false,                                       # does the model support sample weights?
-	load_path        = "BetaML.GMM.GaussianMixtureRegressor"
+	load_path        = "BetaML.Bmlj.GaussianMixtureRegressor"
 )
 MMI.metadata_model(MultitargetGaussianMixtureRegressor,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Infinite}),
     target_scitype   = AbstractMatrix{<: MMI.Continuous},           # for a supervised model, what target?
     supports_weights = false,                                       # does the model support sample weights?
-	load_path        = "BetaML.GMM.MultitargetGaussianMixtureRegressor"
+	load_path        = "BetaML.Bmlj.MultitargetGaussianMixtureRegressor"
 )
diff --git a/src/Imputation/Imputation_MLJ.jl b/src/Bmlj/Imputation_mlj.jl
similarity index 94%
rename from src/Imputation/Imputation_MLJ.jl
rename to src/Bmlj/Imputation_mlj.jl
index f7f1723b..b825d3c6 100644
--- a/src/Imputation/Imputation_MLJ.jl
+++ b/src/Bmlj/Imputation_mlj.jl
@@ -2,9 +2,6 @@
 
 # MLJ interface for imputers models
 
-import MLJModelInterface       # It seems that having done this in the top module is not enought
-const MMI = MLJModelInterface  # We need to repeat it here
-
 export SimpleImputer,GaussianMixtureImputer, RandomForestImputer, GeneralImputer
 
 """
@@ -115,7 +112,7 @@ mutable struct GaussianMixtureImputer <: MMI.Unsupervised
     This parameter can also be given symply in term of a _type_. In this case it is automatically extended to a vector of `n_classes`` mixtures of the specified type.
     Note that mixing of different mixture types is not currently supported and that currently implemented mixtures are `SphericalGaussian`, `DiagonalGaussian` and `FullGaussian`.
     [def: `DiagonalGaussian`]"""
-    mixtures::Union{Type,Vector{<: AbstractMixture}}
+    mixtures::Union{Type,Vector{<: BetaML.GMM.AbstractMixture}}
     "Tolerance to stop the algorithm [default: 10^(-6)]"
     tol::Float64
     "Minimum variance for the mixtures [default: 0.05]"
@@ -137,7 +134,7 @@ end
 function GaussianMixtureImputer(;
     n_classes      = 3,
     initial_probmixtures  = Float64[],
-    mixtures      = DiagonalGaussian, #[DiagonalGaussian() for i in 1:n_classes],
+    mixtures      = BetaML.GMM.DiagonalGaussian, #[DiagonalGaussian() for i in 1:n_classes],
     tol           = 10^(-6),
     minimum_variance   = 0.05,
     minimum_covariance = 0.0,
@@ -346,8 +343,8 @@ GeneralImputer(;
 function MMI.fit(m::SimpleImputer, verbosity, X)
     x          = MMI.matrix(X) # convert table to matrix
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
-    mod = FeatureBasedImputer(
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+    mod = BetaML.Imputation.FeatureBasedImputer(
         statistic = m.statistic,
         norm      = m.norm,
         verbosity = verbosity,
@@ -356,14 +353,14 @@ function MMI.fit(m::SimpleImputer, verbosity, X)
     #fitResults = MMI.table(predict(mod))
     fitResults = mod
     cache      = nothing
-    report     = info(mod)
+    report     = BetaML.Api.info(mod)
     return (fitResults, cache, report)
 end
 
 function MMI.fit(m::GaussianMixtureImputer, verbosity, X)
     x          = MMI.matrix(X) # convert table to matrix
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
     #=if m.mixtures == :diag_gaussian
         mixtures = [DiagonalGaussian() for i in 1:m.n_classes]
     elseif m.mixtures == :full_gaussian
@@ -375,7 +372,7 @@ function MMI.fit(m::GaussianMixtureImputer, verbosity, X)
     end
     =#
 
-    mod = GMMImputer(
+    mod = BetaML.Imputation.GMMImputer(
         n_classes      = m.n_classes,
         initial_probmixtures  = m.initial_probmixtures,
         mixtures      = m.mixtures,
@@ -390,7 +387,7 @@ function MMI.fit(m::GaussianMixtureImputer, verbosity, X)
     #fitResults = MMI.table(predict(mod))
     fitResults = mod
     cache      = nothing
-    report     = info(mod)
+    report     = BetaML.Api.info(mod)
 
     return (fitResults, cache, report)  
 end
@@ -398,8 +395,8 @@ end
 function MMI.fit(m::RandomForestImputer, verbosity, X)
     x          = MMI.matrix(X) # convert table to matrix
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
-    mod = RFImputer(
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+    mod = BetaML.Imputation.RFImputer(
         n_trees                 = m.n_trees, 
         max_depth               = m.max_depth,
         min_gain                = m.min_gain,
@@ -412,7 +409,7 @@ function MMI.fit(m::RandomForestImputer, verbosity, X)
         #multiple_imputations    = m.multiple_imputations,
         rng                    = m.rng,
     )
-    fit!(mod,x)
+    BetaML.Api.fit!(mod,x)
     #if m.multiple_imputations == 1
     #    fitResults = MMI.table(predict(mod))
     #else
@@ -420,15 +417,15 @@ function MMI.fit(m::RandomForestImputer, verbosity, X)
     #end
     fitResults = mod
     cache      = nothing
-    report     = info(mod)
+    report     = BetaML.Api.info(mod)
     return (fitResults, cache, report)
 end
 
 function MMI.fit(m::GeneralImputer, verbosity, X)
     x          = MMI.matrix(X) # convert table to matrix
     typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
-    mod =  UniversalImputer(
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+    mod =  BetaML.Imputation.UniversalImputer(
         cols_to_impute     = m.cols_to_impute,
         estimator          = m.estimator,
         missing_supported  = m.missing_supported,
@@ -438,7 +435,7 @@ function MMI.fit(m::GeneralImputer, verbosity, X)
         rng                = m.rng,
         verbosity          = verbosity,
     )
-    fit!(mod,x)
+    BetaML.Api.fit!(mod,x)
     #if m.multiple_imputations == 1
     #    fitResults = MMI.table(predict(mod))
     #else
@@ -446,7 +443,7 @@ function MMI.fit(m::GeneralImputer, verbosity, X)
     #end
     fitResults = mod
     cache      = nothing
-    report     = info(mod)
+    report     = BetaML.Api.info(mod)
     return (fitResults, cache, report)
 end
 
@@ -457,7 +454,7 @@ end
 function MMI.transform(m::Union{SimpleImputer,GaussianMixtureImputer,RandomForestImputer}, fitResults, X)
     x   = MMI.matrix(X) # convert table to matrix
     mod = fitResults
-    return MMI.table(predict(mod,x))
+    return MMI.table(BetaML.Api.predict(mod,x))
 end
 
 
@@ -474,10 +471,10 @@ function MMI.transform(m::GeneralImputer, fitResults, X)
     if fitResults.hpar.recursive_passages == 1 || all(missing_supported) 
        x   = MMI.matrix(X) # convert table to matrix
        mod = fitResults
-       return MMI.table(predict(mod,x))
+       return MMI.table(BetaML.Api.predict(mod,x))
     else
        mod = fitResults 
-       return MMI.table(predict(mod))
+       return MMI.table(BetaML.Api.predict(mod))
     end
 end
 
@@ -488,25 +485,25 @@ MMI.metadata_model(SimpleImputer,
     input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Missing}),
     output_scitype   = MMI.Table(MMI.Continuous),     # for an unsupervised, what output?
     supports_weights = false,                         # does the model support sample weights?
-	load_path        = "BetaML.Imputation.SimpleImputer"
+	load_path        = "BetaML.Bmlj.SimpleImputer"
 )
 
 MMI.metadata_model(GaussianMixtureImputer,
     input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Missing}),
     output_scitype   = MMI.Table(MMI.Continuous),     # for an unsupervised, what output?
     supports_weights = false,                         # does the model support sample weights?
-	load_path        = "BetaML.Imputation.GaussianMixtureImputer"
+	load_path        = "BetaML.Bmlj.GaussianMixtureImputer"
 )
 
 MMI.metadata_model(RandomForestImputer,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Known}),
     output_scitype   = MMI.Table(MMI.Known),          # for an unsupervised, what output?
     supports_weights = false,                         # does the model support sample weights?
-	load_path        = "BetaML.Imputation.RandomForestImputer"
+	load_path        = "BetaML.Bmlj.RandomForestImputer"
 )
 MMI.metadata_model(GeneralImputer,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Known}),
     output_scitype   = MMI.Table(MMI.Known),          # for an unsupervised, what output?
     supports_weights = false,                         # does the model support sample weights?
-	load_path        = "BetaML.Imputation.GeneralImputer"
+	load_path        = "BetaML.Bmlj.GeneralImputer"
 )
diff --git a/src/Bmlj/Nn_mlj.jl b/src/Bmlj/Nn_mlj.jl
index 715c073a..9a10d6b6 100644
--- a/src/Bmlj/Nn_mlj.jl
+++ b/src/Bmlj/Nn_mlj.jl
@@ -1,6 +1,372 @@
 "Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT."
 
-# MLJ interface for the  models of BetaML
+# MLJ interface for Neural Networks models
 
+using CategoricalArrays
 
-export 
\ No newline at end of file
+export NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier
+
+
+# Model Structure declarations..
+"""
+$(TYPEDEF)
+
+A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.
+
+# Parameters:
+$(FIELDS)
+
+# Notes:
+- data must be numerical
+- the label should be be a _n-records_ vector.
+
+# Example:
+```julia
+julia> using MLJ
+
+julia> X, y        = @load_boston;
+
+julia> modelType   = @load NeuralNetworkRegressor pkg = "BetaML" verbosity=0
+BetaML.Nn.NeuralNetworkRegressor
+
+julia> layers                      = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];
+
+julia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM());
+NeuralNetworkRegressor(
+  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], 
+  loss = BetaML.Utils.squared_cost, 
+  dloss = BetaML.Utils.dsquared_cost, 
+  epochs = 100, 
+  batch_size = 32, 
+  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), 
+  shuffle = true, 
+  descr = "", 
+  cb = BetaML.Nn.fitting_info, 
+  rng = Random._GLOBAL_RNG())
+
+julia> mach        = machine(model, X, y);
+
+julia> fit!(mach);
+
+julia> ŷ    = predict(mach, X);
+
+julia> hcat(y,ŷ)
+506×2 Matrix{Float64}:
+ 24.0  30.7726
+ 21.6  28.0811
+ 34.7  31.3194
+  ⋮    
+ 23.9  30.9032
+ 22.0  29.49
+ 11.9  27.2438
+```
+"""
+Base.@kwdef mutable struct NeuralNetworkRegressor <: MMI.Deterministic
+    "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers"
+    layers::Union{Array{BetaML.Nn.AbstractLayer,1},Nothing} = nothing
+    """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D
+    !!! warning
+        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
+    """
+    loss::Union{Nothing,Function} = BetaML.Utils.squared_cost
+    "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
+    dloss::Union{Function,Nothing}  = BetaML.Utils.dsquared_cost
+    "Number of epochs, i.e. passages trough the whole training sample [def: `200`]"
+    epochs::Int64 = 200
+    "Size of each individual batch [def: `16`]"
+    batch_size::Int64 = 16
+    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
+    opt_alg::OptimisationAlgorithm = BetaML.Nn.ADAM()
+    "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
+    shuffle::Bool = true  
+    "An optional title and/or description for this model"
+    descr::String = "" 
+    "A call back function to provide information during training [def: `fitting_info`]"
+    cb::Function=BetaML.Nn.fitting_info
+    "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
+    "
+    rng::AbstractRNG = Random.GLOBAL_RNG
+end
+
+"""
+$(TYPEDSIGNATURES)
+
+For the `verbosity` parameter see [`Verbosity`](@ref))
+
+"""
+function MMI.fit(m::NeuralNetworkRegressor, verbosity, X, y)
+    x = MMI.matrix(X)                     # convert table to matrix   
+    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+    ndims(y) > 1 && error("The label should have only 1 dimensions. Use `MultitargetNeuralNetworkRegressor` or `NeuralNetworkClassifier` for multi_dimensional outputs.")
+    mi = BetaML.Nn.NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity)
+    fit!(mi,x,y)
+    fitresults = mi
+    cache      = nothing
+    report     = nothing
+    return fitresults, cache, report
+ end
+
+ MMI.predict(m::NeuralNetworkRegressor, fitresult, Xnew) = BetaML.Api.predict(fitresult, MMI.matrix(Xnew))
+
+ MMI.metadata_model(NeuralNetworkRegressor,
+    input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Count}),
+    target_scitype   = AbstractVector{<: Union{MMI.Continuous,MMI.Count}},
+    supports_weights = false,
+    load_path        = "BetaML.Bmlj.NeuralNetworkRegressor"
+)
+
+# ------------------------------------------------------------------------------
+# Model Structure declarations..
+"""
+$(TYPEDEF)
+
+A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets.
+
+# Parameters:
+$(FIELDS)
+
+# Notes:
+- data must be numerical
+- the label should be a _n-records_ by _n-dimensions_ matrix 
+
+# Example:
+```julia
+julia> using MLJ
+
+julia> X, y        = @load_boston;
+
+julia> ydouble     = hcat(y, y .*2  .+5);
+
+julia> modelType   = @load MultitargetNeuralNetworkRegressor pkg = "BetaML" verbosity=0
+BetaML.Nn.MultitargetNeuralNetworkRegressor
+
+julia> layers                      = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)];
+
+julia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500)
+MultitargetNeuralNetworkRegressor(
+  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253  …  -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947  …  -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564  …  0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], 
+  loss = BetaML.Utils.squared_cost, 
+  dloss = BetaML.Utils.dsquared_cost, 
+  epochs = 500, 
+  batch_size = 32, 
+  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), 
+  shuffle = true, 
+  descr = "", 
+  cb = BetaML.Nn.fitting_info, 
+  rng = Random._GLOBAL_RNG())
+
+julia> mach        = machine(model, X, ydouble);
+
+julia> fit!(mach);
+
+julia> ŷdouble    = predict(mach, X);
+
+julia> hcat(ydouble,ŷdouble)
+506×4 Matrix{Float64}:
+ 24.0  53.0  28.4624  62.8607
+ 21.6  48.2  22.665   49.7401
+ 34.7  74.4  31.5602  67.9433
+ 33.4  71.8  33.0869  72.4337
+  ⋮                   
+ 23.9  52.8  23.3573  50.654
+ 22.0  49.0  22.1141  48.5926
+ 11.9  28.8  19.9639  45.5823
+```
+
+"""
+Base.@kwdef mutable struct MultitargetNeuralNetworkRegressor <: MMI.Deterministic
+    "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers"
+    layers::Union{Array{BetaML.Nn.AbstractLayer,1},Nothing} = nothing
+    """Loss (cost) function [def: `BetaML.squared_cost`].  Should always assume y and ŷ as matrices.
+    !!! warning
+        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
+    """
+    loss::Union{Nothing,Function} = BetaML.Utils.squared_cost
+    "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
+    dloss::Union{Function,Nothing}  = BetaML.Utils.dsquared_cost
+    "Number of epochs, i.e. passages trough the whole training sample [def: `300`]"
+    epochs::Int64 = 300
+    "Size of each individual batch [def: `16`]"
+    batch_size::Int64 = 16
+    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
+    opt_alg::OptimisationAlgorithm = BetaML.Nn.ADAM()
+    "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
+    shuffle::Bool = true  
+    "An optional title and/or description for this model"
+    descr::String = "" 
+    "A call back function to provide information during training [def: `BetaML.fitting_info`]"
+    cb::Function=BetaML.Nn.fitting_info
+    "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
+    "
+    rng::AbstractRNG = Random.GLOBAL_RNG
+end
+"""
+$(TYPEDSIGNATURES)
+
+For the `verbosity` parameter see [`Verbosity`](@ref))
+
+"""
+function MMI.fit(m::MultitargetNeuralNetworkRegressor, verbosity, X, y)
+    x = MMI.matrix(X)                     # convert table to matrix   
+    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+    ndims(y) > 1 || error("The label should have multiple dimensions. Use `NeuralNetworkRegressor` for single-dimensional outputs.")
+    mi = BetaML.Nn.NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity)
+    BetaML.Api.fit!(mi,x,y)
+    fitresults = mi
+    cache      = nothing
+    report     = nothing
+    return fitresults, cache, report
+ end
+
+ MMI.predict(m::MultitargetNeuralNetworkRegressor, fitresult, Xnew) = BetaML.Api.predict(fitresult, MMI.matrix(Xnew))
+
+ MMI.metadata_model(MultitargetNeuralNetworkRegressor,
+    input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Count}),
+    target_scitype   = AbstractMatrix{<: Union{MMI.Continuous,MMI.Count}},
+    supports_weights = false,
+    load_path        = "BetaML.Bmlj.MultitargetNeuralNetworkRegressor"
+)
+
+# ------------------------------------------------------------------------------
+
+"""
+$(TYPEDEF)
+
+A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification  problems.
+
+# Parameters:
+$(FIELDS)
+
+# Notes:
+- data must be numerical
+- the label should be a _n-records_ by _n-dimensions_ matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories.
+
+# Example:
+```julia
+julia> using MLJ
+
+julia> X, y        = @load_iris;
+
+julia> modelType   = @load NeuralNetworkClassifier pkg = "BetaML" verbosity=0
+BetaML.Nn.NeuralNetworkClassifier
+
+julia> layers      = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)];
+
+julia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM())
+NeuralNetworkClassifier(
+  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.376173352338049 0.7029289511758696 -0.5589563304592478 -0.21043274001651874; 0.044758889527899415 0.6687689636685921 0.4584331114653877 0.6820506583840453; … ; -0.26546358457167507 -0.28469736227283804 -0.164225549922154 -0.516785639164486; -0.5146043550684141 -0.0699113265130964 0.14959906603941908 -0.053706860039406834], [0.7003943613125758, -0.23990840466587576, -0.23823126271387746, 0.4018101580410387, 0.2274483050356888, -0.564975060667734, 0.1732063297031089, 0.11880299829896945], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.029467850439546583 0.4074661266592745 … 0.36775675246760053 -0.595524555448422; 0.42455597698371306 -0.2458082732997091 … -0.3324220683462514 0.44439454998610595; … ; -0.2890883863364267 -0.10109249362508033 … -0.0602680568207582 0.18177278845097555; -0.03432587226449335 -0.4301192922760063 … 0.5646018168286626 0.47269177680892693], [0.13777442835428688, 0.5473306726675433, 0.3781939472904011, 0.24021813428130567, -0.0714779477402877, -0.020386373530818958, 0.5465466618404464, -0.40339790713616525], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([0.6565120540082393 0.7139211611842745 … 0.07809812467915389 -0.49346311403373844; -0.4544472987041656 0.6502667641568863 … 0.43634608676548214 0.7213049952968921; 0.41212264783075303 -0.21993289366360613 … 0.25365007887755064 -0.5664469566269569], [-0.6911986792747682, -0.2149343209329364, -0.6347727539063817], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, BetaML.Utils.dsoftmax, nothing)], 
+  loss = BetaML.Utils.crossentropy, 
+  dloss = BetaML.Utils.dcrossentropy, 
+  epochs = 100, 
+  batch_size = 32, 
+  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), 
+  shuffle = true, 
+  descr = "", 
+  cb = BetaML.Nn.fitting_info, 
+  categories = nothing, 
+  handle_unknown = "error", 
+  other_categories_name = nothing, 
+  rng = Random._GLOBAL_RNG())
+
+julia> mach        = machine(model, X, y);
+
+julia> fit!(mach);
+
+julia> classes_est = predict(mach, X)
+150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:
+ UnivariateFinite{Multiclass{3}}(setosa=>0.575, versicolor=>0.213, virginica=>0.213)
+ UnivariateFinite{Multiclass{3}}(setosa=>0.573, versicolor=>0.213, virginica=>0.213)
+ ⋮
+ UnivariateFinite{Multiclass{3}}(setosa=>0.236, versicolor=>0.236, virginica=>0.529)
+ UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492)
+```
+"""
+Base.@kwdef mutable struct NeuralNetworkClassifier <: MMI.Probabilistic
+    "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added."
+    layers::Union{Array{BetaML.Nn.AbstractLayer,1},Nothing} = nothing
+    """Loss (cost) function [def: `BetaML.crossentropy`]. Should always assume y and ŷ as matrices.
+    !!! warning
+        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
+    """
+    loss::Union{Nothing,Function} = BetaML.Utils.crossentropy
+    "Derivative of the loss function [def: `BetaML.dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff."
+    dloss::Union{Function,Nothing}  = BetaML.Utils.dcrossentropy
+    "Number of epochs, i.e. passages trough the whole training sample [def: `200`]"
+    epochs::Int64 = 200
+    "Size of each individual batch [def: `16`]"
+    batch_size::Int64 = 16
+    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
+    opt_alg::OptimisationAlgorithm = BetaML.Nn.ADAM()
+    "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
+    shuffle::Bool = true  
+    "An optional title and/or description for this model"
+    descr::String = "" 
+    "A call back function to provide information during training [def: `BetaML.fitting_info`]"
+    cb::Function=BetaML.Nn.fitting_info
+    "The categories to represent as columns. [def: `nothing`, i.e. unique training values]."  
+    categories::Union{Vector,Nothing} = nothing
+    "How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories."
+    handle_unknown::String = "error"
+    "Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: ` nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings"
+    other_categories_name = nothing
+    "Random Number Generator [deafult: `Random.GLOBAL_RNG`]"
+    rng::AbstractRNG = Random.GLOBAL_RNG
+end
+
+"""
+MMI.fit(model::NeuralNetworkClassifier, verbosity, X, y)
+
+For the `verbosity` parameter see [`Verbosity`](@ref))
+
+"""
+function MMI.fit(m::NeuralNetworkClassifier, verbosity, X, y)
+    x = MMI.matrix(X)                     # convert table to matrix   
+    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
+    verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+    categories = deepcopy(m.categories)
+    if categories == nothing
+        #if occursin("CategoricalVector",string(typeof(y))) # to avoid dependency to CategoricalArrays or MLJBase 
+        if typeof(y) <: CategoricalVector
+            categories = levels(y)
+        end
+    end
+
+    ohmod = BetaML.Utils.OneHotEncoder(categories=categories,handle_unknown=m.handle_unknown,other_categories_name=m.other_categories_name, verbosity=verbosity)
+    Y_oh = BetaML.Api.fit!(ohmod,y)
+
+    nR,nD       = size(x)
+    (nRy,nDy)   = size(Y_oh)         
+    
+    nR == nRy || error("X and Y have different number of records (rows)")
+
+    if isnothing(m.layers)
+        layers = nothing
+    else
+        layers = deepcopy(m.layers)
+        push!(layers,BetaML.Nn.VectorFunctionLayer(nDy,f=BetaML.Utils.softmax))
+    end
+    mi = BetaML.Nn.NeuralNetworkEstimator(;layers=layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity)
+    BetaML.Api.fit!(mi,x,Y_oh)
+    fitresults = (mi,ohmod)
+    cache      = nothing
+    report     = nothing
+    return fitresults, cache, report
+ end
+
+function MMI.predict(m::NeuralNetworkClassifier, fitresult, Xnew) 
+    nnmod, ohmod = fitresult
+    yhat = BetaML.Api.predict(nnmod, MMI.matrix(Xnew))
+    classes = BetaML.Api.parameters(ohmod).categories_applied
+    predictions = MMI.UnivariateFinite(classes, yhat,pool=missing)
+    #return yhat
+    return predictions
+end
+
+ MMI.metadata_model(NeuralNetworkClassifier,
+    input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Count}),
+    target_scitype = AbstractVector{<: Union{MMI.Multiclass,MMI.Finite,MMI.Count}},
+    supports_weights = false,
+    load_path        = "BetaML.Bmlj.NeuralNetworkClassifier"
+)
diff --git a/src/Bmlj/Perceptron_mlj.jl b/src/Bmlj/Perceptron_mlj.jl
new file mode 100644
index 00000000..36df7b3c
--- /dev/null
+++ b/src/Bmlj/Perceptron_mlj.jl
@@ -0,0 +1,325 @@
+"Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT."
+
+# MLJ interface for Decision Trees/Random Forests models
+
+export LinearPerceptron, KernelPerceptron, Pegasos
+
+
+# ------------------------------------------------------------------------------
+# Model Structure declarations..
+"""
+$(TYPEDEF)
+
+The classical perceptron algorithm using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).
+
+# Hyperparameters:
+$(TYPEDFIELDS)
+
+# Example:
+```julia
+julia> using MLJ
+
+julia> X, y        = @load_iris;
+
+julia> modelType   = @load LinearPerceptron pkg = "BetaML"
+[ Info: For silent loading, specify `verbosity=0`. 
+import BetaML ✔
+BetaML.Perceptron.LinearPerceptron
+
+julia> model       = modelType()
+LinearPerceptron(
+  initial_coefficients = nothing, 
+  initial_constant = nothing, 
+  epochs = 1000, 
+  shuffle = true, 
+  force_origin = false, 
+  return_mean_hyperplane = false, 
+  rng = Random._GLOBAL_RNG())
+
+julia> mach        = machine(model, X, y);
+
+julia> fit!(mach);
+[ Info: Training machine(LinearPerceptron(initial_coefficients = nothing, …), …).
+*** Avg. error after epoch 2 : 0.0 (all elements of the set has been correctly classified)
+julia> est_classes = predict(mach, X)
+150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:
+ UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>2.53e-34, virginica=>0.0)
+ UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>1.27e-18, virginica=>1.86e-310)
+ ⋮
+ UnivariateFinite{Multiclass{3}}(setosa=>2.77e-57, versicolor=>1.1099999999999999e-82, virginica=>1.0)
+ UnivariateFinite{Multiclass{3}}(setosa=>3.09e-22, versicolor=>4.03e-25, virginica=>1.0)
+```
+
+"""
+mutable struct LinearPerceptron <: MMI.Probabilistic
+   "N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]"
+   initial_coefficients::Union{Matrix{Float64},Nothing} 
+   "N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]"
+   initial_constant::Union{Vector{Float64},Nothing} 
+   "Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]"
+   epochs::Int64
+   "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
+   shuffle::Bool
+   "Whether to force the parameter associated with the constant term to remain zero [def: `false`]"
+   force_origin::Bool
+   "Whether to return the average hyperplane coefficients instead of the final ones  [def: `false`]"
+   return_mean_hyperplane::Bool
+   "A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]"
+   rng::AbstractRNG
+end
+LinearPerceptron(;
+  initial_coefficients=nothing,
+  initial_constant=nothing,
+  epochs=1000,
+  shuffle=true,
+  force_origin=false,
+  return_mean_hyperplane=false,
+  rng = Random.GLOBAL_RNG,
+  ) = LinearPerceptron(initial_coefficients,initial_constant,epochs,shuffle,force_origin,return_mean_hyperplane,rng)
+
+"""
+$(TYPEDEF)
+
+The kernel perceptron algorithm using one-vs-one for multiclass, from the Beta Machine Learning Toolkit (BetaML).
+
+# Hyperparameters:
+$(TYPEDFIELDS)
+
+# Example:
+```julia
+julia> using MLJ
+
+julia> X, y        = @load_iris;
+
+julia> modelType   = @load KernelPerceptron pkg = "BetaML"
+[ Info: For silent loading, specify `verbosity=0`. 
+import BetaML ✔
+BetaML.Perceptron.KernelPerceptron
+
+julia> model       = modelType()
+KernelPerceptron(
+  kernel = BetaML.Utils.radial_kernel, 
+  epochs = 100, 
+  initial_errors = nothing, 
+  shuffle = true, 
+  rng = Random._GLOBAL_RNG())
+
+julia> mach        = machine(model, X, y);
+
+julia> fit!(mach);
+
+julia> est_classes = predict(mach, X)
+150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:
+ UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)
+ UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)
+ ⋮
+ UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.245, virginica=>0.665)
+ UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.665, virginica=>0.245)
+```
+
+"""
+mutable struct KernelPerceptron <: MMI.Probabilistic
+    "Kernel function to employ. See `?radial_kernel` or `?polynomial_kernel` (once loaded the BetaML package) for details or check `?BetaML.Utils` to verify if other kernels are defined (you can alsways define your own kernel) [def: [`radial_kernel`](@ref)]"
+    kernel::Function
+    "Maximum number of epochs, i.e. passages trough the whole training sample [def: `100`]"
+    epochs::Int64
+    "Initial distribution of the number of errors errors [def: `nothing`, i.e. zeros]. If provided, this should be a nModels-lenght vector of nRecords integer values vectors , where nModels is computed as `(n_classes  * (n_classes - 1)) / 2`"
+    initial_errors::Union{Nothing,Vector{Vector{Int64}}}
+    "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
+    shuffle::Bool
+    "A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]"
+    rng::AbstractRNG
+end
+KernelPerceptron(;
+    kernel=BetaML.Utils.radial_kernel,
+    epochs=100,
+    initial_errors = nothing,
+    shuffle=true,
+    rng = Random.GLOBAL_RNG,
+    ) = KernelPerceptron(kernel,epochs,initial_errors,shuffle,rng)
+"""
+$(TYPEDEF)
+
+The gradient-based linear "pegasos" classifier using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).
+
+# Hyperparameters:
+$(TYPEDFIELDS)
+
+# Example:
+```julia
+julia> using MLJ
+
+julia> X, y        = @load_iris;
+
+julia> modelType   = @load Pegasos pkg = "BetaML" verbosity=0
+BetaML.Perceptron.Pegasos
+
+julia> model       = modelType()
+Pegasos(
+  initial_coefficients = nothing, 
+  initial_constant = nothing, 
+  learning_rate = BetaML.Perceptron.var"#71#73"(), 
+  learning_rate_multiplicative = 0.5, 
+  epochs = 1000, 
+  shuffle = true, 
+  force_origin = false, 
+  return_mean_hyperplane = false, 
+  rng = Random._GLOBAL_RNG())
+
+julia> mach        = machine(model, X, y);
+
+julia> fit!(mach);
+
+julia> est_classes = predict(mach, X)
+150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:
+ UnivariateFinite{Multiclass{3}}(setosa=>0.817, versicolor=>0.153, virginica=>0.0301)
+ UnivariateFinite{Multiclass{3}}(setosa=>0.791, versicolor=>0.177, virginica=>0.0318)
+ ⋮
+ UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.5, virginica=>0.246)
+ UnivariateFinite{Multiclass{3}}(setosa=>0.283, versicolor=>0.51, virginica=>0.207)
+```
+"""
+mutable struct Pegasos <: MMI.Probabilistic
+    "N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]"
+   initial_coefficients::Union{Matrix{Float64},Nothing} 
+   "N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]"
+   initial_constant::Union{Vector{Float64},Nothing} 
+   "Learning rate [def: (epoch -> 1/sqrt(epoch))]"
+   learning_rate::Function
+   "Multiplicative term of the learning rate [def: `0.5`]"       
+   learning_rate_multiplicative::Float64
+   "Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]"
+   epochs::Int64
+   "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
+   shuffle::Bool
+   "Whether to force the parameter associated with the constant term to remain zero [def: `false`]"
+   force_origin::Bool
+   "Whether to return the average hyperplane coefficients instead of the final ones  [def: `false`]"
+   return_mean_hyperplane::Bool
+   "A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]"
+   rng::AbstractRNG
+end
+Pegasos(;
+  initial_coefficients=nothing,
+  initial_constant=nothing,
+  learning_rate = (t -> 1/sqrt(t)),
+  learning_rate_multiplicative = 0.5,
+  epochs=1000,
+  shuffle=true,
+  force_origin=false,
+  return_mean_hyperplane=false,
+  rng = Random.GLOBAL_RNG,
+  ) = Pegasos(initial_coefficients,initial_constant,learning_rate,learning_rate_multiplicative,epochs,shuffle,force_origin,return_mean_hyperplane,rng)
+
+# ------------------------------------------------------------------------------
+# Fit functions...
+
+function MMI.fit(model::LinearPerceptron, verbosity, X, y)
+ x = MMI.matrix(X)                     # convert table to matrix
+ allClasses = levels(y)
+ typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
+ verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+ #initial_coefficients  = length(model.initial_coefficients) == 0 ? zeros(size(x,2)) : model.initial_coefficients
+ fitresult = BetaML.Perceptron.perceptron(x, y; θ=model.initial_coefficients, θ₀=model.initial_constant, T=model.epochs, nMsgs=0, shuffle=model.shuffle, force_origin=model.force_origin, return_mean_hyperplane=model.return_mean_hyperplane,rng=model.rng, verbosity=verbosity)
+ cache=nothing
+ report=nothing
+ return (fitresult,allClasses), cache, report
+end
+
+function MMI.fit(model::KernelPerceptron, verbosity, X, y)
+ x          = MMI.matrix(X)                     # convert table to matrix
+ allClasses = levels(y)
+ typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
+ verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+ #initial_errors   = length(model.initial_errors) == 0 ? zeros(Int64,length(y)) : model.initial_errors
+ fitresult  = BetaML.Perceptron.kernelPerceptron(x, y; K=model.kernel, T=model.epochs, α=model.initial_errors, nMsgs=0, shuffle=model.shuffle,rng=model.rng, verbosity=verbosity)
+ cache      = nothing
+ report     = nothing
+ return (fitresult,allClasses), cache, report
+end
+
+function MMI.fit(model::Pegasos, verbosity, X, y)
+ x = MMI.matrix(X)                     # convert table to matrix
+ allClasses = levels(y)
+ typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
+ verbosity = mljverbosity_to_betaml_verbosity(verbosity)
+ #initial_coefficients  = length(model.initial_coefficients) == 0 ? zeros(size(x,2)) : model.initial_coefficients
+ fitresult = BetaML.Perceptron.pegasos(x, y; θ=model.initial_coefficients,θ₀=model.initial_constant, λ=model.learning_rate_multiplicative,η=model.learning_rate, T=model.epochs, nMsgs=0, shuffle=model.shuffle, force_origin=model.force_origin, return_mean_hyperplane=model.return_mean_hyperplane,rng=model.rng, verbosity=verbosity)
+ cache=nothing
+ report=nothing
+ return (fitresult,allClasses), cache, report
+end
+
+# ------------------------------------------------------------------------------
+# Predict functions....
+function MMI.predict(model::Union{LinearPerceptron,Pegasos}, fitresult, Xnew)
+    fittedModel      = fitresult[1]
+    #classes          = CategoricalVector(fittedModel.classes)
+    classes          = fittedModel.classes
+    allClasses       = fitresult[2] # as classes do not includes classes unsees at training time
+    nLevels          = length(allClasses)
+    nRecords         = MMI.nrows(Xnew)
+    modelPredictions = BetaML.Perceptron.predict(MMI.matrix(Xnew), fittedModel.θ, fittedModel.θ₀, fittedModel.classes)
+    predMatrix       = zeros(Float64,(nRecords,nLevels))
+    # Transform the predictions from a vector of dictionaries to a matrix
+    # where the rows are the PMF of each record
+    for n in 1:nRecords
+        for (c,cl) in enumerate(allClasses)
+            predMatrix[n,c] = get(modelPredictions[n],cl,0.0)
+        end
+    end
+    #predictions = [MMI.UnivariateFinite(classes, predMatrix[i,:])
+    #               for i in 1:nRecords]
+    predictions = MMI.UnivariateFinite(allClasses,predMatrix,pool=missing)
+    return predictions
+end
+
+function MMI.predict(model::KernelPerceptron, fitresult, Xnew)
+    fittedModel      = fitresult[1]
+    #classes          = CategoricalVector(fittedModel.classes)
+    classes          = fittedModel.classes
+    allClasses       = fitresult[2] # as classes do not includes classes unsees at training time
+    nLevels          = length(allClasses)
+    nRecords         = MMI.nrows(Xnew)
+    #ŷtrain = Perceptron.predict([10 10; 2.2 2.5],model.x,model.y,model.α, model.classes,K=model.K)
+    modelPredictions = BetaML.Perceptron.predict(MMI.matrix(Xnew), fittedModel.x, fittedModel.y, fittedModel.α, fittedModel.classes, K=fittedModel.K)
+    predMatrix       = zeros(Float64,(nRecords,nLevels))
+    # Transform the predictions from a vector of dictionaries to a matrix
+    # where the rows are the PMF of each record
+    for n in 1:nRecords
+        for (c,cl) in enumerate(allClasses)
+            predMatrix[n,c] = get(modelPredictions[n],cl,0.0)
+        end
+    end
+    #predictions = [MMI.UnivariateFinite(classes, predMatrix[i,:])
+    #              for i in 1:nRecords]
+    #predictions = MMI.UnivariateFinite(classes, predMatrix)
+    predictions = MMI.UnivariateFinite(allClasses,predMatrix,pool=missing)
+    #predictions4 = MMI.UnivariateFinite(modelPredictions,pool=classes,ordered=false)
+    #predictions = MMI.UnivariateFinite(modelPredictions,pool=fittedModel.classes)
+    return predictions
+end
+
+# ------------------------------------------------------------------------------
+# Model metadata for registration in MLJ...
+
+MMI.metadata_model(LinearPerceptron,
+    input_scitype    = MMI.Table(MMI.Infinite),
+    target_scitype   = AbstractVector{<: MMI.Finite},
+    supports_weights = false,
+	load_path        = "BetaML.Bmlj.LinearPerceptron"
+)
+
+MMI.metadata_model(KernelPerceptron,
+    input_scitype    = MMI.Table(MMI.Infinite),
+    target_scitype   = AbstractVector{<: MMI.Finite},
+    supports_weights = false,
+	load_path        = "BetaML.Bmlj.KernelPerceptron"
+)
+
+MMI.metadata_model(Pegasos,
+    input_scitype    = MMI.Table(MMI.Infinite),
+    target_scitype   = AbstractVector{<: MMI.Finite},
+    supports_weights = false,
+	load_path        = "BetaML.Bmlj.Pegasos"
+)
diff --git a/src/Trees/Trees_MLJ.jl b/src/Bmlj/Trees_mlj.jl
similarity index 89%
rename from src/Trees/Trees_MLJ.jl
rename to src/Bmlj/Trees_mlj.jl
index cd970ac0..9330979f 100644
--- a/src/Trees/Trees_MLJ.jl
+++ b/src/Bmlj/Trees_mlj.jl
@@ -2,9 +2,6 @@
 
 # MLJ interface for Decision Trees/Random Forests models
 
-import MLJModelInterface       # It seems that having done this in the top module is not enought
-const MMI = MLJModelInterface  # We need to repoeat it here
-
 export DecisionTreeRegressor, RandomForestRegressor, DecisionTreeClassifier, RandomForestClassifier
 
 
@@ -76,7 +73,7 @@ DecisionTreeRegressor(;
    min_gain=0.0,
    min_records=2,
    max_features=0,
-   splitting_criterion=variance,
+   splitting_criterion=BetaML.Utils.variance,
    rng = Random.GLOBAL_RNG,
    ) = DecisionTreeRegressor(max_depth,min_gain,min_records,max_features,splitting_criterion,rng)
 
@@ -141,7 +138,7 @@ DecisionTreeClassifier(;
   min_gain=0.0,
   min_records=2,
   max_features=0,
-  splitting_criterion=gini,
+  splitting_criterion=BetaML.Utils.gini,
   rng = Random.GLOBAL_RNG,
   ) = DecisionTreeClassifier(max_depth,min_gain,min_records,max_features,splitting_criterion,rng)
 
@@ -216,7 +213,7 @@ RandomForestRegressor(;
   min_gain=0.0,
   min_records=2,
   max_features=0,
-  splitting_criterion=variance,
+  splitting_criterion=BetaML.Utils.variance,
   β=0.0,
   rng = Random.GLOBAL_RNG,
   ) = RandomForestRegressor(n_trees,max_depth,min_gain,min_records,max_features,splitting_criterion,β,rng)
@@ -286,7 +283,7 @@ RandomForestClassifier(;
     min_gain=0.0,
     min_records=2,
     max_features=0,
-    splitting_criterion=gini,
+    splitting_criterion=BetaML.Utils.gini,
     β=0.0,
     rng = Random.GLOBAL_RNG,
 ) = RandomForestClassifier(n_trees,max_depth,min_gain,min_records,max_features,splitting_criterion,β,rng)
@@ -314,15 +311,15 @@ MMI.hyperparameter_ranges(::Type{<:DecisionTreeRegressor}) = (
 function MMI.fit(model::Union{DecisionTreeRegressor,RandomForestRegressor}, verbosity, X, y)
    x = MMI.matrix(X)                     # convert table to matrix
    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-   verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
+   verbosity = mljverbosity_to_betaml_verbosity(verbosity)
    max_depth         = model.max_depth == 0 ? size(x,1) : model.max_depth
    # Using low level API here. We could switch to APIV2...
    if (typeof(model) == DecisionTreeRegressor)
        max_features = model.max_features == 0 ? size(x,2) : model.max_features
-       fitresult   = buildTree(x, y, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion,rng=model.rng, verbosity=verbosity)
+       fitresult   = BetaML.Trees.buildTree(x, y, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion,rng=model.rng, verbosity=verbosity)
    else
        max_features = model.max_features == 0 ? Int(round(sqrt(size(x,2)))) : model.max_features
-       fitresult   = buildForest(x, y, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, β=model.β,rng=model.rng,verbosity=verbosity)
+       fitresult   = BetaML.Trees.buildForest(x, y, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, β=model.β,rng=model.rng,verbosity=verbosity)
    end
    cache=nothing
    report=nothing
@@ -335,15 +332,15 @@ function MMI.fit(model::Union{DecisionTreeClassifier,RandomForestClassifier}, ve
    #y_plain          = MMI.int(y) .- 1                     # integer relabeling should start at 0
    yarray           = convert(Vector{eltype(levels(y))},y) # convert to a simple Array{T}
    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-   verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
+   verbosity = mljverbosity_to_betaml_verbosity(verbosity)
    max_depth         = model.max_depth == 0 ? size(x,1) : model.max_depth
    # Using low level API here. We could switch to APIV2...
    if (typeof(model) == DecisionTreeClassifier)
        max_features   = model.max_features == 0 ? size(x,2) : model.max_features
-       fittedmodel   = buildTree(x, yarray, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true,rng=model.rng, verbosity=verbosity)
+       fittedmodel   = BetaML.Trees.buildTree(x, yarray, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true,rng=model.rng, verbosity=verbosity)
    else
        max_features   = model.max_features == 0 ? Int(round(sqrt(size(x,2)))) : model.max_features
-       fittedmodel   = buildForest(x, yarray, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true, β=model.β,rng=model.rng, verbosity=verbosity)
+       fittedmodel   = BetaML.Trees.buildForest(x, yarray, model.n_trees, max_depth=max_depth, min_gain=model.min_gain, min_records=model.min_records, max_features=max_features, splitting_criterion=model.splitting_criterion, force_classification=true, β=model.β,rng=model.rng, verbosity=verbosity)
    end
    cache            = nothing
    report           = nothing
@@ -355,7 +352,7 @@ end
 # ------------------------------------------------------------------------------
 # Predict functions....
 
-MMI.predict(model::Union{DecisionTreeRegressor,RandomForestRegressor}, fitresult, Xnew) = Trees.predict(fitresult, MMI.matrix(Xnew))
+MMI.predict(model::Union{DecisionTreeRegressor,RandomForestRegressor}, fitresult, Xnew) = BetaML.Trees.predict(fitresult, MMI.matrix(Xnew))
 
 function MMI.predict(model::Union{DecisionTreeClassifier,RandomForestClassifier}, fitresult, Xnew)
     fittedModel      = fitresult[1]
@@ -364,7 +361,7 @@ function MMI.predict(model::Union{DecisionTreeClassifier,RandomForestClassifier}
     classes          = MMI.classes(a_target_element)
     nLevels          = length(classes)
     nRecords         = MMI.nrows(Xnew)
-    treePredictions  = Trees.predict(fittedModel, MMI.matrix(Xnew),rng=model.rng)
+    treePredictions  = BetaML.Trees.predict(fittedModel, MMI.matrix(Xnew),rng=model.rng)
     predMatrix       = zeros(Float64,(nRecords,nLevels))
     # Transform the predictions from a vector of dictionaries to a matrix
     # where the rows are the PMF of each record
@@ -384,23 +381,23 @@ MMI.metadata_model(DecisionTreeRegressor,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Known}),
     target_scitype   = AbstractVector{<: MMI.Continuous},           # for a supervised model, what target?
     supports_weights = false,                                       # does the model support sample weights?
-	load_path        = "BetaML.Trees.DecisionTreeRegressor"
+	load_path        = "BetaML.Bmlj.DecisionTreeRegressor"
     )
 MMI.metadata_model(RandomForestRegressor,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Known}),
     target_scitype   = AbstractVector{<: MMI.Continuous},
     supports_weights = false,
-	load_path        = "BetaML.Trees.RandomForestRegressor"
+	load_path        = "BetaML.Bmlj.RandomForestRegressor"
     )
 MMI.metadata_model(DecisionTreeClassifier,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Known}),
     target_scitype   = AbstractVector{<: Union{MMI.Missing,MMI.Finite}},
     supports_weights = false,
-	load_path        = "BetaML.Trees.DecisionTreeClassifier"
+	load_path        = "BetaML.Bmlj.DecisionTreeClassifier"
     )
 MMI.metadata_model(RandomForestClassifier,
     input_scitype    = MMI.Table(Union{MMI.Missing, MMI.Known}),
     target_scitype   = AbstractVector{<: Union{MMI.Missing,MMI.Finite}},
     supports_weights = false,
-	load_path        = "BetaML.Trees.RandomForestClassifier"
+	load_path        = "BetaML.Bmlj.RandomForestClassifier"
     )
diff --git a/src/Clustering/Clustering.jl b/src/Clustering/Clustering.jl
index e0f5ae17..cd8dba72 100644
--- a/src/Clustering/Clustering.jl
+++ b/src/Clustering/Clustering.jl
@@ -32,8 +32,6 @@ import Base.show
 export KMeansHyperParametersSet, KMedoidsHyperParametersSet, KMeansClusterer, KMedoidsClusterer 
 
 include("Clustering_hard.jl") # K-means and k-medoids
-# MLJ interface
-include("Clustering_MLJ.jl")
 
 end
 
diff --git a/src/GMM/GMM.jl b/src/GMM/GMM.jl
index e6a0ed2f..b1f853a3 100644
--- a/src/GMM/GMM.jl
+++ b/src/GMM/GMM.jl
@@ -51,8 +51,5 @@ include("GMM_clustering.jl")
 include("Mixtures.jl")
 include("GMM_regression.jl")
 
-# MLJ interface
-include("GMM_MLJ.jl")
-
 end
 
diff --git a/src/GMM/GMM_regression.jl b/src/GMM/GMM_regression.jl
index bd534384..dc143d7a 100644
--- a/src/GMM/GMM_regression.jl
+++ b/src/GMM/GMM_regression.jl
@@ -6,7 +6,7 @@ import BetaML.Utils.allowmissing
 # GMMRegressor1 
 
 Base.@kwdef mutable struct GMMRegressor1LearnableParameters <: BetaMLLearnableParametersSet
-    mixtures::Vector{AbstractMixture}              = []
+    mixtures::Union{Type,Vector{<: AbstractMixture}}    = DiagonalGaussian[] # The type is only temporary, it should always be replaced by an actual mixture
     initial_probmixtures::Vector{Float64}                  = []
     #probRecords::Union{Nothing,Matrix{Float64}}    = nothing
     meanYByMixture::Union{Nothing,Matrix{Float64}} = nothing
diff --git a/src/Imputation/Imputation.jl b/src/Imputation/Imputation.jl
index b8245edd..4d31ea6f 100644
--- a/src/Imputation/Imputation.jl
+++ b/src/Imputation/Imputation.jl
@@ -263,7 +263,7 @@ end
 # ------------------------------------------------------------------------------
 # GMMImputer
 Base.@kwdef mutable struct GMMImputerLearnableParameters <: BetaMLLearnableParametersSet
-    mixtures::Vector{AbstractMixture}           = []
+    mixtures::Union{Type,Vector{<: AbstractMixture}}    = DiagonalGaussian[] # The type is only temporary, it should always be replaced by an actual mixture
     initial_probmixtures::Vector{Float64}               = []
     probRecords::Union{Nothing,Matrix{Float64}} = nothing
     #imputedValues                               = nothing
@@ -1211,7 +1211,5 @@ function show(io::IO, m::UniversalImputer)
     end
 end
 
-# MLJ interface
-include("Imputation_MLJ.jl")
 
 end # end Imputation module
\ No newline at end of file
diff --git a/src/Nn/Nn.jl b/src/Nn/Nn.jl
index efcd14b5..4fec6cdb 100644
--- a/src/Nn/Nn.jl
+++ b/src/Nn/Nn.jl
@@ -1161,9 +1161,4 @@ function show(io::IO, m::NeuralNetworkEstimator)
         end
     end
 end
-
-
-# MLJ interface
-include("Nn_MLJ.jl")
-
 end # end module
diff --git a/src/Nn/Nn_MLJ.jl b/src/Nn/Nn_MLJ.jl
deleted file mode 100644
index c23b7404..00000000
--- a/src/Nn/Nn_MLJ.jl
+++ /dev/null
@@ -1,374 +0,0 @@
-"Part of [BetaML](https://github.com/sylvaticus/BetaML.jl). Licence is MIT."
-
-# MLJ interface for Neural Networks models
-
-import MLJModelInterface       # It seems that having done this in the top module is not enought
-const MMI = MLJModelInterface  # We need to repeat it here
-using CategoricalArrays
-
-export NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor, NeuralNetworkClassifier
-
-
-# Model Structure declarations..
-"""
-$(TYPEDEF)
-
-A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.
-
-# Parameters:
-$(FIELDS)
-
-# Notes:
-- data must be numerical
-- the label should be be a _n-records_ vector.
-
-# Example:
-```julia
-julia> using MLJ
-
-julia> X, y        = @load_boston;
-
-julia> modelType   = @load NeuralNetworkRegressor pkg = "BetaML" verbosity=0
-BetaML.Nn.NeuralNetworkRegressor
-
-julia> layers                      = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];
-
-julia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM());
-NeuralNetworkRegressor(
-  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], 
-  loss = BetaML.Utils.squared_cost, 
-  dloss = BetaML.Utils.dsquared_cost, 
-  epochs = 100, 
-  batch_size = 32, 
-  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), 
-  shuffle = true, 
-  descr = "", 
-  cb = BetaML.Nn.fitting_info, 
-  rng = Random._GLOBAL_RNG())
-
-julia> mach        = machine(model, X, y);
-
-julia> fit!(mach);
-
-julia> ŷ    = predict(mach, X);
-
-julia> hcat(y,ŷ)
-506×2 Matrix{Float64}:
- 24.0  30.7726
- 21.6  28.0811
- 34.7  31.3194
-  ⋮    
- 23.9  30.9032
- 22.0  29.49
- 11.9  27.2438
-```
-"""
-Base.@kwdef mutable struct NeuralNetworkRegressor <: MMI.Deterministic
-    "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers"
-    layers::Union{Array{AbstractLayer,1},Nothing} = nothing
-    """Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D
-    !!! warning
-        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
-    """
-    loss::Union{Nothing,Function} = squared_cost
-    "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
-    dloss::Union{Function,Nothing}  = dsquared_cost
-    "Number of epochs, i.e. passages trough the whole training sample [def: `200`]"
-    epochs::Int64 = 200
-    "Size of each individual batch [def: `16`]"
-    batch_size::Int64 = 16
-    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
-    opt_alg::OptimisationAlgorithm = ADAM()
-    "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
-    shuffle::Bool = true  
-    "An optional title and/or description for this model"
-    descr::String = "" 
-    "A call back function to provide information during training [def: `fitting_info`]"
-    cb::Function=fitting_info
-    "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
-    "
-    rng::AbstractRNG = Random.GLOBAL_RNG
-end
-
-"""
-$(TYPEDSIGNATURES)
-
-For the `verbosity` parameter see [`Verbosity`](@ref))
-
-"""
-function MMI.fit(m::NeuralNetworkRegressor, verbosity, X, y)
-    x = MMI.matrix(X)                     # convert table to matrix   
-    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
-    ndims(y) > 1 && error("The label should have only 1 dimensions. Use `MultitargetNeuralNetworkRegressor` or `NeuralNetworkClassifier` for multi_dimensional outputs.")
-    mi = NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity)
-    fit!(mi,x,y)
-    fitresults = mi
-    cache      = nothing
-    report     = nothing
-    return fitresults, cache, report
- end
-
- MMI.predict(m::NeuralNetworkRegressor, fitresult, Xnew) = predict(fitresult, MMI.matrix(Xnew))
-
- MMI.metadata_model(NeuralNetworkRegressor,
-    input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Count}),
-    target_scitype   = AbstractVector{<: Union{MMI.Continuous,MMI.Count}},
-    supports_weights = false,
-    load_path        = "BetaML.Nn.NeuralNetworkRegressor"
-)
-
-# ------------------------------------------------------------------------------
-# Model Structure declarations..
-"""
-$(TYPEDEF)
-
-A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets.
-
-# Parameters:
-$(FIELDS)
-
-# Notes:
-- data must be numerical
-- the label should be a _n-records_ by _n-dimensions_ matrix 
-
-# Example:
-```julia
-julia> using MLJ
-
-julia> X, y        = @load_boston;
-
-julia> ydouble     = hcat(y, y .*2  .+5);
-
-julia> modelType   = @load MultitargetNeuralNetworkRegressor pkg = "BetaML" verbosity=0
-BetaML.Nn.MultitargetNeuralNetworkRegressor
-
-julia> layers                      = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)];
-
-julia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500)
-MultitargetNeuralNetworkRegressor(
-  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253  …  -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947  …  -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564  …  0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], 
-  loss = BetaML.Utils.squared_cost, 
-  dloss = BetaML.Utils.dsquared_cost, 
-  epochs = 500, 
-  batch_size = 32, 
-  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), 
-  shuffle = true, 
-  descr = "", 
-  cb = BetaML.Nn.fitting_info, 
-  rng = Random._GLOBAL_RNG())
-
-julia> mach        = machine(model, X, ydouble);
-
-julia> fit!(mach);
-
-julia> ŷdouble    = predict(mach, X);
-
-julia> hcat(ydouble,ŷdouble)
-506×4 Matrix{Float64}:
- 24.0  53.0  28.4624  62.8607
- 21.6  48.2  22.665   49.7401
- 34.7  74.4  31.5602  67.9433
- 33.4  71.8  33.0869  72.4337
-  ⋮                   
- 23.9  52.8  23.3573  50.654
- 22.0  49.0  22.1141  48.5926
- 11.9  28.8  19.9639  45.5823
-```
-
-"""
-Base.@kwdef mutable struct MultitargetNeuralNetworkRegressor <: MMI.Deterministic
-    "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers"
-    layers::Union{Array{AbstractLayer,1},Nothing} = nothing
-    """Loss (cost) function [def: `BetaML.squared_cost`].  Should always assume y and ŷ as matrices.
-    !!! warning
-        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
-    """
-    loss::Union{Nothing,Function} = squared_cost
-    "Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff."
-    dloss::Union{Function,Nothing}  = dsquared_cost
-    "Number of epochs, i.e. passages trough the whole training sample [def: `300`]"
-    epochs::Int64 = 300
-    "Size of each individual batch [def: `16`]"
-    batch_size::Int64 = 16
-    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
-    opt_alg::OptimisationAlgorithm = ADAM()
-    "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
-    shuffle::Bool = true  
-    "An optional title and/or description for this model"
-    descr::String = "" 
-    "A call back function to provide information during training [def: `BetaML.fitting_info`]"
-    cb::Function=fitting_info
-    "Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]
-    "
-    rng::AbstractRNG = Random.GLOBAL_RNG
-end
-"""
-$(TYPEDSIGNATURES)
-
-For the `verbosity` parameter see [`Verbosity`](@ref))
-
-"""
-function MMI.fit(m::MultitargetNeuralNetworkRegressor, verbosity, X, y)
-    x = MMI.matrix(X)                     # convert table to matrix   
-    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
-    ndims(y) > 1 || error("The label should have multiple dimensions. Use `NeuralNetworkRegressor` for single-dimensional outputs.")
-    mi = NeuralNetworkEstimator(;layers=m.layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity)
-    fit!(mi,x,y)
-    fitresults = mi
-    cache      = nothing
-    report     = nothing
-    return fitresults, cache, report
- end
-
- MMI.predict(m::MultitargetNeuralNetworkRegressor, fitresult, Xnew) = predict(fitresult, MMI.matrix(Xnew))
-
- MMI.metadata_model(MultitargetNeuralNetworkRegressor,
-    input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Count}),
-    target_scitype   = AbstractMatrix{<: Union{MMI.Continuous,MMI.Count}},
-    supports_weights = false,
-    load_path        = "BetaML.Nn.MultitargetNeuralNetworkRegressor"
-)
-
-# ------------------------------------------------------------------------------
-
-"""
-$(TYPEDEF)
-
-A simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification  problems.
-
-# Parameters:
-$(FIELDS)
-
-# Notes:
-- data must be numerical
-- the label should be a _n-records_ by _n-dimensions_ matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories.
-
-# Example:
-```julia
-julia> using MLJ
-
-julia> X, y        = @load_iris;
-
-julia> modelType   = @load NeuralNetworkClassifier pkg = "BetaML" verbosity=0
-BetaML.Nn.NeuralNetworkClassifier
-
-julia> layers      = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)];
-
-julia> model       = modelType(layers=layers,opt_alg=BetaML.ADAM())
-NeuralNetworkClassifier(
-  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.376173352338049 0.7029289511758696 -0.5589563304592478 -0.21043274001651874; 0.044758889527899415 0.6687689636685921 0.4584331114653877 0.6820506583840453; … ; -0.26546358457167507 -0.28469736227283804 -0.164225549922154 -0.516785639164486; -0.5146043550684141 -0.0699113265130964 0.14959906603941908 -0.053706860039406834], [0.7003943613125758, -0.23990840466587576, -0.23823126271387746, 0.4018101580410387, 0.2274483050356888, -0.564975060667734, 0.1732063297031089, 0.11880299829896945], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.029467850439546583 0.4074661266592745 … 0.36775675246760053 -0.595524555448422; 0.42455597698371306 -0.2458082732997091 … -0.3324220683462514 0.44439454998610595; … ; -0.2890883863364267 -0.10109249362508033 … -0.0602680568207582 0.18177278845097555; -0.03432587226449335 -0.4301192922760063 … 0.5646018168286626 0.47269177680892693], [0.13777442835428688, 0.5473306726675433, 0.3781939472904011, 0.24021813428130567, -0.0714779477402877, -0.020386373530818958, 0.5465466618404464, -0.40339790713616525], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([0.6565120540082393 0.7139211611842745 … 0.07809812467915389 -0.49346311403373844; -0.4544472987041656 0.6502667641568863 … 0.43634608676548214 0.7213049952968921; 0.41212264783075303 -0.21993289366360613 … 0.25365007887755064 -0.5664469566269569], [-0.6911986792747682, -0.2149343209329364, -0.6347727539063817], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, BetaML.Utils.dsoftmax, nothing)], 
-  loss = BetaML.Utils.crossentropy, 
-  dloss = BetaML.Utils.dcrossentropy, 
-  epochs = 100, 
-  batch_size = 32, 
-  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var"#90#93"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), 
-  shuffle = true, 
-  descr = "", 
-  cb = BetaML.Nn.fitting_info, 
-  categories = nothing, 
-  handle_unknown = "error", 
-  other_categories_name = nothing, 
-  rng = Random._GLOBAL_RNG())
-
-julia> mach        = machine(model, X, y);
-
-julia> fit!(mach);
-
-julia> classes_est = predict(mach, X)
-150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:
- UnivariateFinite{Multiclass{3}}(setosa=>0.575, versicolor=>0.213, virginica=>0.213)
- UnivariateFinite{Multiclass{3}}(setosa=>0.573, versicolor=>0.213, virginica=>0.213)
- ⋮
- UnivariateFinite{Multiclass{3}}(setosa=>0.236, versicolor=>0.236, virginica=>0.529)
- UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492)
-```
-"""
-Base.@kwdef mutable struct NeuralNetworkClassifier <: MMI.Probabilistic
-    "Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added."
-    layers::Union{Array{AbstractLayer,1},Nothing} = nothing
-    """Loss (cost) function [def: `BetaML.crossentropy`]. Should always assume y and ŷ as matrices.
-    !!! warning
-        If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.
-    """
-    loss::Union{Nothing,Function} = crossentropy
-    "Derivative of the loss function [def: `BetaML.dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff."
-    dloss::Union{Function,Nothing}  = dcrossentropy
-    "Number of epochs, i.e. passages trough the whole training sample [def: `200`]"
-    epochs::Int64 = 200
-    "Size of each individual batch [def: `16`]"
-    batch_size::Int64 = 16
-    "The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers"
-    opt_alg::OptimisationAlgorithm = ADAM()
-    "Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]"
-    shuffle::Bool = true  
-    "An optional title and/or description for this model"
-    descr::String = "" 
-    "A call back function to provide information during training [def: `BetaML.fitting_info`]"
-    cb::Function=fitting_info
-    "The categories to represent as columns. [def: `nothing`, i.e. unique training values]."  
-    categories::Union{Vector,Nothing} = nothing
-    "How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories."
-    handle_unknown::String = "error"
-    "Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: ` nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings"
-    other_categories_name = nothing
-    "Random Number Generator [deafult: `Random.GLOBAL_RNG`]"
-    rng::AbstractRNG = Random.GLOBAL_RNG
-end
-
-"""
-MMI.fit(model::NeuralNetworkClassifier, verbosity, X, y)
-
-For the `verbosity` parameter see [`Verbosity`](@ref))
-
-"""
-function MMI.fit(m::NeuralNetworkClassifier, verbosity, X, y)
-    x = MMI.matrix(X)                     # convert table to matrix   
-    typeof(verbosity) <: Integer || error("Verbosity must be a integer. Current \"steps\" are 0, 1, 2 and 3.")  
-    verbosity = Utils.mljverbosity_to_betaml_verbosity(verbosity)
-    categories = deepcopy(m.categories)
-    if categories == nothing
-        #if occursin("CategoricalVector",string(typeof(y))) # to avoid dependency to CategoricalArrays or MLJBase 
-        if typeof(y) <: CategoricalVector
-            categories = levels(y)
-        end
-    end
-
-    ohmod = OneHotEncoder(categories=categories,handle_unknown=m.handle_unknown,other_categories_name=m.other_categories_name, verbosity=verbosity)
-    Y_oh = fit!(ohmod,y)
-
-    nR,nD       = size(x)
-    (nRy,nDy)   = size(Y_oh)         
-    
-    nR == nRy || error("X and Y have different number of records (rows)")
-
-    if isnothing(m.layers)
-        layers = nothing
-    else
-        layers = deepcopy(m.layers)
-        push!(layers,VectorFunctionLayer(nDy,f=softmax))
-    end
-    mi = NeuralNetworkEstimator(;layers=layers,loss=m.loss, dloss=m.dloss, epochs=m.epochs, batch_size=m.batch_size, opt_alg=m.opt_alg,shuffle=m.shuffle, cache=false, descr=m.descr, cb=m.cb, rng=m.rng, verbosity=verbosity)
-    fit!(mi,x,Y_oh)
-    fitresults = (mi,ohmod)
-    cache      = nothing
-    report     = nothing
-    return fitresults, cache, report
- end
-
-function MMI.predict(m::NeuralNetworkClassifier, fitresult, Xnew) 
-    nnmod, ohmod = fitresult
-    yhat = predict(nnmod, MMI.matrix(Xnew))
-    classes = parameters(ohmod).categories_applied
-    predictions = MMI.UnivariateFinite(classes, yhat,pool=missing)
-    #return yhat
-    return predictions
-end
-
- MMI.metadata_model(NeuralNetworkClassifier,
-    input_scitype    = MMI.Table(Union{MMI.Continuous,MMI.Count}),
-    target_scitype = AbstractVector{<: Union{MMI.Multiclass,MMI.Finite,MMI.Count}},
-    supports_weights = false,
-    load_path        = "BetaML.Nn.NeuralNetworkClassifier"
-)
diff --git a/src/Trees/Trees.jl b/src/Trees/Trees.jl
index 06f81ffe..f0c4e458 100644
--- a/src/Trees/Trees.jl
+++ b/src/Trees/Trees.jl
@@ -46,7 +46,6 @@ export RandomForestEstimator, RFHyperParametersSet
 include("DecisionTrees.jl")                  # Decision Trees algorithm and API
 include("AbstractTrees_BetaML_interface.jl") # Code to allow plotting of a DecisionTree
 include("RandomForests.jl")                  # Random Forests algorithm and API
-include("Trees_MLJ.jl")                      # MLJ interface
 
 
 end # end module
diff --git a/test/Clustering_tests.jl b/test/Clustering_tests.jl
index 1d9b3880..26d8c93b 100644
--- a/test/Clustering_tests.jl
+++ b/test/Clustering_tests.jl
@@ -1,7 +1,6 @@
 using Test, DelimitedFiles
 
-import MLJBase
-const Mlj = MLJBase
+
 using BetaML
 import BetaML.Clustering: init_representatives, kmeans, kmedoids
 
@@ -86,9 +85,11 @@ s = mean(silhouette(pd,ŷ))
 # ==================================
 # NEW TEST
 println("Testing MLJ interface for Clustering models....")
+import MLJBase
+const Mlj = MLJBase
 X, y                           = Mlj.@load_iris
 
-model                          = KMeans(rng=copy(TESTRNG))
+model                          = BetaML.Bmlj.KMeans(rng=copy(TESTRNG))
 modelMachine                   = Mlj.machine(model, X)
 (fitResults, cache, report)    = Mlj.fit(model, 0, X)
 distances                      = Mlj.transform(model,fitResults,X)
@@ -96,7 +97,7 @@ yhat                           = Mlj.predict(model, fitResults, X)
 acc = BetaML.accuracy(Mlj.levelcode.(yhat),Mlj.levelcode.(y),ignorelabels=true)
 @test acc > 0.8
 
-model                          = KMedoids(rng=copy(TESTRNG))
+model                          = BetaML.Bmlj.KMedoids(rng=copy(TESTRNG))
 modelMachine                   = Mlj.machine(model, X)
 (fitResults, cache, report)    = Mlj.fit(model, 0, X)
 distances                      = Mlj.transform(model,fitResults,X)
diff --git a/test/GMM_tests.jl b/test/GMM_tests.jl
index eadaf146..8b50374a 100644
--- a/test/GMM_tests.jl
+++ b/test/GMM_tests.jl
@@ -1,7 +1,5 @@
 using Test
 
-import MLJBase
-const Mlj = MLJBase
 import Distributions
 using BetaML
 import BetaML.GMM: gmm, initVariances!, updateVariances!
@@ -128,6 +126,10 @@ ŷtrain2db = predict(m)
 mreTrain2d = relative_mean_error(ytrain2d,ŷtrain2d,normrec=true)
 @test mreTrain2d <= 0.08
 
+m = GMMRegressor1(n_classes=2,rng=copy(TESTRNG), verbosity=NONE, mixtures= SphericalGaussian)
+est = fit!(m,xtrain,ytrain2d)
+@test typeof(est) == Matrix{Float64}
+
 # Testing GMM Regressor 2
 m = GMMRegressor2(n_classes=2,rng=copy(TESTRNG), verbosity=NONE)
 fit!(m,xtrain,ytrain)
@@ -167,9 +169,11 @@ fit!(m,xtrain,ytrain)
 # ==================================
 # NEW TEST
 println("Testing MLJ interface for GMM models....")
+import MLJBase
+const Mlj = MLJBase
 X, y                           = Mlj.@load_iris
 
-model                       =  GaussianMixtureClusterer(mixtures=[DiagonalGaussian() for i in 1:3],rng=copy(TESTRNG))
+model                       =  BetaML.Bmlj.GaussianMixtureClusterer(mixtures=[DiagonalGaussian() for i in 1:3],rng=copy(TESTRNG))
 modelMachine                =  Mlj.machine(model, X) # DimensionMismatch
 (fitResults, cache, report) =  Mlj.fit(model, 0, X)
 yhat_prob                   =  Mlj.predict(model, fitResults, X)  # Mlj.transform(model,fitResults,X)
@@ -180,14 +184,14 @@ yhat_prob                   =  Mlj.predict(model, fitResults, X)  # Mlj.transfor
 println("Testing MLJ interface for GMMRegressor models....")
 X, y                           = Mlj.@load_boston
 
-model_gmmr                      = GaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG))
+model_gmmr                      = BetaML.Bmlj.GaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG))
 regressor_gmmr                  = Mlj.machine(model_gmmr, X, y)
 (fitresult_gmmr, cache, report) = Mlj.fit(model_gmmr, 0, X, y)
 yhat_gmmr                       = Mlj.predict(model_gmmr, fitresult_gmmr, X)
 @test relative_mean_error(y,yhat_gmmr,normrec=true) < 0.3
 
 ydouble = hcat(y,y)
-model_gmmr2                      = MultitargetGaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG))
+model_gmmr2                      = BetaML.Bmlj.MultitargetGaussianMixtureRegressor(n_classes=20,rng=copy(TESTRNG))
 regressor_gmmr2                  = Mlj.machine(model_gmmr2, X, ydouble)
 (fitresult_gmmr2, cache, report) = Mlj.fit(model_gmmr2, 0, X, ydouble)
 yhat_gmmr2                       = Mlj.predict(model_gmmr2, fitresult_gmmr2, X)
diff --git a/test/Imputation_tests.jl b/test/Imputation_tests.jl
index 12768a00..4235c013 100644
--- a/test/Imputation_tests.jl
+++ b/test/Imputation_tests.jl
@@ -4,10 +4,6 @@ using Statistics, Random
 using BetaML
 import DecisionTree
 
-
-import MLJBase
-const Mlj = MLJBase
-
 TESTRNG = FIXEDRNG # This could change...
 
 
@@ -101,6 +97,11 @@ X̂3 = predict(mod,X3)
 reset!(mod)
 #predict(mod,X3)
 
+mod = GMMImputer(mixtures=DiagonalGaussian)
+X2 = [3 6 9; 2000 missing 10000; 1 2 5; 1500 3000 9000; 1.5 3 6]
+fit!(mod,X2)
+X̂2 =  predict(mod)
+@test typeof(X̂2) == Matrix{Float64}
 
 # ------------------------------------------------------------------------------
 
@@ -229,15 +230,14 @@ Xfull2 = BetaML.fit!(mod2,X)
 
 println("Testing MLJ Interfaces...")
 
-# ------------------------------------------------------------------------------
-
-
+import MLJBase
+const Mlj = MLJBase
 
 println("Testing MLJ Interface for SimpleImputer...")
 
 X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4]
 Xt = Mlj.table(X)
-model                       =  SimpleImputer(norm=1)
+model                       =  BetaML.Bmlj.SimpleImputer(norm=1)
 modelMachine                =  Mlj.machine(model,Xt)
 (fitResults, cache, report) =  Mlj.fit(model, 0, Xt)
 XM                          =  Mlj.transform(model,fitResults,Xt)
@@ -253,7 +253,7 @@ println("Testing MLJ Interface for GaussianMixtureImputer...")
 
 X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4]
 Xt = Mlj.table(X)
-model                       =  GaussianMixtureImputer(initialisation_strategy="grid",rng=copy(TESTRNG))
+model                       =  BetaML.Bmlj.GaussianMixtureImputer(initialisation_strategy="grid",rng=copy(TESTRNG))
 modelMachine                =  Mlj.machine(model,Xt)
 (fitResults, cache, report) =  Mlj.fit(model, 0, Xt)
 XM                          =  Mlj.transform(model,fitResults,Xt)
@@ -264,12 +264,16 @@ Xnew_withMissing            = Mlj.table([1.5 missing; missing 38; missing -2.3;
 XDNew                       = Mlj.transform(model,fitResults,Xnew_withMissing)
 XDMNew                      = Mlj.matrix(XDNew)
 @test isapprox(XDMNew[1,2],x̂[2,2])
+model                       =  BetaML.Bmlj.GaussianMixtureImputer(initialisation_strategy="grid",rng=copy(TESTRNG), mixtures=BetaML.SphericalGaussian)
+modelMachine                =  Mlj.machine(model,Xt)
+(fitResults, cache, report) =  Mlj.fit(model, 0, Xt)
+@test report["AIC"] < 100000
 
 println("Testing MLJ Interface for RandomForestImputer...")
 
 X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4]
 Xt = Mlj.table(X)
-model                       =  RandomForestImputer(n_trees=40,rng=copy(TESTRNG))
+model                       =  BetaML.Bmlj.RandomForestImputer(n_trees=40,rng=copy(TESTRNG))
 modelMachine                =  Mlj.machine(model,Xt)
 (fitResults, cache, report) =  Mlj.fit(model, 0, Xt)
 XM                          =  Mlj.transform(model,fitResults,Xt)
@@ -286,7 +290,7 @@ println("Testing MLJ Interface for GeneralImputer...")
 X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4]
 Xt = Mlj.table(X)
 trng = copy(TESTRNG)
-model                       =  GeneralImputer(estimator=[GMMRegressor1(rng=copy(TESTRNG),verbosity=NONE),RandomForestEstimator(n_trees=40,rng=copy(TESTRNG),verbosity=NONE)],recursive_passages=2, missing_supported=true, rng = copy(TESTRNG))
+model                       =  BetaML.Bmlj.GeneralImputer(estimator=[GMMRegressor1(rng=copy(TESTRNG),verbosity=NONE),RandomForestEstimator(n_trees=40,rng=copy(TESTRNG),verbosity=NONE)],recursive_passages=2, missing_supported=true, rng = copy(TESTRNG))
 modelMachine                =  Mlj.machine(model,Xt)
 (fitResults, cache, report) =  Mlj.fit(model, 0, Xt)
 XM                          =  Mlj.transform(model,fitResults,Xt)
@@ -311,7 +315,7 @@ X =  [     12      0.3       5      11;
 
 Xt = Mlj.table(X)
 trng = copy(TESTRNG)
-model                       =  GeneralImputer(estimator=DecisionTree.DecisionTreeRegressor(), fit_function=DecisionTree.fit!,predict_function=DecisionTree.predict,recursive_passages=10, rng = copy(TESTRNG))
+model                       =  BetaML.Bmlj.GeneralImputer(estimator=DecisionTree.DecisionTreeRegressor(), fit_function=DecisionTree.fit!,predict_function=DecisionTree.predict,recursive_passages=10, rng = copy(TESTRNG))
 modelMachine                =  Mlj.machine(model,Xt)
 (fitResults, cache, report) =  Mlj.fit(model, 0, Xt)
 XM                          =  Mlj.transform(model,fitResults,Xt)
diff --git a/test/Nn_tests.jl b/test/Nn_tests.jl
index e628c2aa..7e48b97b 100644
--- a/test/Nn_tests.jl
+++ b/test/Nn_tests.jl
@@ -623,7 +623,7 @@ import MLJBase
 const Mlj = MLJBase
 import StatisticalMeasures
 X, y                           = Mlj.@load_boston
-model                          = NeuralNetworkRegressor(rng=copy(TESTRNG))
+model                          = BetaML.Bmlj.NeuralNetworkRegressor(rng=copy(TESTRNG))
 regressor                      = Mlj.machine(model, X, y)
 (fitresult, cache, report)     = Mlj.fit(model, -1, X, y)
 yhat                           = Mlj.predict(model, fitresult, X)
@@ -631,14 +631,14 @@ yhat                           = Mlj.predict(model, fitresult, X)
 
 X, y                           = Mlj.@load_boston
 y2d = [y y]
-model                          = MultitargetNeuralNetworkRegressor(rng=copy(TESTRNG))
+model                          = BetaML.Bmlj.MultitargetNeuralNetworkRegressor(rng=copy(TESTRNG))
 regressor                      = Mlj.machine(model, X, y2d)
 (fitresult, cache, report)     = Mlj.fit(model, -1, X, y2d)
 yhat                           = Mlj.predict(model, fitresult, X)
 @test relative_mean_error(y2d,yhat,normrec=true) < 0.2
 
 X, y                           = Mlj.@load_iris
-model                          = NeuralNetworkClassifier(rng=copy(TESTRNG),epochs=500,batch_size=64)
+model                          = BetaML.Bmlj.NeuralNetworkClassifier(rng=copy(TESTRNG),epochs=500,batch_size=64)
 regressor                      = Mlj.machine(model, X, y)
 (fitresult, cache, report)     = Mlj.fit(model, -1, X, y)
 yhat                           = Mlj.predict(model, fitresult, X)
diff --git a/test/Trees_tests.jl b/test/Trees_tests.jl
index 965a29a6..42529021 100644
--- a/test/Trees_tests.jl
+++ b/test/Trees_tests.jl
@@ -341,27 +341,27 @@ accβ   = accuracy(ytest,ŷtestβ,rng=copy(TESTRNG))
 # NEW TEST
 println("Testing MLJ interface for Trees models....")
 X, y                           = Mlj.@load_boston
-model_dtr                      = DecisionTreeRegressor(rng=copy(TESTRNG))
+model_dtr                      = BetaML.Bmlj.DecisionTreeRegressor(rng=copy(TESTRNG))
 regressor_dtr                  = Mlj.machine(model_dtr, X, y)
 (fitresult_dtr, cache, report) = Mlj.fit(model_dtr, 0, X, y)
 yhat_dtr                       = Mlj.predict(model_dtr, fitresult_dtr, X)
 @test relative_mean_error(y,yhat_dtr,normrec=true) < 0.02
 
-model_rfr                      = RandomForestRegressor(rng=copy(TESTRNG))
+model_rfr                      = BetaML.Bmlj.RandomForestRegressor(rng=copy(TESTRNG))
 regressor_rfr                  = Mlj.machine(model_rfr, X, y)
 (fitresult_rfr, cache, report) = Mlj.fit(model_rfr, 0, X, y)
 yhat_rfr                       = Mlj.predict(model_rfr, fitresult_rfr, X)
 @test relative_mean_error(y,yhat_rfr,normrec=true) < 0.06
 
 X, y                           = Mlj.@load_iris
-model_dtc                      = DecisionTreeClassifier(rng=copy(TESTRNG))
+model_dtc                      = BetaML.Bmlj.DecisionTreeClassifier(rng=copy(TESTRNG))
 regressor_dtc                  = Mlj.machine(model_dtc, X, y)
 (fitresult_dtc, cache, report) = Mlj.fit(model_dtc, 0, X, y)
 yhat_dtc                       = Mlj.predict(model_dtc, fitresult_dtc, X)
 @test Mlj.mean(StatisticalMeasures.LogLoss(tol=1e-4)(yhat_dtc, y)) < 0.0002
 @test sum(Mlj.mode.(yhat_dtc) .== y)/length(y) == 1
 
-model_rfc                      = RandomForestClassifier(max_features=3,rng=copy(TESTRNG))
+model_rfc                      = BetaML.Bmlj.RandomForestClassifier(max_features=3,rng=copy(TESTRNG))
 regressor_rfc                  = Mlj.machine(model_rfc, X, y)
 (fitresult_rfc, cache, report) = Mlj.fit(model_rfc, 0, X, y)
 yhat_rfc                       = Mlj.predict(model_rfc, fitresult_rfc, X)