diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml
index 85956469..f78313ff 100644
--- a/src/registry/Metadata.toml
+++ b/src/registry/Metadata.toml
@@ -561,7 +561,7 @@
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```julia\nmutable struct NeuralNetworkClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification  problems.\n\n# Parameters:\n\n  * `layers`\n\n: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added.\n\n  * `loss`\n\n: Loss (cost) function [def: `crossentropy`]. Should always assume y and ŷ as matrices.     !!! warning         If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`\n\n: Derivative of the loss function [def: `dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff.\n\n  * `epochs`\n\n: Number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n\n  * `batch_size`\n\n: Size of each individual batch [def: `32`]\n\n  * `opt_alg`\n\n: The optimisation algorithm to update the gradient at each batch [def: `ADAM()`]\n\n  * `shuffle`\n\n: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n\n  * `descr`\n\n: An optional title and/or description for this model\n\n  * `cb`\n\n: A call back function to provide information during training [def: `fitting_info`\n\n  * `categories`\n\n: The categories to represent as columns. [def: `nothing`, i.e. unique training values].\n\n  * `handle_unknown`\n\n: How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories.\n\n  * `other_categories_name`\n\n: Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: `nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings\n\n  * `rng`\n\n: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * the label should be a *n-records* by *n-dimensions* matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y                        = @load_iris;\n\njulia> modelType                   = @load NeuralNetworkClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Nn.NeuralNetworkClassifier\n\njulia> layers                      = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)];\n\njulia> model                       = modelType(layers=layers,opt_alg=BetaML.ADAM())\nNeuralNetworkClassifier(\n  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.13065425957999977 0.3006718045454293 -0.14208182654389845 -0.010396909703178414; 0.048520032692515036 -0.015206389893573924 0.10185996867206404 0.3322496808168578; … ; -0.35259614611009477 0.6482620436066895 0.008337847389667918 -0.12305204287019345; 0.4658422589725906 0.6934957957952972 -0.3085357878320247 0.20222661286207866], [0.36174111580772195, -0.35269496628536656, 0.26811746239579826, 0.5528187653581791, -0.3510634981562191, 0.10825967870150688, 0.3022797568475024, 0.4981155176339185], BetaML.Utils.relu, nothing), BetaML.Nn.DenseLayer([-0.10421417572899494 -0.35499611903472195 … -0.3335182269175171 -0.3985778486065036; 0.23543572035878935 0.59952318489473 … 0.2795331413389591 -0.5720523377542953; … ; 0.2647745208772335 -0.3248093104701972 … 0.3974038426324087 -0.08540125672267229; 0.5192880535413722 0.484381279307307 … 0.5908202412047914 0.3565865691496263], [-0.43847147676332937, -0.0792557647479405, 0.28527379769156247, 0.472161396182901, 0.5499454540456155, -0.24120815998677952, 0.07292491907243237, 0.6046011380800786], BetaML.Utils.relu, nothing), BetaML.Nn.DenseLayer([0.07404458231451261 -0.6297338418338474 … -0.5203349840135756 0.2659245561353357; -0.03739230431842255 -0.7175051212845613 … 0.7131622720546834 -0.6340542706678468; -0.14453639566110688 0.38900994015838364 … 0.5074513955919556 0.34154609716155104], [-0.39346454660088837, -0.3091008284310222, -0.03586152622920202], BetaML.Utils.relu, nothing), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, nothing, nothing)], \n  loss = BetaML.Utils.crossentropy, \n  dloss = BetaML.Utils.dcrossentropy, \n  epochs = 100, \n  batch_size = 32, \n  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#69#72\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n  shuffle = true, \n  descr = \"\", \n  cb = BetaML.Nn.fitting_info, \n  categories = nothing, \n  handle_unknown = \"error\", \n  other_categories_name = nothing, \n  rng = Random._GLOBAL_RNG())\n\njulia> (fitResults, cache, report) = MLJ.fit(model, 0, X, y);\n\njulia> est_classes                 = predict(model, fitResults, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.57, versicolor=>0.215, virginica=>0.215)\n UnivariateFinite{Multiclass{3}}(setosa=>0.565, versicolor=>0.217, virginica=>0.217)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.255, versicolor=>0.255, virginica=>0.49)\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492)\n UnivariateFinite{Multiclass{3}}(setosa=>0.263, versicolor=>0.263, virginica=>0.473)\n```\n"
+":docstring" = "```julia\nmutable struct NeuralNetworkClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification  problems.\n\n# Parameters:\n\n  * `layers`\n\n: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added.\n\n  * `loss`\n\n: Loss (cost) function [def: `crossentropy`]. Should always assume y and ŷ as matrices.     !!! warning         If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n  * `dloss`\n\n: Derivative of the loss function [def: `dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff.\n\n  * `epochs`\n\n: Number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n\n  * `batch_size`\n\n: Size of each individual batch [def: `32`]\n\n  * `opt_alg`\n\n: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]\n\n  * `shuffle`\n\n: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n\n  * `descr`\n\n: An optional title and/or description for this model\n\n  * `cb`\n\n: A call back function to provide information during training [def: `BetaML.fitting_info`\n\n  * `categories`\n\n: The categories to represent as columns. [def: `nothing`, i.e. unique training values].\n\n  * `handle_unknown`\n\n: How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories.\n\n  * `other_categories_name`\n\n: Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: `nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings\n\n  * `rng`\n\n: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n  * data must be numerical\n  * the label should be a *n-records* by *n-dimensions* matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y                        = @load_iris;\n\njulia> modelType                   = @load NeuralNetworkClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Nn.NeuralNetworkClassifier\n\njulia> layers                      = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)];\n\njulia> model                       = modelType(layers=layers,opt_alg=BetaML.ADAM())\nNeuralNetworkClassifier(\n  layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.13065425957999977 0.3006718045454293 -0.14208182654389845 -0.010396909703178414; 0.048520032692515036 -0.015206389893573924 0.10185996867206404 0.3322496808168578; … ; -0.35259614611009477 0.6482620436066895 0.008337847389667918 -0.12305204287019345; 0.4658422589725906 0.6934957957952972 -0.3085357878320247 0.20222661286207866], [0.36174111580772195, -0.35269496628536656, 0.26811746239579826, 0.5528187653581791, -0.3510634981562191, 0.10825967870150688, 0.3022797568475024, 0.4981155176339185], BetaML.Utils.relu, nothing), BetaML.Nn.DenseLayer([-0.10421417572899494 -0.35499611903472195 … -0.3335182269175171 -0.3985778486065036; 0.23543572035878935 0.59952318489473 … 0.2795331413389591 -0.5720523377542953; … ; 0.2647745208772335 -0.3248093104701972 … 0.3974038426324087 -0.08540125672267229; 0.5192880535413722 0.484381279307307 … 0.5908202412047914 0.3565865691496263], [-0.43847147676332937, -0.0792557647479405, 0.28527379769156247, 0.472161396182901, 0.5499454540456155, -0.24120815998677952, 0.07292491907243237, 0.6046011380800786], BetaML.Utils.relu, nothing), BetaML.Nn.DenseLayer([0.07404458231451261 -0.6297338418338474 … -0.5203349840135756 0.2659245561353357; -0.03739230431842255 -0.7175051212845613 … 0.7131622720546834 -0.6340542706678468; -0.14453639566110688 0.38900994015838364 … 0.5074513955919556 0.34154609716155104], [-0.39346454660088837, -0.3091008284310222, -0.03586152622920202], BetaML.Utils.relu, nothing), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, nothing, nothing)], \n  loss = BetaML.Utils.crossentropy, \n  dloss = BetaML.Utils.dcrossentropy, \n  epochs = 100, \n  batch_size = 32, \n  opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#69#72\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n  shuffle = true, \n  descr = \"\", \n  cb = BetaML.Nn.fitting_info, \n  categories = nothing, \n  handle_unknown = \"error\", \n  other_categories_name = nothing, \n  rng = Random._GLOBAL_RNG())\n\njulia> (fitResults, cache, report) = MLJ.fit(model, 0, X, y);\n\njulia> est_classes                 = predict(model, fitResults, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.57, versicolor=>0.215, virginica=>0.215)\n UnivariateFinite{Multiclass{3}}(setosa=>0.565, versicolor=>0.217, virginica=>0.217)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.255, versicolor=>0.255, virginica=>0.49)\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492)\n UnivariateFinite{Multiclass{3}}(setosa=>0.263, versicolor=>0.263, virginica=>0.473)\n```\n"
 ":name" = "NeuralNetworkClassifier"
 ":human_name" = "neural network classifier"
 ":is_supervised" = "`true`"
@@ -4369,7 +4369,7 @@
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```\nDecisionTreeRegressor\n```\n\nA model type for constructing a CART decision tree regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\n```\n\nDo `model = DecisionTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `max_depth=-1`:          max depth of the decision tree (-1=any)\n  * `min_samples_leaf=1`:    max number of samples each leaf needs to have\n  * `min_samples_split=2`:   min number of samples needed for a split\n  * `min_purity_increase=0`: min purity needed for a split\n  * `n_subfeatures=0`: number of features to select at random (0 for all, -1 for square root of number of features)\n  * `post_prune=false`:      set to `true` for post-fit pruning\n  * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having                          combined purity `>= merge_purity_threshold`\n  * `feature_importance`:    method to use for computing feature importances. One of `(:impurity, :split)`\n  * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n\n# Report\n\n  * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeRegressor pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(tree, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTree.jl\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeRegressor`](@ref).\n"
+":docstring" = "```\nDecisionTreeRegressor\n```\n\nA model type for constructing a CART decision tree regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\n```\n\nDo `model = DecisionTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `max_depth=-1`:          max depth of the decision tree (-1=any)\n  * `min_samples_leaf=1`:    max number of samples each leaf needs to have\n  * `min_samples_split=2`:   min number of samples needed for a split\n  * `min_purity_increase=0`: min purity needed for a split\n  * `n_subfeatures=0`: number of features to select at random (0 for all)\n  * `post_prune=false`:      set to `true` for post-fit pruning\n  * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having                          combined purity `>= merge_purity_threshold`\n  * `feature_importance`:    method to use for computing feature importances. One of `(:impurity, :split)`\n  * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n\n# Report\n\n  * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeRegressor pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(tree, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTree.jl\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeRegressor`](@ref).\n"
 ":name" = "DecisionTreeRegressor"
 ":human_name" = "CART decision tree regressor"
 ":is_supervised" = "`true`"
@@ -4403,7 +4403,7 @@
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```\nDecisionTreeClassifier\n```\n\nA model type for constructing a CART decision tree classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\n```\n\nDo `model = DecisionTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeClassifier(max_depth=...)`.\n\n`DecisionTreeClassifier` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `max_depth=-1`:          max depth of the decision tree (-1=any)\n  * `min_samples_leaf=1`:    max number of samples each leaf needs to have\n  * `min_samples_split=2`:   min number of samples needed for a split\n  * `min_purity_increase=0`: min purity needed for a split\n  * `n_subfeatures=0`: number of features to select at random (0 for all, -1 for square root of number of features)\n  * `post_prune=false`:      set to `true` for post-fit pruning\n  * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having                          combined purity `>= merge_purity_threshold`\n  * `display_depth=5`:       max depth to show when displaying the tree\n  * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n  * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n  * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n  * `encoding`: dictionary of target classes keyed on integers used internally by DecisionTree.jl; needed to interpret pretty printing of tree (obtained by calling `fit!(mach, verbosity=2)` or from report - see below)\n  * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `classes_seen`: list of target classes actually observed in training\n  * `print_tree`: method to print a pretty representation of the fitted tree, with single argument the tree depth; interpretation requires internal integer-class encoding (see \"Fitted parameters\" above).\n  * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeClassifier pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = @load_iris\nmach = machine(tree, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n        sepal_width = [2.8, 3.0, 2.8],\n        petal_length = [5.6, 5.8, 6.1],\n        petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew)   # point predictions\npdf.(yhat, \"virginica\")    # probabilities for the \"verginica\" class\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTrees.jl\n\njulia> report(mach).print_tree(3)\nFeature 4, Threshold 0.8\nL-> 1 : 50/50\nR-> Feature 4, Threshold 1.75\n    L-> Feature 3, Threshold 4.95\n        L->\n        R->\n    R-> Feature 3, Threshold 4.85\n        L->\n        R-> 3 : 43/43\n```\n\nTo interpret the internal class labelling:\n\n```\njulia> fitted_params(mach).encoding\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, UInt32} with 3 entries:\n  \"virginica\"  => 0x00000003\n  \"setosa\"     => 0x00000001\n  \"versicolor\" => 0x00000002\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeClassifier`](@ref).\n"
+":docstring" = "```\nDecisionTreeClassifier\n```\n\nA model type for constructing a CART decision tree classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\n```\n\nDo `model = DecisionTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeClassifier(max_depth=...)`.\n\n`DecisionTreeClassifier` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `max_depth=-1`:          max depth of the decision tree (-1=any)\n  * `min_samples_leaf=1`:    max number of samples each leaf needs to have\n  * `min_samples_split=2`:   min number of samples needed for a split\n  * `min_purity_increase=0`: min purity needed for a split\n  * `n_subfeatures=0`: number of features to select at random (0 for all)\n  * `post_prune=false`:      set to `true` for post-fit pruning\n  * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having                          combined purity `>= merge_purity_threshold`\n  * `display_depth=5`:       max depth to show when displaying the tree\n  * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n  * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n  * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n  * `encoding`: dictionary of target classes keyed on integers used internally by DecisionTree.jl; needed to interpret pretty printing of tree (obtained by calling `fit!(mach, verbosity=2)` or from report - see below)\n  * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `classes_seen`: list of target classes actually observed in training\n  * `print_tree`: method to print a pretty representation of the fitted tree, with single argument the tree depth; interpretation requires internal integer-class encoding (see \"Fitted parameters\" above).\n  * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Examples\n\n```\nusing MLJ\nTree = @load DecisionTreeClassifier pkg=DecisionTree\ntree = Tree(max_depth=4, min_samples_split=3)\n\nX, y = @load_iris\nmach = machine(tree, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n        sepal_width = [2.8, 3.0, 2.8],\n        petal_length = [5.6, 5.8, 6.1],\n        petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew)   # point predictions\npdf.(yhat, \"virginica\")    # probabilities for the \"verginica\" class\n\nfitted_params(mach).tree # raw tree or stump object from DecisionTrees.jl\n\njulia> report(mach).print_tree(3)\nFeature 4, Threshold 0.8\nL-> 1 : 50/50\nR-> Feature 4, Threshold 1.75\n    L-> Feature 3, Threshold 4.95\n        L->\n        R->\n    R-> Feature 3, Threshold 4.85\n        L->\n        R-> 3 : 43/43\n```\n\nTo interpret the internal class labelling:\n\n```\njulia> fitted_params(mach).encoding\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, UInt32} with 3 entries:\n  0x00000003 => \"virginica\"\n  0x00000001 => \"setosa\"\n  0x00000002 => \"versicolor\"\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeClassifier`](@ref).\n"
 ":name" = "DecisionTreeClassifier"
 ":human_name" = "CART decision tree classifier"
 ":is_supervised" = "`true`"
@@ -4641,16 +4641,16 @@
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "Regression models for various taks: mse, logistic, poisson, gamma, tweedie."
+":docstring" = "```\nEvoLinearRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoLinearRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n  * `loss=:mse`: loss function to be minimised.    Can be one of:\n\n      * `:mse`\n      * `:logistic`\n      * `:poisson`\n      * `:gamma`\n      * `:tweedie`\n  * `nrounds=10`: maximum number of training rounds.\n  * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n  * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n  * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n  * `rng=123`: random seed. Not used at the moment.\n  * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n  * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoLinearRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoLinearRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoLinearRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument.  \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoLinearRegressor = @load EvoLinearRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoLinearRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions   are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: the `EvoLinearModel` object returned by EvoLnear.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:coef`: Vector of coefficients (βs) associated to each of the features.\n  * `:bias`: Value of the bias.\n  * `:names`: Names of each of the features.\n"
 ":name" = "EvoLinearRegressor"
 ":human_name" = "evo linear regressor"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":deterministic"
 ":abstract_type" = "`MLJModelInterface.Deterministic`"
-":implemented_methods" = [":fit", ":predict", ":reformat", ":selectrows", ":update"]
-":hyperparameters" = "`(:loss, :updater, :nrounds, :eta, :L1, :L2, :rng, :device)`"
-":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"AbstractFloat\", \"AbstractFloat\", \"AbstractFloat\", \"Any\", \"Symbol\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":implemented_methods" = [":fit", ":predict", ":update"]
+":hyperparameters" = "`(:updater, :nrounds, :eta, :L1, :L2, :rng, :device)`"
+":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Any\", \"Any\", \"Any\", \"Any\", \"Symbol\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`false`"
@@ -4929,108 +4929,6 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
-[OutlierDetectionNetworks.AEDetector]
-":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`"
-":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
-":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`"
-":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`"
-":predict_scitype" = "`ScientificTypesBase.Unknown`"
-":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":is_pure_julia" = "`true`"
-":package_name" = "OutlierDetectionNetworks"
-":package_license" = "MIT"
-":load_path" = "OutlierDetectionNetworks.AEDetector"
-":package_uuid" = "c7f57e37-4fcb-4a0b-a36c-c2204bc839a7"
-":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNetworks.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`false`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = "```\nAEDetector(encoder= Chain(),\n           decoder = Chain(),\n           batchsize= 32,\n           epochs = 1,\n           shuffle = false,\n           partial = true,\n           opt = Adam(),\n           loss = mse)\n```\n\nCalculate the anomaly score of an instance based on the reconstruction loss of an autoencoder, see [1] for an explanation of auto encoders.\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct`  that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nloss::Function\n```\n\nThe loss function used to calculate the reconstruction error, see [https://fluxml.ai/Flux.jl/stable/models/losses/](https://fluxml.ai/Flux.jl/stable/models/losses/) for examples.\n\n## Examples\n\n```julia\nusing OutlierDetection: AEDetector, fit, score\ndetector = AEDetector()\nX = rand(10, 100)\nresult = fit(detector, X)\ntest_scores = transform(detector, result.model, X)\n```\n\n## References\n\n[1] Aggarwal, Charu C. (2017): Outlier Analysis.\n"
-":name" = "AEDetector"
-":human_name" = "ae detector"
-":is_supervised" = "`false`"
-":prediction_type" = ":unknown"
-":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`"
-":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"]
-":hyperparameters" = "`(:encoder, :decoder, :batchsize, :epochs, :shuffle, :partial, :opt, :loss)`"
-":hyperparameter_types" = "`(\"Flux.Chain\", \"Flux.Chain\", \"Integer\", \"Integer\", \"Bool\", \"Bool\", \"Any\", \"Function\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-
-[OutlierDetectionNetworks.DSADDetector]
-":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`"
-":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
-":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`"
-":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`"
-":predict_scitype" = "`ScientificTypesBase.Unknown`"
-":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":is_pure_julia" = "`true`"
-":package_name" = "OutlierDetectionNetworks"
-":package_license" = "MIT"
-":load_path" = "OutlierDetectionNetworks.DSADDetector"
-":package_uuid" = "c7f57e37-4fcb-4a0b-a36c-c2204bc839a7"
-":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNetworks.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`false`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = "```\nDSADDetector(encoder = Chain(),\n                decoder = Chain(),\n                batchsize = 32,\n                epochs = 1,\n                shuffle = true,\n                partial = false,\n                opt = Adam(),\n                loss = mse,\n                eta = 1,\n                eps = 1e-6,\n                callback = _ -> () -> ())\n```\n\nDeep Semi-Supervised Anomaly detection technique based on the distance to a hypersphere center as described in [1].\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct`  that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nloss::Function\n```\n\nThe loss function used to calculate the reconstruction error, see [https://fluxml.ai/Flux.jl/stable/models/losses/](https://fluxml.ai/Flux.jl/stable/models/losses/) for examples.\n\n```\neta::Real\n```\n\nWeighting parameter for the labeled data; i.e. higher values of eta assign higher weight to labeled data in the svdd loss function. For a sensitivity analysis of this parameter, see [1].\n\n```\neps::Real\n```\n\nBecause the inverse distance used in the svdd loss can lead to division by zero, the parameters `eps` is added for numerical stability.\n\n```\ncallback::Function\n```\n\n*Experimental parameter that might change*. A function to be called after the model parameters have been updated that can call Flux's callback helpers, see [https://fluxml.ai/Flux.jl/stable/utilities/#Callback-Helpers-1](https://fluxml.ai/Flux.jl/stable/utilities/#Callback-Helpers-1).\n\n**Notice:** The parameters `batchsize`, `epochs`, `shuffle`, `partial`, `opt` and `callback` can also be tuples of size 2, specifying the corresponding values for (1) pretraining and (2) training; otherwise the same values are used for pretraining and training.\n\n## Examples\n\n```julia\nusing OutlierDetection: DSADDetector, fit, score\ndetector = DSADDetector()\nX = rand(10, 100)\ny = rand([-1,1], 100)\nmodel = fit(detector, X, y)\ntrain_scores, test_scores = score(detector, model, X)\n```\n\n## References\n\n[1] Ruff, Lukas; Vandermeulen, Robert A.; Görnitz, Nico; Binder, Alexander; Müller, Emmanuel; Müller, Klaus-Robert; Kloft, Marius (2019): Deep Semi-Supervised Anomaly Detection.\n"
-":name" = "DSADDetector"
-":human_name" = "dsad detector"
-":is_supervised" = "`true`"
-":prediction_type" = ":unknown"
-":abstract_type" = "`MLJModelInterface.SupervisedDetector`"
-":implemented_methods" = [":reformat", ":selectrows", ":fit", ":transform"]
-":hyperparameters" = "`(:encoder, :decoder, :batchsize, :epochs, :shuffle, :partial, :opt, :loss, :eta, :eps, :callback)`"
-":hyperparameter_types" = "`(\"Flux.Chain\", \"Flux.Chain\", \"Tuple{Integer, Integer}\", \"Tuple{Integer, Integer}\", \"Tuple{Bool, Bool}\", \"Tuple{Bool, Bool}\", \"Any\", \"Function\", \"Number\", \"Number\", \"Tuple{Function, Function}\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-
-[OutlierDetectionNetworks.ESADDetector]
-":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`"
-":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
-":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`"
-":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}`"
-":predict_scitype" = "`ScientificTypesBase.Unknown`"
-":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":is_pure_julia" = "`true`"
-":package_name" = "OutlierDetectionNetworks"
-":package_license" = "MIT"
-":load_path" = "OutlierDetectionNetworks.ESADDetector"
-":package_uuid" = "c7f57e37-4fcb-4a0b-a36c-c2204bc839a7"
-":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNetworks.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`false`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = "```\nESADDetector(encoder = Chain(),\n            decoder = Chain(),\n            batchsize = 32,\n            epochs = 1,\n            shuffle = false,\n            partial = true,\n            opt = Adam(),\n            λ1 = 1,\n            λ2 = 1,\n            noise = identity)\n```\n\nEnd-to-End semi-supervised anomaly detection algorithm similar to DeepSAD, but without the pretraining phase. The algorithm was published by Huang et al., see [1].\n\n## Parameters\n\n```\nencoder::Chain\n```\n\nTransforms the input data into a latent state with a fixed shape.\n\n```\ndecoder::Chain\n```\n\nTransforms the latent state back into the shape of the input data.\n\n```\nbatchsize::Integer\n```\n\nThe number of samples to work through before updating the internal model parameters.\n\n```\nepochs::Integer\n```\n\nThe number of passes of the entire training dataset the machine learning algorithm has completed. \n\n```\nshuffle::Bool\n```\n\nIf `shuffle=true`, shuffles the observations each time iterations are re-started, else no shuffling is performed.\n\n```\npartial::Bool\n```\n\nIf `partial=false`, drops the last mini-batch if it is smaller than the batchsize.\n\n```\nopt::Any\n```\n\nAny Flux-compatibale optimizer, typically a `struct`  that holds all the optimiser parameters along with a definition of `apply!` that defines how to apply the update rule associated with the optimizer.\n\n```\nλ1::Real\n```\n\nWeighting parameter of the norm loss, which minimizes the empirical variance and thus minimizes entropy.\n\n```\nλ2::Real\n```\n\nWeighting parameter of the assistent loss function to define the consistency between the two encoders.\n\n```\nnoise::Function (AbstractArray{T} -> AbstractArray{T})\n```\n\nA function to be applied to a batch of input data to add noise, see [1] for an explanation.\n\n## Examples\n\n```julia\nusing OutlierDetection: ESADDetector, fit, score\ndetector = ESADDetector()\nX = rand(10, 100)\ny = rand([-1,1], 100)\nmodel = fit(detector, X, y)\ntrain_scores, test_scores = score(detector, model, X)\n```\n\n## References\n\n[1] Huang, Chaoqin; Ye, Fei; Zhang, Ya; Wang, Yan-Feng; Tian, Qi (2020): ESAD: End-to-end Deep Semi-supervised Anomaly Detection.\n"
-":name" = "ESADDetector"
-":human_name" = "esad detector"
-":is_supervised" = "`true`"
-":prediction_type" = ":unknown"
-":abstract_type" = "`MLJModelInterface.SupervisedDetector`"
-":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"]
-":hyperparameters" = "`(:encoder, :decoder, :batchsize, :epochs, :shuffle, :partial, :opt, :λ1, :λ2, :noise)`"
-":hyperparameter_types" = "`(\"Flux.Chain\", \"Flux.Chain\", \"Integer\", \"Integer\", \"Bool\", \"Bool\", \"Any\", \"Number\", \"Number\", \"Function\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-
 [EvoTrees.EvoTreeClassifier]
 ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
@@ -5083,7 +4981,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `metric::Symbol=:none`:       Metric that is to be tracked during the training process. One of: `:none`, `:gaussian`.\n  * `device=\"cpu\"`:               Hardware device to use for computations. Can be either `\"cpu\"` or `\"gpu\"`.\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
+":docstring" = "EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `device=\"cpu\"`:               Hardware device to use for computations. Can be either `\"cpu\"` or `\"gpu\"`.\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
 ":name" = "EvoTreeGaussian"
 ":human_name" = "evo tree gaussian"
 ":is_supervised" = "`true`"
@@ -5117,7 +5015,7 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distributions is specified through `loss` kwargs. Both Normal/Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:normal`/ `:gaussian`\n  * `:logistic`\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf.\n\nA complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes.   Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `metric::Symbol=:none`:       Metric that is to be tracked during the training process. One of: `:none`, `:gaussian`, `:logistic`.\n  * `device=\"cpu\"`:               Hardware device to use for computations. Can be either `\"cpu\"` or `\"gpu\"`.\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
+":docstring" = "EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:gaussian` / `:gaussian_mle`\n  * `:logistic` / `:logistic_mle`\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf.\n\nA complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes.   Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `device=\"cpu\"`:               Hardware device to use for computations. Can be either `\"cpu\"` or `\"gpu\"`.\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
 ":name" = "EvoTreeMLE"
 ":human_name" = "evo tree mle"
 ":is_supervised" = "`true`"
@@ -5711,6 +5609,40 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
+[unknown.EvoSplineRegressor]
+":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
+":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`"
+":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":is_pure_julia" = "`false`"
+":package_name" = "unknown"
+":package_license" = "unknown"
+":load_path" = "EvoLinear.EvoSplineRegressor"
+":package_uuid" = "unknown"
+":package_url" = "unknown"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```\nEvoSplineRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoSplineRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n  * `loss=:mse`: loss function to be minimised.    Can be one of:\n\n      * `:mse`\n      * `:logistic`\n      * `:poisson`\n      * `:gamma`\n      * `:tweedie`\n  * `nrounds=10`: maximum number of training rounds.\n  * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n  * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n  * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n  * `rng=123`: random seed. Not used at the moment.\n  * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n  * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoSplineRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoSplineRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoSplineRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument.  \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoSplineRegressor = @load EvoSplineRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoSplineRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions   are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: the `SplineModel` object returned by EvoSplineRegressor fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:coef`: Vector of coefficients (βs) associated to each of the features.\n  * `:bias`: Value of the bias.\n  * `:names`: Names of each of the features.\n"
+":name" = "EvoSplineRegressor"
+":human_name" = "evo spline regressor"
+":is_supervised" = "`true`"
+":prediction_type" = ":deterministic"
+":abstract_type" = "`MLJModelInterface.Deterministic`"
+":implemented_methods" = [":fit", ":predict", ":update"]
+":hyperparameters" = "`(:nrounds, :opt, :batchsize, :act, :eta, :L2, :knots, :rng, :device)`"
+":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Symbol\", \"Any\", \"Any\", \"Union{Nothing, Dict}\", \"Any\", \"Symbol\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = ":nrounds"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
 [OutlierDetectionPython.MCDDetector]
 ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`"
 ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
diff --git a/src/registry/Models.toml b/src/registry/Models.toml
index 571b1866..ed46c5ca 100644
--- a/src/registry/Models.toml
+++ b/src/registry/Models.toml
@@ -13,9 +13,9 @@ EvoLinear = ["EvoLinearRegressor"]
 XGBoost = ["XGBoostCount", "XGBoostRegressor", "XGBoostClassifier"]
 LightGBM = ["LGBMClassifier", "LGBMRegressor"]
 MLJText = ["TfidfTransformer", "CountTransformer", "BM25Transformer"]
-OutlierDetectionNetworks = ["AEDetector", "DSADDetector", "ESADDetector"]
 EvoTrees = ["EvoTreeClassifier", "EvoTreeGaussian", "EvoTreeMLE", "EvoTreeRegressor", "EvoTreeCount"]
 MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "FeatureSelector", "UnivariateDiscretizer", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"]
+unknown = ["EvoSplineRegressor"]
 OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "OCSVMDetector", "SODDetector", "LODADetector", "KNNDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"]
 OneRule = ["OneRuleClassifier"]
 LIBSVM = ["EpsilonSVR", "LinearSVC", "NuSVR", "NuSVC", "SVC", "OneClassSVM"]
diff --git a/src/registry/Project.toml b/src/registry/Project.toml
index 696d4b12..e76b1068 100644
--- a/src/registry/Project.toml
+++ b/src/registry/Project.toml
@@ -22,7 +22,6 @@ MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
 NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
 OneRule = "90484964-6d6a-4979-af09-8657dbed84ff"
 OutlierDetectionNeighbors = "51249a0a-cb36-4849-8e04-30c7f8d311bb"
-OutlierDetectionNetworks = "c7f57e37-4fcb-4a0b-a36c-c2204bc839a7"
 OutlierDetectionPython = "2449c660-d36c-460e-a68b-92ab3c865b3e"
 ParallelKMeans = "42b8e9d4-006b-409a-8472-7f34b3fb58af"
 PartialLeastSquaresRegressor = "f4b1acfe-f311-436c-bb79-8483f53c17d5"