diff --git a/CHANGES.md b/CHANGES.md index fa01fbf..53e7b41 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +# version 0.22.3 + +- Add `GenericGradientBooster` for regression and classification. See: https://thierrymoudiki.github.io/blog/2024/10/06/python/r/genericboosting +and `examples/genboost*` + # version 0.18.2 - Gaussian weights in `LSBoostRegressor` and `LSBoostClassifier` randomized hidden layer diff --git a/examples/genboost_regressor2.py b/examples/genboost_regressor2.py new file mode 100644 index 0000000..dddd399 --- /dev/null +++ b/examples/genboost_regressor2.py @@ -0,0 +1,49 @@ +import subprocess +import sys +import os + +print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n") + + +subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib"]) + +import mlsauce as ms +import numpy as np +import matplotlib.pyplot as plt +from sklearn.linear_model import Ridge, LinearRegression +from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score +from sklearn.tree import DecisionTreeRegressor +from time import time +from os import chdir +from sklearn import metrics +from sklearn.datasets import fetch_openml + +# Load the dataset from OpenML +boston = fetch_openml(name='boston', version=1, as_frame=True) + +# Get the features and target +X = boston.data +y = boston.target + +# Display the first few rows +print(X.head()) +print(y.head()) + +np.random.seed(15029) +X_train, X_test, y_train, y_test = train_test_split(X, y, + test_size=0.2) + +X_train = X_train.astype(np.float64) +X_test = X_test.astype(np.float64) +y_train = y_train.astype(np.float64) +y_test = y_test.astype(np.float64) + +obj = ms.GenericBoostingRegressor(col_sample=0.9, row_sample=0.9) +print(obj.get_params()) +start = time() +obj.fit(X_train, y_train) +print(time()-start) +start = time() +print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test)))) +print(time()-start) +print(obj.obj['loss']) diff --git a/examples/lazy_booster_regression.py b/examples/lazy_booster_regression.py index 59ecfe7..fb6af72 100644 --- a/examples/lazy_booster_regression.py +++ b/examples/lazy_booster_regression.py @@ -1,5 +1,6 @@ import os import mlsauce as ms +import numpy as np from sklearn.datasets import load_diabetes from sklearn.datasets import fetch_california_housing from sklearn.model_selection import train_test_split @@ -27,3 +28,32 @@ models, predictioms = regr.fit(X_train, X_test, y_train, y_test) model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test) print(models) + + +from sklearn.datasets import fetch_openml + +# Load the dataset from OpenML +boston = fetch_openml(name='boston', version=1, as_frame=True) + +# Get the features and target +X = boston.data +y = boston.target + +# Display the first few rows +print(X.head()) +print(y.head()) + +np.random.seed(1509) +X_train, X_test, y_train, y_test = train_test_split(X, y, + test_size=0.2) + +X_train = X_train.astype(np.float64) +X_test = X_test.astype(np.float64) +y_train = y_train.astype(np.float64) +y_test = y_test.astype(np.float64) + +regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, #n_jobs=2, + custom_metric=None, preprocess=True) +models, predictioms = regr.fit(X_train, X_test, y_train, y_test) +model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test) +print(models) diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py index fb26667..9c69de6 100644 --- a/mlsauce/booster/_booster_classifier.py +++ b/mlsauce/booster/_booster_classifier.py @@ -4,6 +4,7 @@ import pandas as pd from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin +from sklearn.tree import ExtraTreeRegressor from sklearn.preprocessing import PolynomialFeatures try: @@ -554,7 +555,7 @@ class GenericBoostingClassifier(LSBoostClassifier): Attributes: base_model: object - base learner. + base learner (default is ExtraTreeRegressor) to be boosted. n_estimators: int number of boosting iterations. @@ -625,7 +626,7 @@ class GenericBoostingClassifier(LSBoostClassifier): def __init__( self, - base_model, + base_model=ExtraTreeRegressor(), n_estimators=100, learning_rate=0.1, n_hidden_features=5, diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py index 5e5ee4a..71f8b71 100644 --- a/mlsauce/booster/_booster_regressor.py +++ b/mlsauce/booster/_booster_regressor.py @@ -4,6 +4,7 @@ import pandas as pd from sklearn.base import BaseEstimator from sklearn.base import RegressorMixin +from sklearn.tree import ExtraTreeRegressor from sklearn.preprocessing import PolynomialFeatures try: @@ -437,7 +438,7 @@ class GenericBoostingRegressor(LSBoostRegressor): Attributes: base_model: object - base learner. + base learner (default is ExtraTreeRegressor) to be boosted. n_estimators: int number of boosting iterations. @@ -517,7 +518,7 @@ class GenericBoostingRegressor(LSBoostRegressor): def __init__( self, - base_model, + base_model=ExtraTreeRegressor(), n_estimators=100, learning_rate=0.1, n_hidden_features=5, diff --git a/mlsauce/lazybooster/lazyboosterregression.py b/mlsauce/lazybooster/lazyboosterregression.py index 4304ee0..a75f6c5 100644 --- a/mlsauce/lazybooster/lazyboosterregression.py +++ b/mlsauce/lazybooster/lazyboosterregression.py @@ -289,7 +289,7 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs): adj_rsquared = adjusted_rsquared( r_squared, X_test.shape[0], X_test.shape[1] ) - rmse = mean_squared_error(y_test, y_pred, squared=False) + rmse = root_mean_squared_error(y_test, y_pred) names.append(name) R2.append(r_squared) diff --git a/setup.py b/setup.py index 8fe6b34..0c92809 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ MAINTAINER_EMAIL = 'thierry.moudiki@gmail.com' LICENSE = 'BSD3 Clause Clear' -__version__ = '0.22.2' +__version__ = '0.22.3' VERSION = __version__