diff --git a/CHANGELOG.md b/CHANGELOG.md index dcdc0f3..8b02e65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## Unreleased +## [0.2.1] - 2020-01-15 +### Changed +- Make stacking compatible with scikit-learn v0.22.1. (#52) + ## [0.2.0] - 2019-12-11 ### Added - Turn on Python 3.7 and 3.8 for Travis CI builds. (#50) diff --git a/README.rst b/README.rst index f6372e4..1edc332 100644 --- a/README.rst +++ b/README.rst @@ -31,21 +31,21 @@ example of using the ``StackedClassifier``: .. code-block:: python - >>> from sklearn.linear_model import LogisticRegression - >>> from sklearn.ensemble import RandomForestClassifier - >>> from civismlext.stacking import StackedClassifier - >>> # Note that the final estimator 'metalr' is the meta-estimator - >>> estlist = [('rf', RandomForestClassifier()), - >>> ('lr', LogisticRegression()), - >>> ('metalr', LogisticRegression())] - >>> mysm = StackedClassifier(estlist) - >>> # Set some parameters, if you didn't set them at instantiation - >>> mysm.set_params(rf__random_state=7, lr__random_state=8, - >>> metalr__random_state=9, metalr__C=10**7) - >>> # Fit - >>> mysm.fit(Xtrain, ytrain) - >>> # Predict! - >>> ypred = mysm.predict_proba(Xtest) + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.ensemble import RandomForestClassifier + >>> from civismlext.stacking import StackedClassifier + >>> # Note that the final estimator 'metalr' is the meta-estimator + >>> estlist = [('rf', RandomForestClassifier()), + >>> ('lr', LogisticRegression()), + >>> ('metalr', LogisticRegression())] + >>> mysm = StackedClassifier(estlist) + >>> # Set some parameters, if you didn't set them at instantiation + >>> mysm.set_params(rf__random_state=7, lr__random_state=8, + >>> metalr__random_state=9, metalr__C=10**7) + >>> # Fit + >>> mysm.fit(Xtrain, ytrain) + >>> # Predict! + >>> ypred = mysm.predict_proba(Xtest) You can learn more about stacking and see an example use of the ``StackedRegressor`` and ``NonNegativeLinearRegression`` estimators in `a talk presented at PyData NYC`_ in November, 2017. diff --git a/civismlext/stacking.py b/civismlext/stacking.py index 8c04a47..b25686d 100644 --- a/civismlext/stacking.py +++ b/civismlext/stacking.py @@ -1,15 +1,37 @@ from __future__ import print_function from __future__ import division -import numpy as np from abc import ABCMeta, abstractmethod +import warnings + +import numpy as np import six from sklearn.base import BaseEstimator, clone from sklearn.utils.metaestimators import if_delegate_has_method from sklearn.model_selection import check_cv from sklearn.utils import tosequence, check_X_y from sklearn.externals.joblib import Parallel, delayed -from sklearn.model_selection._validation import _index_param_value + +try: + # TODO: Avoid using a private function from scikit-learn. + # _check_fit_params was added at sklearn 0.22.1 + from sklearn.utils.validation import _check_fit_params +except ImportError: + # _index_param_value was removed in sklearn 0.22.1 + # See: https://github.com/scikit-learn/scikit-learn/pull/15863 + from sklearn.model_selection._validation import _index_param_value + + warnings.warn( + 'Your civisml-extensions installation uses private functions from ' + 'scikit-learn < v0.22.1. Please upgrade scikit-learn to v0.22.1 ' + 'or beyond. A future version of civisml-extensions will no longer ' + 'be compatible with scikit-learn < v0.22.1.', + FutureWarning + ) + + def _check_fit_params(X, fit_params, train): + return {k: _index_param_value(X, v, train) + for k, v in fit_params.items()} def _fit_est(est, X, y, **fit_params): @@ -255,9 +277,9 @@ def _base_est_fit_predict(self, X, y, **fit_params): for name, est in self.estimator_list[:-1]: # adapted from sklearn.model_selection._fit_and_predict # Adjust length of sample weights - fit_params_est_adjusted = dict([ - (k, _index_param_value(X, v, train)) - for k, v in fit_params_ests[name].items()]) + fit_params_est_adjusted = _check_fit_params( + X, fit_params_ests[name], train + ) # Fit estimator on training set and score out-of-sample _jobs.append(delayed(_fit_predict)( diff --git a/setup.py b/setup.py index 8bcb288..329aa7e 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ def read(fname): return _in.read() -_VERSION = '0.2.0' +_VERSION = '0.2.1' setup(version=_VERSION, name="civisml-extensions", @@ -18,5 +18,6 @@ def read(fname): packages=find_packages(), install_requires=read('requirements.txt').splitlines(), long_description=read('README.rst'), + long_description_content_type='text/x-rst', include_package_data=True, license="BSD-3")