Make stacking compatible with sklearn 0.22.1; bump version to 0.2.1 (#52

) * DEP make stacking compatible with sklearn v0.22.1 * MAINT bump version to 0.2.1 * MAINT make twine happy with README * MAINT update CHANGELOG
civisanalytics · Jan 14, 2020 · cfdf621 · cfdf621
1 parent 7f9e7b1
commit cfdf621
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 21 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ## Unreleased
 
+## [0.2.1] - 2020-01-15
+### Changed
+- Make stacking compatible with scikit-learn v0.22.1. (#52)
+
 ## [0.2.0] - 2019-12-11
 ### Added
 - Turn on Python 3.7 and 3.8 for Travis CI builds. (#50)

diff --git a/README.rst b/README.rst
@@ -31,21 +31,21 @@ example of using the ``StackedClassifier``:
 
     .. code-block:: python
 
-    >>> from sklearn.linear_model import LogisticRegression
-    >>> from sklearn.ensemble import RandomForestClassifier
-    >>> from civismlext.stacking import StackedClassifier
-    >>> # Note that the final estimator 'metalr' is the meta-estimator
-    >>> estlist = [('rf', RandomForestClassifier()),
-    >>>            ('lr', LogisticRegression()),
-    >>>            ('metalr', LogisticRegression())]
-    >>> mysm = StackedClassifier(estlist)
-    >>> # Set some parameters, if you didn't set them at instantiation
-    >>> mysm.set_params(rf__random_state=7, lr__random_state=8,
-    >>>                 metalr__random_state=9, metalr__C=10**7)
-    >>> # Fit
-    >>> mysm.fit(Xtrain, ytrain)
-    >>> # Predict!
-    >>> ypred = mysm.predict_proba(Xtest)
+        >>> from sklearn.linear_model import LogisticRegression
+        >>> from sklearn.ensemble import RandomForestClassifier
+        >>> from civismlext.stacking import StackedClassifier
+        >>> # Note that the final estimator 'metalr' is the meta-estimator
+        >>> estlist = [('rf', RandomForestClassifier()),
+        >>>            ('lr', LogisticRegression()),
+        >>>            ('metalr', LogisticRegression())]
+        >>> mysm = StackedClassifier(estlist)
+        >>> # Set some parameters, if you didn't set them at instantiation
+        >>> mysm.set_params(rf__random_state=7, lr__random_state=8,
+        >>>                 metalr__random_state=9, metalr__C=10**7)
+        >>> # Fit
+        >>> mysm.fit(Xtrain, ytrain)
+        >>> # Predict!
+        >>> ypred = mysm.predict_proba(Xtest)
 
 You can learn more about stacking and see an example use of the  ``StackedRegressor`` and ``NonNegativeLinearRegression`` estimators in `a talk presented at PyData NYC`_ in November, 2017.
 

diff --git a/civismlext/stacking.py b/civismlext/stacking.py
@@ -1,15 +1,37 @@
 from __future__ import print_function
 from __future__ import division
 
-import numpy as np
 from abc import ABCMeta, abstractmethod
+import warnings
+
+import numpy as np
 import six
 from sklearn.base import BaseEstimator, clone
 from sklearn.utils.metaestimators import if_delegate_has_method
 from sklearn.model_selection import check_cv
 from sklearn.utils import tosequence, check_X_y
 from sklearn.externals.joblib import Parallel, delayed
-from sklearn.model_selection._validation import _index_param_value
+
+try:
+    # TODO: Avoid using a private function from scikit-learn.
+    #  _check_fit_params was added at sklearn 0.22.1
+    from sklearn.utils.validation import _check_fit_params
+except ImportError:
+    # _index_param_value was removed in sklearn 0.22.1
+    # See: https://github.com/scikit-learn/scikit-learn/pull/15863
+    from sklearn.model_selection._validation import _index_param_value
+
+    warnings.warn(
+        'Your civisml-extensions installation uses private functions from '
+        'scikit-learn < v0.22.1. Please upgrade scikit-learn to v0.22.1 '
+        'or beyond. A future version of civisml-extensions will no longer '
+        'be compatible with scikit-learn < v0.22.1.',
+        FutureWarning
+    )
+
+    def _check_fit_params(X, fit_params, train):
+        return {k: _index_param_value(X, v, train)
+                for k, v in fit_params.items()}
 
 
 def _fit_est(est, X, y, **fit_params):
@@ -255,9 +277,9 @@ def _base_est_fit_predict(self, X, y, **fit_params):
             for name, est in self.estimator_list[:-1]:
                 # adapted from sklearn.model_selection._fit_and_predict
                 # Adjust length of sample weights
-                fit_params_est_adjusted = dict([
-                    (k, _index_param_value(X, v, train))
-                    for k, v in fit_params_ests[name].items()])
+                fit_params_est_adjusted = _check_fit_params(
+                    X, fit_params_ests[name], train
+                )
 
                 # Fit estimator on training set and score out-of-sample
                 _jobs.append(delayed(_fit_predict)(

diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@ def read(fname):
         return _in.read()
 
 
-_VERSION = '0.2.0'
+_VERSION = '0.2.1'
 
 setup(version=_VERSION,
       name="civisml-extensions",
@@ -18,5 +18,6 @@ def read(fname):
       packages=find_packages(),
       install_requires=read('requirements.txt').splitlines(),
       long_description=read('README.rst'),
+      long_description_content_type='text/x-rst',
       include_package_data=True,
       license="BSD-3")