diff --git a/.all-contributorsrc b/.all-contributorsrc
index dbe365847..af77c211e 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -176,6 +176,17 @@
"maintenance"
]
},
+ {
+ "login": "meraldoantonio",
+ "name": "Meraldo Antonio",
+ "avatar_url": "https://avatars.githubusercontent.com/u/37468543?v=4",
+ "profile": "https://github.com/meraldoantonio",
+ "contributions": [
+ "bug",
+ "code",
+ "doc"
+ ]
+ },
{
"login": "szepeviktor",
"name": "Viktor Szépe",
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ddcf7616d..2f4dbf106 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -103,7 +103,7 @@ jobs:
run: make test
- name: Publish code coverage
- uses: codecov/codecov-action@v4
+ uses: codecov/codecov-action@v5
run-tests-all-extras:
needs: code-quality
@@ -142,4 +142,4 @@ jobs:
run: make test
- name: Publish code coverage
- uses: codecov/codecov-action@v4
+ uses: codecov/codecov-action@v5
diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 000000000..d024d0c1f
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1,14 @@
+# The file specifies framework level core developers for automated review requests
+#
+# Note: historically, this file has been used to maintain a list of
+# algorithm maintainers as specified in GOVERNANCE.md.
+# This is no longer the case, algorithm maintainers are now
+# specified directly in the estimator,
+# in the "maintainers" tag of the respective scikit-base object.
+#
+# Algorithm maintainers are programmatically queryable
+# via Estimator.get_class_tag("maintainers").
+# Further lookup such as "which algorithms does M maintain"
+# can be carried out using registry.all_estimators
+
+* @achieveordie @benheid @fkiraly @fnhirwa @geetu040 @pranavvp16 @sairevanth25 @XinyuWuu @yarnabrina
diff --git a/README.md b/README.md
index ba57a1b26..371c6f284 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-:rocket: **Version 2.7.0 out now!** [Read the release notes here.](https://skpro.readthedocs.io/en/latest/changelog.html).
+:rocket: **Version 2.8.0 out now!** [Read the release notes here.](https://skpro.readthedocs.io/en/latest/changelog.html).
`skpro` is a library for supervised probabilistic prediction in python.
It provides `scikit-learn`-like, `scikit-base` compatible interfaces to:
diff --git a/docs/source/_static/switcher.json b/docs/source/_static/switcher.json
index 904bbdd27..71963dad0 100644
--- a/docs/source/_static/switcher.json
+++ b/docs/source/_static/switcher.json
@@ -5,7 +5,12 @@
"url": "https://skpro.readthedocs.io/en/latest/"
},
{
- "name": "2.7.0 (stable)",
+ "name": "2.8.0 (stable)",
+ "version": "stable",
+ "url": "https://skpro.readthedocs.io/en/v2.8.0/"
+ },
+ {
+ "name": "2.7.0",
"version": "stable",
"url": "https://skpro.readthedocs.io/en/v2.7.0/"
},
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index ffda1181f..bf8ad626a 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -15,6 +15,46 @@ You can also subscribe to ``skpro``'s
For planned changes and upcoming releases, see roadmap in the
`issue tracker `_.
+[2.8.0] - 2024-11-17
+====================
+
+Feature and maintenance release.
+
+Dependency changes
+~~~~~~~~~~~~~~~~~~
+
+* ``scikit-base`` bounds have been updated to ``>=0.6.1,<0.13.0``.
+* ``pymc`` is now a soft dependency, for probabilistic regressors.
+* ``polars`` (data container soft dependency) bounds have been updated to ``<1.14.0``.
+
+Enhancements
+~~~~~~~~~~~~
+
+* [ENH] Creating a new Bayesian Regressor with ``pymc`` as a backend (:pr:`358`) :user:`meraldoantonio`
+* [ENH] add suite test for docstring and ``get_test_params`` coverage (:pr:`482`) :user:`fkiraly`
+* [ENH] Synchronize dependency checker with ``sktime`` counterpart (:pr:`490`) :user:`meraldoantonio`
+
+Maintenance
+~~~~~~~~~~~
+
+* [MNT] fix failing ``code-quality`` CI step (:pr:`483`) :user:`fkiraly`
+* [MNT] [Dependabot](deps): Update ``scikit-base`` requirement from ``<0.12.0,>=0.6.1`` to ``>=0.6.1,<0.13.0`` (:pr:`483`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``sphinx-gallery`` requirement from ``<0.18.0`` to ``<0.19.0`` (:pr:`481`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``sphinx-issues`` requirement from ``<5.0.0`` to ``<6.0.0`` (:pr:`484`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Update ``polars`` requirement from ``<1.10.0`` to ``<1.14.0`` (:pr:`491`) :user:`dependabot[bot]`
+* [MNT] [Dependabot](deps): Bump codecov/codecov-action from ``4`` to ``5`` (:pr:`494`) :user:`dependabot[bot]`
+
+Documentation
+~~~~~~~~~~~~~
+
+* [DOC] in docstring, rename ``Example`` to ``Examples`` sections (:pr:`487`) :user:`fkiraly`
+
+Contributors
+~~~~~~~~~~~~
+
+:user:`fkiraly`,
+:user:`meraldoantonio`
+
[2.7.0] - 2024-10-08
====================
diff --git a/pyproject.toml b/pyproject.toml
index bfe543c35..72173f8b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "skpro"
-version = "2.7.0"
+version = "2.8.0"
description = "A unified framework for tabular probabilistic regression, time-to-event prediction, and probability distributions in python"
authors = [
{name = "skpro developers", email = "info@sktime.net"},
@@ -42,7 +42,7 @@ dependencies = [
"numpy>=1.21.0,<2.2",
"pandas>=1.1.0,<2.3.0",
"packaging",
- "scikit-base>=0.6.1,<0.12.0",
+ "scikit-base>=0.6.1,<0.13.0",
"scikit-learn>=0.24.0,<1.6.0",
"scipy<2.0.0,>=1.2.0",
]
@@ -57,6 +57,7 @@ all_extras = [
"ngboost<0.6.0; python_version < '3.13'",
"polars<1.14.0",
"pyarrow<14.0.0; python_version < '3.12'",
+ "pymc; python_version < '3.13'",
"scikit-survival<0.24.0; python_version < '3.13'",
"statsmodels>=0.12.1",
]
diff --git a/skpro/__init__.py b/skpro/__init__.py
index 62dc671ac..acc60bcc2 100644
--- a/skpro/__init__.py
+++ b/skpro/__init__.py
@@ -1,6 +1,6 @@
"""skpro."""
-__version__ = "2.7.0"
+__version__ = "2.8.0"
__all__ = ["show_versions"]
diff --git a/skpro/base/_base.py b/skpro/base/_base.py
index 08c9f78ca..093ac4550 100644
--- a/skpro/base/_base.py
+++ b/skpro/base/_base.py
@@ -28,30 +28,6 @@ class BaseObject(_CommonTags, _BaseObject):
def __init__(self):
super().__init__()
- import sys
- from warnings import warn
-
- from packaging.specifiers import SpecifierSet
-
- py39_or_higher = SpecifierSet(">=3.9")
- sys_version = sys.version.split(" ")[0]
-
- # todo 2.8.0 - check whether python 3.8 eol is reached.
- # If yes, remove this msg.
- if sys_version not in py39_or_higher:
- warn(
- f"From skpro 2.5.0, skpro requires Python version >=3.9, "
- f"but found {sys_version}. "
- "The package can still be installed, until 3.8 end of life "
- "is reached, "
- "but some functionality may not work as test coverage is dropped."
- "Kindly note for context: python 3.8 will reach end of life "
- "in October 2024, and multiple skpro core dependencies, "
- "including scikit-learn, have already dropped support for 3.8. ",
- category=DeprecationWarning,
- stacklevel=2,
- )
-
class BaseEstimator(_CommonTags, _BaseEstimator):
"""Base class for fittable objects."""
diff --git a/skpro/regression/bayesian.py b/skpro/regression/bayesian.py
new file mode 100644
index 000000000..f5648702f
--- /dev/null
+++ b/skpro/regression/bayesian.py
@@ -0,0 +1,581 @@
+"""
+Simple Bayesian Linear Regressor.
+
+Bayesian Linear Regression defined with user-specified priors or defaults for slopes,
+intercept, and noise; implemented using the pymc backend.
+"""
+
+# copyright: skpro developers
+__author__ = ["meraldoantonio"]
+
+from skpro.regression.base import BaseProbaRegressor
+from skpro.utils.validation._dependencies import _check_soft_dependencies
+
+
+class BayesianLinearRegressor(BaseProbaRegressor):
+ """
+ Bayesian Linear Regression class with MCMC sampling.
+
+ Defined with user-specified priors or defaults for slopes, intercept,
+ and noise; implemented using the pymc backend.
+
+ Parameters
+ ----------
+ prior_config : Dictionary, optional
+ Dictionary of priors
+ Class-default defined by default_prior_config method.
+ sampler_config : Dictionary, optional
+ Dictionary of parameters that initialise sampler configuration.
+ Class-default defined by default_sampler_config method.
+
+ Example
+ -------
+ >>> from skpro.regression.bayesian import BayesianLinearRegressor
+ >>> from sklearn.datasets import load_diabetes # doctest: +SKIP
+ >>> from sklearn.model_selection import train_test_split # doctest: +SKIP
+ >>> X, y = load_diabetes(return_X_y=True, as_frame=True) # doctest: +SKIP
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y) # doctest: +SKIP
+
+ >>> bayes_model = BayesianLinearRegressor() # doctest: +SKIP
+ >>> bayes_model.fit(X_train, y_train) # doctest: +SKIP
+ >>> y_test_pred_proba = bayes_model.predict_proba(X_test) # doctest: +SKIP
+ >>> y_test_pred = bayes_model.predict(X_test) # doctest: +SKIP
+ """
+
+ _tags = {
+ # packaging info
+ # --------------
+ "authors": ["meraldoantonio"],
+ "python_version": ">=3.10",
+ "python_dependencies": [
+ "pymc",
+ "pymc_marketing",
+ "arviz>=0.18.0",
+ ],
+ # estimator tags
+ # --------------
+ "capability:multioutput": False, # can the estimator handle multi-output data?
+ "capability:missing": True, # can the estimator handle missing data?
+ "X_inner_mtype": "pd_DataFrame_Table", # type seen in internal _fit, _predict
+ "y_inner_mtype": "pd_DataFrame_Table", # type seen in internal _fit
+ }
+
+ def __init__(self, prior_config=None, sampler_config=None):
+ if sampler_config is None:
+ sampler_config = {}
+ if prior_config is None:
+ prior_config = {} # configuration for priors
+ self.sampler_config = {**self.default_sampler_config, **sampler_config}
+ self.prior_config = {**self.default_prior_config, **prior_config}
+ self.model = None # generated during fitting
+ self.idata = None # generated during fitting
+ self._predict_done = False # a flag indicating if a prediction has been done
+
+ print( # noqa: T201
+ f"instantiated {self.__class__.__name__} with the following priors:"
+ )
+
+ for key, value in self.prior_config.items():
+ print(f" - {key}: {value}") # noqa: T201
+
+ super().__init__()
+
+ @property
+ def default_prior_config(self):
+ """Return a dictionary of prior defaults."""
+ from pymc_marketing.prior import Prior
+
+ print( # noqa: T201
+ "The model assumes that the intercept and slopes are independent. \n\
+ Modify the model if this assumption doesn't apply!"
+ )
+ default_prior_config = {
+ "intercept": Prior(
+ "Normal", mu=0, sigma=100
+ ), # Weakly informative normal prior with large sigma
+ "slopes": Prior(
+ "Normal", mu=0, sigma=100, dims=("pred_id",)
+ ), # Same for slopes
+ "noise_var": Prior(
+ "HalfCauchy", beta=5
+ ), # Weakly informative Half-Cauchy prior for noise variance
+ }
+ return default_prior_config
+
+ @property
+ def default_sampler_config(self):
+ """Return a class default sampler configuration dictionary."""
+ default_sampler_config = {
+ "draws": 1000,
+ "tune": 1000,
+ "chains": 2,
+ "target_accept": 0.95,
+ "random_seed": 123,
+ "progressbar": True,
+ }
+ return default_sampler_config
+
+ def _fit(self, X, y):
+ """Fit regressor to training data.
+
+ Writes to self:
+ Sets fitted model attributes ending in "_".
+
+ Parameters
+ ----------
+ X : pandas DataFrame
+ feature instances to fit regressor to
+ y : pandas DataFrame, must be same length as X
+ labels to fit regressor to
+
+ Returns
+ -------
+ self : reference to self
+ """
+ import warnings
+
+ import pandas as pd
+ import pymc as pm
+
+ assert len(y.columns) == 1, "y must have only one column!"
+ self._X = X
+ self._y = y
+ self._y_vals = y.values[
+ :, 0
+ ] # we need a 1-dimensional array for compatibility with pymc
+
+ # Model construction and posterior sampling
+ with pm.Model(coords={"obs_id": X.index, "pred_id": X.columns}) as self.model:
+ # Mutable data containers for X and y
+ X_data = pm.Data("X", X, dims=("obs_id", "pred_id"))
+ y_data = pm.Data("y", self._y_vals, dims=("obs_id"))
+
+ # Priors for model parameters, taken from self.prior_config
+ self.intercept = self.prior_config["intercept"].create_variable("intercept")
+ self.slopes = self.prior_config["slopes"].create_variable("slopes")
+ self.noise_var = self.prior_config["noise_var"].create_variable("noise_var")
+ self.noise = pm.Deterministic("noise", self.noise_var**0.5)
+
+ # Expected value of the target variable
+ self.mu = pm.Deterministic(
+ "mu", self.intercept + pm.math.dot(X_data, self.slopes)
+ )
+
+ # Likelihood of observations
+ y_obs = pm.Normal( # noqa: F841
+ "y_obs", mu=self.mu, sigma=self.noise, observed=y_data, dims=("obs_id")
+ )
+
+ # Constructing the posterior
+ self.idata = pm.sample(**self.sampler_config)
+
+ # Incorporation of training_data as a new group in self.idata
+ training_data = pd.concat([X, y], axis=1)
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ category=UserWarning,
+ )
+ self.idata.add_groups(training_data=training_data.to_xarray())
+ return self
+
+ def visualize_model(self, **kwargs):
+ """Use Graphviz to visualize the model flow."""
+ _check_soft_dependencies(
+ "graphviz", msg="You need to install Graphviz to use this method!"
+ )
+ import pymc as pm
+
+ assert self._is_fitted, "You need to fit the model before visualizing it!"
+
+ return pm.model_to_graphviz(self.model, **kwargs)
+
+ def _sample_dataset(self, group_name, return_type=None):
+ """
+ General method to sample from a specified group in the idata object.
+
+ Extracts samples from a specified group (e.g., 'prior') in the idata object and
+ returns them in the required format
+
+ Parameters
+ ----------
+ group_name : str
+ The name of the group in the idata object to sample from (e.g., 'prior').
+
+ return_type : str or None, optional (default=None)
+ The format in which to return the sampled distributions.
+ Accepted values are:
+ - "xarray": Returns an xarray.Dataset
+ - "numpy": Returns a dictionary of NumPy arrays
+ - "dataframe": Returns a pandas DataFrame
+ - "skpro": Returns an `Empirical` distribution from the skpro library.
+ - None: Does not return any sampled data but performs the sampling
+ and updates the 'idata' attribute.
+
+ Returns
+ -------
+ xarray.Dataset or dict or pd.DataFrame or skpro.distributions.Empirical or None
+
+ The sampled distributions in the specified format,
+ or None if return_type is None.
+ """
+ import pandas as pd
+
+ # Validate the return_type
+ assert return_type in [
+ "xarray",
+ "numpy",
+ "dataframe",
+ "skpro",
+ None,
+ ], "return_type must be one of 'xarray', 'numpy', 'dataframe', 'skpro', or None"
+
+ # Validate that the group_name exists in idata
+ assert hasattr(
+ self.idata, group_name
+ ), f"{group_name} group does not exist in the idata object."
+
+ # Get the specified group from idata
+ group = getattr(self.idata, group_name)
+ # prediction-specific groups which focus on posterior predictive
+ is_predictive = group_name in ["predictions", "posterior_predictive"]
+ # as opposed to ["prior", "posterior"] which focus on prior/posterior
+
+ if is_predictive:
+ variables = ["y_obs"]
+ else:
+ variables = ["intercept", "slopes", "noise_var", "noise"]
+
+ if return_type is None:
+ return None
+ elif return_type == "xarray":
+ return group
+ else:
+ data_dict = {}
+
+ for var in variables:
+ # Check if the variable has a `pred_id` dimension
+ if var in group and "pred_id" in group[var].dims:
+ # Iterate through each feature (e.g., 'feature1', 'feature2')
+ for feature in group[var].pred_id.values:
+ # Select the slope for the current feature and flatten it
+ feature_key = f"{var}_{feature}"
+ data_dict[feature_key] = (
+ group[var]
+ .sel(pred_id=feature)
+ .stack({"sample": ("chain", "draw")})
+ .values.squeeze()
+ )
+ else:
+ if var in group:
+ data_dict[var] = (
+ group[var]
+ .stack({"sample": ("chain", "draw")})
+ .values.squeeze()
+ )
+
+ if return_type == "numpy":
+ return data_dict
+
+ elif return_type == "dataframe":
+ if is_predictive:
+ return pd.DataFrame(data_dict["y_obs"]).T
+ else:
+ return pd.DataFrame(data_dict)
+
+ elif return_type == "skpro":
+ from skpro.distributions import Empirical
+
+ if not is_predictive:
+ df = pd.DataFrame(data_dict)
+ reshaped_df = df.stack()
+ reshaped_df = reshaped_df.reset_index(name="value")
+ reshaped_df.set_index(["level_0", "level_1"], inplace=True)
+ reshaped_df.index.names = ["obs_id", "variable"]
+ return Empirical(spl=reshaped_df)
+ else:
+ # Extract posterior predictive distributions as an xarray DataArray
+ pred_proba_xarray = group["y_obs"]
+
+ # Convert data to pd.DataFrame and format it appropriately for
+ # subsequent conversion into a skpro Empirical distribution
+ pred_proba_df = pred_proba_xarray.to_dataframe()
+ pred_proba_df = pred_proba_df.reset_index()
+
+ # Create a new 'sample_id' column by
+ # combining the 'chain' and 'draw' columns
+ pred_proba_df["sample_id"] = (
+ pred_proba_df["chain"] * self.sampler_config["draws"]
+ + pred_proba_df["draw"]
+ )
+ pred_proba_df = pred_proba_df[["obs_id", "sample_id", "y_obs"]]
+ pred_proba_df = pred_proba_df.rename(
+ columns={"y_obs": self._y.columns[0]}
+ )
+ pred_proba_df = pred_proba_df.set_index(["sample_id", "obs_id"])
+
+ # Convert data to skpro Empirical distribution
+ pred_proba_dist = Empirical(
+ spl=pred_proba_df, columns=self._y.columns
+ )
+ return pred_proba_dist
+
+ def _get_dataset_summary(self, group_name, var_names=None, **kwargs):
+ """
+ Get the summary statistics of a specified group in the idata object.
+
+ Parameters
+ ----------
+ group_name : str
+ The name of the group in the idata object to summarize (e.g., 'prior').
+
+ var_names : list, optional (default=None)
+ A list of variable names to include in the summary.
+ If None, all variables in the group are included.
+
+ **kwargs :
+ Additional keyword arguments to pass to `arviz.summary`.
+
+ Returns
+ -------
+ az.data.inference_data.Summary
+ The summary statistics for the specified group and variables.
+ """
+ import arviz as az
+
+ # Check if the specified group exists in the idata object
+ if group_name not in self.idata.groups():
+ if group_name == "prior":
+ self.sample_prior()
+ elif group_name == "posterior":
+ self.sample_posterior()
+ else:
+ raise ValueError(
+ f"Group '{group_name}' does not exist in the idata object."
+ )
+
+ # Get the summary statistics with optional kwargs
+ return az.summary(
+ getattr(self.idata, group_name), var_names=var_names, **kwargs
+ )
+
+ def sample_prior(self, return_type=None):
+ """
+ Sample from the prior distributions.
+
+ Samples from the prior distributions and returns
+ them in the required format
+
+ If return_type is None, the method updates the 'idata' attribute
+ by adding the 'prior' group but does not return any samples.
+
+ return_type : str or None, optional (default=None)
+ The format in which to return the sampled distributions.
+ Accepted values are:
+ - "xarray": Returns an xarray.Dataset
+ - "numpy": Returns a dictionary of NumPy arrays
+ - "dataframe": Returns a pandas DataFrame
+ - "skpro": Returns an `Empirical` distribution from the skpro library.
+ - None: Does not return any sampled data but performs the sampling
+ and updates the 'idata' attribute.
+
+ Returns
+ -------
+ xarray.Dataset or dict or pd.DataFrame or skpro.distributions.Empirical or None
+ The sampled distributions in the specified format,
+ or None if return_type is None.
+ """
+ import pymc as pm
+
+ assert (
+ self.is_fitted
+ ), "Model needs to be fitted before you can sample from prior"
+
+ with self.model:
+ # if we've previously used the model for prediction,
+ # we need to reset the reference of 'X' to X used for training
+ if self._predict_done:
+ pm.set_data(
+ {"X": self._X},
+ coords={"obs_id": self._X.index, "pred_id": self._X.columns},
+ )
+ self.idata.extend(
+ pm.sample_prior_predictive(
+ samples=self.sampler_config["draws"],
+ random_seed=self.sampler_config["random_seed"],
+ )
+ ) # todo: the keyword 'samples' will be changed to 'draws'
+ # in pymc 5.16
+
+ return self._sample_dataset(
+ group_name="prior",
+ return_type=return_type,
+ )
+
+ def get_prior_summary(self, **kwargs):
+ """
+ Get the summary statistics of prior distributions.
+
+ Parameters
+ ----------
+ **kwargs :
+ Additional keyword arguments to pass to `arviz.summary`.
+
+ Returns
+ -------
+ az.data.inference_data.Summary
+ The summary statistics for the prior distributions.
+ """
+ return self._get_dataset_summary(
+ group_name="prior",
+ var_names=["intercept", "slopes", "noise_var", "noise"],
+ **kwargs,
+ )
+
+ def sample_posterior(self, return_type=None):
+ """
+ Sample from the posterior distributions.
+
+ Samples from the posterior distributions and returns
+ them in the required format
+
+ If return_type is None, the method updates the 'idata' attribute
+ by adding the 'posterior' group but does not return any samples.
+
+ return_type : str or None, optional (default="xarray")
+ The format in which to return the sampled distributions.
+ Accepted values are:
+ - "xarray": Returns an xarray.Dataset
+ - "numpy": Returns a dictionary of NumPy arrays
+ - "dataframe": Returns a pandas DataFrame
+ - "skpro": Returns an `Empirical` distribution from the skpro library.
+ - None: Does not return any sampled data but performs the sampling
+ and updates the 'idata' attribute.
+
+ Returns
+ -------
+ xarray.Dataset or dict or pd.DataFrame or skpro.distributions.Empirical or None
+ The sampled distributions in the specified format,
+ or None if return_type is None.
+ """
+ assert (
+ self.is_fitted
+ ), "The model must be fitted before posterior can be returned."
+ return self._sample_dataset(
+ group_name="posterior",
+ return_type=return_type,
+ )
+
+ def get_posterior_summary(self, **kwargs):
+ """
+ Get the summary statistics of the posterior distributions.
+
+ Parameters
+ ----------
+ **kwargs :
+ Additional keyword arguments to pass to `arviz.summary`.
+
+ Returns
+ -------
+ az.data.inference_data.Summary
+ The summary statistics for the posterior distributions.
+ """
+ return self._get_dataset_summary(
+ group_name="posterior",
+ var_names=["intercept", "slopes", "noise_var", "noise"],
+ **kwargs,
+ )
+
+ def sample_in_sample_posterior_predictive(self, return_type=None):
+ """Perform in-sample predictions and sample from it."""
+ import pymc as pm
+
+ with self.model:
+ # if we've previously used the model for prediction,
+ # we need to reset the reference of 'X' to X_train (i.e. self._X)
+ if self._predict_done:
+ pm.set_data(
+ {"X": self._X},
+ coords={"obs_id": self._X.index, "pred_id": self._X.columns},
+ )
+ self.idata.extend(
+ pm.sample_posterior_predictive(self.idata, predictions=False)
+ )
+
+ return self._sample_dataset(
+ group_name="posterior_predictive", return_type=return_type
+ )
+
+ def plot_ppc(self, **kwargs):
+ """Plot the posterior predictive check."""
+ import arviz as az
+
+ if "posterior_predictive" not in self.idata:
+ self.sample_in_sample_posterior_predictive()
+
+ return az.plot_ppc(self.idata, **kwargs)
+
+ def _predict_proba(self, X):
+ """
+ Predict distribution over labels for data from features.
+
+ State required:
+ Requires state to be "fitted".
+
+ Accesses in self:
+ Fitted model attributes ending in "_"
+
+ Parameters
+ ----------
+ X : pandas DataFrame, must have same columns as X in `fit`
+ data to predict labels for
+
+ Returns
+ -------
+ pred_proba_dist : skpro BaseDistribution, same length as `X`
+ labels predicted for `X`
+ """
+ import pymc as pm
+
+ with self.model:
+ if "predictions" in self.idata.groups():
+ del self.idata.predictions
+
+ # Set the X to be the new 'X' variable and then sample posterior predictive
+ pm.set_data({"X": X}, coords={"obs_id": X.index, "pred_id": X.columns})
+ self.idata.extend(
+ pm.sample_posterior_predictive(
+ self.idata,
+ predictions=True,
+ )
+ )
+ self._predict_done = True # a flag indicating prediction has been done
+
+ return self._sample_dataset(group_name="predictions", return_type="skpro")
+
+ # todo: return default parameters, so that a test instance can be created
+ # required for automated unit and integration testing of estimator
+ @classmethod
+ def get_test_params(cls, parameter_set="default"):
+ """Return testing parameter settings for the estimator.
+
+ Parameters
+ ----------
+ parameter_set : str, default="default"
+ Name of the set of test parameters to return, for use in tests. If no
+ special parameters are defined for a value, will return `"default"` set.
+
+ Returns
+ -------
+ params : dict or list of dict, default = {}
+ Parameters to create testing instances of the class
+ Each dict are parameters to construct an "interesting" test instance, i.e.,
+ `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+ `create_test_instance` uses the first (or only) dictionary in `params`
+ """
+ from pymc_marketing.prior import Prior
+
+ params1 = {}
+ params2 = {"prior_config": {"intercept": Prior("Normal", mu=0, sigma=10)}}
+
+ return [params1, params2]
diff --git a/skpro/tests/_config.py b/skpro/tests/_config.py
index 31c08b3fe..0775ff69f 100644
--- a/skpro/tests/_config.py
+++ b/skpro/tests/_config.py
@@ -2,4 +2,10 @@
# list of str, names of estimators to exclude from testing
# WARNING: tests for these estimators will be skipped
-EXCLUDE_ESTIMATORS = ["DummySkipped"]
+EXCLUDE_ESTIMATORS = [
+ "DummySkipped",
+ "ClassName", # exclude classes from extension templates
+]
+
+
+EXCLUDED_TESTS = {"GLMRegressor": ["test_online_update"]}
diff --git a/skpro/tests/test_all_estimators.py b/skpro/tests/test_all_estimators.py
index f06263c6c..1874c9166 100644
--- a/skpro/tests/test_all_estimators.py
+++ b/skpro/tests/test_all_estimators.py
@@ -13,6 +13,7 @@
from skbase.testing.utils.inspect import _get_args
from skpro.registry import OBJECT_TAG_LIST, all_objects
+from skpro.tests._config import EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
from skpro.tests.scenarios.scenarios_getter import retrieve_scenarios
from skpro.tests.test_switch import run_test_for_class
from skpro.utils.deep_equals import deep_equals
@@ -35,7 +36,12 @@ class PackageConfig:
# list of object types (class names) to exclude
# expected type: list of str, str are class names
- exclude_objects = ["ClassName"] # exclude classes from extension templates
+ exclude_objects = EXCLUDE_ESTIMATORS
+
+ # list of tests to exclude
+ # expected type: dict of lists, key:str, value: List[str]
+ # keys are class names of estimators, values are lists of test names to exclude
+ excluded_tests = EXCLUDED_TESTS
# list of valid tags
# expected type: list of str, str are tag names
diff --git a/skpro/utils/_maint/_show_versions.py b/skpro/utils/_maint/_show_versions.py
index 542c5b9bd..cc2c621ce 100644
--- a/skpro/utils/_maint/_show_versions.py
+++ b/skpro/utils/_maint/_show_versions.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python3 -u
# License: BSD 3 clause
-
"""Utility methods to print system info for debugging.
-adapted from :func:`sklearn.show_versions`
+adapted from
+:func: `sklearn.show_versions`
"""
__author__ = ["mloning", "fkiraly"]
@@ -15,8 +15,7 @@
def _get_sys_info():
- """
- System information.
+ """System information.
Return
------
@@ -37,28 +36,33 @@ def _get_sys_info():
# dependencies to print versions of, by default
DEFAULT_DEPS_TO_SHOW = [
"pip",
- "skpro",
- "skbase",
+ "sktime",
"sklearn",
+ "skbase",
"numpy",
"scipy",
"pandas",
"matplotlib",
"joblib",
"numba",
- "tabulate",
- "uncertainties",
+ "statsmodels",
+ "pmdarima",
+ "statsforecast",
+ "tsfresh",
+ "tslearn",
+ "torch",
+ "tensorflow",
+ "tensorflow_probability",
]
def _get_deps_info(deps=None):
- """
- Overview of the installed version of main dependencies.
+ """Overview of the installed version of main dependencies.
Parameters
----------
deps : optional, list of strings with import names
- if None, behaves as deps = ["skpro"]
+ if None, behaves as deps = ["sktime"]
Returns
-------
@@ -68,10 +72,10 @@ def _get_deps_info(deps=None):
of the import as present in the current python environment
"""
if deps is None:
- deps = ["skpro"]
+ deps = ["sktime"]
def get_version(module):
- return module.__version__
+ return getattr(module, "__version__", None)
deps_info = {}
@@ -81,16 +85,17 @@ def get_version(module):
mod = sys.modules[modname]
else:
mod = importlib.import_module(modname)
- ver = get_version(mod)
- deps_info[modname] = ver
except ImportError:
deps_info[modname] = None
+ else:
+ ver = get_version(mod)
+ deps_info[modname] = ver
return deps_info
def show_versions():
- """Print python version, OS version, skpro version, selected dependency versions.
+ """Print python version, OS version, sktime version, selected dependency versions.
Pretty prints:
diff --git a/skpro/utils/_maint/tests/test_show_versions.py b/skpro/utils/_maint/tests/test_show_versions.py
index 928017df1..67c7f1877 100644
--- a/skpro/utils/_maint/tests/test_show_versions.py
+++ b/skpro/utils/_maint/tests/test_show_versions.py
@@ -1,4 +1,6 @@
"""Tests for the show_versions utility."""
+import pathlib
+import uuid
from skpro.utils._maint._show_versions import (
DEFAULT_DEPS_TO_SHOW,
@@ -18,16 +20,39 @@ def test_deps_info():
"""Test that _get_deps_info returns package/version dict as per contract."""
deps_info = _get_deps_info()
assert isinstance(deps_info, dict)
- assert set(deps_info.keys()) == {"skpro"}
+ assert set(deps_info.keys()) == {"sktime"}
deps_info_default = _get_deps_info(DEFAULT_DEPS_TO_SHOW)
assert isinstance(deps_info_default, dict)
assert set(deps_info_default.keys()) == set(DEFAULT_DEPS_TO_SHOW)
+ PKG_IMPORT_ALIAS = {"scikit-learn": "sklearn", "scikit-base": "skbase"}
+ KEY_ALIAS = {"sklearn": "scikit-learn", "skbase": "scikit-base"}
+
for key in DEFAULT_DEPS_TO_SHOW:
- key_is_available = _check_soft_dependencies(key, severity="none")
+ pkg_name = KEY_ALIAS.get(key, key)
+ key_is_available = _check_soft_dependencies(
+ pkg_name,
+ severity="none",
+ package_import_alias=PKG_IMPORT_ALIAS,
+ )
assert (deps_info_default[key] is None) != key_is_available
if key_is_available:
- assert _check_soft_dependencies(f"{key}=={deps_info_default[key]}")
+ assert _check_soft_dependencies(
+ f"{pkg_name}=={deps_info_default[key]}",
+ package_import_alias=PKG_IMPORT_ALIAS,
+ )
deps_single_key = _get_deps_info([key])
assert set(deps_single_key.keys()) == {key}
+
+
+def test_deps_info_deps_missing_package_present_directory():
+ """Test that _get_deps_info does not fail if a dependency is missing."""
+ dummy_package_name = uuid.uuid4().hex
+
+ dummy_folder_path = pathlib.Path(dummy_package_name)
+ dummy_folder_path.mkdir()
+
+ assert _get_deps_info([dummy_package_name]) == {dummy_package_name: None}
+
+ dummy_folder_path.rmdir()
diff --git a/skpro/utils/validation/_dependencies.py b/skpro/utils/validation/_dependencies.py
index f7b24fb04..b303924ea 100644
--- a/skpro/utils/validation/_dependencies.py
+++ b/skpro/utils/validation/_dependencies.py
@@ -2,23 +2,26 @@
__author__ = ["fkiraly", "mloning"]
-import io
import sys
import warnings
-from importlib import import_module
+from importlib.metadata import PackageNotFoundError, version
+from importlib.util import find_spec
from inspect import isclass
+from packaging.markers import InvalidMarker, Marker
from packaging.requirements import InvalidRequirement, Requirement
-from packaging.specifiers import InvalidSpecifier, SpecifierSet
+from packaging.specifiers import InvalidSpecifier, Specifier, SpecifierSet
+from packaging.version import InvalidVersion, Version
+# todo 0.32.0: remove suppress_import_stdout argument
def _check_soft_dependencies(
*packages,
package_import_alias=None,
severity="error",
obj=None,
msg=None,
- suppress_import_stdout=False,
+ suppress_import_stdout="deprecated",
):
"""Check if required soft dependencies are installed and raise error or warning.
@@ -40,7 +43,7 @@ def _check_soft_dependencies(
should be provided if import name differs from package name
severity : str, "error" (default), "warning", "none"
behaviour for raising errors or warnings
- "error" - raises a `ModuleNotFoundException` if one of packages is not installed
+ "error" - raises a `ModuleNotFoundError` if one of packages is not installed
"warning" - raises a warning if one of packages is not installed
function returns False if one of packages is not installed, otherwise True
"none" - does not raise exception or warning
@@ -52,8 +55,6 @@ def _check_soft_dependencies(
if str is passed, will be used as name of the class/object or module
msg : str, or None, default=None
if str, will override the error message or warning shown with msg
- suppress_import_stdout : bool, optional. Default=False
- whether to suppress stdout printout upon import.
Raises
------
@@ -64,6 +65,22 @@ def _check_soft_dependencies(
-------
boolean - whether all packages are installed, only if no exception is raised
"""
+ # todo 0.32.0: remove this warning
+ if suppress_import_stdout != "deprecated":
+ warnings.warn(
+ "In sktime _check_soft_dependencies, the suppress_import_stdout argument "
+ "is deprecated and no longer has any effect. "
+ "The argument will be removed in version 0.32.0, so users of the "
+ "_check_soft_dependencies utility should not pass this argument anymore. "
+ "The _check_soft_dependencies utility also no longer causes imports, "
+ "hence no stdout "
+ "output is created from imports, for any setting of the "
+ "suppress_import_stdout argument. If you wish to import packages "
+ "and make use of stdout prints, import the package directly instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+
if len(packages) == 1 and isinstance(packages[0], (tuple, list)):
packages = packages[0]
if not all(isinstance(x, str) for x in packages):
@@ -110,6 +127,7 @@ def _check_soft_dependencies(
for package in packages:
try:
req = Requirement(package)
+ req = _normalize_requirement(req)
except InvalidRequirement:
msg_version = (
f"wrong format for package requirement string "
@@ -127,40 +145,44 @@ def _check_soft_dependencies(
package_import_name = package_import_alias[package_name]
else:
package_import_name = package_name
- # attempt import - if not possible, we know we need to raise warning/exception
- try:
- if suppress_import_stdout:
- # setup text trap, import, then restore
- sys.stdout = io.StringIO()
- pkg_ref = import_module(package_import_name)
- sys.stdout = sys.__stdout__
- else:
- pkg_ref = import_module(package_import_name)
- # if package cannot be imported, make the user aware of installation requirement
- except ModuleNotFoundError as e:
+
+ # optimized branching to check presence of import
+ # and presence of package distribution
+ # first we check import, then we check distribution
+ # because try/except consumes more runtime
+ pkg_spec = find_spec(package_import_name)
+ if pkg_spec is not None:
+ try:
+ pkg_env_version = Version(version(package_name))
+ except (InvalidVersion, PackageNotFoundError):
+ pkg_spec = None
+
+ # if package not present, make the user aware of installation reqs
+ if pkg_spec is None:
if obj is None and msg is None:
msg = (
- f"{e}. '{package}' is a soft dependency and not included in the "
- f"base skpro installation. Please run: `pip install {package}` to "
+ f"'{package}' not found. "
+ f"'{package}' is a soft dependency and not included in the "
+ f"base sktime installation. Please run: `pip install {package}` to "
f"install the {package} package. "
f"To install all soft dependencies, run: `pip install "
- f"skpro[all_extras]`"
+ f"sktime[all_extras]`"
)
elif msg is None: # obj is not None, msg is None
msg = (
f"{class_name} requires package '{package}' to be present "
f"in the python environment, but '{package}' was not found. "
f"'{package}' is a soft dependency and not included in the base "
- f"skpro installation. Please run: `pip install {package}` to "
+ f"sktime installation. Please run: `pip install {package}` to "
f"install the {package} package. "
f"To install all soft dependencies, run: `pip install "
- f"skpro[all_extras]`"
+ f"sktime[all_extras]`"
)
# if msg is not None, none of the above is executed,
# so if msg is passed it overrides the default messages
if severity == "error":
- raise ModuleNotFoundError(msg) from e
+ raise ModuleNotFoundError(msg)
elif severity == "warning":
warnings.warn(msg, stacklevel=2)
return False
@@ -175,8 +197,6 @@ def _check_soft_dependencies(
# now we check compatibility with the version specifier if non-empty
if package_version_req != SpecifierSet(""):
- pkg_env_version = pkg_ref.__version__
-
msg = (
f"{class_name} requires package '{package}' to be present "
f"in the python environment, with version {package_version_req}, "
@@ -184,7 +204,7 @@ def _check_soft_dependencies(
)
if obj is not None:
msg = msg + (
- f"This version requirement is not one by skpro, but specific "
+ f"This version requirement is not one by sktime, but specific "
f"to the module, class or object with name {obj}."
)
@@ -207,12 +227,94 @@ def _check_soft_dependencies(
return True
+def _check_dl_dependencies(msg=None, severity="error"):
+ """Check if deep learning dependencies are installed.
+
+ Parameters
+ ----------
+ msg : str, optional, default= default message (msg below)
+ error message to be returned in the `ModuleNotFoundError`, overrides default
+ severity : str, "error" (default), "warning", "none"
+ behaviour for raising errors or warnings
+ "error" - raises a ModuleNotFoundError if one of packages is not installed
+ "warning" - raises a warning if one of packages is not installed
+ function returns False if one of packages is not installed, otherwise True
+ "none" - does not raise exception or warning
+ function returns False if one of packages is not installed, otherwise True
+
+ Raises
+ ------
+ ModuleNotFoundError
+ User friendly error with suggested action to install deep learning dependencies
+
+ Returns
+ -------
+ boolean - whether all packages are installed, only if no exception is raised
+ """
+ if not isinstance(msg, str):
+ msg = (
+ "tensorflow is required for deep learning functionality in `sktime`. "
+ "To install these dependencies, run: `pip install sktime[dl]`"
+ )
+ if find_spec("tensorflow") is not None:
+ return True
+ else:
+ if severity == "error":
+ raise ModuleNotFoundError(msg)
+ elif severity == "warning":
+ warnings.warn(msg, stacklevel=2)
+ return False
+ elif severity == "none":
+ return False
+ else:
+ raise RuntimeError(
+ "Error in calling _check_dl_dependencies, severity "
+ f'argument must be "error", "warning", or "none", found "{severity}".'
+ )
+
+
+def _check_mlflow_dependencies(msg=None, severity="error"):
+ """Check if `mlflow` and its dependencies are installed.
+
+ Parameters
+ ----------
+ msg: str, optional, default= default message (msg below)
+ error message to be returned when `ModuleNotFoundError` is raised.
+ severity: str, either of "error", "warning" or "none"
+ behaviour for raising errors or warnings
+ "error" - raises a `ModuleNotFound` if mlflow-related packages are not found.
+ "warning" - raises a warning message if any mlflow-related package is not
+ installed also returns False. In case all packages are present,
+ returns True.
+ "none" - does not raise any exception or warning and simply returns True
+ if all packages are installed otherwise return False.
+
+ Raise
+ -----
+ ModuleNotFoundError
+ User Friendly error with a suggested action to install mlflow dependencies
+
+ Returns
+ -------
+ boolean - whether all mlflow-related packages are installed.
+ """
+ if not isinstance(msg, str):
+ msg = (
+ "`mlflow` is an extra dependency and is not included "
+ "in the base sktime installation. "
+ "Please run `pip install mlflow` "
+ "or `pip install sktime[mlflow]` to install the package."
+ )
+
+ return _check_soft_dependencies("mlflow", msg=msg, severity=severity)
+
+
def _check_python_version(obj, package=None, msg=None, severity="error"):
"""Check if system python version is compatible with requirements of obj.
Parameters
----------
- obj : skpro estimator, BaseObject descendant
+ obj : sktime estimator, BaseObject descendant
used to check python version
package : str, default = None
if given, will be used in error message as package name
@@ -253,6 +355,7 @@ def _check_python_version(obj, package=None, msg=None, severity="error"):
if sys_version in est_specifier:
return True
# now we know that est_version is not compatible with sys_version
+
if isclass(obj):
class_name = obj.__name__
else:
@@ -260,7 +363,6 @@ def _check_python_version(obj, package=None, msg=None, severity="error"):
if not isinstance(msg, str):
msg = (
- f"{type(obj).__name__} requires python version to be {est_specifier},"
f"{class_name} requires python version to be {est_specifier},"
f" but system python version is {sys.version}."
)
@@ -284,6 +386,81 @@ def _check_python_version(obj, package=None, msg=None, severity="error"):
return True
+def _check_env_marker(obj, package=None, msg=None, severity="error"):
+ """Check if packaging marker tag is with requirements of obj.
+
+ Parameters
+ ----------
+ obj : sktime estimator, BaseObject descendant
+ used to check python version
+ package : str, default = None
+ if given, will be used in error message as package name
+ msg : str, optional, default = default message (msg below)
+ error message to be returned in the `ModuleNotFoundError`, overrides default
+ severity : str, "error" (default), "warning", or "none"
+ whether the check should raise an error, a warning, or nothing
+
+ Returns
+ -------
+ compatible : bool, whether obj is compatible with system python version
+ check is using the python_version tag of obj
+
+ Raises
+ ------
+ InvalidMarker
+ User friendly error if obj has env_marker tag that is not a
+ packaging compatible marker string
+ ModuleNotFoundError
+ User friendly error if obj has an env_marker tag that is
+ incompatible with the python environment. If package is given,
+ error message gives package as the reason for incompatibility.
+ """
+ est_marker_tag = obj.get_class_tag("env_marker", tag_value_default="None")
+ if est_marker_tag in ["None", None]:
+ return True
+
+ try:
+ est_marker = Marker(est_marker_tag)
+ except InvalidMarker:
+ msg_version = (
+ f"wrong format for env_marker tag, "
+ f"must be PEP 508 compatible specifier string, e.g., "
+ f'platform_system!="windows", but found "{est_marker_tag}"'
+ )
+ raise InvalidMarker(msg_version)
+
+ if est_marker.evaluate():
+ return True
+ # now we know that est_marker is not compatible with the environment
+
+ if isclass(obj):
+ class_name = obj.__name__
+ else:
+ class_name = type(obj).__name__
+
+ if not isinstance(msg, str):
+ msg = (
+ f"{class_name} requires an environment to satisfy "
+ f"packaging marker spec {est_marker}, but enviroment does not satisfy it."
+ )
+
+ if package is not None:
+ msg += f" This is due to requirements of the {package} package."
+
+ if severity == "error":
+ raise ModuleNotFoundError(msg)
+ elif severity == "warning":
+ warnings.warn(msg, stacklevel=2)
+ elif severity == "none":
+ return False
+ else:
+ raise RuntimeError(
+ "Error in calling _check_env_marker, severity "
+ f'argument must be "error", "warning", or "none", found "{severity}".'
+ )
+ return True
+
+
def _check_estimator_deps(obj, msg=None, severity="error"):
"""Check if object/estimator's package & python requirements are met by python env.
@@ -295,13 +472,13 @@ def _check_estimator_deps(obj, msg=None, severity="error"):
Parameters
----------
- obj : `skpro` object, `BaseObject` descendant, or list/tuple thereof
+ obj : `sktime` object, `BaseObject` descendant, or list/tuple thereof
object(s) that this function checks compatibility of, with the python env
msg : str, optional, default = default message (msg below)
error message to be returned in the `ModuleNotFoundError`, overrides default
severity : str, "error" (default), "warning", or "none"
behaviour for raising errors or warnings
- "error" - raises a ModuleNotFoundException if environment is incompatible
+ "error" - raises a `ModuleNotFoundError` if environment is incompatible
"warning" - raises a warning if environment is incompatible
function returns False if environment is incompatible, otherwise True
"none" - does not raise exception or warning
@@ -334,6 +511,7 @@ def _check_estimator_deps(obj, msg=None, severity="error"):
return compatible
compatible = compatible and _check_python_version(obj, severity=severity)
+ compatible = compatible and _check_env_marker(obj, severity=severity)
pkg_deps = obj.get_class_tag("python_dependencies", None)
pck_alias = obj.get_class_tag("python_dependencies_alias", None)
@@ -346,3 +524,37 @@ def _check_estimator_deps(obj, msg=None, severity="error"):
compatible = compatible and pkg_deps_ok
return compatible
+
+
+def _normalize_requirement(req):
+ """Normalize packaging Requirement by removing build metadata from versions.
+
+ Parameters
+ ----------
+ req : packaging.requirements.Requirement
+ requirement string to normalize, e.g., Requirement("pandas>1.2.3+foobar")
+
+ Returns
+ -------
+ normalized_req : packaging.requirements.Requirement
+ normalized requirement object with build metadata removed from versions,
+ e.g., Requirement("pandas>1.2.3")
+ """
+ # Process each specifier in the requirement
+ normalized_specs = []
+ for spec in req.specifier:
+ # Parse the version and remove the build metadata
+ spec_v = Version(spec.version)
+ version_wo_build_metadata = f"{spec_v.major}.{spec_v.minor}.{spec_v.micro}"
+
+ # Create a new specifier without the build metadata
+ normalized_spec = Specifier(f"{spec.operator}{version_wo_build_metadata}")
+ normalized_specs.append(normalized_spec)
+
+ # Reconstruct the specifier set
+ normalized_specifier_set = SpecifierSet(",".join(str(s) for s in normalized_specs))
+
+ # Create a new Requirement object with the normalized specifiers
+ normalized_req = Requirement(f"{req.name}{normalized_specifier_set}")
+
+ return normalized_req