From c04677a9159dc7f4fc24dea79de8c0bf58e7ffc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 19 Apr 2024 20:53:04 +0100 Subject: [PATCH] [ENH] Fisk distribution aka log-logistic distribution (#259) This PR adds a Fisk distribution aka log-logistic distribution. Required for interfacing log-logistic AFT from `lifelines`. --- docs/source/api_reference/distributions.rst | 1 + skpro/distributions/__init__.py | 2 + skpro/distributions/fisk.py | 126 ++++++++++++++++++ skpro/survival/ensemble/_survforest_sksurv.py | 2 +- 4 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 skpro/distributions/fisk.py diff --git a/docs/source/api_reference/distributions.rst b/docs/source/api_reference/distributions.rst index abcec7a9e..557b8a46f 100644 --- a/docs/source/api_reference/distributions.rst +++ b/docs/source/api_reference/distributions.rst @@ -35,6 +35,7 @@ Continuous support :toctree: auto_generated/ :template: class.rst + Fisk Laplace Logistic Normal diff --git a/skpro/distributions/__init__.py b/skpro/distributions/__init__.py index 21e6e97ed..910a70e3e 100644 --- a/skpro/distributions/__init__.py +++ b/skpro/distributions/__init__.py @@ -4,6 +4,7 @@ __all__ = [ "Empirical", + "Fisk", "Laplace", "Logistic", "LogNormal", @@ -19,6 +20,7 @@ ] from skpro.distributions.empirical import Empirical +from skpro.distributions.fisk import Fisk from skpro.distributions.laplace import Laplace from skpro.distributions.logistic import Logistic from skpro.distributions.lognormal import LogNormal diff --git a/skpro/distributions/fisk.py b/skpro/distributions/fisk.py new file mode 100644 index 000000000..aa292e48b --- /dev/null +++ b/skpro/distributions/fisk.py @@ -0,0 +1,126 @@ +# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) +"""Log-logistic aka Fisk probability distribution.""" + +__author__ = ["fkiraly"] + +import pandas as pd +from scipy.stats import fisk + +from skpro.distributions.base import BaseDistribution + + +class Fisk(BaseDistribution): + r"""Fisk distribution, aka log-logistic distribution. + + The Fisk distibution is parametrized by a scale parameter :math:`\alpha` + and a shape parameter :math:`\beta`, such that the cumulative distribution + function (CDF) is given by: + + .. math:: F(x) = 1 - \left(1 + \frac{x}{\alpha}\right)^{-\beta}\right)^{-1} + + Parameters + ---------- + alpha : float or array of float (1D or 2D), must be positive + scale parameter of the distribution + beta : float or array of float (1D or 2D), must be positive + shape parameter of the distribution + index : pd.Index, optional, default = RangeIndex + columns : pd.Index, optional, default = RangeIndex + + Example + ------- + >>> from skpro.distributions.fisk import Fisk + + >>> d = Fisk(beta=[[1, 1], [2, 3], [4, 5]], alpha=2) + """ + + _tags = { + "capabilities:approx": ["energy", "pdfnorm"], + "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"], + "distr:measuretype": "continuous", + } + + def __init__(self, alpha=1, beta=1, index=None, columns=None): + self.alpha = alpha + self.beta = beta + self.index = index + self.columns = columns + + # todo: untangle index handling + # and broadcast of parameters. + # move this functionality to the base class + # important: if only one argument, it is a lenght-1-tuple, deal with this + self._alpha, self._beta = self._get_bc_params(self.alpha, self.beta) + shape = self._alpha.shape + + if index is None: + index = pd.RangeIndex(shape[0]) + + if columns is None: + columns = pd.RangeIndex(shape[1]) + + super().__init__(index=index, columns=columns) + + def mean(self): + r"""Return expected value of the distribution. + + Let :math:`X` be a random variable with the distribution of `self`. + Returns the expectation :math:`\mathbb{E}[X]` + + Returns + ------- + pd.DataFrame with same rows, columns as `self` + expected value of distribution (entry-wise) + """ + mean_arr = fisk.mean(scale=self._alpha, c=self._beta) + return pd.DataFrame(mean_arr, index=self.index, columns=self.columns) + + def var(self): + r"""Return element/entry-wise variance of the distribution. + + Let :math:`X` be a random variable with the distribution of `self`. + Returns :math:`\mathbb{V}[X] = \mathbb{E}\left(X - \mathbb{E}[X]\right)^2` + + Returns + ------- + pd.DataFrame with same rows, columns as `self` + variance of distribution (entry-wise) + """ + var_arr = fisk.var(scale=self._alpha, c=self._beta) + return pd.DataFrame(var_arr, index=self.index, columns=self.columns) + + def pdf(self, x): + """Probability density function.""" + d = self.loc[x.index, x.columns] + pdf_arr = fisk.pdf(x.values, scale=d.alpha, c=d.beta) + return pd.DataFrame(pdf_arr, index=x.index, columns=x.columns) + + def log_pdf(self, x): + """Logarithmic probability density function.""" + d = self.loc[x.index, x.columns] + lpdf_arr = fisk.logpdf(x.values, scale=d.alpha, c=d.beta) + return pd.DataFrame(lpdf_arr, index=x.index, columns=x.columns) + + def cdf(self, x): + """Cumulative distribution function.""" + d = self.loc[x.index, x.columns] + cdf_arr = fisk.cdf(x.values, scale=d.alpha, c=d.beta) + return pd.DataFrame(cdf_arr, index=x.index, columns=x.columns) + + def ppf(self, p): + """Quantile function = percent point function = inverse cdf.""" + d = self.loc[p.index, p.columns] + icdf_arr = fisk.ppf(p.values, scale=d.alpha, c=d.beta) + return pd.DataFrame(icdf_arr, index=p.index, columns=p.columns) + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator.""" + params1 = {"alpha": [[1, 1], [2, 3], [4, 5]], "beta": 3} + params2 = { + "alpha": 2, + "beta": 3, + "index": pd.Index([1, 2, 5]), + "columns": pd.Index(["a", "b"]), + } + return [params1, params2] diff --git a/skpro/survival/ensemble/_survforest_sksurv.py b/skpro/survival/ensemble/_survforest_sksurv.py index 638316db8..2afcc602d 100644 --- a/skpro/survival/ensemble/_survforest_sksurv.py +++ b/skpro/survival/ensemble/_survforest_sksurv.py @@ -8,7 +8,7 @@ class SurvivalForestSkSurv(_SksurvAdapter, BaseSurvReg): - """Random survival forestm from scikit-survival. + """Random survival forest from scikit-survival. Direct interface to ``sksurv.ensemble.RandomSurvivalForest``, by ``sebp``.