sktime · fkiraly · Apr 25, 2024 · Mar 23, 2024 · Mar 23, 2024 · Mar 23, 2024
@@ -11,6 +11,7 @@
     "QPD_S",
     "QPD_B",
     "QPD_U",
+    "ChiSquared"
 ]
 
 from skpro.distributions.empirical import Empirical
@@ -19,3 +20,4 @@
 from skpro.distributions.normal import Normal
 from skpro.distributions.qpd import QPD_B, QPD_S, QPD_U
 from skpro.distributions.t import TDistribution
+from skpro.distributions.chi_squared import ChiSquared
@@ -0,0 +1,122 @@
+# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
+"""Chi-Squared probability distribution."""
+
+__author__ = ["sukjingitsit"]
+
+import numpy as np
+import pandas as pd
+from scipy.special import gamma, chdtr, gammainc, chdtriv
+
+from skpro.distributions.base import BaseDistribution
+
+
+class ChiSquared(BaseDistribution):
+    """Chi-Squared distribution (skpro native).
+
+    Parameters
+    ----------
+    dof : float or array of float (1D or 2D)
+        degrees of freedom of the chi-squared distribution
+    index : pd.Index, optional, default = RangeIndex
+    columns : pd.Index, optional, default = RangeIndex
+
+    Example
+    -------
+    >>> from skpro.distributions.normal import ChiSquared
+
+    >>> chi = ChiSquared(dof=[[1, 2], [3, 4], [5, 6]])
+    """
+
+    _tags = {
+        "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
+        "distr:measuretype": "continuous",
+    }
+
+    def __init__(self, dof, index=None, columns=None):
+        self.dof = dof
+        self.index = index
+        self.columns = columns
+
+        # todo: untangle index handling
+        # and broadcast of parameters.
+        # move this functionality to the base class
+        self._dof = self._get_bc_params(self.dof)[0]
+        shape = self._dof.shape
+        if index is None:
+            index = pd.RangeIndex(shape[0])
+        if columns is None:
+            columns = pd.RangeIndex(shape[1])
+        super().__init__(index=index, columns=columns)
+
+
+    # Working on maths of energy, have (mostly) finished for when
+    # x is a pandas Dataframe, but the math for self-energy is getting
+    # complicated, might have to use approx for self-energy case
+
+    def mean(self):
+        r"""Return expected value of the distribution.
+
+        Let :math:`X` be a random variable with the distribution of `self`.
+        Returns the expectation :math:`\mathbb{E}[X]`
+
+        Returns
+        -------
+        pd.DataFrame with same rows, columns as `self`
+        expected value of distribution (entry-wise)
+        """
+        mean_arr = self._dof
+        return pd.DataFrame(mean_arr, index=self.index, columns=self.columns)
+
+    def var(self):
+        r"""Return element/entry-wise variance of the distribution.
+
+        Let :math:`X` be a random variable with the distribution of `self`.
+        Returns :math:`\mathbb{V}[X] = \mathbb{E}\left(X - \mathbb{E}[X]\right)^2`
+
+        Returns
+        -------
+        pd.DataFrame with same rows, columns as `self`
+        variance of distribution (entry-wise)
+        """
+        sd_arr = 2*self._dof
+        return pd.DataFrame(sd_arr, index=self.index, columns=self.columns)
+
+    def pdf(self, x):
+        """Probability density function."""
+        d = self.loc[x.index, x.columns]
+        pdf_arr = np.exp(-x/2)*np.power(x,(d.dof-2)/2)
+        pdf_arr = pdf_arr/(np.power(2,d.dof/2)*gamma(d.dof/2))
+        return pd.DataFrame(pdf_arr, index=x.index, columns=x.columns)
+
+    def log_pdf(self, x):
+        """Logarithmic probability density function."""
+        d = self.loc[x.index, x.columns]
+        lpdf_arr = -x/2 + (d.dof-2)*np.log(x)/2
+        lpdf_arr = lpdf_arr - (d.dof*np.log(2)/2 + np.log(gamma(d.dof/2)))
+        return pd.DataFrame(lpdf_arr, index=x.index, columns=x.columns)
+
+    def cdf(self, x):
+        """Cumulative distribution function."""
+        d = self.loc[x.index, x.columns]
+        # cdf_arr = chdtr(d.dof, x)
+        cdf_arr = gammainc(d.dof/2, x/2)
+        cdf_arr = cdf_arr/(np.power(2,d.dof/2)*gamma(d.dof/2))
+        return pd.DataFrame(cdf_arr, index=x.index, columns=x.columns)
+
+    def ppf(self, p):
+        """Quantile function = percent point function = inverse cdf."""
+        # Working on maths of native ppf
+        d = self.loc[p.index, p.columns]
+        icdf_arr = chdtriv(d.dof, p)
+        return pd.DataFrame(icdf_arr, index=p.index, columns=p.columns)
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator."""
+        params1 = {"dof":[[1, 2], [3, 4], [5, 6]]}
+        params2 = {
+            "dof": 10,
+            "index": pd.Index([1, 2, 5]),
+            "columns": pd.Index(["a", "b"]),
+        }
+        return [params1, params2]