linlogloss

sktime · Sep 10, 2023 · 5ad9a39 · 5ad9a39
1 parent a375be4
commit 5ad9a39
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 0 deletions.
diff --git a/skpro/metrics/__init__.py b/skpro/metrics/__init__.py
@@ -10,13 +10,15 @@
     "ConstraintViolation",
     "CRPS",
     "LogLoss",
+    "LinearizedLogLoss",
     "SquaredDistrLoss",
 ]
 
 from skpro.metrics._classes import (
     CRPS,
     ConstraintViolation,
     EmpiricalCoverage,
+    LinearizedLogLoss,
     LogLoss,
     PinballLoss,
     SquaredDistrLoss,

diff --git a/skpro/metrics/_classes.py b/skpro/metrics/_classes.py
@@ -269,6 +269,69 @@ def _evaluate_by_index(self, y_true, y_pred, **kwargs):
             return res
 
 
+class LinearizedLogLoss(BaseDistrMetric):
+    r"""Lineararized logarithmic loss for distributional predictions.
+
+    For a predictive distribution :math:`d` with pdf :math:`p_d`
+    and a ground truth value :math:`y`, the linearized logarithmic loss is
+    defined as :math:`L(y, d) := -\log p_d(y)` if :math:`p_d(y) \geq r`,
+    and :math:`L(y, d) := -\log p_d(r) + 1 - \frac{1}{r} p_d(r) (y-r)` otherwise.
+
+    `evaluate` computes the average test sample loss.
+    `evaluate_by_index` produces the loss sample by test data point
+    `multivariate` controls averaging over variables.
+
+    Parameters
+    ----------
+    range : positive float, optional, default=1
+        range of linearization, i.e., where to linearize the log-loss
+        for values smaller than range, the log-loss is linearized
+    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
+            (n_outputs,), default='uniform_average'
+        Defines whether and how to aggregate metric for across variables.
+        If 'uniform_average' (default), errors are mean-averaged across variables.
+        If array-like, errors are weighted averaged across variables, values as weights.
+        If 'raw_values', does not average errors across variables, columns are retained.
+    multivariate : bool, optional, default=False
+        if True, behaves as multivariate log-loss
+        log-loss is computed for entire row, results one score per row
+        if False, is univariate log-loss
+        log-loss is computed per variable marginal, results in many scores per row
+    """
+
+    def __init__(self, range=1, multioutput="uniform_average", multivariate=False):
+        self.range = range
+        self.multivariate = multivariate
+        super().__init__(multioutput=multioutput)
+
+    def _evaluate_by_index(self, y_true, y_pred, **kwargs):
+
+        range = self.range
+
+        pdf = y_pred.pdf(y_true)
+        pdf_smaller_range = pdf < range
+        pdf_greater_range = pdf >= range
+
+        logloss = -y_pred.log_pdf(y_true)
+        linear = (-1 / range) * pdf - np.log(range) + 1
+
+        res = pdf_smaller_range * linear + pdf_greater_range * logloss
+
+        # replace this by multivariate log_pdf once distr implements
+        # i.e., pass multivariate on to log_pdf
+        if self.multivariate:
+            return pd.DataFrame(res.mean(axis=1), columns=["density"])
+        else:
+            return res
+
+    @classmethod
+    def get_test_params(self):
+        """Test parameter settings."""
+        params1 = {}
+        params2 = {"range": 0.1}
+        return [params1, params2]
+
+
 class SquaredDistrLoss(BaseDistrMetric):
     r"""Squared loss for distributional predictions.