Skip to content

Commit

Permalink
linlogloss
Browse files Browse the repository at this point in the history
  • Loading branch information
fkiraly committed Sep 10, 2023
1 parent a375be4 commit 5ad9a39
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 0 deletions.
2 changes: 2 additions & 0 deletions skpro/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
"ConstraintViolation",
"CRPS",
"LogLoss",
"LinearizedLogLoss",
"SquaredDistrLoss",
]

from skpro.metrics._classes import (
CRPS,
ConstraintViolation,
EmpiricalCoverage,
LinearizedLogLoss,
LogLoss,
PinballLoss,
SquaredDistrLoss,
Expand Down
63 changes: 63 additions & 0 deletions skpro/metrics/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,69 @@ def _evaluate_by_index(self, y_true, y_pred, **kwargs):
return res


class LinearizedLogLoss(BaseDistrMetric):
r"""Lineararized logarithmic loss for distributional predictions.
For a predictive distribution :math:`d` with pdf :math:`p_d`
and a ground truth value :math:`y`, the linearized logarithmic loss is
defined as :math:`L(y, d) := -\log p_d(y)` if :math:`p_d(y) \geq r`,
and :math:`L(y, d) := -\log p_d(r) + 1 - \frac{1}{r} p_d(r) (y-r)` otherwise.
`evaluate` computes the average test sample loss.
`evaluate_by_index` produces the loss sample by test data point
`multivariate` controls averaging over variables.
Parameters
----------
range : positive float, optional, default=1
range of linearization, i.e., where to linearize the log-loss
for values smaller than range, the log-loss is linearized
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines whether and how to aggregate metric for across variables.
If 'uniform_average' (default), errors are mean-averaged across variables.
If array-like, errors are weighted averaged across variables, values as weights.
If 'raw_values', does not average errors across variables, columns are retained.
multivariate : bool, optional, default=False
if True, behaves as multivariate log-loss
log-loss is computed for entire row, results one score per row
if False, is univariate log-loss
log-loss is computed per variable marginal, results in many scores per row
"""

def __init__(self, range=1, multioutput="uniform_average", multivariate=False):
self.range = range
self.multivariate = multivariate
super().__init__(multioutput=multioutput)

def _evaluate_by_index(self, y_true, y_pred, **kwargs):

range = self.range

pdf = y_pred.pdf(y_true)
pdf_smaller_range = pdf < range
pdf_greater_range = pdf >= range

logloss = -y_pred.log_pdf(y_true)
linear = (-1 / range) * pdf - np.log(range) + 1

res = pdf_smaller_range * linear + pdf_greater_range * logloss

# replace this by multivariate log_pdf once distr implements
# i.e., pass multivariate on to log_pdf
if self.multivariate:
return pd.DataFrame(res.mean(axis=1), columns=["density"])
else:
return res

@classmethod
def get_test_params(self):
"""Test parameter settings."""
params1 = {}
params2 = {"range": 0.1}
return [params1, params2]


class SquaredDistrLoss(BaseDistrMetric):
r"""Squared loss for distributional predictions.
Expand Down

0 comments on commit 5ad9a39

Please sign in to comment.