[ENH] Add performance metrics for anomaly detection (from TimeEval) (#…

…1938) * feat: add TSAD metrics and tests from TimeEval * fix: prts soft dependency specification * feat: update CODEOWNERS * feat: add to docs * fix: prts version specification until CompML/PRTS#77 is merged and add skiptests conditions if not installed * feat: handle str-object-names in check_soft_dependencies correctly * fix: check_soft_dependencies test case for str-object reference * fix: address PR review change requests
aeon-toolkit · Sep 2, 2024 · 2b011bc · 2b011bc
1 parent 5aaa4ba
commit 2b011bc
Show file tree

Hide file tree

Showing 14 changed files with 1,575 additions and 2 deletions.
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -20,6 +20,7 @@ aeon/forecasting/ @aiwalter @guzalbulatova @ltsaprounis
 aeon/networks/ @hadifawaz1999
 
 aeon/performance_metrics/forecasting/ @aiwalter
+aeon/performance_metrics/anomaly_detection/ @codelionx @MatthewMiddlehurst
 
 aeon/pipeline/ @aiwalter
 

diff --git a/aeon/performance_metrics/anomaly_detection/__init__.py b/aeon/performance_metrics/anomaly_detection/__init__.py
@@ -0,0 +1,37 @@
+"""Metrics for anomaly detection."""
+
+from aeon.performance_metrics.anomaly_detection._binary import (
+    range_f_score,
+    range_precision,
+    range_recall,
+)
+from aeon.performance_metrics.anomaly_detection._continuous import (
+    f_score_at_k_points,
+    f_score_at_k_ranges,
+    pr_auc_score,
+    roc_auc_score,
+    rp_rr_auc_score,
+)
+from aeon.performance_metrics.anomaly_detection._vus_metrics import (
+    range_pr_auc_score,
+    range_pr_roc_auc_support,
+    range_pr_vus_score,
+    range_roc_auc_score,
+    range_roc_vus_score,
+)
+
+__all__ = [
+    "range_precision",
+    "range_recall",
+    "range_f_score",
+    "roc_auc_score",
+    "pr_auc_score",
+    "rp_rr_auc_score",
+    "f_score_at_k_points",
+    "f_score_at_k_ranges",
+    "range_pr_roc_auc_support",
+    "range_roc_auc_score",
+    "range_pr_auc_score",
+    "range_pr_vus_score",
+    "range_roc_vus_score",
+]
diff --git a/aeon/performance_metrics/anomaly_detection/_binary.py b/aeon/performance_metrics/anomaly_detection/_binary.py
@@ -0,0 +1,209 @@
+"""Metrics on binary predictions for anomaly detection."""
+
+__maintainer__ = ["CodeLionX"]
+__all__ = ["range_precision", "range_recall", "range_f_score"]
+
+import warnings
+
+import numpy as np
+
+from aeon.performance_metrics.anomaly_detection._util import check_y
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+def range_precision(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    alpha: float = 0,
+    cardinality: str = "reciprocal",
+    bias: str = "flat",
+) -> float:
+    """Compute the range-based precision metric.
+
+    Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This
+    implementation uses the community package `prts <https://pypi.org/project/prts/>`_
+    as a soft-dependency.
+
+    Range precision is the average precision of each predicted anomaly range. For each
+    predicted continuous anomaly range the overlap size, position, and cardinality is
+    considered. For more details, please refer to the paper [1]_.
+
+    Parameters
+    ----------
+    y_true : np.ndarray
+        True binary labels of shape (n_instances,).
+    y_pred : np.ndarray
+        Anomaly scores for each point of the time series of shape (n_instances,).
+    alpha : float
+        Weight of the existence reward. Because precision by definition emphasizes on
+        prediction quality, there is no need for an existence reward and this value
+        should always be set to 0.
+    cardinality : {'reciprocal', 'one', 'udf_gamma'}
+        Cardinality type.
+    bias : {'flat', 'front', 'middle', 'back'}
+        Positional bias type.
+
+    Returns
+    -------
+    float
+        Range-based precision
+
+    References
+    ----------
+    .. [1] Tatbul, Nesime, Tae Jun Lee, Stan Zdonik, Mejbah Alam, and Justin
+       Gottschlich. "Precision and Recall for Time Series." In Proceedings of the
+       International Conference on Neural Information Processing Systems (NeurIPS),
+       1920–30. 2018.
+       http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf.
+    """
+    _check_soft_dependencies("prts", obj="range_precision", suppress_import_stdout=True)
+
+    from prts import ts_precision
+
+    y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False)
+    if np.unique(y_pred).shape[0] == 1:
+        warnings.warn(
+            "Cannot compute metric for a constant value in y_score, returning 0.0!",
+            stacklevel=2,
+        )
+        return 0.0
+    return ts_precision(y_true, y_pred, alpha=alpha, cardinality=cardinality, bias=bias)
+
+
+def range_recall(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    alpha: float = 0,
+    cardinality: str = "reciprocal",
+    bias: str = "flat",
+) -> float:
+    """Compute the range-based recall metric.
+
+    Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This
+    implementation uses the community package `prts <https://pypi.org/project/prts/>`_
+    as a soft-dependency.
+
+    Range recall is the average recall of each real anomaly range. For each real
+    anomaly range the overlap size, position, and cardinality with predicted anomaly
+    ranges are considered. In addition, an existence reward can be given that boosts
+    the recall even if just a single point of the real anomaly is in the predicted
+    ranges. For more details, please refer to the paper [1]_.
+
+    Parameters
+    ----------
+    y_true : np.ndarray
+        True binary labels of shape (n_instances,).
+    y_pred : np.ndarray
+        Anomaly scores for each point of the time series of shape (n_instances,).
+    alpha : float
+        Weight of the existence reward. If 0: no existence reward, if 1: only existence
+        reward. The existence reward is given if the real anomaly range has overlap
+        with even a single point of the predicted anomaly range.
+    cardinality : {'reciprocal', 'one', 'udf_gamma'}
+        Cardinality type.
+    bias : {'flat', 'front', 'middle', 'back'}
+        Positional bias type.
+
+    Returns
+    -------
+    float
+        Range-based recall
+
+    References
+    ----------
+    .. [1] Tatbul, Nesime, Tae Jun Lee, Stan Zdonik, Mejbah Alam, and Justin
+       Gottschlich. "Precision and Recall for Time Series." In Proceedings of the
+       International Conference on Neural Information Processing Systems (NeurIPS),
+       1920–30. 2018.
+       http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf.
+    """
+    _check_soft_dependencies("prts", obj="range_recall", suppress_import_stdout=True)
+
+    from prts import ts_recall
+
+    y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False)
+    if np.unique(y_pred).shape[0] == 1:
+        warnings.warn(
+            "Cannot compute metric for a constant value in y_score, returning 0.0!",
+            stacklevel=2,
+        )
+        return 0.0
+    return ts_recall(y_true, y_pred, alpha=alpha, cardinality=cardinality, bias=bias)
+
+
+def range_f_score(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    beta: float = 1,
+    p_alpha: float = 0,
+    r_alpha: float = 0.5,
+    cardinality: str = "reciprocal",
+    p_bias: str = "flat",
+    r_bias: str = "flat",
+) -> float:
+    """Compute the F-score using the range-based recall and precision metrics.
+
+    Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This
+    implementation uses the community package `prts <https://pypi.org/project/prts/>`_
+    as a soft-dependency.
+
+    The F-beta score is the weighted harmonic mean of precision and recall, reaching
+    its optimal value at 1 and its worst value at 0. This implementation uses the
+    range-based precision and range-based recall as basis.
+
+    Parameters
+    ----------
+    y_true : np.ndarray
+        True binary labels of shape (n_instances,).
+    y_pred : np.ndarray
+        Anomaly scores for each point of the time series of shape (n_instances,).
+    beta : float
+        F-score beta determines the weight of recall in the combined score.
+        beta < 1 lends more weight to precision, while beta > 1 favors recall.
+    p_alpha : float
+        Weight of the existence reward for the range-based precision. For most - when
+        not all - cases, `p_alpha` should be set to 0.
+    r_alpha : float
+        Weight of the existence reward. If 0: no existence reward, if 1: only
+        existence reward.
+    cardinality : {'reciprocal', 'one', 'udf_gamma'}
+        Cardinality type.
+    p_bias : {'flat', 'front', 'middle', 'back'}
+        Positional bias type.
+    r_bias : {'flat', 'front', 'middle', 'back'}
+        Positional bias type.
+
+    Returns
+    -------
+    float
+        Range-based F-score
+
+    References
+    ----------
+    .. [1] Tatbul, Nesime, Tae Jun Lee, Stan Zdonik, Mejbah Alam, and Justin
+       Gottschlich. "Precision and Recall for Time Series." In Proceedings of the
+       International Conference on Neural Information Processing Systems (NeurIPS),
+       1920–30. 2018.
+       http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf.
+    """
+    _check_soft_dependencies("prts", obj="range_recall", suppress_import_stdout=True)
+
+    from prts import ts_fscore
+
+    y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False)
+    if np.unique(y_pred).shape[0] == 1:
+        warnings.warn(
+            "Cannot compute metric for a constant value in y_score, returning 0.0!",
+            stacklevel=2,
+        )
+        return 0.0
+    return ts_fscore(
+        y_true,
+        y_pred,
+        beta=beta,
+        p_alpha=p_alpha,
+        r_alpha=r_alpha,
+        cardinality=cardinality,
+        p_bias=p_bias,
+        r_bias=r_bias,
+    )