Skip to content

Commit

Permalink
Average precision and AUROC update (#374)
Browse files Browse the repository at this point in the history

* add realized perf AP metric

* add CBPE BC AP implementation

* update CBPE metrics _common_cleaning and estimate_auroc/accuracy

* ap sampling error update

* update docs

---------

Co-authored-by: Niels <[email protected]>
Co-authored-by: Niels Nuyttens <[email protected]>
  • Loading branch information
3 people authored Mar 8, 2024
1 parent 0ec1fc8 commit 70aecce
Show file tree
Hide file tree
Showing 17 changed files with 1,252 additions and 1,613 deletions.

This file was deleted.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Large diffs are not rendered by default.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ We currently support the following **standard** metrics for bianry classificatio
* **roc_auc**
* **f1**
* **precision**
* **average_precision**
* **recall**
* **specificity**
* **accuracy**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ We currently support the following **standard** metrics for bianry classificatio
* **roc_auc**
* **f1**
* **precision**
* **average_precision**
* **recall**
* **specificity**
* **accuracy**
Expand Down
2 changes: 1 addition & 1 deletion nannyml/drift/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
- Domain Classifer: detects drift by looking at how performance a domain classifier is at distinguising
between the reference and the chunk datasets.
"""
from .multivariate.domain_classifier import DomainClassifierCalculator
from .multivariate.data_reconstruction import DataReconstructionDriftCalculator
from .multivariate.domain_classifier import DomainClassifierCalculator
from .ranker import AlertCountRanker, CorrelationRanker
from .univariate import FeatureType, Method, MethodFactory, UnivariateDriftCalculator
1 change: 1 addition & 0 deletions nannyml/performance_calculation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
'accuracy',
'confusion_matrix',
'business_value',
'average_precision',
]

SUPPORTED_REGRESSION_METRIC_VALUES = [
Expand Down
6 changes: 4 additions & 2 deletions nannyml/performance_calculation/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
... y_true='repaid',
... timestamp_column_name='timestamp',
... problem_type='classification_binary',
... metrics=['roc_auc', 'f1', 'precision', 'recall', 'specificity', 'accuracy'],
... metrics=['roc_auc', 'f1', 'precision', 'recall', 'specificity', 'accuracy', 'average_precision'],
... chunk_size=5000)
>>> calc.fit(reference_df)
>>> results = calc.calculate(analysis_df)
Expand Down Expand Up @@ -62,6 +62,7 @@
'roc_auc': StandardDeviationThreshold(),
'f1': StandardDeviationThreshold(),
'precision': StandardDeviationThreshold(),
'average_precision': StandardDeviationThreshold(),
'recall': StandardDeviationThreshold(),
'specificity': StandardDeviationThreshold(),
'accuracy': StandardDeviationThreshold(),
Expand Down Expand Up @@ -128,6 +129,7 @@ def __init__(
'roc_auc': StandardDeviationThreshold(),
'f1': StandardDeviationThreshold(),
'precision': StandardDeviationThreshold(),
'average_precision': StandardDeviationThreshold(),
'recall': StandardDeviationThreshold(),
'specificity': StandardDeviationThreshold(),
'accuracy': StandardDeviationThreshold(),
Expand Down Expand Up @@ -187,7 +189,7 @@ def __init__(
... y_true='repaid',
... timestamp_column_name='timestamp',
... problem_type='classification_binary',
... metrics=['roc_auc', 'f1', 'precision', 'recall', 'specificity', 'accuracy'],
... metrics=['roc_auc', 'f1', 'precision', 'recall', 'specificity', 'accuracy', 'average_precision'],
... chunk_size=5000)
>>> calc.fit(reference_df)
>>> results = calc.calculate(analysis_df)
Expand Down
99 changes: 97 additions & 2 deletions nannyml/performance_calculation/metrics/binary_classification.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
# Author: Niels Nuyttens <[email protected]>
#
# License: Apache Software License 2.0
"""Module containing implemenations for binary classification metrics and utilities."""
import warnings
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import (
average_precision_score,
confusion_matrix,
f1_score,
precision_score,
recall_score,
roc_auc_score,
)

from nannyml._typing import ProblemType
from nannyml.base import _list_missing, _remove_nans
Expand All @@ -16,6 +24,8 @@
from nannyml.sampling_error.binary_classification import (
accuracy_sampling_error,
accuracy_sampling_error_components,
ap_sampling_error,
ap_sampling_error_components,
auroc_sampling_error,
auroc_sampling_error_components,
business_value_sampling_error,
Expand Down Expand Up @@ -64,7 +74,7 @@ def __init__(
The Threshold instance that determines how the lower and upper threshold values will be calculated.
y_pred_proba: Optional[str], default=None
Name(s) of the column(s) containing your model output. For binary classification, pass a single string
refering to the model output column.
referring to the model output column.
"""
super().__init__(
name='roc_auc',
Expand All @@ -81,9 +91,11 @@ def __init__(
self._sampling_error_components: Tuple = ()

def __str__(self):
"""Metric string."""
return "roc_auc"

def _fit(self, reference_data: pd.DataFrame):
"""Metric _fit implementation on reference data."""
_list_missing([self.y_true, self.y_pred_proba], list(reference_data.columns))
self._sampling_error_components = auroc_sampling_error_components(
y_true_reference=reference_data[self.y_true],
Expand Down Expand Up @@ -111,6 +123,88 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
return auroc_sampling_error(self._sampling_error_components, data)


@MetricFactory.register(metric='average_precision', use_case=ProblemType.CLASSIFICATION_BINARY)
class BinaryClassificationAP(Metric):
"""Average Precision metric.
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html
"""

def __init__(
self,
y_true: str,
y_pred: str,
threshold: Threshold,
y_pred_proba: Optional[str] = None,
**kwargs,
):
"""Creates a new AP instance.
Parameters
----------
y_true: str
The name of the column containing target values.
y_pred: str
The name of the column containing your model predictions.
threshold: Threshold
The Threshold instance that determines how the lower and upper threshold values will be calculated.
y_pred_proba: Optional[str], default=None
Name(s) of the column(s) containing your model output. For binary classification, pass a single string
referring to the model output column.
"""
super().__init__(
name='average_precision',
y_true=y_true,
y_pred=y_pred,
threshold=threshold,
y_pred_proba=y_pred_proba,
lower_threshold_limit=0,
upper_threshold_limit=1,
components=[('Average Precision', 'average_precision')],
)

# sampling error
self._sampling_error_components: Tuple = ()

def __str__(self):
"""Metric string."""
return "average_precision"

def _fit(self, reference_data: pd.DataFrame):
"""Metric _fit implementation on reference data."""
_list_missing([self.y_true, self.y_pred_proba], list(reference_data.columns))
# we don't want to count missing rows for sampling error
reference_data = _remove_nans(reference_data, (self.y_true, self.y_pred))

if 1 not in reference_data[self.y_true].unique():
self._sampling_error_components = np.NaN, 0
else:
self._sampling_error_components = ap_sampling_error_components(
y_true_reference=reference_data[self.y_true],
y_pred_proba_reference=reference_data[self.y_pred_proba],
)

def _calculate(self, data: pd.DataFrame):
"""Redefine to handle NaNs and edge cases."""
_list_missing([self.y_true, self.y_pred_proba], list(data.columns))
data = _remove_nans(data, (self.y_true, self.y_pred))

y_true = data[self.y_true]
y_pred_proba = data[self.y_pred_proba]

if 1 not in y_true.unique():
warnings.warn(
f"'{self.y_true}' does not contain positive class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
else:
return average_precision_score(y_true, y_pred_proba)

def _sampling_error(self, data: pd.DataFrame) -> float:
return ap_sampling_error(self._sampling_error_components, data)


@MetricFactory.register(metric='f1', use_case=ProblemType.CLASSIFICATION_BINARY)
class BinaryClassificationF1(Metric):
"""F1 score metric."""
Expand Down Expand Up @@ -156,6 +250,7 @@ def __str__(self):

def _fit(self, reference_data: pd.DataFrame):
_list_missing([self.y_true, self.y_pred], list(reference_data.columns))
# TODO: maybe handle data quality issues here and pass clean data to sampling error calculation?
self._sampling_error_components = f1_sampling_error_components(
y_true_reference=reference_data[self.y_true],
y_pred_reference=reference_data[self.y_pred],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
SUPPORTED_METRIC_VALUES = [
'roc_auc',
'f1',
'average_precision',
'precision',
'recall',
'specificity',
Expand Down
1 change: 1 addition & 0 deletions nannyml/performance_estimation/confidence_based/cbpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
'accuracy': StandardDeviationThreshold(),
'confusion_matrix': StandardDeviationThreshold(),
'business_value': StandardDeviationThreshold(),
'average_precision': StandardDeviationThreshold(),
}


Expand Down
Loading

0 comments on commit 70aecce

Please sign in to comment.