Skip to content

Commit

Permalink
Merge pull request roboflow#1609 from roboflow/feat/metrics-precision…
Browse files Browse the repository at this point in the history
…-recall

Feat/metrics precision recall
  • Loading branch information
LinasKo authored Oct 18, 2024
2 parents bda4003 + 3e8a88a commit ea776b5
Show file tree
Hide file tree
Showing 10 changed files with 1,359 additions and 31 deletions.
20 changes: 20 additions & 0 deletions docs/metrics/common_values.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
comments: true
status: new
---

# Common Values

This page contains supplementary values, types and enums that metrics use.

<div class="md-typeset">
<h2><a href="#supervision.metrics.core.MetricTarget">MetricTarget</a></h2>
</div>

:::supervision.metrics.core.MetricTarget

<div class="md-typeset">
<h2><a href="#supervision.metrics.core.AveragingMethod">AveragingMethod</a></h2>
</div>

:::supervision.metrics.core.AveragingMethod
18 changes: 18 additions & 0 deletions docs/metrics/precision.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
comments: true
status: new
---

# Precision

<div class="md-typeset">
<h2><a href="#supervision.metrics.precision.Precision">Precision</a></h2>
</div>

:::supervision.metrics.precision.Precision

<div class="md-typeset">
<h2><a href="#supervision.metrics.precision.PrecisionResult">PrecisionResult</a></h2>
</div>

:::supervision.metrics.precision.PrecisionResult
18 changes: 18 additions & 0 deletions docs/metrics/recall.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
comments: true
status: new
---

# Recall

<div class="md-typeset">
<h2><a href="#supervision.metrics.recall.Recall">Recall</a></h2>
</div>

:::supervision.metrics.recall.Recall

<div class="md-typeset">
<h2><a href="#supervision.metrics.recall.RecallResult">RecallResult</a></h2>
</div>

:::supervision.metrics.recall.RecallResult
3 changes: 3 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ nav:
- Utils: datasets/utils.md
- Metrics:
- mAP: metrics/mean_average_precision.md
- Precision: metrics/precision.md
- Recall: metrics/recall.md
- F1 Score: metrics/f1_score.md
- Common Values: metrics/common_values.md
- Legacy Metrics: detection/metrics.md
- Utils:
- Video: utils/video.md
Expand Down
2 changes: 2 additions & 0 deletions supervision/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
MeanAveragePrecision,
MeanAveragePrecisionResult,
)
from supervision.metrics.precision import Precision, PrecisionResult
from supervision.metrics.recall import Recall, RecallResult
from supervision.metrics.utils.object_size import (
ObjectSizeCategory,
get_detection_size_category,
Expand Down
26 changes: 14 additions & 12 deletions supervision/metrics/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ class MetricTarget(Enum):
"""
Specifies what type of detection is used to compute the metric.
* BOXES: xyxy bounding boxes
* MASKS: Binary masks
* ORIENTED_BOUNDING_BOXES: Oriented bounding boxes (OBB)
Attributes:
BOXES: xyxy bounding boxes
MASKS: Binary masks
ORIENTED_BOUNDING_BOXES: Oriented bounding boxes (OBB)
"""

BOXES = "boxes"
Expand All @@ -54,15 +55,16 @@ class AveragingMethod(Enum):
Suppose, before returning the final result, a metric is computed for each class.
How do you combine those to get the final number?
* MACRO: Calculate the metric for each class and average the results. The simplest
averaging method, but it does not take class imbalance into account.
* MICRO: Calculate the metric globally by counting the total true positives, false
positives, and false negatives. Micro averaging is useful when you want to give
more importance to classes with more samples. It's also more appropriate if you
have an imbalance in the number of instances per class.
* WEIGHTED: Calculate the metric for each class and average the results, weighted by
the number of true instances of each class. Use weighted averaging if you want
to take class imbalance into account.
Attributes:
MACRO: Calculate the metric for each class and average the results. The simplest
averaging method, but it does not take class imbalance into account.
MICRO: Calculate the metric globally by counting the total true positives, false
positives, and false negatives. Micro averaging is useful when you want to
give more importance to classes with more samples. It's also more
appropriate if you have an imbalance in the number of instances per class.
WEIGHTED: Calculate the metric for each class and average the results, weighted
by the number of true instances of each class. Use weighted averaging if
you want to take class imbalance into account.
"""

MACRO = "macro"
Expand Down
55 changes: 54 additions & 1 deletion supervision/metrics/f1_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,45 @@


class F1Score(Metric):
"""
F1 Score is a metric used to evaluate object detection models. It is the harmonic
mean of precision and recall, calculated at different IoU thresholds.
In simple terms, F1 Score is a measure of a model's balance between precision and
recall (accuracy and completeness), calculated as:
`F1 = 2 * (precision * recall) / (precision + recall)`
Example:
```python
import supervision as sv
from supervision.metrics import F1Score
predictions = sv.Detections(...)
targets = sv.Detections(...)
f1_metric = F1Score()
f1_result = f1_metric.update(predictions, targets).compute()
print(f1_result)
print(f1_result.f1_50)
print(f1_result.small_objects.f1_50)
```
"""

def __init__(
self,
metric_target: MetricTarget = MetricTarget.BOXES,
averaging_method: AveragingMethod = AveragingMethod.WEIGHTED,
):
"""
Initialize the F1Score metric.
Args:
metric_target (MetricTarget): The type of detection data to use.
averaging_method (AveragingMethod): The averaging method used to compute the
F1 scores. Determines how the F1 scores are aggregated across classes.
"""
self._metric_target = metric_target
if self._metric_target == MetricTarget.ORIENTED_BOUNDING_BOXES:
raise NotImplementedError(
Expand All @@ -40,6 +74,9 @@ def __init__(
self._targets_list: List[Detections] = []

def reset(self) -> None:
"""
Reset the metric to its initial state, clearing all stored data.
"""
self._predictions_list = []
self._targets_list = []

Expand All @@ -48,6 +85,16 @@ def update(
predictions: Union[Detections, List[Detections]],
targets: Union[Detections, List[Detections]],
) -> F1Score:
"""
Add new predictions and targets to the metric, but do not compute the result.
Args:
predictions (Union[Detections, List[Detections]]): The predicted detections.
targets (Union[Detections, List[Detections]]): The target detections.
Returns:
(F1Score): The updated metric instance.
"""
if not isinstance(predictions, list):
predictions = [predictions]
if not isinstance(targets, list):
Expand All @@ -65,6 +112,13 @@ def update(
return self

def compute(self) -> F1ScoreResult:
"""
Calculate the F1 score metric based on the stored predictions and ground-truth
data, at different IoU thresholds.
Returns:
(F1ScoreResult): The F1 score metric result.
"""
result = self._compute(self._predictions_list, self._targets_list)

small_predictions, small_targets = self._filter_predictions_and_targets_by_size(
Expand Down Expand Up @@ -373,7 +427,6 @@ class F1ScoreResult:
The results of the F1 score metric calculation.
Defaults to `0` if no detections or targets were provided.
Provides a custom `__str__` method for pretty printing.
Attributes:
metric_target (MetricTarget): the type of data used for the metric -
Expand Down
44 changes: 26 additions & 18 deletions supervision/metrics/mean_average_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,27 @@


class MeanAveragePrecision(Metric):
"""
Mean Average Precision (mAP) is a metric used to evaluate object detection models.
It is the average of the precision-recall curves at different IoU thresholds.
Example:
```python
import supervision as sv
from supervision.metrics import MeanAveragePrecision
predictions = sv.Detections(...)
targets = sv.Detections(...)
map_metric = MeanAveragePrecision()
map_result = map_metric.update(predictions, targets).compute()
print(map_result)
print(map_result.map50_95)
map_result.plot()
```
"""

def __init__(
self,
metric_target: MetricTarget = MetricTarget.BOXES,
Expand All @@ -47,6 +68,9 @@ def __init__(
self._targets_list: List[Detections] = []

def reset(self) -> None:
"""
Reset the metric to its initial state, clearing all stored data.
"""
self._predictions_list = []
self._targets_list = []

Expand Down Expand Up @@ -95,26 +119,10 @@ def compute(
) -> MeanAveragePrecisionResult:
"""
Calculate Mean Average Precision based on predicted and ground-truth
detections at different thresholds.
detections at different thresholds.
Returns:
(MeanAveragePrecisionResult): New instance of MeanAveragePrecision.
Example:
```python
import supervision as sv
from supervision.metrics import MeanAveragePrecision
predictions = sv.Detections(...)
targets = sv.Detections(...)
map_metric = MeanAveragePrecision()
map_result = map_metric.update(predictions, targets).compute()
print(map_result)
print(map_result.map50_95)
map_result.plot()
```
(MeanAveragePrecisionResult): The Mean Average Precision result.
"""
result = self._compute(self._predictions_list, self._targets_list)

Expand Down
Loading

0 comments on commit ea776b5

Please sign in to comment.