Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Validate] Add metadata filtering for evaluation functions to metrics #268

Open
wants to merge 33 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e65a451
Pass eval_func_arguments to backend with EvaluationCriteria
gatli Feb 25, 2022
d3231e3
Add better error message for scenario_test misconfiguration and argum…
gatli Feb 25, 2022
9b9f68d
Update defaults to match metrics
gatli Mar 1, 2022
d86cef2
Address @phil-scale comments!
gatli Mar 1, 2022
2d1e738
Add examples to configuration functions and clear up class naming
gatli Mar 1, 2022
4b4ffee
Fix rebase errors
gatli Mar 30, 2022
442eaf8
Another rebasing error bites the dust
gatli Mar 30, 2022
203d0f2
Refactor a lot of segmentation local upload and async logic (#256)
ardila Mar 16, 2022
577d4fc
Update pyproject.toml
ardila Mar 18, 2022
940741c
fix camera_model initialization (#264)
sasha-scale Mar 24, 2022
e6a9058
Validate feature: setting baseline models (#266)
sasha-scale Mar 29, 2022
e18b124
Add better error message for scenario_test misconfiguration and argum…
gatli Feb 25, 2022
58c66eb
Address @phil-scale comments!
gatli Mar 1, 2022
c955984
flake fix
Anirudh-Scale Mar 16, 2022
94949ec
lint
Anirudh-Scale Mar 16, 2022
9065ebe
linting for circle ci
Anirudh-Scale Mar 17, 2022
3e9f7da
version
Anirudh-Scale Mar 17, 2022
a494ccb
used native polygon
Anirudh-Scale Mar 17, 2022
29d0483
adding shapely
Anirudh-Scale Mar 17, 2022
677c777
adding shapely
Anirudh-Scale Mar 17, 2022
d8b7c34
changing shapely
Anirudh-Scale Mar 17, 2022
ac6f542
changing shapely
Anirudh-Scale Mar 17, 2022
9f5b6bc
updating shapely
Anirudh-Scale Mar 17, 2022
e945089
poetry added shapely
Anirudh-Scale Mar 17, 2022
155f270
edge case
Anirudh-Scale Mar 23, 2022
003127d
np type
Anirudh-Scale Mar 23, 2022
213bafa
CuboidMetrics can filter metadata
gatli Mar 30, 2022
72f3c0d
Add Cuboid configs
gatli Mar 30, 2022
f0c7399
Fix mypy errors
gatli Mar 30, 2022
d72c565
Add field filters
gatli Mar 30, 2022
3ffffd0
Add tests for filtering functions and move them to seperate module
gatli Mar 31, 2022
8948945
Add in and not in statements
gatli Mar 31, 2022
a980d4c
Fix rebase error with conftest.py
gatli Mar 31, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions nucleus/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
from .base import Metric, ScalarResult
from .categorization_metrics import CategorizationF1
from .cuboid_metrics import CuboidIOU, CuboidPrecision, CuboidRecall
from .filtering import (
FieldFilter,
ListOfOrAndFilters,
MetadataFilter,
apply_filters,
)
from .polygon_metrics import (
PolygonAveragePrecision,
PolygonIOU,
Expand Down
3 changes: 2 additions & 1 deletion nucleus/metrics/categorization_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def __init__(
):
"""
Args:
confidence_threshold: minimum confidence threshold for predictions to be taken into account for evaluation. Must be in [0, 1]. Default 0.0
confidence_threshold: minimum confidence threshold for predictions to be taken into account for evaluation.
Must be in [0, 1]. Default 0.0
f1_method: {'micro', 'macro', 'samples','weighted', 'binary'}, \
default='macro'
This parameter is required for multiclass/multilabel targets.
Expand Down
274 changes: 274 additions & 0 deletions nucleus/metrics/cuboid_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
import sys
from abc import abstractmethod
from typing import List, Optional, Union

from nucleus.annotation import AnnotationList, CuboidAnnotation
from nucleus.prediction import CuboidPrediction, PredictionList

from .base import Metric, ScalarResult
from .cuboid_utils import detection_iou, label_match_wrapper, recall_precision
from .filtering import ListOfAndFilters, ListOfOrAndFilters, apply_filters
from .filters import confidence_filter


class CuboidMetric(Metric):
"""Abstract class for metrics of cuboids.

The CuboidMetric class automatically filters incoming annotations and
predictions for only cuboid annotations. It also filters
predictions whose confidence is less than the provided confidence_threshold.
Finally, it provides support for enforcing matching labels. If
`enforce_label_match` is set to True, then annotations and predictions will
only be matched if they have the same label.

To create a new concrete CuboidMetric, override the `eval` function
with logic to define a metric between cuboid annotations and predictions.
"""

def __init__(
self,
enforce_label_match: bool = False,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
prediction_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
):
"""Initializes CuboidMetric abstract object.

Args:
enforce_label_match: whether to enforce that annotation and prediction labels must match. Default False
confidence_threshold: minimum confidence threshold for predictions. Must be in [0, 1]. Default 0.0
annotation_filters: MetadataFilter predicates. Predicates are expressed in disjunctive normal form (DNF),
like [[MetadataFilter('x', '==', 0), FieldFilter('label', '==', 'pedestrian')], ...].
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
each describe a single field predicate. The list of inner predicates is interpreted as a conjunction
(AND), forming a more selective and multiple column predicate. Finally, the most outer list combines
these filters as a disjunction (OR).
prediction_filters: MetadataFilter predicates. Predicates are expressed in disjunctive normal form (DNF),
like [[MetadataFilter('x', '==', 0), FieldFilter('label', '==', 'pedestrian')], ...].
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
each describe a single field predicate. The list of inner predicates is interpreted as a conjunction
(AND), forming a more selective and multiple column predicate. Finally, the most outer list combines
these filters as a disjunction (OR).
"""
self.enforce_label_match = enforce_label_match
assert 0 <= confidence_threshold <= 1
self.confidence_threshold = confidence_threshold
self.annotation_filters = annotation_filters
self.prediction_filters = prediction_filters

@abstractmethod
def eval(
self,
annotations: List[CuboidAnnotation],
predictions: List[CuboidPrediction],
) -> ScalarResult:
# Main evaluation function that subclasses must override.
pass

def aggregate_score(self, results: List[ScalarResult]) -> ScalarResult: # type: ignore[override]
return ScalarResult.aggregate(results)

def __call__(
self, annotations: AnnotationList, predictions: PredictionList
) -> ScalarResult:
if self.confidence_threshold > 0:
predictions = confidence_filter(
predictions, self.confidence_threshold
)
cuboid_annotations: List[CuboidAnnotation] = []
cuboid_annotations.extend(annotations.cuboid_annotations)
cuboid_predictions: List[CuboidPrediction] = []
cuboid_predictions.extend(predictions.cuboid_predictions)

eval_fn = label_match_wrapper(self.eval)
cuboid_annotations = apply_filters(
cuboid_annotations, self.annotation_filters # type: ignore
)
cuboid_predictions = apply_filters(
cuboid_predictions, self.prediction_filters # type: ignore
)
result = eval_fn(
cuboid_annotations,
cuboid_predictions,
enforce_label_match=self.enforce_label_match,
)
return result


class CuboidIOU(CuboidMetric):
"""Calculates the average IOU between cuboid annotations and predictions."""

# TODO: Remove defaults once these are surfaced more cleanly to users.
def __init__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
confidence_threshold: float = 0.0,
iou_2d: bool = False,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
prediction_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
):
"""Initializes CuboidIOU object.

Args:
enforce_label_match: whether to enforce that annotation and prediction labels must match. Defaults to True
iou_threshold: IOU threshold to consider detection as valid. Must be in [0, 1]. Default 0.0
iou_2d: whether to return the BEV 2D IOU if true, or the 3D IOU if false.
confidence_threshold: minimum confidence threshold for predictions. Must be in [0, 1]. Default 0.0
annotation_filters: MetadataFilter predicates. Predicates are expressed in disjunctive normal form (DNF), like
[[MetadataFilter('x', '=', 0), ...], ...]. DNF allows arbitrary boolean logical combinations of single field
predicates. The innermost structures each describe a single column predicate. The list of inner predicates is
interpreted as a conjunction (AND), forming a more selective and multiple column predicate.
Finally, the most outer list combines these filters as a disjunction (OR).
prediction_filters: MetadataFilter predicates. Predicates are expressed in disjunctive normal form (DNF), like
[[MetadataFilter('x', '=', 0), ...], ...]. DNF allows arbitrary boolean logical combinations of single field
predicates. The innermost structures each describe a single column predicate. The list of inner predicates is
interpreted as a conjunction (AND), forming a more selective and multiple column predicate.
Finally, the most outer list combines these filters as a disjunction (OR).
"""
assert (
0 <= iou_threshold <= 1
), "IoU threshold must be between 0 and 1."
self.iou_threshold = iou_threshold
self.iou_2d = iou_2d
super().__init__(
enforce_label_match=enforce_label_match,
confidence_threshold=confidence_threshold,
annotation_filters=annotation_filters,
prediction_filters=prediction_filters,
)

def eval(
self,
annotations: List[CuboidAnnotation],
predictions: List[CuboidPrediction],
) -> ScalarResult:
iou_3d_metric, iou_2d_metric = detection_iou(
predictions,
annotations,
threshold_in_overlap_ratio=self.iou_threshold,
)

weight = max(len(annotations), len(predictions))
if self.iou_2d:
avg_iou = iou_2d_metric.sum() / max(weight, sys.float_info.epsilon)
else:
avg_iou = iou_3d_metric.sum() / max(weight, sys.float_info.epsilon)

return ScalarResult(avg_iou, weight)


class CuboidPrecision(CuboidMetric):
"""Calculates the average precision between cuboid annotations and predictions."""

# TODO: Remove defaults once these are surfaced more cleanly to users.
def __init__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
prediction_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
):
"""Initializes CuboidIOU object.

Args:
enforce_label_match: whether to enforce that annotation and prediction labels must match. Defaults to True
iou_threshold: IOU threshold to consider detection as valid. Must be in [0, 1]. Default 0.0
confidence_threshold: minimum confidence threshold for predictions. Must be in [0, 1]. Default 0.0
annotation_filters: MetadataFilter predicates. Predicates are expressed in disjunctive normal form (DNF), like
[[MetadataFilter('x', '==', 0), ...], ...]. DNF allows arbitrary boolean logical combinations of single field
predicates. The innermost structures each describe a single column predicate. The list of inner predicates is
interpreted as a conjunction (AND), forming a more selective and multiple column predicate.
Finally, the most outer list combines these filters as a disjunction (OR).
prediction_filters: MetadataFilter predicates. Predicates are expressed in disjunctive normal form (DNF), like
[[MetadataFilter('x', '==', 0), ...], ...]. DNF allows arbitrary boolean logical combinations of single field
predicates. The innermost structures each describe a single column predicate. The list of inner predicates is
interpreted as a conjunction (AND), forming a more selective and multiple column predicate.
Finally, the most outer list combines these filters as a disjunction (OR).
"""
assert (
0 <= iou_threshold <= 1
), "IoU threshold must be between 0 and 1."
self.iou_threshold = iou_threshold
super().__init__(
enforce_label_match=enforce_label_match,
confidence_threshold=confidence_threshold,
annotation_filters=annotation_filters,
prediction_filters=prediction_filters,
)

def eval(
self,
annotations: List[CuboidAnnotation],
predictions: List[CuboidPrediction],
) -> ScalarResult:
stats = recall_precision(
predictions,
annotations,
threshold_in_overlap_ratio=self.iou_threshold,
)
weight = stats["tp_sum"] + stats["fp_sum"]
precision = stats["tp_sum"] / max(weight, sys.float_info.epsilon)
return ScalarResult(precision, weight)


class CuboidRecall(CuboidMetric):
"""Calculates the average recall between cuboid annotations and predictions."""

# TODO: Remove defaults once these are surfaced more cleanly to users.
def __init__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
prediction_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
):
"""Initializes CuboidIOU object.

Args:
enforce_label_match: whether to enforce that annotation and prediction labels must match. Defaults to True
iou_threshold: IOU threshold to consider detection as valid. Must be in [0, 1]. Default 0.0
confidence_threshold: minimum confidence threshold for predictions. Must be in [0, 1]. Default 0.0
"""
assert (
0 <= iou_threshold <= 1
), "IoU threshold must be between 0 and 1."
self.iou_threshold = iou_threshold
super().__init__(
enforce_label_match=enforce_label_match,
confidence_threshold=confidence_threshold,
annotation_filters=annotation_filters,
prediction_filters=prediction_filters,
)

def eval(
self,
annotations: List[CuboidAnnotation],
predictions: List[CuboidPrediction],
) -> ScalarResult:
stats = recall_precision(
predictions,
annotations,
threshold_in_overlap_ratio=self.iou_threshold,
)
weight = stats["tp_sum"] + stats["fn_sum"]
recall = stats["tp_sum"] / max(weight, sys.float_info.epsilon)
return ScalarResult(recall, weight)
Loading