From 02fc27a23463127321b0dc7b8ba4901b527e3853 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+borda@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:21:51 +0100 Subject: [PATCH 01/17] ci: fix missed testing with oldest (#2803) (cherry picked from commit d87aff75bb2c67df4ebef9ab289afe5467d87aaa) --- .github/workflows/ci-integrate.yml | 6 ++-- .github/workflows/ci-tests.yml | 2 +- .github/workflows/docs-build.yml | 1 - src/conftest.py | 2 +- src/torchmetrics/collections.py | 28 +++++++++++++++++-- .../test_generalized_dice_score.py | 4 ++- 6 files changed, 35 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci-integrate.yml b/.github/workflows/ci-integrate.yml index a01bd076cb2..ca50ea9a2c4 100644 --- a/.github/workflows/ci-integrate.yml +++ b/.github/workflows/ci-integrate.yml @@ -30,8 +30,8 @@ jobs: python-version: ["3.8", "3.10"] requires: ["oldest", "latest"] exclude: - - { python-version: "3.10", requires: "oldest" } - - { python-version: "3.10", os: "windows" } # todo: https://discuss.pytorch.org/t/numpy-is-not-available-error/146192 + - { python-version: "3.11", requires: "oldest" } + - { python-version: "3.11", os: "windows" } # todo: https://discuss.pytorch.org/t/numpy-is-not-available-error/146192 include: - { python-version: "3.10", requires: "latest", os: "ubuntu-22.04" } # - { python-version: "3.10", requires: "latest", os: "macOS-14" } # M1 machine # todo: crashing for MPS out of memory @@ -53,6 +53,8 @@ jobs: - name: source cashing uses: ./.github/actions/pull-caches + with: + requires: ${{ matrix.requires }} - name: set oldest if/only for integrations if: matrix.requires == 'oldest' run: python .github/assistant.py set-oldest-versions --req_files='["requirements/_integrate.txt"]' diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 0f91e84ed6f..f6e618cfb6e 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -46,7 +46,7 @@ jobs: - "2.5.0" include: # cover additional python and PT combinations - - { os: "ubuntu-22.04", python-version: "3.8", pytorch-version: "1.13.1" } + - { os: "ubuntu-20.04", python-version: "3.8", pytorch-version: "1.13.1", requires: "oldest" } - { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.0.1" } - { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.2.2" } - { os: "ubuntu-22.04", python-version: "3.11", pytorch-version: "2.3.1" } diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml index be5e39fc09a..dce0f0192a2 100644 --- a/.github/workflows/docs-build.yml +++ b/.github/workflows/docs-build.yml @@ -44,7 +44,6 @@ jobs: - name: source cashing uses: ./.github/actions/pull-caches with: - requires: ${{ matrix.requires }} pytorch-version: ${{ matrix.pytorch-version }} pypi-dir: ${{ env.PYPI_CACHE }} diff --git a/src/conftest.py b/src/conftest.py index c988c1784c5..5f4a26123d3 100644 --- a/src/conftest.py +++ b/src/conftest.py @@ -36,5 +36,5 @@ def collect(self) -> GeneratorExit: def pytest_collect_file(parent: Path, path: Path) -> Optional[DoctestModule]: """Collect doctests and add the reset_random_seed fixture.""" if path.ext == ".py": - return DoctestModule.from_parent(parent, fspath=path) + return DoctestModule.from_parent(parent, path=Path(path)) return None diff --git a/src/torchmetrics/collections.py b/src/torchmetrics/collections.py index 9fe0bb40761..0b7f927deb9 100644 --- a/src/torchmetrics/collections.py +++ b/src/torchmetrics/collections.py @@ -31,6 +31,30 @@ __doctest_skip__ = ["MetricCollection.plot", "MetricCollection.plot_all"] +def _remove_prefix(string: str, prefix: str) -> str: + """Patch for older version with missing method `removeprefix`. + + >>> _remove_prefix("prefix_string", "prefix_") + 'string' + >>> _remove_prefix("not_prefix_string", "prefix_") + 'not_prefix_string' + + """ + return string[len(prefix) :] if string.startswith(prefix) else string + + +def _remove_suffix(string: str, suffix: str) -> str: + """Patch for older version with missing method `removesuffix`. + + >>> _remove_suffix("string_suffix", "_suffix") + 'string' + >>> _remove_suffix("string_suffix_missing", "_suffix") + 'string_suffix_missing' + + """ + return string[: -len(suffix)] if string.endswith(suffix) else string + + class MetricCollection(ModuleDict): """MetricCollection class can be used to chain metrics that have the same call pattern into one single class. @@ -558,9 +582,9 @@ def __getitem__(self, key: str, copy_state: bool = True) -> Metric: """ self._compute_groups_create_state_ref(copy_state) if self.prefix: - key = key.removeprefix(self.prefix) + key = _remove_prefix(key, self.prefix) if self.postfix: - key = key.removesuffix(self.postfix) + key = _remove_suffix(key, self.postfix) return self._modules[key] @staticmethod diff --git a/tests/unittests/segmentation/test_generalized_dice_score.py b/tests/unittests/segmentation/test_generalized_dice_score.py index 3f8acec842a..ef05face380 100644 --- a/tests/unittests/segmentation/test_generalized_dice_score.py +++ b/tests/unittests/segmentation/test_generalized_dice_score.py @@ -16,6 +16,7 @@ import pytest import torch +from lightning_utilities.core.imports import RequirementCache from monai.metrics.generalized_dice import compute_generalized_dice from torchmetrics.functional.segmentation.generalized_dice import generalized_dice_score from torchmetrics.segmentation.generalized_dice import GeneralizedDiceScore @@ -51,7 +52,8 @@ def _reference_generalized_dice( if input_format == "index": preds = torch.nn.functional.one_hot(preds, num_classes=NUM_CLASSES).movedim(-1, 1) target = torch.nn.functional.one_hot(target, num_classes=NUM_CLASSES).movedim(-1, 1) - val = compute_generalized_dice(preds, target, include_background=include_background) + monai_extra_arg = {"sum_over_classes": True} if RequirementCache("monai>=1.4.0") else {} + val = compute_generalized_dice(preds, target, include_background=include_background, **monai_extra_arg) if reduce: val = val.mean() return val From f7c88f6816129c941f6e58c99f52a69c9e1bf3f2 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Thu, 24 Oct 2024 11:16:11 +0200 Subject: [PATCH 02/17] Re-adding `numpy` 2+ support (#2804) (cherry picked from commit 4c753696200cf56b601b7970c9e9f8a999e2e753) --- requirements/audio.txt | 1 + requirements/base.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/audio.txt b/requirements/audio.txt index dcfc5b05740..6b08e0cb684 100644 --- a/requirements/audio.txt +++ b/requirements/audio.txt @@ -3,6 +3,7 @@ # this need to be the same as used inside speechmetrics pesq >=0.0.4, <0.0.5 +numpy <2.0 # strict, for compatibility reasons pystoi >=0.4.0, <0.5.0 torchaudio >=0.10.0, <2.6.0 gammatone >=1.0.0, <1.1.0 diff --git a/requirements/base.txt b/requirements/base.txt index 35f1b4406c7..9fc7a5e3fa8 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -numpy >1.20.0, <2.0 # strict, for compatibility reasons +numpy >1.20.0 packaging >17.1 torch >=1.10.0, <2.6.0 typing-extensions; python_version < '3.9' From 779fb7200eee6879b7e933e227d203cb62c8c726 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:15:03 +0100 Subject: [PATCH 03/17] build(deps): update transformers requirement from <4.46.0,>4.4.0 to >4.4.0,<4.47.0 in /requirements (#2807) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 9fa882d00a2c0bf1be8dffca219f43409a22984f) --- requirements/multimodal.txt | 2 +- requirements/text.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/multimodal.txt b/requirements/multimodal.txt index 1a034aa3a1f..2f5c6749bd9 100644 --- a/requirements/multimodal.txt +++ b/requirements/multimodal.txt @@ -1,5 +1,5 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -transformers >=4.42.3, <4.46.0 +transformers >=4.42.3, <4.47.0 piq <=0.8.0 diff --git a/requirements/text.txt b/requirements/text.txt index 62007c3e127..208b87ca34c 100644 --- a/requirements/text.txt +++ b/requirements/text.txt @@ -4,7 +4,7 @@ nltk >3.8.1, <=3.9.1 tqdm <4.67.0 regex >=2021.9.24, <=2024.9.11 -transformers >4.4.0, <4.46.0 +transformers >4.4.0, <4.47.0 mecab-python3 >=1.0.6, <1.1.0 ipadic >=1.0.0, <1.1.0 sentencepiece >=0.2.0, <0.3.0 From 940f6f39a433832936459f63b814f6622fa55d31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:34:30 +0100 Subject: [PATCH 04/17] build(deps): bump cachier from 3.0.1 to 3.1.2 in /requirements (#2809) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit c572289e5ea0ded6601f41f45bcc1377ec42055c) --- requirements/_tests.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/_tests.txt b/requirements/_tests.txt index a2910fdc2d2..2205d8ae1f1 100644 --- a/requirements/_tests.txt +++ b/requirements/_tests.txt @@ -18,4 +18,4 @@ fire ==0.7.* cloudpickle >1.3, <=3.1.0 scikit-learn ==1.2.*; python_version < "3.9" scikit-learn ==1.5.*; python_version > "3.8" # we do not use `> =` because of oldest replcement -cachier ==3.0.1 +cachier ==3.1.2 From 5b83f4016b58fa92776a9f055b1564d902faf3b2 Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Tue, 29 Oct 2024 12:07:30 +0000 Subject: [PATCH 05/17] Bugfix for empty preds or target in iou scores (#2806) Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka B (cherry picked from commit fbc7877e7d0819c63c2e5129503c7500d9acf58e) --- CHANGELOG.md | 9 +++++ src/torchmetrics/detection/iou.py | 13 ++++--- src/torchmetrics/functional/detection/ciou.py | 5 +++ src/torchmetrics/functional/detection/diou.py | 5 +++ src/torchmetrics/functional/detection/giou.py | 5 +++ src/torchmetrics/functional/detection/iou.py | 5 +++ .../unittests/detection/test_intersection.py | 37 +++++++++++++++++++ 7 files changed, 74 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f97f05c16b..62afc2d6bb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 **Note: we move fast, but still we preserve 0.1 version (one feature release) back compatibility.** +--- + +## [UnReleased] - 2024-MM-DD + +### Fixed + +- Fixed iou scores in detection for either empty predictions/targets leading to wrong scores ([#2805](https://github.com/Lightning-AI/torchmetrics/pull/2805)) + + --- ## [1.5.1] - 2024-10-22 diff --git a/src/torchmetrics/detection/iou.py b/src/torchmetrics/detection/iou.py index e809b27ce6a..f828889153d 100644 --- a/src/torchmetrics/detection/iou.py +++ b/src/torchmetrics/detection/iou.py @@ -182,14 +182,17 @@ def update(self, preds: List[Dict[str, Tensor]], target: List[Dict[str, Tensor]] """Update state with predictions and targets.""" _input_validator(preds, target, ignore_score=True) - for p, t in zip(preds, target): - det_boxes = self._get_safe_item_values(p["boxes"]) - gt_boxes = self._get_safe_item_values(t["boxes"]) - self.groundtruth_labels.append(t["labels"]) + for p_i, t_i in zip(preds, target): + det_boxes = self._get_safe_item_values(p_i["boxes"]) + gt_boxes = self._get_safe_item_values(t_i["boxes"]) + self.groundtruth_labels.append(t_i["labels"]) iou_matrix = self._iou_update_fn(det_boxes, gt_boxes, self.iou_threshold, self._invalid_val) # N x M if self.respect_labels: - label_eq = p["labels"].unsqueeze(1) == t["labels"].unsqueeze(0) # N x M + if det_boxes.numel() > 0 and gt_boxes.numel() > 0: + label_eq = p_i["labels"].unsqueeze(1) == t_i["labels"].unsqueeze(0) # N x M + else: + label_eq = torch.eye(iou_matrix.shape[0], dtype=bool, device=iou_matrix.device) # type: ignore[call-overload] iou_matrix[~label_eq] = self._invalid_val self.iou_matrix.append(iou_matrix) diff --git a/src/torchmetrics/functional/detection/ciou.py b/src/torchmetrics/functional/detection/ciou.py index 9669029ba73..55560241aec 100644 --- a/src/torchmetrics/functional/detection/ciou.py +++ b/src/torchmetrics/functional/detection/ciou.py @@ -31,6 +31,11 @@ def _ciou_update( from torchvision.ops import complete_box_iou + if preds.numel() == 0: # if no boxes are predicted + return torch.zeros(target.shape[0], target.shape[0], device=target.device, dtype=torch.float32) + if target.numel() == 0: # if no boxes are true + return torch.zeros(preds.shape[0], preds.shape[0], device=preds.device, dtype=torch.float32) + iou = complete_box_iou(preds, target) if iou_threshold is not None: iou[iou < iou_threshold] = replacement_val diff --git a/src/torchmetrics/functional/detection/diou.py b/src/torchmetrics/functional/detection/diou.py index 13fb0071fed..42492675c98 100644 --- a/src/torchmetrics/functional/detection/diou.py +++ b/src/torchmetrics/functional/detection/diou.py @@ -31,6 +31,11 @@ def _diou_update( from torchvision.ops import distance_box_iou + if preds.numel() == 0: # if no boxes are predicted + return torch.zeros(target.shape[0], target.shape[0], device=target.device, dtype=torch.float32) + if target.numel() == 0: # if no boxes are true + return torch.zeros(preds.shape[0], preds.shape[0], device=preds.device, dtype=torch.float32) + iou = distance_box_iou(preds, target) if iou_threshold is not None: iou[iou < iou_threshold] = replacement_val diff --git a/src/torchmetrics/functional/detection/giou.py b/src/torchmetrics/functional/detection/giou.py index cc39f813b41..5fd8f873286 100644 --- a/src/torchmetrics/functional/detection/giou.py +++ b/src/torchmetrics/functional/detection/giou.py @@ -31,6 +31,11 @@ def _giou_update( from torchvision.ops import generalized_box_iou + if preds.numel() == 0: # if no boxes are predicted + return torch.zeros(target.shape[0], target.shape[0], device=target.device, dtype=torch.float32) + if target.numel() == 0: # if no boxes are true + return torch.zeros(preds.shape[0], preds.shape[0], device=preds.device, dtype=torch.float32) + iou = generalized_box_iou(preds, target) if iou_threshold is not None: iou[iou < iou_threshold] = replacement_val diff --git a/src/torchmetrics/functional/detection/iou.py b/src/torchmetrics/functional/detection/iou.py index 3d3cef26bb2..6558af5cd37 100644 --- a/src/torchmetrics/functional/detection/iou.py +++ b/src/torchmetrics/functional/detection/iou.py @@ -32,6 +32,11 @@ def _iou_update( from torchvision.ops import box_iou + if preds.numel() == 0: # if no boxes are predicted + return torch.zeros(target.shape[0], target.shape[0], device=target.device, dtype=torch.float32) + if target.numel() == 0: # if no boxes are true + return torch.zeros(preds.shape[0], preds.shape[0], device=preds.device, dtype=torch.float32) + iou = box_iou(preds, target) if iou_threshold is not None: iou[iou < iou_threshold] = replacement_val diff --git a/tests/unittests/detection/test_intersection.py b/tests/unittests/detection/test_intersection.py index a028a014ea1..f44c6017e6c 100644 --- a/tests/unittests/detection/test_intersection.py +++ b/tests/unittests/detection/test_intersection.py @@ -355,6 +355,43 @@ def test_corner_case_only_one_empty_prediction(self, class_metric, functional_me for val in res.values(): assert val == torch.tensor(0.0) + def test_empty_preds_and_target(self, class_metric, functional_metric, reference_metric): + """Check that for either empty preds and targets that the metric returns 0 in these cases before averaging.""" + x = [ + { + "boxes": torch.empty(size=(0, 4), dtype=torch.float32), + "labels": torch.tensor([], dtype=torch.long), + }, + { + "boxes": torch.FloatTensor([[0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4]]), + "labels": torch.LongTensor([1, 2]), + }, + ] + + y = [ + { + "boxes": torch.FloatTensor([[0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4]]), + "labels": torch.LongTensor([1, 2]), + "scores": torch.FloatTensor([0.9, 0.8]), + }, + { + "boxes": torch.FloatTensor([[0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4]]), + "labels": torch.LongTensor([1, 2]), + "scores": torch.FloatTensor([0.9, 0.8]), + }, + ] + metric = class_metric() + metric.update(x, y) + res = metric.compute() + for val in res.values(): + assert val == torch.tensor(0.5) + + metric = class_metric() + metric.update(y, x) + res = metric.compute() + for val in res.values(): + assert val == torch.tensor(0.5) + def test_corner_case(): """See issue: https://github.com/Lightning-AI/torchmetrics/issues/1921.""" From c1ded2ce40ca3839a66e146522b7b5ab0224fb25 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 13:15:48 +0100 Subject: [PATCH 06/17] build(deps): bump mypy from 1.11.2 to 1.13.0 in /requirements (#2808) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 421e028b9715636ca9e325ad692e21b5220e4f56) --- requirements/typing.txt | 2 +- src/torchmetrics/detection/_mean_ap.py | 6 +++--- src/torchmetrics/functional/audio/pit.py | 2 +- src/torchmetrics/wrappers/tracker.py | 11 ++++++----- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/requirements/typing.txt b/requirements/typing.txt index 01c6897fa9c..449ff685fa6 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,4 +1,4 @@ -mypy ==1.11.2 +mypy ==1.13.0 torch ==2.5.0 types-PyYAML diff --git a/src/torchmetrics/detection/_mean_ap.py b/src/torchmetrics/detection/_mean_ap.py index fd342608360..73df4a41d91 100644 --- a/src/torchmetrics/detection/_mean_ap.py +++ b/src/torchmetrics/detection/_mean_ap.py @@ -849,9 +849,9 @@ def __calculate_recall_precision_scores( inds = torch.searchsorted(rc, rec_thresholds.to(rc.device), right=False) num_inds = inds.argmax() if inds.max() >= tp_len else num_rec_thrs - inds = inds[:num_inds] # type: ignore[misc] - prec[:num_inds] = pr[inds] # type: ignore[misc] - score[:num_inds] = det_scores_sorted[inds] # type: ignore[misc] + inds = inds[:num_inds] + prec[:num_inds] = pr[inds] + score[:num_inds] = det_scores_sorted[inds] precision[idx, :, idx_cls, idx_bbox_area, idx_max_det_thresholds] = prec scores[idx, :, idx_cls, idx_bbox_area, idx_max_det_thresholds] = score diff --git a/src/torchmetrics/functional/audio/pit.py b/src/torchmetrics/functional/audio/pit.py index 90c4afb01f9..8fa186299f9 100644 --- a/src/torchmetrics/functional/audio/pit.py +++ b/src/torchmetrics/functional/audio/pit.py @@ -182,7 +182,7 @@ def permutation_invariant_training( metric_of_ps = metric_func(ppreds, ptarget) metric_of_ps = torch.mean(metric_of_ps.reshape(batch_size, len(perms), -1), dim=-1) # find the best metric and best permutation - best_metric, best_indexes = eval_op(metric_of_ps, dim=1) # type: ignore[call-overload] + best_metric, best_indexes = eval_op(metric_of_ps, dim=1) best_indexes = best_indexes.detach() best_perm = perms[best_indexes, :] return best_metric, best_perm diff --git a/src/torchmetrics/wrappers/tracker.py b/src/torchmetrics/wrappers/tracker.py index 554902d1092..c1fe9957b1b 100644 --- a/src/torchmetrics/wrappers/tracker.py +++ b/src/torchmetrics/wrappers/tracker.py @@ -219,7 +219,8 @@ def best_metric( ) -> Union[ None, float, - Tuple[float, int], + Tensor, + Tuple[Union[int, float, Tensor], Union[int, float, Tensor]], Tuple[None, None], Dict[str, Union[float, None]], Tuple[Dict[str, Union[float, None]], Dict[str, Union[int, None]]], @@ -260,7 +261,7 @@ def best_metric( if isinstance(self._base_metric, Metric): fn = torch.max if self.maximize else torch.min try: - value, idx = fn(res, 0) # type: ignore[call-overload] + value, idx = fn(res, 0) if return_step: return value.item(), idx.item() return value.item() @@ -277,11 +278,11 @@ def best_metric( else: # this is a metric collection maximize = self.maximize if isinstance(self.maximize, list) else len(res) * [self.maximize] - value, idx = {}, {} + value, idx = {}, {} # type: ignore[assignment] for i, (k, v) in enumerate(res.items()): try: fn = torch.max if maximize[i] else torch.min - out = fn(v, 0) # type: ignore[call-overload] + out = fn(v, 0) value[k], idx[k] = out[0].item(), out[1].item() except (ValueError, RuntimeError) as error: # noqa: PERF203 # todo rank_zero_warn( @@ -290,7 +291,7 @@ def best_metric( "Returning `None` instead.", UserWarning, ) - value[k], idx[k] = None, None + value[k], idx[k] = None, None # type: ignore[assignment] if return_step: return value, idx From 9e6c2e6cf490003dc82d8c541eab80f7d45ea3b3 Mon Sep 17 00:00:00 2001 From: Alexander Gaponov <67455670+StalkerShurik@users.noreply.github.com> Date: Wed, 30 Oct 2024 17:06:11 +0300 Subject: [PATCH 07/17] fix assert in pit.py (#2811) (cherry picked from commit 429556b6583deb49a045686a27c0bda8c830f675) --- src/torchmetrics/functional/audio/pit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchmetrics/functional/audio/pit.py b/src/torchmetrics/functional/audio/pit.py index 8fa186299f9..a7cc72d48f7 100644 --- a/src/torchmetrics/functional/audio/pit.py +++ b/src/torchmetrics/functional/audio/pit.py @@ -161,7 +161,7 @@ def permutation_invariant_training( if eval_func not in ["max", "min"]: raise ValueError(f'eval_func can only be "max" or "min" but got {eval_func}') if mode not in ["speaker-wise", "permutation-wise"]: - raise ValueError(f'mode can only be "speaker-wise" or "permutation-wise" but got {eval_func}') + raise ValueError(f'mode can only be "speaker-wise" or "permutation-wise" but got {mode}') if target.ndim < 2: raise ValueError(f"Inputs must be of shape [batch, spk, ...], got {target.shape} and {preds.shape} instead") From 94715adc9fa27f050ef165c5b4cc43d7f290f0ba Mon Sep 17 00:00:00 2001 From: Nicki Skafte Detlefsen Date: Thu, 31 Oct 2024 12:58:30 +0100 Subject: [PATCH 08/17] Fix `MetricCollection` compatibility with `torch.jit.script` (#2813) (cherry picked from commit abdd2c4e6cbf72741b162dd6361a6c1024df69a2) --- CHANGELOG.md | 3 +++ src/torchmetrics/collections.py | 3 ++- tests/unittests/bases/test_collections.py | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62afc2d6bb6..e93b4155e54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed iou scores in detection for either empty predictions/targets leading to wrong scores ([#2805](https://github.com/Lightning-AI/torchmetrics/pull/2805)) +- Fixed `MetricCollection` compatibility with `torch.jit.script` ([#2813](https://github.com/Lightning-AI/torchmetrics/pull/2813)) + + --- ## [1.5.1] - 2024-10-22 diff --git a/src/torchmetrics/collections.py b/src/torchmetrics/collections.py index 0b7f927deb9..baa5b5ae9a6 100644 --- a/src/torchmetrics/collections.py +++ b/src/torchmetrics/collections.py @@ -14,7 +14,7 @@ # this is just a bypass for this module name collision with built-in one from collections import OrderedDict from copy import deepcopy -from typing import Any, Dict, Hashable, Iterable, Iterator, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, ClassVar, Dict, Hashable, Iterable, Iterator, List, Mapping, Optional, Sequence, Tuple, Union import torch from torch import Tensor @@ -192,6 +192,7 @@ class name of the metric: _modules: Dict[str, Metric] # type: ignore[assignment] _groups: Dict[int, List[str]] + __jit_unused_properties__: ClassVar[List[str]] = ["metric_state"] def __init__( self, diff --git a/tests/unittests/bases/test_collections.py b/tests/unittests/bases/test_collections.py index 55062ccbe29..f674e76b376 100644 --- a/tests/unittests/bases/test_collections.py +++ b/tests/unittests/bases/test_collections.py @@ -41,6 +41,15 @@ seed_all(42) +def test_metric_collection_jit_script(): + """Test that the MetricCollection can be scripted and jitted.""" + m1 = DummyMetricSum() + m2 = DummyMetricDiff() + metric_collection = MetricCollection([m1, m2]) + scripted = torch.jit.script(metric_collection) + assert isinstance(scripted, torch.jit.ScriptModule) + + def test_metric_collection(tmpdir): """Test that updating the metric collection is equal to individually updating metrics in the collection.""" m1 = DummyMetricSum() From 9b16c250a2498d0aeeb9be85c7da1354cc8a4357 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+borda@users.noreply.github.com> Date: Thu, 31 Oct 2024 19:51:06 +0000 Subject: [PATCH 09/17] docs: specify directives (#2814) (cherry picked from commit d52cb4866116734b522b8f9260efcf52cd93da67) --- .github/CONTRIBUTING.md | 2 +- docs/source/pages/implement.rst | 10 +++++----- docs/source/pages/lightning.rst | 8 ++++---- docs/source/pages/overview.rst | 8 ++++---- src/torchmetrics/audio/dnsmos.py | 6 ++++-- src/torchmetrics/audio/pesq.py | 6 ++++-- src/torchmetrics/audio/srmr.py | 5 +++-- src/torchmetrics/audio/stoi.py | 3 ++- .../classification/calibration_error.py | 2 +- src/torchmetrics/classification/cohen_kappa.py | 4 ++-- src/torchmetrics/classification/dice.py | 2 +- src/torchmetrics/classification/hinge.py | 4 ++-- src/torchmetrics/classification/jaccard.py | 6 +++--- .../classification/matthews_corrcoef.py | 6 +++--- .../classification/precision_fixed_recall.py | 6 +++--- .../classification/precision_recall_curve.py | 6 +++--- src/torchmetrics/classification/ranking.py | 6 +++--- .../classification/recall_fixed_precision.py | 6 +++--- src/torchmetrics/classification/roc.py | 18 +++++++++--------- src/torchmetrics/collections.py | 4 ++-- src/torchmetrics/detection/_mean_ap.py | 10 +++++----- src/torchmetrics/detection/mean_ap.py | 10 +++++----- src/torchmetrics/functional/audio/dnsmos.py | 3 ++- src/torchmetrics/functional/audio/pesq.py | 3 ++- src/torchmetrics/functional/audio/srmr.py | 5 +++-- src/torchmetrics/functional/audio/stoi.py | 3 ++- .../functional/classification/dice.py | 6 ++++-- src/torchmetrics/functional/image/gradients.py | 3 ++- src/torchmetrics/functional/image/psnr.py | 4 ++-- .../functional/multimodal/clip_iqa.py | 3 ++- .../functional/multimodal/clip_score.py | 3 ++- .../functional/regression/log_mse.py | 4 ++-- src/torchmetrics/image/fid.py | 5 ++--- src/torchmetrics/image/inception.py | 3 ++- src/torchmetrics/image/kid.py | 3 ++- src/torchmetrics/image/lpip.py | 6 ++---- src/torchmetrics/image/mifid.py | 6 ++++-- .../image/perceptual_path_length.py | 3 ++- src/torchmetrics/image/psnrb.py | 2 +- src/torchmetrics/metric.py | 4 ++-- src/torchmetrics/multimodal/clip_iqa.py | 3 ++- src/torchmetrics/multimodal/clip_score.py | 3 ++- src/torchmetrics/regression/kl_divergence.py | 4 ++-- src/torchmetrics/regression/log_mse.py | 4 ++-- src/torchmetrics/retrieval/base.py | 7 ++++--- .../retrieval/precision_recall_curve.py | 5 +++-- src/torchmetrics/wrappers/multitask.py | 4 ++-- 47 files changed, 129 insertions(+), 108 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7b7211b9d61..b923d6ea262 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -103,7 +103,7 @@ def my_func(param_a: int, param_b: Optional[float] = None) -> str: >>> my_func(1, 2) 3 - .. note:: If you want to add something. + .. hint:: If you want to add something. """ p = param_b if param_b else 0 return str(param_a + p) diff --git a/docs/source/pages/implement.rst b/docs/source/pages/implement.rst index 1620ce29cd9..5ab044e1be1 100644 --- a/docs/source/pages/implement.rst +++ b/docs/source/pages/implement.rst @@ -257,7 +257,7 @@ and tests gets formatted in the following way: 3. ``new_metric(...)``: essentially wraps the ``_update`` and ``_compute`` private functions into one public function that makes up the functional interface for the metric. - .. note:: + .. hint:: The `functional mean squared error `_ metric is a is a great example of how to divide the logic. @@ -270,9 +270,9 @@ and tests gets formatted in the following way: ``_new_metric_compute(...)`` function in its ``compute``. No logic should really be implemented in the module interface. We do this to not have duplicate code to maintain. - .. note:: - The module `MeanSquaredError `_ - metric that corresponds to the above functional example showcases these steps. + .. note:: + The module `MeanSquaredError `_ + metric that corresponds to the above functional example showcases these steps. 4. Remember to add binding to the different relevant ``__init__`` files. @@ -291,7 +291,7 @@ and tests gets formatted in the following way: so that different combinations of inputs and parameters get tested. 5. (optional) If your metric raises any exception, please add tests that showcase this. - .. note:: + .. hint:: The `test file for MSE `_ metric shows how to implement such tests. diff --git a/docs/source/pages/lightning.rst b/docs/source/pages/lightning.rst index a96196396b8..0fd27a4d474 100644 --- a/docs/source/pages/lightning.rst +++ b/docs/source/pages/lightning.rst @@ -13,7 +13,7 @@ TorchMetrics in PyTorch Lightning TorchMetrics was originally created as part of `PyTorch Lightning `_, a powerful deep learning research framework designed for scaling models without boilerplate. -.. note:: +.. caution:: TorchMetrics always offers compatibility with the last 2 major PyTorch Lightning versions, but we recommend always keeping both frameworks up-to-date for the best experience. @@ -69,9 +69,9 @@ LightningModule `self.log `_. - .. note:: using this metric requires you to have ``librosa``, ``onnxruntime`` and ``requests`` installed. Install + .. hint:: + Using this metric requires you to have ``librosa``, ``onnxruntime`` and ``requests`` installed. Install as ``pip install torchmetrics['audio']`` or alternatively ``pip install librosa onnxruntime-gpu requests`` (if you do not have GPU enabled machine install ``onnxruntime`` instead of ``onnxruntime-gpu``) diff --git a/src/torchmetrics/functional/audio/pesq.py b/src/torchmetrics/functional/audio/pesq.py index 516014434bf..04cf1ebace2 100644 --- a/src/torchmetrics/functional/audio/pesq.py +++ b/src/torchmetrics/functional/audio/pesq.py @@ -40,7 +40,8 @@ def perceptual_evaluation_speech_quality( This metric is a wrapper for the `pesq package`_. Note that input will be moved to `cpu` to perform the metric calculation. - .. note:: using this metrics requires you to have ``pesq`` install. Either install as ``pip install + .. hint:: + Usingsing this metrics requires you to have ``pesq`` install. Either install as ``pip install torchmetrics[audio]`` or ``pip install pesq``. Note that ``pesq`` will compile with your currently installed version of numpy, meaning that if you upgrade numpy at some point in the future you will most likely have to reinstall ``pesq``. diff --git a/src/torchmetrics/functional/audio/srmr.py b/src/torchmetrics/functional/audio/srmr.py index 79f710a6a31..5ca00de55f3 100644 --- a/src/torchmetrics/functional/audio/srmr.py +++ b/src/torchmetrics/functional/audio/srmr.py @@ -203,11 +203,12 @@ def speech_reverberation_modulation_energy_ratio( Note: this argument is inherited from `SRMRpy`_. As the translated code is based to pytorch, setting `fast=True` may slow down the speed for calculating this metric on GPU. - .. note:: using this metrics requires you to have ``gammatone`` and ``torchaudio`` installed. + .. hint:: + Usingsing this metrics requires you to have ``gammatone`` and ``torchaudio`` installed. Either install as ``pip install torchmetrics[audio]`` or ``pip install torchaudio`` and ``pip install git+https://github.com/detly/gammatone``. - .. note:: + .. attention:: This implementation is experimental, and might not be consistent with the matlab implementation `SRMRToolbox`_, especially the fast implementation. The slow versions, a) fast=False, norm=False, max_cf=128, b) fast=False, norm=True, max_cf=30, have diff --git a/src/torchmetrics/functional/audio/stoi.py b/src/torchmetrics/functional/audio/stoi.py index 48e9e78510b..e029b721ef8 100644 --- a/src/torchmetrics/functional/audio/stoi.py +++ b/src/torchmetrics/functional/audio/stoi.py @@ -39,7 +39,8 @@ def short_time_objective_intelligibility( calculations on CPU, all input will automatically be moved to CPU to perform the metric calculation before being moved back to the original device. - .. note:: using this metrics requires you to have ``pystoi`` install. Either install as ``pip install + .. hint:: + Usingsing this metrics requires you to have ``pystoi`` install. Either install as ``pip install torchmetrics[audio]`` or ``pip install pystoi`` Args: diff --git a/src/torchmetrics/functional/classification/dice.py b/src/torchmetrics/functional/classification/dice.py index 49d66ea9361..78f58fafbb4 100644 --- a/src/torchmetrics/functional/classification/dice.py +++ b/src/torchmetrics/functional/classification/dice.py @@ -106,10 +106,12 @@ def dice( - ``'samples'``: Calculate the metric for each sample, and average the metrics across samples (with equal weights for each sample). - .. note:: What is considered a sample in the multi-dimensional multi-class case + .. tip:: + What is considered a sample in the multi-dimensional multi-class case depends on the value of ``mdmc_average``. - .. note:: If ``'none'`` and a given class doesn't occur in the ``preds`` or ``target``, + .. hint:: + If ``'none'`` and a given class doesn't occur in the ``preds`` or ``target``, the value for the class will be ``nan``. mdmc_average: diff --git a/src/torchmetrics/functional/image/gradients.py b/src/torchmetrics/functional/image/gradients.py index 03f751f0870..e87d4a75198 100644 --- a/src/torchmetrics/functional/image/gradients.py +++ b/src/torchmetrics/functional/image/gradients.py @@ -70,7 +70,8 @@ def image_gradients(img: Tensor) -> Tuple[Tensor, Tensor]: [5., 5., 5., 5., 5.], [0., 0., 0., 0., 0.]]) - .. note:: The implementation follows the 1-step finite difference method as followed + .. note:: + The implementation follows the 1-step finite difference method as followed by the TF implementation. The values are organized such that the gradient of [I(x+1, y)-[I(x, y)]] are at the (x, y) location diff --git a/src/torchmetrics/functional/image/psnr.py b/src/torchmetrics/functional/image/psnr.py index adb80e2bc77..7bd93ba94e1 100644 --- a/src/torchmetrics/functional/image/psnr.py +++ b/src/torchmetrics/functional/image/psnr.py @@ -134,8 +134,8 @@ def peak_signal_noise_ratio( >>> peak_signal_noise_ratio(pred, target) tensor(2.5527) - .. note:: - Half precision is only support on GPU for this metric + .. attention:: + Half precision is only support on GPU for this metric. """ if dim is None and reduction != "elementwise_mean": diff --git a/src/torchmetrics/functional/multimodal/clip_iqa.py b/src/torchmetrics/functional/multimodal/clip_iqa.py index 49e710e248a..659c667fc52 100644 --- a/src/torchmetrics/functional/multimodal/clip_iqa.py +++ b/src/torchmetrics/functional/multimodal/clip_iqa.py @@ -271,7 +271,8 @@ def clip_image_quality_assessment( available prompts (see above). If tuple is provided, it must be of length 2 and the first string must be a positive prompt and the second string must be a negative prompt. - .. note:: If using the default `clip_iqa` model, the package `piq` must be installed. Either install with + .. hint:: + If using the default `clip_iqa` model, the package `piq` must be installed. Either install with `pip install piq` or `pip install torchmetrics[multimodal]`. Returns: diff --git a/src/torchmetrics/functional/multimodal/clip_score.py b/src/torchmetrics/functional/multimodal/clip_score.py index 920eb6972e6..f70f37d534b 100644 --- a/src/torchmetrics/functional/multimodal/clip_score.py +++ b/src/torchmetrics/functional/multimodal/clip_score.py @@ -135,7 +135,8 @@ def clip_score( textual CLIP embedding :math:`E_C` for an caption :math:`C`. The score is bound between 0 and 100 and the closer to 100 the better. - .. note:: Metric is not scriptable + .. caution:: + Metric is not scriptable Args: images: Either a single [N, C, H, W] tensor or a list of [C, H, W] tensors diff --git a/src/torchmetrics/functional/regression/log_mse.py b/src/torchmetrics/functional/regression/log_mse.py index 96d2938a8ee..3ba27ab9e99 100644 --- a/src/torchmetrics/functional/regression/log_mse.py +++ b/src/torchmetrics/functional/regression/log_mse.py @@ -68,8 +68,8 @@ def mean_squared_log_error(preds: Tensor, target: Tensor) -> Tensor: >>> mean_squared_log_error(x, y) tensor(0.0207) - .. note:: - Half precision is only support on GPU for this metric + .. attention:: + Half precision is only support on GPU for this metric. """ sum_squared_log_error, num_obs = _mean_squared_log_error_update(preds, target) diff --git a/src/torchmetrics/image/fid.py b/src/torchmetrics/image/fid.py index 8c2e5d3cf76..ac559ed0c68 100644 --- a/src/torchmetrics/image/fid.py +++ b/src/torchmetrics/image/fid.py @@ -211,9 +211,8 @@ class FrechetInceptionDistance(Metric): that you calculate using `torch.float64` (default is `torch.float32`) which can be set using the `.set_dtype` method of the metric. - .. note:: using this metrics requires you to have torch 1.9 or higher installed - - .. note:: using this metric with the default feature extractor requires that ``torch-fidelity`` + .. hint:: + Using this metric with the default feature extractor requires that ``torch-fidelity`` is installed. Either install as ``pip install torchmetrics[image]`` or ``pip install torch-fidelity`` As input to ``forward`` and ``update`` the metric accepts the following input diff --git a/src/torchmetrics/image/inception.py b/src/torchmetrics/image/inception.py index 2b125542a17..20d53d10f2b 100644 --- a/src/torchmetrics/image/inception.py +++ b/src/torchmetrics/image/inception.py @@ -49,7 +49,8 @@ class InceptionScore(Metric): ``normalize`` is set to ``False`` images are expected to have dtype uint8 and take values in the ``[0, 255]`` range. All images will be resized to 299 x 299 which is the size of the original training data. - .. note:: using this metric with the default feature extractor requires that ``torch-fidelity`` + .. hint:: + Using this metric with the default feature extractor requires that ``torch-fidelity`` is installed. Either install as ``pip install torchmetrics[image]`` or ``pip install torch-fidelity`` diff --git a/src/torchmetrics/image/kid.py b/src/torchmetrics/image/kid.py index 975edf800eb..018fc7a7511 100644 --- a/src/torchmetrics/image/kid.py +++ b/src/torchmetrics/image/kid.py @@ -97,7 +97,8 @@ class KernelInceptionDistance(Metric): effect and update method expects to have the tensor given to `imgs` argument to be in the correct shape and type that is compatible to the custom feature extractor. - .. note:: using this metric with the default feature extractor requires that ``torch-fidelity`` + .. hint:: + Using this metric with the default feature extractor requires that ``torch-fidelity`` is installed. Either install as ``pip install torchmetrics[image]`` or ``pip install torch-fidelity`` diff --git a/src/torchmetrics/image/lpip.py b/src/torchmetrics/image/lpip.py index f1adb73648a..1893fb734ba 100644 --- a/src/torchmetrics/image/lpip.py +++ b/src/torchmetrics/image/lpip.py @@ -47,12 +47,10 @@ class LearnedPerceptualImagePatchSimilarity(Metric): Both input image patches are expected to have shape ``(N, 3, H, W)``. The minimum size of `H, W` depends on the chosen backbone (see `net_type` arg). - .. note:: using this metrics requires you to have ``torchvision`` package installed. Either install as + .. hint:: + Using this metrics requires you to have ``torchvision`` package installed. Either install as ``pip install torchmetrics[image]`` or ``pip install torchvision``. - .. note:: this metric is not scriptable when using ``torch<1.8``. Please update your pytorch installation - if this is a issue. - As input to ``forward`` and ``update`` the metric accepts the following input - ``img1`` (:class:`~torch.Tensor`): tensor with images of shape ``(N, 3, H, W)`` diff --git a/src/torchmetrics/image/mifid.py b/src/torchmetrics/image/mifid.py index 7d7237b7416..a1e2d2f4c0a 100644 --- a/src/torchmetrics/image/mifid.py +++ b/src/torchmetrics/image/mifid.py @@ -84,10 +84,12 @@ class MemorizationInformedFrechetInceptionDistance(Metric): flag ``real`` determines if the images should update the statistics of the real distribution or the fake distribution. - .. note:: using this metrics requires you to have ``scipy`` install. Either install as ``pip install + .. hint:: + Using this metrics requires you to have ``scipy`` install. Either install as ``pip install torchmetrics[image]`` or ``pip install scipy`` - .. note:: using this metric with the default feature extractor requires that ``torch-fidelity`` + .. hint:: + Using this metric with the default feature extractor requires that ``torch-fidelity`` is installed. Either install as ``pip install torchmetrics[image]`` or ``pip install torch-fidelity`` diff --git a/src/torchmetrics/image/perceptual_path_length.py b/src/torchmetrics/image/perceptual_path_length.py index b2255941250..24f701e5371 100644 --- a/src/torchmetrics/image/perceptual_path_length.py +++ b/src/torchmetrics/image/perceptual_path_length.py @@ -51,7 +51,8 @@ class PerceptualPathLength(Metric): if `conditional=False`, and `forward(z: Tensor, labels: Tensor) -> Tensor` if `conditional=True`. The returned tensor should have shape `(num_samples, C, H, W)` and be scaled to the range [0, 255]. - .. note:: using this metric with the default feature extractor requires that ``torchvision`` is installed. + .. hint:: + Using this metric with the default feature extractor requires that ``torchvision`` is installed. Either install as ``pip install torchmetrics[image]`` or ``pip install torchvision`` As input to ``forward`` and ``update`` the metric accepts the following input diff --git a/src/torchmetrics/image/psnrb.py b/src/torchmetrics/image/psnrb.py index bac58b84e46..c9fb157d6ff 100644 --- a/src/torchmetrics/image/psnrb.py +++ b/src/torchmetrics/image/psnrb.py @@ -34,7 +34,7 @@ class PeakSignalNoiseRatioWithBlockedEffect(Metric): Where :math:`\text{MSE}` denotes the `mean-squared-error`_ function. This metric is a modified version of PSNR that better supports evaluation of images with blocked artifacts, that oftens occur in compressed images. - .. note:: + .. attention:: Metric only supports grayscale images. If you have RGB images, please convert them to grayscale first. As input to ``forward`` and ``update`` the metric accepts the following input diff --git a/src/torchmetrics/metric.py b/src/torchmetrics/metric.py index 940e393c6d1..a1119cf9d35 100644 --- a/src/torchmetrics/metric.py +++ b/src/torchmetrics/metric.py @@ -236,11 +236,11 @@ def add_state( - If the metric state is a ``list``, the synced value will be a ``list`` containing the combined elements from all processes. - Note: + .. important:: When passing a custom function to ``dist_reduce_fx``, expect the synchronized metric state to follow the format discussed in the above note. - Note: + .. caution:: The values inserted into a list state are deleted whenever :meth:`~Metric.reset` is called. This allows device memory to be automatically reallocated, but may produce unexpected effects when referencing list states. To retain such values after :meth:`~Metric.reset` is called, you must first copy them to another diff --git a/src/torchmetrics/multimodal/clip_iqa.py b/src/torchmetrics/multimodal/clip_iqa.py index 7a66de4ae3e..f49113e297f 100644 --- a/src/torchmetrics/multimodal/clip_iqa.py +++ b/src/torchmetrics/multimodal/clip_iqa.py @@ -112,7 +112,8 @@ class CLIPImageQualityAssessment(Metric): positive prompt and the second string must be a negative prompt. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - .. note:: If using the default `clip_iqa` model, the package `piq` must be installed. Either install with + .. hint:: + If using the default `clip_iqa` model, the package `piq` must be installed. Either install with `pip install piq` or `pip install torchmetrics[image]`. Raises: diff --git a/src/torchmetrics/multimodal/clip_score.py b/src/torchmetrics/multimodal/clip_score.py index f385fbc145d..92ca7ad6b4f 100644 --- a/src/torchmetrics/multimodal/clip_score.py +++ b/src/torchmetrics/multimodal/clip_score.py @@ -54,7 +54,8 @@ class CLIPScore(Metric): textual CLIP embedding :math:`E_C` for an caption :math:`C`. The score is bound between 0 and 100 and the closer to 100 the better. - .. note:: Metric is not scriptable + .. caution:: + Metric is not scriptable As input to ``forward`` and ``update`` the metric accepts the following input diff --git a/src/torchmetrics/regression/kl_divergence.py b/src/torchmetrics/regression/kl_divergence.py index 8c5c41d3d57..368cdef0adf 100644 --- a/src/torchmetrics/regression/kl_divergence.py +++ b/src/torchmetrics/regression/kl_divergence.py @@ -73,8 +73,8 @@ class KLDivergence(Metric): ValueError: If ``reduction`` is not one of ``'mean'``, ``'sum'``, ``'none'`` or ``None``. - .. note:: - Half precision is only support on GPU for this metric + .. attention:: + Half precision is only support on GPU for this metric. Example: >>> from torch import tensor diff --git a/src/torchmetrics/regression/log_mse.py b/src/torchmetrics/regression/log_mse.py index 85e01dba314..da190016844 100644 --- a/src/torchmetrics/regression/log_mse.py +++ b/src/torchmetrics/regression/log_mse.py @@ -52,8 +52,8 @@ class MeanSquaredLogError(Metric): >>> mean_squared_log_error(preds, target) tensor(0.0397) - .. note:: - Half precision is only support on GPU for this metric + .. attention:: + Half precision is only support on GPU for this metric. """ diff --git a/src/torchmetrics/retrieval/base.py b/src/torchmetrics/retrieval/base.py index 5446d8668b1..f9a0a4f8cc4 100644 --- a/src/torchmetrics/retrieval/base.py +++ b/src/torchmetrics/retrieval/base.py @@ -50,10 +50,11 @@ class RetrievalMetric(Metric, ABC): - ``indexes`` (:class:`~torch.Tensor`): A long tensor of shape ``(N, ...)`` which indicate to which query a prediction belongs - .. note:: ``indexes``, ``preds`` and ``target`` must have the same dimension and will be flatten - to single dimension once provided. + .. hint:: + The ``indexes``, ``preds`` and ``target`` must have the same dimension and will be flattened + to single dimension once provided. - .. note:: + .. attention:: Predictions will be first grouped by ``indexes`` and then the real metric, defined by overriding the `_metric` method, will be computed as the mean of the scores over each query. diff --git a/src/torchmetrics/retrieval/precision_recall_curve.py b/src/torchmetrics/retrieval/precision_recall_curve.py index 701e89b1433..9b4f4d38d0e 100644 --- a/src/torchmetrics/retrieval/precision_recall_curve.py +++ b/src/torchmetrics/retrieval/precision_recall_curve.py @@ -303,9 +303,10 @@ class RetrievalRecallAtFixedPrecision(RetrievalPrecisionRecallCurve): - ``indexes`` (:class:`~torch.Tensor`): A long tensor of shape ``(N, ...)`` which indicate to which query a prediction belongs - .. note:: All ``indexes``, ``preds`` and ``target`` must have the same dimension. + .. important:: + All ``indexes``, ``preds`` and ``target`` must have the same dimension. - .. note:: + .. attention:: Predictions will be first grouped by ``indexes`` and then `RetrievalRecallAtFixedPrecision` will be computed as the mean of the `RetrievalRecallAtFixedPrecision` over each query. diff --git a/src/torchmetrics/wrappers/multitask.py b/src/torchmetrics/wrappers/multitask.py index fa2f04db97d..04ddd87ad71 100644 --- a/src/torchmetrics/wrappers/multitask.py +++ b/src/torchmetrics/wrappers/multitask.py @@ -43,8 +43,8 @@ class MultitaskWrapper(WrapperMetric): postfix: A string to append after the keys of the output dict. If not provided, will default to an empty string. - .. note:: - The use pre prefix and postfix allows for easily creating task wrappers for training, validation and test. + .. tip:: + The use prefix and postfix allows for easily creating task wrappers for training, validation and test. The arguments are only changing the output keys of the computed metrics and not the input keys. This means that a ``MultitaskWrapper`` initialized as ``MultitaskWrapper({"task": Metric()}, prefix="train_")`` will still expect the input to be a dictionary with the key "task", but the output will be a dictionary with the key From ba0976b3740c0a6d18b8e89e102921b44077e566 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Thu, 31 Oct 2024 22:01:08 +0100 Subject: [PATCH 10/17] tests: cleaning classif. (#2815) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit dfc9e33cc7fd06b535d29aae2886da10aaf054ba) --- src/torchmetrics/utilities/compute.py | 26 +++++++++++++- tests/unittests/_helpers/testers.py | 11 +++++- .../unittests/classification/test_accuracy.py | 16 ++++----- tests/unittests/classification/test_auroc.py | 4 +-- .../classification/test_average_precision.py | 4 +-- .../classification/test_calibration_error.py | 4 +-- .../classification/test_cohen_kappa.py | 4 +-- .../classification/test_confusion_matrix.py | 6 ++-- tests/unittests/classification/test_f_beta.py | 16 ++++----- .../classification/test_hamming_distance.py | 16 ++++----- tests/unittests/classification/test_hinge.py | 4 +-- .../unittests/classification/test_jaccard.py | 6 ++-- .../classification/test_matthews_corrcoef.py | 6 ++-- .../test_precision_fixed_recall.py | 4 +-- .../classification/test_precision_recall.py | 34 ++++++++++--------- .../test_precision_recall_curve.py | 4 +-- .../test_recall_fixed_precision.py | 4 +-- tests/unittests/classification/test_roc.py | 4 +-- .../test_sensitivity_specificity.py | 4 +-- .../test_specificity_sensitivity.py | 4 +-- .../classification/test_stat_scores.py | 12 +++---- 21 files changed, 114 insertions(+), 79 deletions(-) diff --git a/src/torchmetrics/utilities/compute.py b/src/torchmetrics/utilities/compute.py index ee11a36136f..e526ecc8456 100644 --- a/src/torchmetrics/utilities/compute.py +++ b/src/torchmetrics/utilities/compute.py @@ -53,6 +53,13 @@ def _safe_divide(num: Tensor, denom: Tensor, zero_division: float = 0.0) -> Tens denom: denominator tensor, which may contain zeros zero_division: value to replace elements divided by zero + Example: + >>> import torch + >>> num = torch.tensor([1.0, 2.0, 3.0]) + >>> denom = torch.tensor([0.0, 1.0, 2.0]) + >>> _safe_divide(num, denom) + tensor([0.0000, 2.0000, 1.5000]) + """ num = num if num.is_floating_point() else num.float() denom = denom if denom.is_floating_point() else denom.float() @@ -102,6 +109,16 @@ def _auc_compute_without_check(x: Tensor, y: Tensor, direction: float, axis: int def _auc_compute(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor: + """Compute area under the curve using the trapezoidal rule. + + Example: + >>> import torch + >>> x = torch.tensor([1, 2, 3, 4]) + >>> y = torch.tensor([1, 2, 3, 4]) + >>> _auc_compute(x, y) + tensor(7.5000) + + """ with torch.no_grad(): if reorder: x, x_idx = torch.sort(x, stable=True) @@ -139,7 +156,7 @@ def auc(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor: def interp(x: Tensor, xp: Tensor, fp: Tensor) -> Tensor: """One-dimensional linear interpolation for monotonically increasing sample points. - Returns the one-dimensional piecewise linear interpolant to a function with + Returns the one-dimensional piecewise linear interpolation to a function with given discrete data points :math:`(xp, fp)`, evaluated at :math:`x`. Adjusted version of this https://github.com/pytorch/pytorch/issues/50334#issuecomment-1000917964 @@ -152,6 +169,13 @@ def interp(x: Tensor, xp: Tensor, fp: Tensor) -> Tensor: Returns: the interpolated values, same size as `x`. + Example: + >>> x = torch.tensor([0.5, 1.5, 2.5]) + >>> xp = torch.tensor([1, 2, 3]) + >>> fp = torch.tensor([1, 2, 3]) + >>> interp(x, xp, fp) + tensor([0.5000, 1.5000, 2.5000]) + """ m = _safe_divide(fp[1:] - fp[:-1], xp[1:] - xp[:-1]) b = fp[:-1] - (m * xp[:-1]) diff --git a/tests/unittests/_helpers/testers.py b/tests/unittests/_helpers/testers.py index c5a69077f3c..9412fa79d75 100644 --- a/tests/unittests/_helpers/testers.py +++ b/tests/unittests/_helpers/testers.py @@ -685,7 +685,16 @@ def inject_ignore_index(x: Tensor, ignore_index: int) -> Tensor: def remove_ignore_index(target: Tensor, preds: Tensor, ignore_index: Optional[int]) -> Tuple[Tensor, Tensor]: - """Remove samples that are equal to the ignore_index in comparison functions.""" + """Remove samples that are equal to the ignore_index in comparison functions. + + Example: + >>> target = torch.tensor([0, 1, 2, 3, 4]) + >>> preds = torch.tensor([0, 1, 2, 3, 4]) + >>> ignore_index = 2 + >>> remove_ignore_index(target, preds, ignore_index) + (tensor([0, 1, 3, 4]), tensor([0, 1, 3, 4])) + + """ if ignore_index is not None: idx = target == ignore_index target, preds = deepcopy(target[~idx]), deepcopy(preds[~idx]) diff --git a/tests/unittests/classification/test_accuracy.py b/tests/unittests/classification/test_accuracy.py index 30d4a473a84..65e42c00b07 100644 --- a/tests/unittests/classification/test_accuracy.py +++ b/tests/unittests/classification/test_accuracy.py @@ -55,14 +55,14 @@ def _reference_sklearn_accuracy_binary(preds, target, ignore_index, multidim_ave preds = (preds >= THRESHOLD).astype(np.uint8) if multidim_average == "global": - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _reference_sklearn_accuracy(target, preds) res = [] for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) res.append(_reference_sklearn_accuracy(true, pred)) return np.stack(res) @@ -185,7 +185,7 @@ def _reference_sklearn_accuracy_multiclass(preds, target, ignore_index, multidim if multidim_average == "global": preds = preds.numpy().flatten() target = target.numpy().flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) if average == "micro": return _reference_sklearn_accuracy(target, preds) confmat = sk_confusion_matrix(target, preds, labels=list(range(NUM_CLASSES))) @@ -207,7 +207,7 @@ def _reference_sklearn_accuracy_multiclass(preds, target, ignore_index, multidim for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) if average == "micro": res.append(_reference_sklearn_accuracy(true, pred)) else: @@ -445,13 +445,13 @@ def _reference_sklearn_accuracy_multilabel(preds, target, ignore_index, multidim if average == "micro": preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _reference_sklearn_accuracy(target, preds) accuracy, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i].flatten(), target[:, i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) accuracy.append(_reference_sklearn_accuracy(true, pred)) weights.append(confmat[1, 1] + confmat[1, 0]) @@ -472,7 +472,7 @@ def _reference_sklearn_accuracy_multilabel(preds, target, ignore_index, multidim for i in range(preds.shape[0]): if average == "micro": pred, true = preds[i].flatten(), target[i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) accuracy.append(_reference_sklearn_accuracy(true, pred)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) @@ -480,7 +480,7 @@ def _reference_sklearn_accuracy_multilabel(preds, target, ignore_index, multidim scores, w = [], [] for j in range(preds.shape[1]): pred, true = preds[i, j], target[i, j] - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) scores.append(_reference_sklearn_accuracy(true, pred)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) w.append(confmat[1, 1] + confmat[1, 0]) diff --git a/tests/unittests/classification/test_auroc.py b/tests/unittests/classification/test_auroc.py index 3691c7305b7..30d4acb470c 100644 --- a/tests/unittests/classification/test_auroc.py +++ b/tests/unittests/classification/test_auroc.py @@ -38,7 +38,7 @@ def _reference_sklearn_auroc_binary(preds, target, max_fpr=None, ignore_index=No target = target.flatten().numpy() if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_roc_auc_score(target, preds, max_fpr=max_fpr) @@ -144,7 +144,7 @@ def _reference_sklearn_auroc_multiclass(preds, target, average="macro", ignore_i target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_roc_auc_score(target, preds, average=average, multi_class="ovr", labels=list(range(NUM_CLASSES))) diff --git a/tests/unittests/classification/test_average_precision.py b/tests/unittests/classification/test_average_precision.py index 51d40839642..da0dc2f56b6 100644 --- a/tests/unittests/classification/test_average_precision.py +++ b/tests/unittests/classification/test_average_precision.py @@ -47,7 +47,7 @@ def _reference_sklearn_avg_precision_binary(preds, target, ignore_index=None): target = target.flatten().numpy() if np.issubdtype(preds.dtype, np.floating) and not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_average_precision_score(target, preds) @@ -156,7 +156,7 @@ def _reference_sklearn_avg_precision_multiclass(preds, target, average="macro", target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) res = [] for i in range(NUM_CLASSES): diff --git a/tests/unittests/classification/test_calibration_error.py b/tests/unittests/classification/test_calibration_error.py index a2000cc984e..f4fc0703881 100644 --- a/tests/unittests/classification/test_calibration_error.py +++ b/tests/unittests/classification/test_calibration_error.py @@ -44,7 +44,7 @@ def _reference_netcal_binary_calibration_error(preds, target, n_bins, norm, igno target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) metric = ECE if norm == "l1" else MCE return metric(n_bins).measure(preds, target) @@ -154,7 +154,7 @@ def _reference_netcal_multiclass_calibration_error(preds, target, n_bins, norm, if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) preds = np.moveaxis(preds, 1, -1).reshape((-1, preds.shape[1])) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) metric = ECE if norm == "l1" else MCE return metric(n_bins).measure(preds, target) diff --git a/tests/unittests/classification/test_cohen_kappa.py b/tests/unittests/classification/test_cohen_kappa.py index 40ebbc028bd..1f2585372bd 100644 --- a/tests/unittests/classification/test_cohen_kappa.py +++ b/tests/unittests/classification/test_cohen_kappa.py @@ -37,7 +37,7 @@ def _reference_sklearn_cohen_kappa_binary(preds, target, weights=None, ignore_in if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) preds = (preds >= THRESHOLD).astype(np.uint8) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_cohen_kappa(y1=target, y2=preds, weights=weights) @@ -136,7 +136,7 @@ def _reference_sklearn_cohen_kappa_multiclass(preds, target, weights, ignore_ind preds = np.argmax(preds, axis=1) preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_cohen_kappa(y1=target, y2=preds, weights=weights) diff --git a/tests/unittests/classification/test_confusion_matrix.py b/tests/unittests/classification/test_confusion_matrix.py index 12f21451949..4d27dfc2069 100644 --- a/tests/unittests/classification/test_confusion_matrix.py +++ b/tests/unittests/classification/test_confusion_matrix.py @@ -46,7 +46,7 @@ def _reference_sklearn_confusion_matrix_binary(preds, target, normalize=None, ig if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) preds = (preds >= THRESHOLD).astype(np.uint8) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_confusion_matrix(y_true=target, y_pred=preds, labels=[0, 1], normalize=normalize) @@ -147,7 +147,7 @@ def _reference_sklearn_confusion_matrix_multiclass(preds, target, normalize=None preds = np.argmax(preds, axis=1) preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_confusion_matrix(y_true=target, y_pred=preds, normalize=normalize, labels=list(range(NUM_CLASSES))) @@ -298,7 +298,7 @@ def _reference_sklearn_confusion_matrix_multilabel(preds, target, normalize=None confmat = [] for i in range(preds.shape[1]): pred, true = preds[:, i], target[:, i] - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) confmat.append(sk_confusion_matrix(true, pred, normalize=normalize, labels=[0, 1])) return np.stack(confmat, axis=0) diff --git a/tests/unittests/classification/test_f_beta.py b/tests/unittests/classification/test_f_beta.py index 03c39d336fe..075e37cc699 100644 --- a/tests/unittests/classification/test_f_beta.py +++ b/tests/unittests/classification/test_f_beta.py @@ -63,14 +63,14 @@ def _reference_sklearn_fbeta_score_binary(preds, target, sk_fn, ignore_index, mu preds = (preds >= THRESHOLD).astype(np.uint8) if multidim_average == "global": - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_fn(target, preds, zero_division=zero_division) res = [] for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) res.append(sk_fn(true, pred, zero_division=zero_division)) return np.stack(res) @@ -205,7 +205,7 @@ def _reference_sklearn_fbeta_score_multiclass( if multidim_average == "global": preds = preds.numpy().flatten() target = target.numpy().flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_fn( target, preds, @@ -220,7 +220,7 @@ def _reference_sklearn_fbeta_score_multiclass( for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) if len(pred) == 0 and average == "weighted": # The result of sk_fn([], [], labels=None, average="weighted", zero_division=zero_division) @@ -417,13 +417,13 @@ def _reference_sklearn_fbeta_score_multilabel_global(preds, target, sk_fn, ignor if average == "micro": preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_fn(target, preds, zero_division=zero_division) fbeta_score, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i].flatten(), target[:, i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) fbeta_score.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) @@ -446,7 +446,7 @@ def _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore for i in range(preds.shape[0]): if average == "micro": pred, true = preds[i].flatten(), target[i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) fbeta_score.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) @@ -454,7 +454,7 @@ def _reference_sklearn_fbeta_score_multilabel_local(preds, target, sk_fn, ignore scores, w = [], [] for j in range(preds.shape[1]): pred, true = preds[i, j], target[i, j] - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) scores.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) w.append(confmat[1, 1] + confmat[1, 0]) diff --git a/tests/unittests/classification/test_hamming_distance.py b/tests/unittests/classification/test_hamming_distance.py index f7f3686c73b..a7a42db61b0 100644 --- a/tests/unittests/classification/test_hamming_distance.py +++ b/tests/unittests/classification/test_hamming_distance.py @@ -59,14 +59,14 @@ def _reference_sklearn_hamming_distance_binary(preds, target, ignore_index, mult preds = (preds >= THRESHOLD).astype(np.uint8) if multidim_average == "global": - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _reference_sklearn_hamming_loss(target, preds) res = [] for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) res.append(_reference_sklearn_hamming_loss(true, pred)) return np.stack(res) @@ -167,7 +167,7 @@ def test_binary_hamming_distance_dtype_gpu(self, inputs, dtype): def _reference_sklearn_hamming_distance_multiclass_global(preds, target, ignore_index, average): preds = preds.numpy().flatten() target = target.numpy().flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) if average == "micro": return _reference_sklearn_hamming_loss(target, preds) confmat = sk_confusion_matrix(y_true=target, y_pred=preds, labels=list(range(NUM_CLASSES))) @@ -191,7 +191,7 @@ def _reference_sklearn_hamming_distance_multiclass_local(preds, target, ignore_i for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) if average == "micro": res.append(_reference_sklearn_hamming_loss(true, pred)) else: @@ -331,13 +331,13 @@ def _reference_sklearn_hamming_distance_multilabel_global(preds, target, ignore_ if average == "micro": preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _reference_sklearn_hamming_loss(target, preds) hamming, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i].flatten(), target[:, i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) hamming.append(_reference_sklearn_hamming_loss(true, pred)) weights.append(confmat[1, 1] + confmat[1, 0]) @@ -360,13 +360,13 @@ def _reference_sklearn_hamming_distance_multilabel_local(preds, target, ignore_i for i in range(preds.shape[0]): if average == "micro": pred, true = preds[i].flatten(), target[i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) hamming.append(_reference_sklearn_hamming_loss(true, pred)) else: scores, w = [], [] for j in range(preds.shape[1]): pred, true = preds[i, j], target[i, j] - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) scores.append(_reference_sklearn_hamming_loss(true, pred)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) w.append(confmat[1, 1] + confmat[1, 0]) diff --git a/tests/unittests/classification/test_hinge.py b/tests/unittests/classification/test_hinge.py index fb0c62838cc..8963177d7a0 100644 --- a/tests/unittests/classification/test_hinge.py +++ b/tests/unittests/classification/test_hinge.py @@ -38,7 +38,7 @@ def _reference_sklearn_binary_hinge_loss(preds, target, ignore_index): if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) target = 2 * target - 1 return sk_hinge(target, preds) @@ -125,7 +125,7 @@ def _reference_sklearn_multiclass_hinge_loss(preds, target, multiclass_mode, ign if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) preds = np.moveaxis(preds, 1, -1).reshape((-1, preds.shape[1])) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) if multiclass_mode == "one-vs-all": enc = OneHotEncoder() diff --git a/tests/unittests/classification/test_jaccard.py b/tests/unittests/classification/test_jaccard.py index e7afdb557a6..0a20a2e458a 100644 --- a/tests/unittests/classification/test_jaccard.py +++ b/tests/unittests/classification/test_jaccard.py @@ -45,7 +45,7 @@ def _reference_sklearn_jaccard_index_binary(preds, target, ignore_index=None, ze if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) preds = (preds >= THRESHOLD).astype(np.uint8) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_jaccard_index(y_true=target, y_pred=preds, zero_division=zero_division) @@ -141,7 +141,7 @@ def _reference_sklearn_jaccard_index_multiclass(preds, target, ignore_index=None preds = np.argmax(preds, axis=1) preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) if average is None: return sk_jaccard_index( y_true=target, y_pred=preds, average=average, labels=list(range(NUM_CLASSES)), zero_division=zero_division @@ -269,7 +269,7 @@ def _reference_sklearn_jaccard_index_multilabel(preds, target, ignore_index=None scores, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i], target[:, i] - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) scores.append(sk_jaccard_index(true, pred, zero_division=zero_division)) weights.append(confmat[1, 0] + confmat[1, 1]) diff --git a/tests/unittests/classification/test_matthews_corrcoef.py b/tests/unittests/classification/test_matthews_corrcoef.py index 2f881604d09..b340db8d713 100644 --- a/tests/unittests/classification/test_matthews_corrcoef.py +++ b/tests/unittests/classification/test_matthews_corrcoef.py @@ -46,7 +46,7 @@ def _reference_sklearn_matthews_corrcoef_binary(preds, target, ignore_index=None if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) preds = (preds >= THRESHOLD).astype(np.uint8) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_matthews_corrcoef(y_true=target, y_pred=preds) @@ -138,7 +138,7 @@ def _reference_sklearn_matthews_corrcoef_multiclass(preds, target, ignore_index= preds = np.argmax(preds, axis=1) preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_matthews_corrcoef(y_true=target, y_pred=preds) @@ -228,7 +228,7 @@ def _reference_sklearn_matthews_corrcoef_multilabel(preds, target, ignore_index= if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) preds = (preds >= THRESHOLD).astype(np.uint8) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_matthews_corrcoef(y_true=target, y_pred=preds) diff --git a/tests/unittests/classification/test_precision_fixed_recall.py b/tests/unittests/classification/test_precision_fixed_recall.py index f320d2cf1e9..b6649ad869d 100644 --- a/tests/unittests/classification/test_precision_fixed_recall.py +++ b/tests/unittests/classification/test_precision_fixed_recall.py @@ -58,7 +58,7 @@ def _reference_sklearn_precision_at_fixed_recall_binary(preds, target, min_recal target = target.flatten().numpy() if np.issubdtype(preds.dtype, np.floating) and not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _precision_at_recall_x_multilabel(preds, target, min_recall) @@ -169,7 +169,7 @@ def _reference_sklearn_precision_at_fixed_recall_multiclass(preds, target, min_r target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) precision, thresholds = [], [] for i in range(NUM_CLASSES): diff --git a/tests/unittests/classification/test_precision_recall.py b/tests/unittests/classification/test_precision_recall.py index 00eee202cc0..7717ffa5b0d 100644 --- a/tests/unittests/classification/test_precision_recall.py +++ b/tests/unittests/classification/test_precision_recall.py @@ -49,7 +49,9 @@ seed_all(42) -def _reference_sklearn_precision_recall_binary(preds, target, sk_fn, ignore_index, multidim_average, zero_division=0): +def _reference_sklearn_precision_recall_binary( + preds, target, sk_fn, ignore_index, multidim_average, zero_division=0, prob_threshold: float = THRESHOLD +): if multidim_average == "global": preds = preds.view(-1).numpy() target = target.view(-1).numpy() @@ -60,17 +62,17 @@ def _reference_sklearn_precision_recall_binary(preds, target, sk_fn, ignore_inde if np.issubdtype(preds.dtype, np.floating): if not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - preds = (preds >= THRESHOLD).astype(np.uint8) + preds = (preds >= prob_threshold).astype(np.uint8) if multidim_average == "global": - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_fn(target, preds, zero_division=zero_division) res = [] for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) res.append(sk_fn(true, pred, zero_division=zero_division)) return np.stack(res) @@ -197,7 +199,7 @@ def test_binary_precision_recall_half_gpu(self, inputs, module, functional, comp def _reference_sklearn_precision_recall_multiclass( - preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0 + preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0, num_classes: int = NUM_CLASSES ): if preds.ndim == target.ndim + 1: preds = torch.argmax(preds, 1) @@ -205,12 +207,12 @@ def _reference_sklearn_precision_recall_multiclass( if multidim_average == "global": preds = preds.numpy().flatten() target = target.numpy().flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_fn( target, preds, average=average, - labels=list(range(NUM_CLASSES)) if average is None else None, + labels=list(range(num_classes)) if average is None else None, zero_division=zero_division, ) @@ -220,7 +222,7 @@ def _reference_sklearn_precision_recall_multiclass( for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) if len(pred) == 0 and average == "weighted": # The result of sk_fn([], [], labels=None, average="weighted", zero_division=zero_division) # varies depending on the sklearn version: @@ -235,7 +237,7 @@ def _reference_sklearn_precision_recall_multiclass( true, pred, average=average, - labels=list(range(NUM_CLASSES)) if average is None else None, + labels=list(range(num_classes)) if average is None else None, zero_division=zero_division, ) res.append(0.0 if np.isnan(r).any() else r) @@ -422,13 +424,13 @@ def _reference_sklearn_precision_recall_multilabel_global(preds, target, sk_fn, if average == "micro": preds = preds.flatten() target = target.flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_fn(target, preds, zero_division=zero_division) precision_recall, weights = [], [] for i in range(preds.shape[1]): pred, true = preds[:, i].flatten(), target[:, i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) precision_recall.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) @@ -451,7 +453,7 @@ def _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, i for i in range(preds.shape[0]): if average == "micro": pred, true = preds[i].flatten(), target[i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) precision_recall.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) weights.append(confmat[1, 1] + confmat[1, 0]) @@ -459,7 +461,7 @@ def _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, i scores, w = [], [] for j in range(preds.shape[1]): pred, true = preds[i, j], target[i, j] - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) scores.append(sk_fn(true, pred, zero_division=zero_division)) confmat = sk_confusion_matrix(true, pred, labels=[0, 1]) w.append(confmat[1, 1] + confmat[1, 0]) @@ -481,7 +483,7 @@ def _reference_sklearn_precision_recall_multilabel_local(preds, target, sk_fn, i def _reference_sklearn_precision_recall_multilabel( - preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0 + preds, target, sk_fn, ignore_index, multidim_average, average, zero_division=0, num_classes: int = NUM_CLASSES ): preds = preds.numpy() target = target.numpy() @@ -493,8 +495,8 @@ def _reference_sklearn_precision_recall_multilabel( target = target.reshape(*target.shape[:2], -1) if ignore_index is None and multidim_average == "global": return sk_fn( - target.transpose(0, 2, 1).reshape(-1, NUM_CLASSES), - preds.transpose(0, 2, 1).reshape(-1, NUM_CLASSES), + target.transpose(0, 2, 1).reshape(-1, num_classes), + preds.transpose(0, 2, 1).reshape(-1, num_classes), average=average, zero_division=zero_division, ) diff --git a/tests/unittests/classification/test_precision_recall_curve.py b/tests/unittests/classification/test_precision_recall_curve.py index 6f78438007e..7c034c528e6 100644 --- a/tests/unittests/classification/test_precision_recall_curve.py +++ b/tests/unittests/classification/test_precision_recall_curve.py @@ -46,7 +46,7 @@ def _reference_sklearn_precision_recall_curve_binary(preds, target, ignore_index target = target.flatten().numpy() if np.issubdtype(preds.dtype, np.floating) and not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return sk_precision_recall_curve(target, preds) @@ -159,7 +159,7 @@ def _reference_sklearn_precision_recall_curve_multiclass(preds, target, ignore_i target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) precision, recall, thresholds = [], [], [] for i in range(NUM_CLASSES): diff --git a/tests/unittests/classification/test_recall_fixed_precision.py b/tests/unittests/classification/test_recall_fixed_precision.py index 9bdca8950bd..2d73d64f264 100644 --- a/tests/unittests/classification/test_recall_fixed_precision.py +++ b/tests/unittests/classification/test_recall_fixed_precision.py @@ -58,7 +58,7 @@ def _reference_sklearn_recall_at_fixed_precision_binary(preds, target, min_preci target = target.flatten().numpy() if np.issubdtype(preds.dtype, np.floating) and not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _recall_at_precision_x_multilabel(preds, target, min_precision) @@ -173,7 +173,7 @@ def _reference_sklearn_recall_at_fixed_precision_multiclass(preds, target, min_p target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) recall, thresholds = [], [] for i in range(NUM_CLASSES): diff --git a/tests/unittests/classification/test_roc.py b/tests/unittests/classification/test_roc.py index 167ad4876f0..f6cbd173128 100644 --- a/tests/unittests/classification/test_roc.py +++ b/tests/unittests/classification/test_roc.py @@ -37,7 +37,7 @@ def _reference_sklearn_roc_binary(preds, target, ignore_index=None): target = target.flatten().numpy() if np.issubdtype(preds.dtype, np.floating) and not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) fpr, tpr, thresholds = sk_roc_curve(target, preds, drop_intermediate=False) thresholds[0] = 1.0 return [np.nan_to_num(x, nan=0.0) for x in [fpr, tpr, thresholds]] @@ -140,7 +140,7 @@ def _reference_sklearn_roc_multiclass(preds, target, ignore_index=None): target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) fpr, tpr, thresholds = [], [], [] for i in range(NUM_CLASSES): diff --git a/tests/unittests/classification/test_sensitivity_specificity.py b/tests/unittests/classification/test_sensitivity_specificity.py index df01b67e4ab..e170365c1fb 100644 --- a/tests/unittests/classification/test_sensitivity_specificity.py +++ b/tests/unittests/classification/test_sensitivity_specificity.py @@ -79,7 +79,7 @@ def _reference_sklearn_sensitivity_at_specificity_binary(preds, target, min_spec target = target.flatten().numpy() if np.issubdtype(preds.dtype, np.floating) and not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _sensitivity_at_specificity_x_multilabel(preds, target, min_specificity) @@ -198,7 +198,7 @@ def _reference_sklearn_sensitivity_at_specificity_multiclass(preds, target, min_ target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) sensitivity, thresholds = [], [] for i in range(NUM_CLASSES): diff --git a/tests/unittests/classification/test_specificity_sensitivity.py b/tests/unittests/classification/test_specificity_sensitivity.py index 0bafdfe55ea..934d669678a 100644 --- a/tests/unittests/classification/test_specificity_sensitivity.py +++ b/tests/unittests/classification/test_specificity_sensitivity.py @@ -77,7 +77,7 @@ def _reference_sklearn_specificity_at_sensitivity_binary(preds, target, min_sens target = target.flatten().numpy() if np.issubdtype(preds.dtype, np.floating) and not ((preds > 0) & (preds < 1)).all(): preds = sigmoid(preds) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) return _specificity_at_sensitivity_x_multilabel(preds, target, min_sensitivity) @@ -192,7 +192,7 @@ def _reference_sklearn_specificity_at_sensitivity_multiclass(preds, target, min_ target = target.numpy().flatten() if not ((preds > 0) & (preds < 1)).all(): preds = softmax(preds, 1) - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) specificity, thresholds = [], [] for i in range(NUM_CLASSES): diff --git a/tests/unittests/classification/test_stat_scores.py b/tests/unittests/classification/test_stat_scores.py index 5ea4c206bc0..2a5a53bb8aa 100644 --- a/tests/unittests/classification/test_stat_scores.py +++ b/tests/unittests/classification/test_stat_scores.py @@ -54,7 +54,7 @@ def _reference_sklearn_stat_scores_binary(preds, target, ignore_index, multidim_ preds = (preds >= THRESHOLD).astype(np.uint8) if multidim_average == "global": - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) tn, fp, fn, tp = sk_confusion_matrix(y_true=target, y_pred=preds, labels=[0, 1]).ravel() return np.array([tp, fp, tn, fn, tp + fn]) @@ -62,7 +62,7 @@ def _reference_sklearn_stat_scores_binary(preds, target, ignore_index, multidim_ for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) tn, fp, fn, tp = sk_confusion_matrix(y_true=true, y_pred=pred, labels=[0, 1]).ravel() res.append(np.array([tp, fp, tn, fn, tp + fn])) return np.stack(res) @@ -164,7 +164,7 @@ def test_binary_stat_scores_dtype_gpu(self, inputs, dtype): def _reference_sklearn_stat_scores_multiclass_global(preds, target, ignore_index, average): preds = preds.numpy().flatten() target = target.numpy().flatten() - target, preds = remove_ignore_index(target, preds, ignore_index) + target, preds = remove_ignore_index(target=target, preds=preds, ignore_index=ignore_index) confmat = sk_confusion_matrix(y_true=target, y_pred=preds, labels=list(range(NUM_CLASSES))) tp = np.diag(confmat) fp = confmat.sum(0) - tp @@ -192,7 +192,7 @@ def _reference_sklearn_stat_scores_multiclass_local(preds, target, ignore_index, for pred, true in zip(preds, target): pred = pred.flatten() true = true.flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) confmat = sk_confusion_matrix(y_true=true, y_pred=pred, labels=list(range(NUM_CLASSES))) tp = np.diag(confmat) fp = confmat.sum(0) - tp @@ -431,7 +431,7 @@ def _reference_sklearn_stat_scores_multilabel(preds, target, ignore_index, multi stat_scores = [] for i in range(preds.shape[1]): pred, true = preds[:, i].flatten(), target[:, i].flatten() - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) tn, fp, fn, tp = sk_confusion_matrix(true, pred, labels=[0, 1]).ravel() stat_scores.append(np.array([tp, fp, tn, fn, tp + fn])) res = np.stack(stat_scores, axis=0) @@ -452,7 +452,7 @@ def _reference_sklearn_stat_scores_multilabel(preds, target, ignore_index, multi scores = [] for j in range(preds.shape[1]): pred, true = preds[i, j], target[i, j] - true, pred = remove_ignore_index(true, pred, ignore_index) + true, pred = remove_ignore_index(target=true, preds=pred, ignore_index=ignore_index) tn, fp, fn, tp = sk_confusion_matrix(true, pred, labels=[0, 1]).ravel() scores.append(np.array([tp, fp, tn, fn, tp + fn])) stat_scores.append(np.stack(scores, 1)) From 700552590f87d76174a2509d055b04c3218c88c0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 2 Nov 2024 20:53:39 +0100 Subject: [PATCH 11/17] build(deps): bump pypa/gh-action-pypi-publish from 1.10.2 to 1.11.0 (#2817) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.10.2 to 1.11.0. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.10.2...v1.11.0) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 5e8e5e645bc8b6ac8761b43c091f639b21c42856) --- .github/workflows/publish-pkg.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-pkg.yml b/.github/workflows/publish-pkg.yml index 7affbea96c2..f7b3f46997f 100644 --- a/.github/workflows/publish-pkg.yml +++ b/.github/workflows/publish-pkg.yml @@ -67,7 +67,7 @@ jobs: - run: ls -lh dist/ # We do this, since failures on test.pypi aren't that bad - name: Publish to Test PyPI - uses: pypa/gh-action-pypi-publish@v1.10.2 + uses: pypa/gh-action-pypi-publish@v1.11.0 with: user: __token__ password: ${{ secrets.test_pypi_password }} @@ -94,7 +94,7 @@ jobs: path: dist - run: ls -lh dist/ - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@v1.10.2 + uses: pypa/gh-action-pypi-publish@v1.11.0 with: user: __token__ password: ${{ secrets.pypi_password }} From 48e0906c5ef9834ed7c113425e9e079f1a58c79d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:10:52 +0100 Subject: [PATCH 12/17] build(deps): bump Lightning-AI/utilities from 0.11.7 to 0.11.8 (#2818) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 9afbac7835d13b047dc3aecf6eb3876f09584e99) --- .github/workflows/ci-checks.yml | 12 ++++++------ .github/workflows/clear-cache.yml | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml index 3c7c3eb69f1..46acca95c7a 100644 --- a/.github/workflows/ci-checks.yml +++ b/.github/workflows/ci-checks.yml @@ -13,19 +13,19 @@ concurrency: jobs: check-code: - uses: Lightning-AI/utilities/.github/workflows/check-typing.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-typing.yml@v0.11.8 with: - actions-ref: v0.11.7 + actions-ref: v0.11.8 extra-typing: "typing" check-schema: - uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.8 check-package: if: github.event.pull_request.draft == false - uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.8 with: - actions-ref: v0.11.7 + actions-ref: v0.11.8 artifact-name: dist-packages-${{ github.sha }} import-name: "torchmetrics" testing-matrix: | @@ -35,7 +35,7 @@ jobs: } check-md-links: - uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.11.8 with: base-branch: master config-file: ".github/markdown-links-config.json" diff --git a/.github/workflows/clear-cache.yml b/.github/workflows/clear-cache.yml index ecd7c6e3ff3..057827d611c 100644 --- a/.github/workflows/clear-cache.yml +++ b/.github/workflows/clear-cache.yml @@ -23,7 +23,7 @@ on: jobs: cron-clear: if: github.event_name == 'schedule' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.8 with: scripts-ref: v0.11.7 dry-run: ${{ github.event_name == 'pull_request' }} @@ -32,9 +32,9 @@ jobs: direct-clear: if: github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.8 with: - scripts-ref: v0.11.7 + scripts-ref: v0.11.8 dry-run: ${{ github.event_name == 'pull_request' }} pattern: ${{ inputs.pattern || 'pypi_wheels' }} # setting str in case of PR / debugging age-days: ${{ fromJSON(inputs.age-days) || 0 }} # setting 0 in case of PR / debugging From ee323cb573de58ef98622be192c17c723e5b3cd1 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:23:29 +0000 Subject: [PATCH 13/17] ci: bump deprecated mac 12 to 13 (#2820) (cherry picked from commit fc6b2356ae071c7ed3e459376015bc300d7680e4) --- .github/workflows/ci-checks.yml | 2 +- .github/workflows/ci-integrate.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml index 46acca95c7a..d7abe9bb569 100644 --- a/.github/workflows/ci-checks.yml +++ b/.github/workflows/ci-checks.yml @@ -30,7 +30,7 @@ jobs: import-name: "torchmetrics" testing-matrix: | { - "os": ["ubuntu-22.04", "macos-12", "windows-2022"], + "os": ["ubuntu-22.04", "macos-13", "windows-2022"], "python-version": ["3.8", "3.11"] } diff --git a/.github/workflows/ci-integrate.yml b/.github/workflows/ci-integrate.yml index ca50ea9a2c4..f904e0a84b8 100644 --- a/.github/workflows/ci-integrate.yml +++ b/.github/workflows/ci-integrate.yml @@ -26,7 +26,7 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-22.04", "macOS-12", "windows-2022"] + os: ["ubuntu-22.04", "macOS-13", "windows-2022"] python-version: ["3.8", "3.10"] requires: ["oldest", "latest"] exclude: From 7e0f13b2f0ee6c50fe44cbd27385ff9866b63398 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:54:20 +0000 Subject: [PATCH 14/17] build(deps): bump torch from 2.5.0 to 2.5.1 in /requirements (#2824) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit c5e9ac405639baa177e946008b8c500b60ead7eb) --- requirements/typing.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/typing.txt b/requirements/typing.txt index 449ff685fa6..271a9dfd690 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy ==1.13.0 -torch ==2.5.0 +torch ==2.5.1 types-PyYAML types-emoji From 3402a4e460cd4c13f54a4fe3f43852f555b86898 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Nov 2024 11:56:09 +0000 Subject: [PATCH 15/17] build(deps): update onnxruntime requirement from <1.20,>=1.12.0 to >=1.12.0,<1.21 in /requirements (#2823) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 650e753cad076d9993658ab541f2a8eba712103e) --- requirements/audio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/audio.txt b/requirements/audio.txt index 6b08e0cb684..d970b2373cb 100644 --- a/requirements/audio.txt +++ b/requirements/audio.txt @@ -8,5 +8,5 @@ pystoi >=0.4.0, <0.5.0 torchaudio >=0.10.0, <2.6.0 gammatone >=1.0.0, <1.1.0 librosa >=0.9.0, <0.11.0 -onnxruntime >=1.12.0, <1.20 # installing onnxruntime_gpu-gpu failed on macos +onnxruntime >=1.12.0, <1.21 # installing onnxruntime_gpu-gpu failed on macos requests >=2.19.0, <2.33.0 From 545296aac4a9524495f0824894c6a72bb8d06a95 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Tue, 5 Nov 2024 13:00:05 +0000 Subject: [PATCH 16/17] ci: bump integration test to 3.9+ & patch `np.Inf` (#2826) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 06c005f6ceeb88a36e3bb7f6a1337c4f8f8e5645) --- .github/workflows/ci-integrate.yml | 2 +- src/torchmetrics/__init__.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-integrate.yml b/.github/workflows/ci-integrate.yml index f904e0a84b8..11bbe401f83 100644 --- a/.github/workflows/ci-integrate.yml +++ b/.github/workflows/ci-integrate.yml @@ -27,7 +27,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-22.04", "macOS-13", "windows-2022"] - python-version: ["3.8", "3.10"] + python-version: ["3.9", "3.11"] requires: ["oldest", "latest"] exclude: - { python-version: "3.11", requires: "oldest" } diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py index 2fa370cb1c9..15f7be3ae90 100644 --- a/src/torchmetrics/__init__.py +++ b/src/torchmetrics/__init__.py @@ -14,6 +14,13 @@ _PACKAGE_ROOT = os.path.dirname(__file__) _PROJECT_ROOT = os.path.dirname(_PACKAGE_ROOT) +if package_available("numpy"): + # compatibility for AttributeError: `np.Inf` was removed in the NumPy 2.0 release. Use `np.inf` instead + import numpy + + numpy.Inf = numpy.inf + + if package_available("PIL"): import PIL From 6cfc3e2c8d18471602ddd5654c977f5443d78ee2 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Thu, 7 Nov 2024 19:40:54 +0000 Subject: [PATCH 17/17] releasing `1.5.2` --- CHANGELOG.md | 10 +++++++--- src/torchmetrics/__about__.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e93b4155e54..636e346940e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,14 +8,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 --- -## [UnReleased] - 2024-MM-DD +## [1.5.2] - 2024-11-07 -### Fixed +### Changed -- Fixed iou scores in detection for either empty predictions/targets leading to wrong scores ([#2805](https://github.com/Lightning-AI/torchmetrics/pull/2805)) +- Re-adding `numpy` 2+ support ([#2804](https://github.com/Lightning-AI/torchmetrics/pull/2804)) +### Fixed +- Fixed iou scores in detection for either empty predictions/targets leading to wrong scores ([#2805](https://github.com/Lightning-AI/torchmetrics/pull/2805)) - Fixed `MetricCollection` compatibility with `torch.jit.script` ([#2813](https://github.com/Lightning-AI/torchmetrics/pull/2813)) +- Fixed assert in PIT ([#2811](https://github.com/Lightning-AI/torchmetrics/pull/2811)) +- Pathed `np.Inf` for `numpy` 2.0+ ([#2826](https://github.com/Lightning-AI/torchmetrics/pull/2826)) --- diff --git a/src/torchmetrics/__about__.py b/src/torchmetrics/__about__.py index 8ae351f52f5..e4a11de08c5 100644 --- a/src/torchmetrics/__about__.py +++ b/src/torchmetrics/__about__.py @@ -1,4 +1,4 @@ -__version__ = "1.5.1" +__version__ = "1.5.2" __author__ = "Lightning-AI et al." __author_email__ = "name@pytorchlightning.ai" __license__ = "Apache-2.0"