From 3d0bce83488453eb5f454e506bb4555591735151 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 27 Feb 2024 12:14:54 -0500 Subject: [PATCH 01/27] Add python script and quick start guide for MetricsReloaded --- compute_metrics/compute_metrics_reloaded.py | 130 ++++++++++++++++++ .../MetricsReloaded_quick_start_guide.md | 64 +++++++++ 2 files changed, 194 insertions(+) create mode 100644 compute_metrics/compute_metrics_reloaded.py create mode 100644 quick_start_guides/MetricsReloaded_quick_start_guide.md diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py new file mode 100644 index 0000000..469af44 --- /dev/null +++ b/compute_metrics/compute_metrics_reloaded.py @@ -0,0 +1,130 @@ +""" +Compute MetricsReloaded metrics for segmentation tasks. +Details: https://github.com/Project-MONAI/MetricsReloaded/tree/main + +Example usage: + python compute_metrics_reloaded.py + -reference sub-001_T2w_seg.nii.gz + -prediction sub-001_T2w_prediction.nii.gz + +Default metrics (semantic segmentation): + - Dice similarity coefficient (DSC) + - Normalized surface distance (NSD) +(for details, see Fig. 2, Fig. 11, and Fig. 12 in https://arxiv.org/abs/2206.01653v5) + +The script is compatible with both binary and multi-class segmentation tasks (e.g., nnunet region-based). +The metrics are computed for each unique label (class) in the reference (ground truth) image. +The output is saved to a JSON file, for example: + +{ + "reference": "sub-001_T2w_seg.nii.gz", + "prediction": "sub-001_T2w_prediction.nii.gz", + "1.0": { + "dsc": 0.8195991091314031, + "nsd": 0.9455782312925171 + }, + "2.0": { + "dsc": 0.8042553191489362, + "nsd": 0.9580573951434879 + } + +} + +Authors: Jan Valosek +""" + + +import os +import argparse +import json +import numpy as np +import nibabel as nib + +from MetricsReloaded.metrics.pairwise_measures import BinaryPairwiseMeasures as BPM + + +def get_parser(): + # parse command line arguments + parser = argparse.ArgumentParser(description='Compute MetricsReloaded metrics for segmentation tasks.') + + # Arguments for model, data, and training + parser.add_argument('-prediction', required=True, type=str, + help='Path to the nifti image of test prediction.') + parser.add_argument('-reference', required=True, type=str, + help='Path to the nifti image of reference (ground truth) label.') + parser.add_argument('-metrics', nargs='+', default=['dsc', 'nsd'], required=False, + help='List of metrics to compute. For details, ' + 'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html. ' + 'Default: dsc, nsd') + parser.add_argument('-output', type=str, default='metrics.json', required=False, + help='Path to the output JSON file to save the metrics. Default: metrics.json') + + return parser + + +def load_nifti_image(file_path): + """ + Construct absolute path to the nifti image, check if it exists, and load the image data. + :param file_path: path to the nifti image + :return: nifti image data + """ + file_path = os.path.expanduser(file_path) # resolve '~' in the path + file_path = os.path.abspath(file_path) + if not os.path.exists(file_path): + raise FileNotFoundError(f'File {file_path} does not exist.') + nifti_image = nib.load(file_path) + return nifti_image.get_fdata() + + +def main(): + + # parse command line arguments + parser = get_parser() + args = parser.parse_args() + + # load nifti images + prediction_data = load_nifti_image(args.prediction) + reference_data = load_nifti_image(args.reference) + + # check whether the images have the same shape and orientation + if prediction_data.shape != reference_data.shape: + raise ValueError(f'The prediction and reference (ground truth) images must have the same shape. ' + f'The prediction image has shape {prediction_data.shape} and the ground truth image has ' + f'shape {reference_data.shape}.') + + # get all unique labels (classes) + # for example, for nnunet region-based segmentation, spinal cord has label 1, and lesions have label 2 + unique_labels_reference = np.unique(reference_data) + unique_labels_reference = unique_labels_reference[unique_labels_reference != 0] # remove background label + + # create dictionary to store the metrics + output_dict = {'reference': args.reference, 'prediction': args.prediction} + + # loop over all unique labels + for label in unique_labels_reference: + # create binary masks for the current label + print(f'Processing label {label}') + predidction_data_label = np.array(prediction_data == label, dtype=float) + reference_data_label = np.array(reference_data == label, dtype=float) + + # Dice similarity coefficient (DSC): + # Fig. 65 in https://arxiv.org/pdf/2206.01653v5.pdf + # https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.dsc + # Normalized surface distance (NSD): + # Fig. 86 in https://arxiv.org/pdf/2206.01653v5.pdf + # https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.normalised_surface_distance + bpm = BPM(predidction_data_label, reference_data_label, measures=args.metrics) + dict_seg = bpm.to_dict_meas() + + # add the metrics to the output dictionary + output_dict[label] = dict_seg + + # save dict as json + fname_output = os.path.abspath(args.output) + with open(fname_output, 'w') as f: + json.dump(output_dict, f, indent=4) + print(f'Saved metrics to {fname_output}.') + + +if __name__ == '__main__': + main() diff --git a/quick_start_guides/MetricsReloaded_quick_start_guide.md b/quick_start_guides/MetricsReloaded_quick_start_guide.md new file mode 100644 index 0000000..c0f6e3a --- /dev/null +++ b/quick_start_guides/MetricsReloaded_quick_start_guide.md @@ -0,0 +1,64 @@ +# MetricsReloaded quick-start guide + +Useful links: +- [MetricsReloaded GitHub page](https://github.com/Project-MONAI/MetricsReloaded) +- [MetricsReloaded documentation](https://metricsreloaded.readthedocs.io/en/latest/) +- [MetricsReloaded publication](https://www.nature.com/articles/s41592-023-02151-z) + +## Installation + +Official installation instructions are available [here](https://github.com/Project-MONAI/MetricsReloaded?tab=readme-ov-file#installation). + +> **Note** +> Always install MetricsReloaded inside a virtual environment. + +``` +# Create and activate a new conda environment +conda create -n metrics_reloaded python=3.10 pip +conda activate metrics_reloaded + +# Clone the repository +cd ~/code +git clone https://github.com/csudre/MetricsReloaded.git +cd MetricsReloaded + +# Install the package +python -m pip install . +# You can alternatively install the package in editable mode: +python -m pip install -e . +``` + +## Usage + +You can use the [compute_metrics_reloaded.py](../compute_metrics/compute_metrics_reloaded.py) script to compute metrics using the MetricsReloaded package. + +```commandline +python compute_metrics_reloaded.py -reference sub-001_T2w_seg.nii.gz -prediction sub-001_T2w_prediction.nii.gz +``` + +Default metrics (semantic segmentation): + - Dice similarity coefficient (DSC) + - Normalized surface distance (NSD) +(for details, see Fig. 2, Fig. 11, and Fig. 12 in https://arxiv.org/abs/2206.01653v5) + +The script is compatible with both binary and multi-class segmentation tasks (e.g., nnunet region-based). + +The metrics are computed for each unique label (class) in the reference (ground truth) image. + +The output is saved to a JSON file, for example: + +```json +{ + "reference": "sub-001_T2w_seg.nii.gz", + "prediction": "sub-001_T2w_prediction.nii.gz", + "1.0": { + "dsc": 0.8195991091314031, + "nsd": 0.9455782312925171 + }, + "2.0": { + "dsc": 0.8042553191489362, + "nsd": 0.9580573951434879 + } + +} +``` \ No newline at end of file From b13a20a3939ae5b2fd133b62bbb1ad7a6aad273b Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 27 Feb 2024 12:34:43 -0500 Subject: [PATCH 02/27] Add preprint to the quick start guide --- quick_start_guides/MetricsReloaded_quick_start_guide.md | 1 + 1 file changed, 1 insertion(+) diff --git a/quick_start_guides/MetricsReloaded_quick_start_guide.md b/quick_start_guides/MetricsReloaded_quick_start_guide.md index c0f6e3a..eaea102 100644 --- a/quick_start_guides/MetricsReloaded_quick_start_guide.md +++ b/quick_start_guides/MetricsReloaded_quick_start_guide.md @@ -4,6 +4,7 @@ Useful links: - [MetricsReloaded GitHub page](https://github.com/Project-MONAI/MetricsReloaded) - [MetricsReloaded documentation](https://metricsreloaded.readthedocs.io/en/latest/) - [MetricsReloaded publication](https://www.nature.com/articles/s41592-023-02151-z) +- [MetricsReloaded preprint](https://arxiv.org/pdf/2206.01653v5.pdf) - preprint contains more figures than the publication ## Installation From 75ea95fdd4ea395db2ba0f1384505fdaf38cbf90 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 27 Feb 2024 13:51:23 -0500 Subject: [PATCH 03/27] fix typo --- compute_metrics/compute_metrics_reloaded.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 469af44..4dd7e26 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -104,7 +104,7 @@ def main(): for label in unique_labels_reference: # create binary masks for the current label print(f'Processing label {label}') - predidction_data_label = np.array(prediction_data == label, dtype=float) + prediction_data_label = np.array(prediction_data == label, dtype=float) reference_data_label = np.array(reference_data == label, dtype=float) # Dice similarity coefficient (DSC): @@ -113,7 +113,7 @@ def main(): # Normalized surface distance (NSD): # Fig. 86 in https://arxiv.org/pdf/2206.01653v5.pdf # https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.normalised_surface_distance - bpm = BPM(predidction_data_label, reference_data_label, measures=args.metrics) + bpm = BPM(prediction_data_label, reference_data_label, measures=args.metrics) dict_seg = bpm.to_dict_meas() # add the metrics to the output dictionary From d7ea9c7f28b98c010a8e62a545d7c935ac6fb6db Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 27 Feb 2024 13:52:00 -0500 Subject: [PATCH 04/27] Handle empty reference, empty prediction, or both empty --- compute_metrics/compute_metrics_reloaded.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 4dd7e26..aaddaf4 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -96,12 +96,20 @@ def main(): # for example, for nnunet region-based segmentation, spinal cord has label 1, and lesions have label 2 unique_labels_reference = np.unique(reference_data) unique_labels_reference = unique_labels_reference[unique_labels_reference != 0] # remove background label + unique_labels_prediction = np.unique(prediction_data) + unique_labels_prediction = unique_labels_prediction[unique_labels_prediction != 0] # remove background label + + # Get the unique labels that are present in the reference OR prediction images + unique_labels = np.unique(np.concatenate((unique_labels_reference, unique_labels_prediction))) + # If both the reference and prediction images are empty, the unique_labels will be [0] + if len(unique_labels) == 0: + unique_labels = [0] # create dictionary to store the metrics output_dict = {'reference': args.reference, 'prediction': args.prediction} # loop over all unique labels - for label in unique_labels_reference: + for label in unique_labels: # create binary masks for the current label print(f'Processing label {label}') prediction_data_label = np.array(prediction_data == label, dtype=float) @@ -115,6 +123,10 @@ def main(): # https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.normalised_surface_distance bpm = BPM(prediction_data_label, reference_data_label, measures=args.metrics) dict_seg = bpm.to_dict_meas() + # Note: + # - if the reference is NOT empty and the prediction is empty the DSC and NSD are set to 0.0 + # - if the reference is empty and the prediction is NOT empty the DSC and NSD are set to 0.0 + # - if the reference and the prediction are both empty the DSC and NSD are set to 1.0 # add the metrics to the output dictionary output_dict[label] = dict_seg From eaaf3a29ca314e0fbc2b36779ec17f90d25c0085 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 28 Feb 2024 10:19:21 -0500 Subject: [PATCH 05/27] Move metrics description --- compute_metrics/compute_metrics_reloaded.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index aaddaf4..5f7e357 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -12,6 +12,13 @@ - Normalized surface distance (NSD) (for details, see Fig. 2, Fig. 11, and Fig. 12 in https://arxiv.org/abs/2206.01653v5) +Dice similarity coefficient (DSC): +- Fig. 65 in https://arxiv.org/pdf/2206.01653v5.pdf +- https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.dsc +Normalized surface distance (NSD): +- Fig. 86 in https://arxiv.org/pdf/2206.01653v5.pdf +- https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.normalised_surface_distance + The script is compatible with both binary and multi-class segmentation tasks (e.g., nnunet region-based). The metrics are computed for each unique label (class) in the reference (ground truth) image. The output is saved to a JSON file, for example: @@ -115,12 +122,6 @@ def main(): prediction_data_label = np.array(prediction_data == label, dtype=float) reference_data_label = np.array(reference_data == label, dtype=float) - # Dice similarity coefficient (DSC): - # Fig. 65 in https://arxiv.org/pdf/2206.01653v5.pdf - # https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.dsc - # Normalized surface distance (NSD): - # Fig. 86 in https://arxiv.org/pdf/2206.01653v5.pdf - # https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/pairwise_measures.html#MetricsReloaded.metrics.pairwise_measures.BinaryPairwiseMeasures.normalised_surface_distance bpm = BPM(prediction_data_label, reference_data_label, measures=args.metrics) dict_seg = bpm.to_dict_meas() # Note: From 60bcb7acd9e8b541c5406d5e3ed709b9ab44c9d1 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 28 Feb 2024 10:19:52 -0500 Subject: [PATCH 06/27] Update comments --- compute_metrics/compute_metrics_reloaded.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 5f7e357..1ab1823 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -102,9 +102,9 @@ def main(): # get all unique labels (classes) # for example, for nnunet region-based segmentation, spinal cord has label 1, and lesions have label 2 unique_labels_reference = np.unique(reference_data) - unique_labels_reference = unique_labels_reference[unique_labels_reference != 0] # remove background label + unique_labels_reference = unique_labels_reference[unique_labels_reference != 0] # remove background unique_labels_prediction = np.unique(prediction_data) - unique_labels_prediction = unique_labels_prediction[unique_labels_prediction != 0] # remove background label + unique_labels_prediction = unique_labels_prediction[unique_labels_prediction != 0] # remove background # Get the unique labels that are present in the reference OR prediction images unique_labels = np.unique(np.concatenate((unique_labels_reference, unique_labels_prediction))) From 434f153abec7448f7c937e6cf5042f8adee16556 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 28 Feb 2024 10:20:49 -0500 Subject: [PATCH 07/27] Store info whether the reference or prediction is empty in the output JSON --- compute_metrics/compute_metrics_reloaded.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 1ab1823..af5469c 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -128,6 +128,11 @@ def main(): # - if the reference is NOT empty and the prediction is empty the DSC and NSD are set to 0.0 # - if the reference is empty and the prediction is NOT empty the DSC and NSD are set to 0.0 # - if the reference and the prediction are both empty the DSC and NSD are set to 1.0 + # Store info whether the reference or prediction is empty + dict_seg['EmptyRef'] = bpm.flag_empty_ref + dict_seg['EmptyPred'] = bpm.flag_empty_pred + # add the metrics to the output dictionary + output_dict[label] = dict_seg # add the metrics to the output dictionary output_dict[label] = dict_seg From 57f854ff4803359ffc97209d79a912b3cad13df2 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 28 Feb 2024 10:21:00 -0500 Subject: [PATCH 08/27] Remove note --- compute_metrics/compute_metrics_reloaded.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index af5469c..5b4c98d 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -124,10 +124,6 @@ def main(): bpm = BPM(prediction_data_label, reference_data_label, measures=args.metrics) dict_seg = bpm.to_dict_meas() - # Note: - # - if the reference is NOT empty and the prediction is empty the DSC and NSD are set to 0.0 - # - if the reference is empty and the prediction is NOT empty the DSC and NSD are set to 0.0 - # - if the reference and the prediction are both empty the DSC and NSD are set to 1.0 # Store info whether the reference or prediction is empty dict_seg['EmptyRef'] = bpm.flag_empty_ref dict_seg['EmptyPred'] = bpm.flag_empty_pred From e26e2303dc6793e00d3c7538fa83dc7f6895d3f2 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 28 Feb 2024 10:21:28 -0500 Subject: [PATCH 09/27] Properly handle special case when both the reference and prediction images are empty --- compute_metrics/compute_metrics_reloaded.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 5b4c98d..3fee9fa 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -108,9 +108,6 @@ def main(): # Get the unique labels that are present in the reference OR prediction images unique_labels = np.unique(np.concatenate((unique_labels_reference, unique_labels_prediction))) - # If both the reference and prediction images are empty, the unique_labels will be [0] - if len(unique_labels) == 0: - unique_labels = [0] # create dictionary to store the metrics output_dict = {'reference': args.reference, 'prediction': args.prediction} @@ -130,6 +127,18 @@ def main(): # add the metrics to the output dictionary output_dict[label] = dict_seg + if label == max(unique_labels): + break # break to loop to avoid processing the background label ("else" block) + # Special case when both the reference and prediction images are empty + else: + label = 0 + print(f'Processing label {label} -- both the reference and prediction are empty') + bpm = BPM(prediction_data, reference_data, measures=args.metrics) + dict_seg = bpm.to_dict_meas() + + # Store info whether the reference or prediction is empty + dict_seg['EmptyRef'] = bpm.flag_empty_ref + dict_seg['EmptyPred'] = bpm.flag_empty_pred # add the metrics to the output dictionary output_dict[label] = dict_seg From b40f5227bf3329328b068747281aaf1f3a364141 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 07:00:12 -0500 Subject: [PATCH 10/27] Move code to compute single subject metrics into a function -- it will allow easier code refactoring --- compute_metrics/compute_metrics_reloaded.py | 111 +++++++++++++++++--- 1 file changed, 97 insertions(+), 14 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 3fee9fa..765109f 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -83,15 +83,39 @@ def load_nifti_image(file_path): return nifti_image.get_fdata() -def main(): - - # parse command line arguments - parser = get_parser() - args = parser.parse_args() - +def get_images_in_folder(prediction, reference): + """ + Get all files (predictions and references/ground truths) in the input directories + :param prediction: path to the directory with prediction files + :param reference: path to the directory with reference (ground truth) files + :return: list of prediction files, list of reference/ground truth files + """ + # Get all files in the directories + prediction_files = [os.path.join(prediction, f) for f in os.listdir(prediction) if f.endswith('.nii.gz')] + reference_files = [os.path.join(reference, f) for f in os.listdir(reference) if f.endswith('.nii.gz')] + # Check if the number of files in the directories is the same + if len(prediction_files) != len(reference_files): + raise ValueError(f'The number of files in the directories is different. ' + f'Prediction files: {len(prediction_files)}, Reference files: {len(reference_files)}') + print(f'Found {len(prediction_files)} files in the directories.') + # Sort the files + # NOTE: Hopefully, the files are named in the same order in both directories + prediction_files.sort() + reference_files.sort() + + return prediction_files, reference_files + + +def compute_metrics_single_subject(prediction, reference, metrics): + """ + Compute MetricsReloaded metrics for a single subject + :param prediction: path to the nifti image with the prediction + :param reference: path to the nifti image with the reference (ground truth) + :param metrics: list of metrics to compute + """ # load nifti images - prediction_data = load_nifti_image(args.prediction) - reference_data = load_nifti_image(args.reference) + prediction_data = load_nifti_image(prediction) + reference_data = load_nifti_image(reference) # check whether the images have the same shape and orientation if prediction_data.shape != reference_data.shape: @@ -109,8 +133,8 @@ def main(): # Get the unique labels that are present in the reference OR prediction images unique_labels = np.unique(np.concatenate((unique_labels_reference, unique_labels_prediction))) - # create dictionary to store the metrics - output_dict = {'reference': args.reference, 'prediction': args.prediction} + # append entry into the output_list to store the metrics for the current subject + metrics_dict = {'reference': reference, 'prediction': prediction} # loop over all unique labels for label in unique_labels: @@ -119,13 +143,13 @@ def main(): prediction_data_label = np.array(prediction_data == label, dtype=float) reference_data_label = np.array(reference_data == label, dtype=float) - bpm = BPM(prediction_data_label, reference_data_label, measures=args.metrics) + bpm = BPM(prediction_data_label, reference_data_label, measures=metrics) dict_seg = bpm.to_dict_meas() # Store info whether the reference or prediction is empty dict_seg['EmptyRef'] = bpm.flag_empty_ref dict_seg['EmptyPred'] = bpm.flag_empty_pred # add the metrics to the output dictionary - output_dict[label] = dict_seg + metrics_dict[label] = dict_seg if label == max(unique_labels): break # break to loop to avoid processing the background label ("else" block) @@ -133,14 +157,73 @@ def main(): else: label = 0 print(f'Processing label {label} -- both the reference and prediction are empty') - bpm = BPM(prediction_data, reference_data, measures=args.metrics) + bpm = BPM(prediction_data, reference_data, measures=metrics) dict_seg = bpm.to_dict_meas() # Store info whether the reference or prediction is empty dict_seg['EmptyRef'] = bpm.flag_empty_ref dict_seg['EmptyPred'] = bpm.flag_empty_pred # add the metrics to the output dictionary - output_dict[label] = dict_seg + metrics_dict[label] = dict_seg + + return metrics_dict + + +def build_output_dataframe(output_list): + """ + Convert JSON data to pandas DataFrame + :param output_list: list of dictionaries with metrics + :return: pandas DataFrame + """ + rows = [] + for item in output_list: + # Extract all keys except 'reference' and 'prediction' to get labels (e.g. 1.0, 2.0, etc.) dynamically + labels = [key for key in item.keys() if key not in ['reference', 'prediction']] + for label in labels: + metrics = item[label] # Get the dictionary of metrics + # Dynamically add all metrics for the label + row = { + "reference": item["reference"], + "prediction": item["prediction"], + "label": label, + } + # Update row with all metrics dynamically + row.update(metrics) + rows.append(row) + + df = pd.DataFrame(rows) + + return df + + +def main(): + + # parse command line arguments + parser = get_parser() + args = parser.parse_args() + + # Initialize the output JSON + output_list = list() + + # Args.prediction and args.reference are paths to folders with multiple nii.gz files (i.e., multiple subjects) + if os.path.isdir(args.prediction) and os.path.isdir(args.reference): + # Get all files in the directories + prediction_files, reference_files = get_images_in_folder(args.prediction, args.reference) + # Loop over the subjects + for i in range(len(prediction_files)): + # Compute metrics for each subject + metrics_dict = compute_metrics_single_subject(prediction_files[i], reference_files[i], args.metrics) + # Append the output dictionary (representing a single reference-prediction pair per subject) to the + # output_list + output_list.append(metrics_dict) + # Args.prediction and args.reference are paths nii.gz files from a single subject + else: + metrics_dict = compute_metrics_single_subject(args.prediction, args.reference, args.metrics) + # Append the output dictionary (representing a single reference-prediction pair per subject) to the output_list + output_list.append(metrics_dict) + + # Convert JSON data to pandas DataFrame + df = build_output_dataframe(output_list) # save dict as json fname_output = os.path.abspath(args.output) From 6747782e4494adf23d6c12803effd9b012000f7d Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 07:02:09 -0500 Subject: [PATCH 11/27] Update input args --- compute_metrics/compute_metrics_reloaded.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 765109f..2353b0a 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -56,10 +56,13 @@ def get_parser(): # Arguments for model, data, and training parser.add_argument('-prediction', required=True, type=str, - help='Path to the nifti image of test prediction.') + help='Path to the folder with nifti images of test predictions or path to a single nifti image ' + 'of test prediction.') parser.add_argument('-reference', required=True, type=str, - help='Path to the nifti image of reference (ground truth) label.') - parser.add_argument('-metrics', nargs='+', default=['dsc', 'nsd'], required=False, + help='Path to the folder with nifti images of reference (ground truth) or path to a single ' + 'nifti image of reference (ground truth).') + parser.add_argument('-metrics', nargs='+', default=['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_diff'], + required=False, help='List of metrics to compute. For details, ' 'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html. ' 'Default: dsc, nsd') From 1905b5764768f8980f773cc27441a3e895c72289 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 07:02:35 -0500 Subject: [PATCH 12/27] Save output as CSV --- compute_metrics/compute_metrics_reloaded.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 2353b0a..4b26083 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -46,6 +46,7 @@ import json import numpy as np import nibabel as nib +import pandas as pd from MetricsReloaded.metrics.pairwise_measures import BinaryPairwiseMeasures as BPM @@ -66,8 +67,10 @@ def get_parser(): help='List of metrics to compute. For details, ' 'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html. ' 'Default: dsc, nsd') - parser.add_argument('-output', type=str, default='metrics.json', required=False, - help='Path to the output JSON file to save the metrics. Default: metrics.json') + parser.add_argument('-output', type=str, default='metrics.csv', required=False, + help='Path to the output CSV file to save the metrics. Default: metrics.csv' + 'Metrics are additionally saved to a JSON file with the same name but with .json ' + 'extension.') return parser @@ -228,10 +231,15 @@ def main(): # Convert JSON data to pandas DataFrame df = build_output_dataframe(output_list) - # save dict as json - fname_output = os.path.abspath(args.output) + # save as CSV + fname_output_csv = os.path.abspath(args.output) + df.to_csv(fname_output_csv, index=False) + print(f'Saved metrics to {fname_output_csv}.') + + # save as JSON + fname_output = fname_output_csv.replace('.csv', '.json') with open(fname_output, 'w') as f: - json.dump(output_dict, f, indent=4) + json.dump(output_list, f, indent=4) print(f'Saved metrics to {fname_output}.') From b7a4b7ab61fcb70418000474fbfeb0c4c8787a51 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 11:19:04 -0500 Subject: [PATCH 13/27] Keep only CSV output --- compute_metrics/compute_metrics_reloaded.py | 33 +++++---------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 4b26083..94c5713 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -21,21 +21,11 @@ The script is compatible with both binary and multi-class segmentation tasks (e.g., nnunet region-based). The metrics are computed for each unique label (class) in the reference (ground truth) image. -The output is saved to a JSON file, for example: - -{ - "reference": "sub-001_T2w_seg.nii.gz", - "prediction": "sub-001_T2w_prediction.nii.gz", - "1.0": { - "dsc": 0.8195991091314031, - "nsd": 0.9455782312925171 - }, - "2.0": { - "dsc": 0.8042553191489362, - "nsd": 0.9580573951434879 - } - -} +The output is saved to a CSV file, for example: + +reference prediction label dsc fbeta nsd vol_diff rel_vol_diff EmptyRef EmptyPred +seg.nii.gz pred.nii.gz 1.0 0.819 0.819 0.945 0.105 -10.548 False False +seg.nii.gz pred.nii.gz 2.0 0.743 0.743 0.923 0.121 -11.423 False False Authors: Jan Valosek """ @@ -43,7 +33,6 @@ import os import argparse -import json import numpy as np import nibabel as nib import pandas as pd @@ -68,9 +57,7 @@ def get_parser(): 'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html. ' 'Default: dsc, nsd') parser.add_argument('-output', type=str, default='metrics.csv', required=False, - help='Path to the output CSV file to save the metrics. Default: metrics.csv' - 'Metrics are additionally saved to a JSON file with the same name but with .json ' - 'extension.') + help='Path to the output CSV file to save the metrics. Default: metrics.csv') return parser @@ -208,7 +195,7 @@ def main(): parser = get_parser() args = parser.parse_args() - # Initialize the output JSON + # Initialize a list to store the output dictionaries (representing a single reference-prediction pair per subject) output_list = list() # Args.prediction and args.reference are paths to folders with multiple nii.gz files (i.e., multiple subjects) @@ -236,12 +223,6 @@ def main(): df.to_csv(fname_output_csv, index=False) print(f'Saved metrics to {fname_output_csv}.') - # save as JSON - fname_output = fname_output_csv.replace('.csv', '.json') - with open(fname_output, 'w') as f: - json.dump(output_list, f, indent=4) - print(f'Saved metrics to {fname_output}.') - if __name__ == '__main__': main() From 03a1e271a21f201100f0f6c6cdf46f97348efed2 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 11:25:25 -0500 Subject: [PATCH 14/27] Compute mean and standard deviation of metrics across all subjects --- compute_metrics/compute_metrics_reloaded.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 94c5713..7a6d6df 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -223,6 +223,15 @@ def main(): df.to_csv(fname_output_csv, index=False) print(f'Saved metrics to {fname_output_csv}.') + # Compute mean and standard deviation of metrics across all subjects + df_mean = (df.drop(columns=['reference', 'prediction', 'EmptyRef', 'EmptyPred']).groupby('label'). + agg(['mean', 'std']).reset_index()) + + # save as CSV + fname_output_csv_mean = os.path.abspath(args.output.replace('.csv', '_mean.csv')) + df_mean.to_csv(fname_output_csv_mean, index=False) + print(f'Saved mean and standard deviation of metrics across all subjects to {fname_output_csv_mean}.') + if __name__ == '__main__': main() From abe99d2c889d1f955dc22b5268532665ac751016 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 11:27:49 -0500 Subject: [PATCH 15/27] Add example usage --- compute_metrics/compute_metrics_reloaded.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 7a6d6df..75edd6a 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -2,11 +2,16 @@ Compute MetricsReloaded metrics for segmentation tasks. Details: https://github.com/Project-MONAI/MetricsReloaded/tree/main -Example usage: +Example usage (single reference-prediction pair): python compute_metrics_reloaded.py -reference sub-001_T2w_seg.nii.gz -prediction sub-001_T2w_prediction.nii.gz +Example usage (multiple reference-prediction pairs): + python compute_metrics_reloaded.py + -reference /path/to/reference + -prediction /path/to/prediction + Default metrics (semantic segmentation): - Dice similarity coefficient (DSC) - Normalized surface distance (NSD) From 07098caf55163747edc70435bd831f8185d01433 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 11:28:28 -0500 Subject: [PATCH 16/27] Keep only 'dsc', 'nsd' as default metrics --- compute_metrics/compute_metrics_reloaded.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 75edd6a..9c83575 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -56,7 +56,7 @@ def get_parser(): parser.add_argument('-reference', required=True, type=str, help='Path to the folder with nifti images of reference (ground truth) or path to a single ' 'nifti image of reference (ground truth).') - parser.add_argument('-metrics', nargs='+', default=['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_diff'], + parser.add_argument('-metrics', nargs='+', default=['dsc', 'nsd'], required=False, help='List of metrics to compute. For details, ' 'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html. ' From f1faffa2275a5299cd6885213c036a9e3c992c79 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 11:43:47 -0500 Subject: [PATCH 17/27] Use full metric names when saving CSV --- compute_metrics/compute_metrics_reloaded.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 9c83575..ce7050b 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -45,6 +45,16 @@ from MetricsReloaded.metrics.pairwise_measures import BinaryPairwiseMeasures as BPM +METRICS_TO_NAME = { + 'dsc': 'Dice similarity coefficient (DSC)', + 'hd': 'Hausdorff distance', + 'fbeta': 'FBeta score', + 'nsd': 'Normalized surface distance (NSD)', + 'vol_diff': 'Volume difference', + 'rel_vol_diff': 'Relative volume error (RVE)', +} + + def get_parser(): # parse command line arguments parser = argparse.ArgumentParser(description='Compute MetricsReloaded metrics for segmentation tasks.') @@ -223,6 +233,9 @@ def main(): # Convert JSON data to pandas DataFrame df = build_output_dataframe(output_list) + # Rename columns + df.rename(columns={metric: METRICS_TO_NAME[metric] for metric in METRICS_TO_NAME}, inplace=True) + # save as CSV fname_output_csv = os.path.abspath(args.output) df.to_csv(fname_output_csv, index=False) From a23b81bbf6020397098c5b450b082a1ef7b23896 Mon Sep 17 00:00:00 2001 From: valosekj Date: Thu, 29 Feb 2024 16:21:56 -0500 Subject: [PATCH 18/27] Change 'label = 0' to 'label = 1' when both the reference and prediction images are empty. The original 'label = 0' caused that the metrics were corresponding to background and were not easy to aggregate across subjects. Now, with 'label = 1', even cases with both empty reference and prediction are considered when computing group mean and std. --- compute_metrics/compute_metrics_reloaded.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index ce7050b..1e3a50b 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -163,7 +163,7 @@ def compute_metrics_single_subject(prediction, reference, metrics): break # break to loop to avoid processing the background label ("else" block) # Special case when both the reference and prediction images are empty else: - label = 0 + label = 1 print(f'Processing label {label} -- both the reference and prediction are empty') bpm = BPM(prediction_data, reference_data, measures=metrics) dict_seg = bpm.to_dict_meas() From cfa7f0ff490d4b680a352872343187b936e89933 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 5 Mar 2024 20:36:24 -0500 Subject: [PATCH 19/27] Update JSON to CSV --- .../MetricsReloaded_quick_start_guide.md | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/quick_start_guides/MetricsReloaded_quick_start_guide.md b/quick_start_guides/MetricsReloaded_quick_start_guide.md index eaea102..fb28cc3 100644 --- a/quick_start_guides/MetricsReloaded_quick_start_guide.md +++ b/quick_start_guides/MetricsReloaded_quick_start_guide.md @@ -46,20 +46,10 @@ The script is compatible with both binary and multi-class segmentation tasks (e. The metrics are computed for each unique label (class) in the reference (ground truth) image. -The output is saved to a JSON file, for example: +The output is saved to a CSV file, for example: -```json -{ - "reference": "sub-001_T2w_seg.nii.gz", - "prediction": "sub-001_T2w_prediction.nii.gz", - "1.0": { - "dsc": 0.8195991091314031, - "nsd": 0.9455782312925171 - }, - "2.0": { - "dsc": 0.8042553191489362, - "nsd": 0.9580573951434879 - } - -} +```csv +reference prediction label dsc fbeta nsd vol_diff rel_vol_diff EmptyRef EmptyPred +seg.nii.gz pred.nii.gz 1.0 0.819 0.819 0.945 0.105 -10.548 False False +seg.nii.gz pred.nii.gz 2.0 0.743 0.743 0.923 0.121 -11.423 False False ``` \ No newline at end of file From 0dbd013589f92d79b258e5c1fb4701a5598f181c Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 5 Mar 2024 20:37:03 -0500 Subject: [PATCH 20/27] Add unittests for compute_metrics_reloaded.py --- .github/workflows/ci.yml | 7 +- tests/test_compute_metrics_reloaded.py | 147 +++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 tests/test_compute_metrics_reloaded.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d11199..3fc0506 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,12 @@ jobs: run: | python -m pip install --upgrade pip pip install -r dataset_conversion/requirements.txt + git clone https://github.com/valosekj/MetricsReloaded.git + cd MetricsReloaded + git checkout jv/add_rel_vol_error_metric + python -m pip install . - name: Run tests with unittest run: | - python -m unittest tests/test_convert_bids_to_nnUNetV2.py \ No newline at end of file + python -m unittest tests/test_convert_bids_to_nnUNetV2.py + python -m unittest tests/test_compute_metrics_reloaded.py \ No newline at end of file diff --git a/tests/test_compute_metrics_reloaded.py b/tests/test_compute_metrics_reloaded.py new file mode 100644 index 0000000..37a19be --- /dev/null +++ b/tests/test_compute_metrics_reloaded.py @@ -0,0 +1,147 @@ +####################################################################### +# +# Tests for the `compute_metrics/compute_metrics_reloaded.py` script +# +# RUN BY: +# python -m unittest tests/test_compute_metrics_reloaded.py +####################################################################### + +import unittest +import os +import numpy as np +import nibabel as nib +from compute_metrics.compute_metrics_reloaded import compute_metrics_single_subject +import tempfile + +METRICS = ['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_error'] + + +class TestComputeMetricsReloaded(unittest.TestCase): + def setUp(self): + # Use tempfile.NamedTemporaryFile to create temporary nifti files + self.ref_file = tempfile.NamedTemporaryFile(suffix=".nii.gz", delete=False) + self.pred_file = tempfile.NamedTemporaryFile(suffix=".nii.gz", delete=False) + self.metrics = METRICS + + def create_dummy_nii(self, file_obj, data): + img = nib.Nifti1Image(data, np.eye(4)) + nib.save(img, file_obj.name) + file_obj.seek(0) # Move back to the beginning of the file + + def tearDown(self): + # Close and remove temporary files + self.ref_file.close() + os.unlink(self.ref_file.name) + self.pred_file.close() + os.unlink(self.pred_file.name) + + def assert_metrics(self, metrics_dict, expected_metrics): + for metric in self.metrics: + # if value is nan, use np.isnan to check + if np.isnan(expected_metrics[metric]): + self.assertTrue(np.isnan(metrics_dict[1][metric])) + # if value is inf, use np.isinf to check + elif np.isinf(expected_metrics[metric]): + self.assertTrue(np.isinf(metrics_dict[1][metric])) + else: + self.assertAlmostEqual(metrics_dict[1][metric], expected_metrics[metric]) + + def test_empty_ref_and_pred(self): + """ + Empty reference and empty prediction + """ + + expected_metrics = {'EmptyPred': True, + 'EmptyRef': True, + 'dsc': 1, + 'fbeta': 1, + 'nsd': np.nan, + 'rel_vol_error': 0, + 'vol_diff': np.nan} + + # Create empty reference + self.create_dummy_nii(self.ref_file, np.zeros((10, 10, 10))) + # Create empty prediction + self.create_dummy_nii(self.pred_file, np.zeros((10, 10, 10))) + # Compute metrics + metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics) + # Assert metrics + self.assert_metrics(metrics_dict, expected_metrics) + + def test_empty_ref(self): + """ + Empty reference and non-empty prediction + """ + + expected_metrics = {'EmptyPred': False, + 'EmptyRef': True, + 'dsc': 0.0, + 'fbeta': 0, + 'nsd': 0.0, + 'rel_vol_error': 100, + 'vol_diff': np.inf} + + # Create empty reference + self.create_dummy_nii(self.ref_file, np.zeros((10, 10, 10))) + # Create non-empty prediction + pred = np.zeros((10, 10, 10)) + pred[5:7, 2:5] = 1 + self.create_dummy_nii(self.pred_file, pred) + # Compute metrics + metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics) + # Assert metrics + self.assert_metrics(metrics_dict, expected_metrics) + + def test_empty_pred(self): + """ + Non-empty reference and empty prediction + """ + + expected_metrics = {'EmptyPred': True, + 'EmptyRef': False, + 'dsc': 0.0, + 'fbeta': 0, + 'nsd': 0.0, + 'rel_vol_error': -100.0, + 'vol_diff': 1.0} + + # Create non-empty reference + ref = np.zeros((10, 10, 10)) + ref[5:7, 2:5] = 1 + self.create_dummy_nii(self.ref_file, ref) + # Create empty prediction + self.create_dummy_nii(self.pred_file, np.zeros((10, 10, 10))) + # Compute metrics + metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics) + # Assert metrics + self.assert_metrics(metrics_dict, expected_metrics) + + def test_non_empty_ref_and_pred(self): + """ + Non-empty reference and non-empty prediction + """ + + expected_metrics = {'EmptyPred': False, + 'EmptyRef': False, + 'dsc': 0.26666666666666666, + 'fbeta': 0.26666667461395266, + 'nsd': 0.5373134328358209, + 'rel_vol_error': 300.0, + 'vol_diff': 3.0} + + # Create non-empty reference + ref = np.zeros((10, 10, 10)) + ref[4:5, 3:6] = 1 + self.create_dummy_nii(self.ref_file, ref) + # Create non-empty prediction + pred = np.zeros((10, 10, 10)) + pred[4:8, 2:5] = 1 + self.create_dummy_nii(self.pred_file, pred) + # Compute metrics + metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics) + # Assert metrics + self.assert_metrics(metrics_dict, expected_metrics) + + +if __name__ == '__main__': + unittest.main() From 63b7913000814f1db3a3b4185711051ae192cdf1 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 5 Mar 2024 21:33:09 -0500 Subject: [PATCH 21/27] Add unittest for non-empty reference and non-empty prediction with full overlap --- tests/test_compute_metrics_reloaded.py | 28 +++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/test_compute_metrics_reloaded.py b/tests/test_compute_metrics_reloaded.py index 37a19be..16afe45 100644 --- a/tests/test_compute_metrics_reloaded.py +++ b/tests/test_compute_metrics_reloaded.py @@ -118,7 +118,7 @@ def test_empty_pred(self): def test_non_empty_ref_and_pred(self): """ - Non-empty reference and non-empty prediction + Non-empty reference and non-empty prediction with partial overlap """ expected_metrics = {'EmptyPred': False, @@ -142,6 +142,32 @@ def test_non_empty_ref_and_pred(self): # Assert metrics self.assert_metrics(metrics_dict, expected_metrics) + def test_non_empty_ref_and_pred_with_full_overlap(self): + """ + Non-empty reference and non-empty prediction with full overlap + """ + + expected_metrics = {'EmptyPred': False, + 'EmptyRef': False, + 'dsc': 1.0, + 'fbeta': 1.0, + 'nsd': 1.0, + 'rel_vol_error': 0.0, + 'vol_diff': 0.0} + + # Create non-empty reference + ref = np.zeros((10, 10, 10)) + ref[4:8, 2:5] = 1 + self.create_dummy_nii(self.ref_file, ref) + # Create non-empty prediction + pred = np.zeros((10, 10, 10)) + pred[4:8, 2:5] = 1 + self.create_dummy_nii(self.pred_file, pred) + # Compute metrics + metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics) + # Assert metrics + self.assert_metrics(metrics_dict, expected_metrics) + if __name__ == '__main__': unittest.main() From 038d10bc0859885375f1ae0ad00857d54a8a24ba Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 6 Mar 2024 07:21:59 -0500 Subject: [PATCH 22/27] Add unittests for multi-label (multi-class) masks --- tests/test_compute_metrics_reloaded.py | 54 ++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/tests/test_compute_metrics_reloaded.py b/tests/test_compute_metrics_reloaded.py index 16afe45..c45618f 100644 --- a/tests/test_compute_metrics_reloaded.py +++ b/tests/test_compute_metrics_reloaded.py @@ -37,14 +37,16 @@ def tearDown(self): def assert_metrics(self, metrics_dict, expected_metrics): for metric in self.metrics: - # if value is nan, use np.isnan to check - if np.isnan(expected_metrics[metric]): - self.assertTrue(np.isnan(metrics_dict[1][metric])) - # if value is inf, use np.isinf to check - elif np.isinf(expected_metrics[metric]): - self.assertTrue(np.isinf(metrics_dict[1][metric])) - else: - self.assertAlmostEqual(metrics_dict[1][metric], expected_metrics[metric]) + # Loop over labels/classes (e.g., 1, 2, ...) + for label in expected_metrics.keys(): + # if value is nan, use np.isnan to check + if np.isnan(expected_metrics[label][metric]): + self.assertTrue(np.isnan(metrics_dict[label][metric])) + # if value is inf, use np.isinf to check + elif np.isinf(expected_metrics[label][metric]): + self.assertTrue(np.isinf(metrics_dict[label][metric])) + else: + self.assertAlmostEqual(metrics_dict[label][metric], expected_metrics[label][metric]) def test_empty_ref_and_pred(self): """ @@ -142,6 +144,42 @@ def test_non_empty_ref_and_pred(self): # Assert metrics self.assert_metrics(metrics_dict, expected_metrics) + def test_non_empty_ref_and_pred_multi_class(self): + """ + Non-empty reference and non-empty prediction with partial overlap + Multi-class (i.e., voxels with values 1 and 2, e.g., region-based nnUNet training) + """ + + expected_metrics = {1.0: {'dsc': 0.25, + 'fbeta': 0.2500000055879354, + 'nsd': 0.5, + 'vol_diff': 2.0, + 'rel_vol_error': 200.0, + 'EmptyRef': False, + 'EmptyPred': False}, + 2.0: {'dsc': 0.26666666666666666, + 'fbeta': 0.26666667461395266, + 'nsd': 0.5373134328358209, + 'vol_diff': 3.0, + 'rel_vol_error': 300.0, + 'EmptyRef': False, + 'EmptyPred': False}} + + # Create non-empty reference + ref = np.zeros((10, 10, 10)) + ref[4:5, 3:10] = 1 + ref[4:5, 3:6] = 2 # e.g., lesion within spinal cord + self.create_dummy_nii(self.ref_file, ref) + # Create non-empty prediction + pred = np.zeros((10, 10, 10)) + pred[4:8, 2:8] = 1 + pred[4:8, 2:5] = 2 # e.g., lesion within spinal cord + self.create_dummy_nii(self.pred_file, pred) + # Compute metrics + metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics) + # Assert metrics + self.assert_metrics(metrics_dict, expected_metrics) + def test_non_empty_ref_and_pred_with_full_overlap(self): """ Non-empty reference and non-empty prediction with full overlap From e921b1925551b80f2f280f24936746db3ea5d348 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 6 Mar 2024 07:22:41 -0500 Subject: [PATCH 23/27] Update all tests to be compatible with multi-class 'assert_metrics' function --- tests/test_compute_metrics_reloaded.py | 70 +++++++++++++------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/tests/test_compute_metrics_reloaded.py b/tests/test_compute_metrics_reloaded.py index c45618f..4cc96fe 100644 --- a/tests/test_compute_metrics_reloaded.py +++ b/tests/test_compute_metrics_reloaded.py @@ -53,13 +53,13 @@ def test_empty_ref_and_pred(self): Empty reference and empty prediction """ - expected_metrics = {'EmptyPred': True, - 'EmptyRef': True, - 'dsc': 1, - 'fbeta': 1, - 'nsd': np.nan, - 'rel_vol_error': 0, - 'vol_diff': np.nan} + expected_metrics = {1.0: {'EmptyPred': True, + 'EmptyRef': True, + 'dsc': 1, + 'fbeta': 1, + 'nsd': np.nan, + 'rel_vol_error': 0, + 'vol_diff': np.nan}} # Create empty reference self.create_dummy_nii(self.ref_file, np.zeros((10, 10, 10))) @@ -75,13 +75,13 @@ def test_empty_ref(self): Empty reference and non-empty prediction """ - expected_metrics = {'EmptyPred': False, - 'EmptyRef': True, - 'dsc': 0.0, - 'fbeta': 0, - 'nsd': 0.0, - 'rel_vol_error': 100, - 'vol_diff': np.inf} + expected_metrics = {1.0: {'EmptyPred': False, + 'EmptyRef': True, + 'dsc': 0.0, + 'fbeta': 0, + 'nsd': 0.0, + 'rel_vol_error': 100, + 'vol_diff': np.inf}} # Create empty reference self.create_dummy_nii(self.ref_file, np.zeros((10, 10, 10))) @@ -99,13 +99,13 @@ def test_empty_pred(self): Non-empty reference and empty prediction """ - expected_metrics = {'EmptyPred': True, - 'EmptyRef': False, - 'dsc': 0.0, - 'fbeta': 0, - 'nsd': 0.0, - 'rel_vol_error': -100.0, - 'vol_diff': 1.0} + expected_metrics = {1.0: {'EmptyPred': True, + 'EmptyRef': False, + 'dsc': 0.0, + 'fbeta': 0, + 'nsd': 0.0, + 'rel_vol_error': -100.0, + 'vol_diff': 1.0}} # Create non-empty reference ref = np.zeros((10, 10, 10)) @@ -123,13 +123,13 @@ def test_non_empty_ref_and_pred(self): Non-empty reference and non-empty prediction with partial overlap """ - expected_metrics = {'EmptyPred': False, - 'EmptyRef': False, - 'dsc': 0.26666666666666666, - 'fbeta': 0.26666667461395266, - 'nsd': 0.5373134328358209, - 'rel_vol_error': 300.0, - 'vol_diff': 3.0} + expected_metrics = {1.0: {'EmptyPred': False, + 'EmptyRef': False, + 'dsc': 0.26666666666666666, + 'fbeta': 0.26666667461395266, + 'nsd': 0.5373134328358209, + 'rel_vol_error': 300.0, + 'vol_diff': 3.0}} # Create non-empty reference ref = np.zeros((10, 10, 10)) @@ -185,13 +185,13 @@ def test_non_empty_ref_and_pred_with_full_overlap(self): Non-empty reference and non-empty prediction with full overlap """ - expected_metrics = {'EmptyPred': False, - 'EmptyRef': False, - 'dsc': 1.0, - 'fbeta': 1.0, - 'nsd': 1.0, - 'rel_vol_error': 0.0, - 'vol_diff': 0.0} + expected_metrics = {1.0: {'EmptyPred': False, + 'EmptyRef': False, + 'dsc': 1.0, + 'fbeta': 1.0, + 'nsd': 1.0, + 'rel_vol_error': 0.0, + 'vol_diff': 0.0}} # Create non-empty reference ref = np.zeros((10, 10, 10)) From da8ad7f6f120df7c5fcd7b106ea6bed95e8faaa1 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 16 Apr 2024 16:13:54 -0400 Subject: [PATCH 24/27] Update to reflecct the ivadomed/MetricsReloaded fork --- compute_metrics/compute_metrics_reloaded.py | 2 +- quick_start_guides/MetricsReloaded_quick_start_guide.md | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 1e3a50b..1d2ce1a 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -1,6 +1,6 @@ """ Compute MetricsReloaded metrics for segmentation tasks. -Details: https://github.com/Project-MONAI/MetricsReloaded/tree/main +Details: https://github.com/ivadomed/MetricsReloaded Example usage (single reference-prediction pair): python compute_metrics_reloaded.py diff --git a/quick_start_guides/MetricsReloaded_quick_start_guide.md b/quick_start_guides/MetricsReloaded_quick_start_guide.md index fb28cc3..63127f7 100644 --- a/quick_start_guides/MetricsReloaded_quick_start_guide.md +++ b/quick_start_guides/MetricsReloaded_quick_start_guide.md @@ -1,14 +1,17 @@ # MetricsReloaded quick-start guide Useful links: -- [MetricsReloaded GitHub page](https://github.com/Project-MONAI/MetricsReloaded) - [MetricsReloaded documentation](https://metricsreloaded.readthedocs.io/en/latest/) - [MetricsReloaded publication](https://www.nature.com/articles/s41592-023-02151-z) - [MetricsReloaded preprint](https://arxiv.org/pdf/2206.01653v5.pdf) - preprint contains more figures than the publication ## Installation -Official installation instructions are available [here](https://github.com/Project-MONAI/MetricsReloaded?tab=readme-ov-file#installation). +The installation instructions are available [here](https://github.com/ivadomed/MetricsReloaded?tab=readme-ov-file#installation). + +> **Note** +> Note that we use an ivadomed fork. + > **Note** > Always install MetricsReloaded inside a virtual environment. @@ -20,7 +23,7 @@ conda activate metrics_reloaded # Clone the repository cd ~/code -git clone https://github.com/csudre/MetricsReloaded.git +git clone https://github.com/ivadomed/MetricsReloaded cd MetricsReloaded # Install the package From 440f1a910dad31e137d75896d952ec2ffd424317 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 16 Apr 2024 16:15:04 -0400 Subject: [PATCH 25/27] Update CI to reflecct the ivadomed/MetricsReloaded fork --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3fc0506..1d86790 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r dataset_conversion/requirements.txt - git clone https://github.com/valosekj/MetricsReloaded.git + git clone https://github.com/ivadomed/MetricsReloaded.git cd MetricsReloaded git checkout jv/add_rel_vol_error_metric python -m pip install . From 034b2c80ff4283b88bb0101dc7a6ac9a33807e43 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 30 Apr 2024 18:54:25 +0200 Subject: [PATCH 26/27] Update CI after RVE PR merge (https://github.com/ivadomed/MetricsReloaded/pull/2) --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1d86790..f1f2a89 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,6 @@ jobs: pip install -r dataset_conversion/requirements.txt git clone https://github.com/ivadomed/MetricsReloaded.git cd MetricsReloaded - git checkout jv/add_rel_vol_error_metric python -m pip install . - name: Run tests with unittest From 23723aca32e9ef8001523318e406906bb540ed42 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 30 Apr 2024 18:54:45 +0200 Subject: [PATCH 27/27] Extend default metrics --- compute_metrics/compute_metrics_reloaded.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compute_metrics/compute_metrics_reloaded.py b/compute_metrics/compute_metrics_reloaded.py index 1d2ce1a..b859d24 100644 --- a/compute_metrics/compute_metrics_reloaded.py +++ b/compute_metrics/compute_metrics_reloaded.py @@ -66,11 +66,11 @@ def get_parser(): parser.add_argument('-reference', required=True, type=str, help='Path to the folder with nifti images of reference (ground truth) or path to a single ' 'nifti image of reference (ground truth).') - parser.add_argument('-metrics', nargs='+', default=['dsc', 'nsd'], + parser.add_argument('-metrics', nargs='+', default=['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_error'], required=False, help='List of metrics to compute. For details, ' 'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html. ' - 'Default: dsc, nsd') + 'Default: dsc, fbeta, nsd, vol_diff, rel_vol_error') parser.add_argument('-output', type=str, default='metrics.csv', required=False, help='Path to the output CSV file to save the metrics. Default: metrics.csv')