Skip to content

Commit

Permalink
Match the prediction and reference files based on the participant_id,…
Browse files Browse the repository at this point in the history
… acq_id, and run_id.
  • Loading branch information
valosekj committed Dec 9, 2024
1 parent 76dbb55 commit ce16ec8
Showing 1 changed file with 50 additions and 10 deletions.
60 changes: 50 additions & 10 deletions compute_metrics_reloaded.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
python compute_metrics_reloaded.py
-reference /path/to/reference
-prediction /path/to/prediction
NOTE: The prediction and reference files are matched based on the participant_id, acq_id, and run_id.
The metrics to be computed can be specified using the `-metrics` argument. For example, to compute only the Dice
similarity coefficient (DSC) and Normalized surface distance (NSD), use:
Expand All @@ -37,6 +38,7 @@


import os
import re
import argparse
import numpy as np
import nibabel as nib
Expand Down Expand Up @@ -103,25 +105,63 @@ def load_nifti_image(file_path):
return nifti_image.get_fdata()


def fetch_participant_id_acq_id_run_id(filename_path, prefix='sub-'):
"""
Get participant_id, acq_id, and run_id from the input BIDS-compatible filename or file path
The function works both on absolute file paths as well as filenames
:param filename_path: input nifti filename (e.g., sub-001_ses-01_T1w.nii.gz) or file path
:param prefix: prefix of the participant ID in the filename (default: 'sub-')
(e.g., /home/user/bids/sub-001/ses-01/anat/sub-001_ses-01_T1w.nii.gz
:return: participant_id: participant ID (e.g., sub-001)
:return: acq_id: acquisition ID (e.g., acq-01)
:return: run_id: run ID (e.g., run-01)
"""

participant = re.search(f'{prefix}(.*?)[_/]', filename_path) # [_/] means either underscore or slash
participant_id = participant.group(0)[:-1] if participant else "" # [:-1] removes the last underscore or slash

acquisition = re.search('acq-(.*?)[_/]', filename_path) # [_/] means either underscore or slash
acq_id = acquisition.group(0)[:-1] if acquisition else "" # [:-1] removes the last underscore or slash

run = re.search('run-(.*?)[_/]', filename_path) # [_/] means either underscore or slash
run_id = run.group(0)[:-1] if run else "" # [:-1] removes the last underscore or slash

# REGEX explanation
# . - match any character (except newline)
# *? - match the previous element as few times as possible (zero or more times)

return participant_id, acq_id, run_id


def get_images_in_folder(prediction, reference):
"""
Get all files (predictions and references/ground truths) in the input directories
Get all files (predictions and references/ground truths) in the input directories.
The prediction and reference files are matched based on the participant_id, acq_id, and run_id.
:param prediction: path to the directory with prediction files
:param reference: path to the directory with reference (ground truth) files
:return: list of prediction files, list of reference/ground truth files
"""
# Get all files in the directories
prediction_files = [os.path.join(prediction, f) for f in os.listdir(prediction) if f.endswith('.nii.gz')]
reference_files = [os.path.join(reference, f) for f in os.listdir(reference) if f.endswith('.nii.gz')]
# Check if the number of files in the directories is the same
if len(prediction_files) != len(reference_files):
raise ValueError(f'The number of files in the directories is different. '
f'Prediction files: {len(prediction_files)}, Reference files: {len(reference_files)}')
print(f'Found {len(prediction_files)} files in the directories.')
# Sort the files
# NOTE: Hopefully, the files are named in the same order in both directories
prediction_files.sort()
reference_files.sort()

# Create dataframe for prediction_files with participant_id, acq_id, run_id
df_pred = pd.DataFrame(prediction_files, columns=['filename'])
df_pred['participant_id'], df_pred['acq_id'], df_pred['run_id'] = zip(*df_pred['filename'].apply(fetch_participant_id_acq_id_run_id))

# Create dataframe for reference_files with participant_id, acq_id, run_id
df_ref = pd.DataFrame(reference_files, columns=['filename'])
df_ref['participant_id'], df_ref['acq_id'], df_ref['run_id'] = zip(*df_ref['filename'].apply(fetch_participant_id_acq_id_run_id))

# Merge the two dataframes on participant_id, acq_id, run_id
df = pd.merge(df_pred, df_ref, on=['participant_id', 'acq_id', 'run_id'], how='outer', suffixes=('_pred', '_ref'))
# Drop 'participant_id', 'acq_id', 'run_id'
df.drop(['participant_id', 'acq_id', 'run_id'], axis=1, inplace=True)
# Drop rows with NaN values
df.dropna(inplace=True)

prediction_files = df['filename_pred'].tolist()
reference_files = df['filename_ref'].tolist()

return prediction_files, reference_files

Expand Down

0 comments on commit ce16ec8

Please sign in to comment.