ivadomed · plbenveniste · Mar 11, 2024 · Mar 11, 2024 · Mar 11, 2024 · Mar 12, 2024
diff --git a/monai/average_tta_performance.py b/monai/average_tta_performance.py
@@ -0,0 +1,84 @@
+"""
+This file is used to get all the dice_scores_X.txt files in a directory and average them.
+
+Input:
+    - Path to the directory containing the dice_scores_X.txt files
+
+Output:
+    None
+
+Example:
+    python average_tta_performance.py --pred-dir-path /path/to/dice_scores
+
+Author: Pierre-Louis Benveniste
+"""
+
+import os
+import argparse
+import numpy as np
+import pandas as pd
+from pathlib import Path
+
+
+def get_parser():
+    """
+    This function returns the parser for the command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Average the performance of the model")
+    parser.add_argument("--pred-dir-path", help="Path to the directory containing the dice_scores_X.txt files", required=True)
+    return parser
+
+
+def main():
+    """
+    This function is used to average the performance of the model on the test set.
+
+    Args:
+        None
+
+    Returns:
+        None
+    """
+    # Get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    # Path to the dice_scores
+    path_to_outputs = args.pred_dir_path
+
+    # Get all the dice_scores_X.txt files using rglob
+    dice_score_files = [str(file) for file in Path(path_to_outputs).rglob("dice_scores_*.txt")]
+
+    # Dict to store the dice scores
+    dice_scores = {}
+
+    # Loop over the dice_scores_X.txt files
+    for dice_score_file in dice_score_files:
+        # Open dice results (they are txt files)
+        with open(os.path.join(path_to_outputs, dice_score_file), 'r') as file:
+            for line in file:
+                key, value = line.strip().split(':')
+                if key in dice_scores:
+                    dice_scores[key].append(float(value))
+                else:
+                    dice_scores[key] = [float(value)]
+
+    # Average the dice scores ang get standard deviation
+    std = {}
+    for key in dice_scores:
+        std[key] = np.std(dice_scores[key])
+        dice_scores[key] = np.mean(dice_scores[key])
+
+    # Save the averaged dice scores
+    with open(os.path.join(path_to_outputs, "dice_scores.txt"), 'w') as file:
+        for key in dice_scores:
+            file.write(f"{key}: {dice_scores[key]}\n")
+
+    # Save the standard deviation
+    with open(os.path.join(path_to_outputs, "std.txt"), 'w') as file:
+        for key in std:
+            file.write(f"{key}: {std[key]}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/monai/compute_performance_tta_sum.py b/monai/compute_performance_tta_sum.py
@@ -0,0 +1,130 @@
+"""
+This script is used to sum all the image predictions of the same subject, then threshold to 0.5 and then compute the dice score.
+
+Input:
+    --path-pred: Path to the directory containing the predictions
+    --path-json: Path to the json file containing the data split
+    --split: Data split to use (train, validation, test)
+    --output-dir: Output directory to save the dice scores
+
+Output:
+    None
+
+Example:
+    python compute_performance_tta_sum.py --path-pred /path/to/predictions --path-json /path/to/data.json --split test --output-dir /path/to/output
+
+Author: Pierre-Louis Benveniste
+"""
+
+import os
+import numpy as np
+import argparse
+from pathlib import Path
+import json
+import nibabel as nib
+from tqdm import tqdm
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path-pred", type=str, required=True, help="Path to the directory containing the predictions")
+    parser.add_argument("--path-json", type=str, required=True, help="Path to the json file containing the data split")
+    parser.add_argument("--split", type=str, required=True, help="Data split to use (train, validation, test)")
+    parser.add_argument("--output-dir", type=str, required=True, help="Output directory to save the dice scores")
+    return parser.parse_args()
+
+
+def dice_score(prediction, groundtruth, smooth=1.):
+    numer = (prediction * groundtruth).sum()
+    denor = (prediction + groundtruth).sum()
+    # loss = (2 * numer + self.smooth) / (denor + self.smooth)
+    dice = (2 * numer + smooth) / (denor + smooth)
+    return dice
+
+
+def main():
+
+    # Parse arguments
+    args = parse_args()
+    path_pred = args.path_pred
+    path_json = args.path_json
+    split = args.split
+    output_dir = args.output_dir
+
+    # Create the output directory
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    # Get all the predictions (with rglob)
+    predictions = list(Path(path_pred).rglob("*.nii.gz"))
+
+    # List of subjects
+    subjects = [pred.name for pred in predictions]
+
+    n_tta = 10
+
+    for i in range(n_tta):
+        # Remove the _pred_0, _pred_1 ... _pred_9 at the end of the name
+        subjects = [sub.replace(f"_pred_{i}", "") for sub in subjects]
+
+    # Open the conversion dictionary (its a json file)
+    with open(path_json, "r") as f:
+        conversion_dict = json.load(f)
+    conversion_dict = conversion_dict[split]
+
+    # Dict of dice score
+    dice_scores = {}
+
+    # Iterate over the subjects in the predictions
+    for subject in subjects:
+        print(f"Processing subject {subject}")
+
+        # Get all predictions corresponding to the subject
+        subject_predictions = [str(pred) for pred in predictions if subject.replace(".nii.gz", "") in pred.name]
+        # print(subject_predictions)
+
+        # Find the corresponding label from the conversion dict
+
+        image_dict = [data for data in conversion_dict if subject in data["image"]]
+        label = image_dict[0]["label"]
+        image = image_dict[0]["image"]
+
+        # We now sum all the predictions
+        summed_prediction = None
+        for pred in subject_predictions:
+            pred_data = nib.load(pred).get_fdata()
+            if summed_prediction is None:
+                summed_prediction = pred_data
+            else:
+                summed_prediction += pred_data
+
+        # Threshold the summed prediction
+        summed_prediction[summed_prediction >= 0.5] = 1
+        summed_prediction[summed_prediction < 0.5] = 0
+
+        # Load the label
+        label_data = nib.load(label).get_fdata()
+
+        # Compute dice score
+        dice = dice_score(summed_prediction, label_data)
+        # print(f"Dice score for summed prediction: {dice}")
+
+        # Compare the dice score with the individual predictions
+        for pred in subject_predictions:
+            pred_data = nib.load(pred).get_fdata()
+            dice_pred = dice_score(pred_data, label_data)
+            # print(f"Dice score for {pred}: {dice_pred}")
+
+        # Save the dice score
+        dice_scores[image] = dice
+
+    # Save the results  
+    with open(os.path.join(output_dir, "dice_scores.txt"), "w") as f:
+        for key, value in dice_scores.items():
+            f.write(f"{key}: {value}\n")
+
+    return None
+
+
+if __name__ == "__main__":
+    main()
diff --git a/monai/config.yml b/monai/config.yml
@@ -0,0 +1,50 @@
+# Description: Configuration file for the UNETR model
+
+# Path to the data json file
+# data: /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/fake.json
+# data: /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/fake_lesion_sc.json
+# data: /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/fake_10_each.json
+# data: /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/fake_sc.json
+# data: /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/dataset_2024-03-13_seed42_canproco.json
+# data: /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/dataset_2024-04-05_seed42_lesionOnly.json
+# data: /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/dataset_2024-04-17_seed42_lesionOnly.json
+data: /home/plbenveniste/net/ms-lesion-agnostic/msd_data/dataset_2024-06-26_seed42_lesionOnly.json
+# data: /home/plbenveniste/net/ms-lesion-agnostic/msd_data/dataset_2024-08-13_seed42_lesionOnly.json
+# data: /home/plbenveniste/net/ms-lesion-agnostic/msd_data/fake.json
+
+# Resampling resolution
+# pixdim : [1.0, 1.0, 1.0]
+pixdim : [0.7, 0.7, 0.7]
+# pixdim : [0.5, 0.5, 0.5]
+
+# Spatial size of the input data
+spatial_size : [64, 128, 128] # RL, AP, IS
+batch_size : 4 # smaller batch size lead to better generalization https://arxiv.org/abs/1609.04836 but longer to train
+
+# Augmentation parameters
+DA_probability : 0.2
+
+# Optimizer parameters
+lr : 0.0001
+weight_decay: 0.00001
+early_stopping_patience : 50
+
+# Training parameters
+max_iterations : 250
+eval_num : 2
+
+# Outputs
+# output_path : /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/results/
+output_path : /home/plbenveniste/net/ms-lesion-agnostic/results/
+# output_path : /home/plbenveniste/net/ms-lesion-agnostic/results_cropped_head/
+
+# Seed
+seed : 42
+
+# UNET model parameters
+unet_channels : [32, 64, 128, 256, 512, 1024]
+unet_strides : [2, 2, 2, 2, 2, 2, 2]
+
+# AttentionUnet
+attention_unet_channels : [32, 64, 128, 256, 512]
+attention_unet_strides : [2, 2, 2, 2, 2]
diff --git a/monai/config_test.yml b/monai/config_test.yml
@@ -0,0 +1,21 @@
+# dataset :  /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/dataset_2024-04-17_seed42_lesionOnly.json
+# dataset : /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/data/monai_data/dataset_2024-04-05_seed42_lesionOnly.json
+dataset : /home/plbenveniste/net/ms-lesion-agnostic/msd_data/dataset_2024-06-26_seed42_lesionOnly.json
+# dataset : /home/plbenveniste/net/ms-lesion-agnostic/msd_data/dataset_2024-08-13_seed42_lesionOnly.json
+# dataset : /home/plbenveniste/net/ms-lesion-agnostic/msd_data/dataset_optThresh.json
+# dataset : /home/plbenveniste/net/ms-lesion-agnostic/msd_data/fake.json
+
+pixdim : [0.7, 0.7, 0.7]
+spatial_size : [64, 128, 128]
+attention_unet_channels : [32, 64, 128, 256, 512]
+attention_unet_strides : [2, 2, 2, 2, 2]
+
+# path_to_model : /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/results/2024-04-21_16:06:04.890513/best_model.pth/best_model.ckpt
+# path_to_model : /home/plbenveniste/net/ms-lesion-agnostic/tta_exp/best_model.pth/best_model.ckpt
+path_to_model : /home/plbenveniste/net/ms-lesion-agnostic/results/2024-07-18_10:46:21.634514/best_model.pth/best_model.ckpt
+# path_to_model : /home/plbenveniste/net/ms-lesion-agnostic/results/2024-09-02_12:14:28.124188/best_model.pth/best_model.ckpt
+
+# output_dir : /home/GRAMES.POLYMTL.CA/p119007/ms_lesion_agnostic/results/2024-04-21_16:06:04.890513/
+# output_dir : /home/plbenveniste/net/ms-lesion-agnostic/tta_exp
+output_dir : /home/plbenveniste/net/ms-lesion-agnostic/results/2024-07-18_10:46:21.634514/
+# output_dir : /home/plbenveniste/net/ms-lesion-agnostic/results/2024-09-02_12:14:28.124188/
diff --git a/monai/plot_optThresh.py b/monai/plot_optThresh.py
@@ -0,0 +1,85 @@
+"""
+This script plots the performance of the model based on the threshold applied to the predictions.
+
+Input:
+    --path-scores: Path to the directory containing the dice_scores_X.txt files
+
+Output:
+    None
+
+Example:
+    python plot_optThresh.py --path-scores /path/to/dice_scores
+
+Author: Pierre-Louis Benveniste
+"""
+
+import os
+import argparse
+import numpy as np
+from pathlib import Path
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+def get_parser():
+    """
+    This function returns the parser for the command line arguments.
+    """
+    parser = argparse.ArgumentParser(description="Plot the optimal threshold")
+    parser.add_argument("--path-scores", help="Path to the directory containing the dice_scores_X.txt files", required=True)
+    return parser
+
+
+def main():
+
+    # Get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    # Path to the dice_scores
+    path_to_outputs = args.path_scores
+
+    # Get all the dice_scores_X.txt files using rglob
+    dice_score_files = [str(file) for file in Path(path_to_outputs).rglob("dice_scores_*.txt")]
+
+    # Create a list to store the dataframes
+    test_dice_results_list = [None] * len(dice_score_files)
+
+    # For each file, get the threshold and the dice score
+    for i, dice_score_file in enumerate(dice_score_files):
+        test_dice_results = {}
+        with open(dice_score_file, 'r') as file:
+            for line in file:
+                key, value = line.strip().split(':')
+                test_dice_results[key] = float(value)
+        # convert to a df with name and dice score
+        test_dice_results_list[i] = pd.DataFrame(list(test_dice_results.items()), columns=['name', 'dice_score'])
+        # Create a column which stores the threshold
+        test_dice_results_list[i]['threshold'] = str(Path(dice_score_file).name).replace('dice_scores_', '').replace('.txt', '').replace('_', '.')
+
+    # Concatenate all the dataframes
+    test_dice_results = pd.concat(test_dice_results_list)
+
+    # Plot
+    plt.figure(figsize=(20, 10))
+    plt.grid(True)
+    sns.violinplot(x='threshold', y='dice_score', data=test_dice_results)
+    # y ranges from -0.2 to 1.2
+    plt.ylim(-0.2, 1.2)
+    plt.title('Dice scores per threshold')
+    plt.show()
+
+    # Save the plot
+    plt.savefig(path_to_outputs + '/dice_scores_contrast.png')
+    print(f"Saved the dice_scores plot in {path_to_outputs}")
+
+    # Print the average dice score per threshold
+    for thresh in test_dice_results['threshold'].unique():
+        print(f"Threshold: {thresh} - Average dice score: {test_dice_results[test_dice_results['threshold'] == thresh]['dice_score'].mean()}")
+
+    return None
+
+
+if __name__ == "__main__":
+    main()