Merge pull request #60 from DIAGNijmegen/m-gc-picai-baseline

MHub / GC - Add PICAI baseline model/algorithm
MHubAI · Jan 10, 2024 · 56efeef · 56efeef
2 parents efbd003 + ed79ebd
commit 56efeef
Show file tree

Hide file tree

Showing 7 changed files with 419 additions and 0 deletions.
diff --git a/models/gc_picai_baseline/config/default.yml b/models/gc_picai_baseline/config/default.yml
@@ -0,0 +1,34 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: Prostate MRI classification default (dicom to json)
+
+execute:
+- FileStructureImporter
+- MhaConverter
+- PicaiBaselineRunner
+- ReportExporter
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    input_dir: input_data
+    structures:
+      - $sid@instance/$type@dicom:mod=mr
+    import_id: sid
+
+  MhaConverter:
+    engine: panimg
+    allow_multi_input: true
+
+  ReportExporter:
+    format: compact
+    includes:
+      - data: prostate_cancer_likelihood
+        label: prostate_cancer_likelihood
+        value: value
+
+  DataOrganizer:
+    targets:
+      - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json
+      - mha:mod=dm-->[i:sid]/cspca-detection-map.mha
diff --git a/models/gc_picai_baseline/config/mha-pipeline.yml b/models/gc_picai_baseline/config/mha-pipeline.yml
@@ -0,0 +1,31 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: Prostate MRI classification MHA pipeline (mha to json)
+
+execute:
+- FileStructureImporter
+- PicaiBaselineRunner
+- ReportExporter
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    input_dir: input_data
+    structures:
+      - $sid@instance/images/transverse-adc-prostate-mri/adc.mha@mha:mod=mradc
+      - $sid/images/transverse-t2-prostate-mri/t2w.mha@mha:mod=mrt2
+      - $sid/images/transverse-hbv-prostate-mri/hbv.mha@mha:mod=mrhbv
+    import_id: sid
+
+  ReportExporter:
+    format: compact
+    includes:
+      - data: prostate_cancer_likelihood
+        label: prostate_cancer_likelihood
+        value: value
+
+  DataOrganizer:
+    targets:
+      - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json
+      - mha:mod=hm-->[i:sid]/cspca-detection-map.mha
diff --git a/models/gc_picai_baseline/dockerfiles/Dockerfile b/models/gc_picai_baseline/dockerfiles/Dockerfile
@@ -0,0 +1,55 @@
+FROM mhubai/base:latest
+
+# Specify/override authors label
+LABEL authors="[email protected]"
+
+# Install PyTorch 2.0.1 (CUDA enabled)
+RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
+
+# Install git-lfs (required for unpacking model weights)
+RUN apt update && \
+    apt install -y --no-install-recommends git-lfs && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install PICAI baseline algorithm and model weights
+#   - Git clone the algorithm repository for v2.1.2 (fixed to v2.1.2 tag)
+#   - We remove unnecessary files for a compacter docker layer
+#   - Subsequently we remove the .git directory to procuce a compacter docker layer
+RUN git clone --depth 1 --branch v2.1.2 https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm.git /opt/algorithm && \
+    rm -rf /opt/algorithm/test && \
+    rm -rf /opt/algorithm/.git
+
+# Set this environment variable as a shortcut to avoid nnunet==1.7.0 crashing the build
+# by pulling sklearn instead of scikit-learn
+# N.B. this is a known issue:
+# https://github.com/MIC-DKFZ/nnUNet/issues/1281
+# https://github.com/MIC-DKFZ/nnUNet/pull/1209
+ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
+
+# Install additional PICAI requirements
+RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt
+
+# Extend the nnUNet installation with custom trainers
+RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \
+    mv /opt/algorithm/nnUNetTrainerV2_focalLoss.py "$SITE_PKG/nnunet/training/network_training/nnUNet_variants/loss_function/nnUNetTrainerV2_focalLoss.py"
+RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \
+    mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py"
+RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \
+    mv /opt/algorithm/nnUNetTrainerV2_Loss_FL_and_CE.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_FL_and_CE.py"
+
+# Two code edits to the __init__ method of the algorithm class in process.py to prevent some of its default behavior
+# 1. Skip forced error caused by using a different input locations than expected (we don't use the GC dirs)
+# 2. Prevent unnecessary folder creation before input directories have been set (we will set the correct directory later)
+RUN sed -i "s|file_paths = list(Path(folder).glob(scan_glob_format))|return|g" /opt/algorithm/process.py && \
+    sed -i "s|self.cspca_detection_map_path.parent.mkdir(exist_ok=True, parents=True)||g" /opt/algorithm/process.py
+
+# Import the MHub model definiton
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh gc_picai_baseline ${MHUB_MODELS_REPO}
+
+# Add lobe segmentation code base to python path
+ENV PYTHONPATH="/app:/opt/algorithm"
+
+# Default entrypoint
+ENTRYPOINT ["python3", "-m", "mhubio.run"]
+CMD ["--config", "/app/models/gc_picai_baseline/config/default.yml"]
diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json
@@ -0,0 +1,179 @@
+{
+  "id": "c5f886fb-9f54-4555-a954-da02b22d6d3f",
+  "name": "gc_picai_baseline",
+  "title": "PI-CAI challenge baseline",
+  "summary": {
+    "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image. This model algorithm was used as a baseline for the PI-CAI challenge hosted on Grand Challenge.",
+    "inputs": [
+      {
+        "label": "Transverse T2-weighted prostate biparametric MRI",
+        "description": "Transverse T2-weighted prostate biparametric MRI exam.",
+        "format": "DICOM",
+        "modality": "MR",
+        "bodypartexamined": "Prostate",
+        "slicethickness": "0.5 x 0.5 x 3.0 mm",
+        "non-contrast": false,
+        "contrast": false
+      },
+      {
+        "label": "Transverse high b-value diffusion-weighted maps of the prostate",
+        "description": "Transverse high b-value diffusion-weighted (DWI) maps, with b-value of 1400 or 2000, either acquired or vendor-calculated.",
+        "format": "DICOM",
+        "modality": "MR",
+        "bodypartexamined": "Prostate",
+        "slicethickness": "0.5 x 0.5 x 3.0 mm",
+        "non-contrast": false,
+        "contrast": false
+      },
+      {
+        "label": "Transverse apparent diffusion coefficient map of the prostate",
+        "description": "Transverse apparent diffusion coefficient (ADC) prostate MRI map.",
+        "format": "DICOM",
+        "modality": "MR",
+        "bodypartexamined": "Prostate",
+        "slicethickness": "0.5 x 0.5 x 3.0 mm",
+        "non-contrast": false,
+        "contrast": false
+      }
+    ],
+    "outputs": [
+      {
+        "type": "Prediction",
+        "valueType": "Likelihood",
+        "label": "Prostate cancer likelihood",
+        "description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1].",
+        "classes": []
+      },
+      {
+        "type": "Prediction",
+        "valueType": "Likelihood map",
+        "label": "Transverse cancer detection map",
+        "description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]. This map is at the same spatial resolution and physical dimensions as the input transversal T2-weighted image.",
+        "classes": []
+      }
+    ],
+    "model": {
+      "architecture": "3d fullres nnUNet",
+      "training": "semi-supervised",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 1500
+      },
+      "evaluation": {
+        "vol_samples": 1000
+      },
+      "public": false,
+      "external": false
+    }
+  },
+  "details": {
+    "name": "PI-CAI challenge baseline",
+    "version": "v2.1.1",
+    "devteam": "Diagnostic Image Analysis Group, Radboud University Medical Center, Nijmegen, The Netherlands",
+    "type": "Prediction",
+    "date": {
+      "weights": "2022-06-22",
+      "code": "2022-09-05",
+      "pub": ""
+    },
+    "cite": "J. S. Bosma, A. Saha, M. Hosseinzadeh, I. Slootweg, M. de Rooij, and H. Huisman, \"Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI\", Radiology: Artificial Intelligence, 230031, 2023. DOI: 10.1148/ryai.230031",
+    "license": {
+      "code": "Apache 2.0",
+      "weights": "CC-BY-NC-SA-4.0"
+    },
+    "publications": [
+      {
+        "uri": "https://doi.org/10.5281/zenodo.6667655",
+        "title": "Artificial Intelligence and Radiologists at Prostate Cancer Detection in MRI: The PI-CAI Challenge (Study Protocol)"
+      },
+      {
+        "uri": "https://pubs.rsna.org/doi/10.1148/ryai.230031",
+        "title": "Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI"
+      }
+    ],
+    "github": "https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm",
+    "zenodo": "",
+    "colab": "",
+    "slicer": false
+  },
+  "info": {
+    "use": {
+      "title": "Intended use",
+      "text": "This algorithm is a deep learning-based detection/diagnosis model, which ensembles 5 independent nnU-Net models (5-fold cross-validation). To predict the likelihood of harboring clinically significant prostate cancer (csPCa), the transversal T2-weighted, apparent diffusion coefficient (ADC) and high b-value diffusion weighted maps are required. The input sequences should be co-registered or aligned reasonably well and the prostate gland should be localized within a volume of 460 cm³ from the centre coordinate. The nnU-Net framework will internally resample all input scans to 0.5 x 0.5 x 3.0 mm. Per case the input data should be put into the following folder structure: `case1/adc`, `case1/hbv`, `case1/t2w`, corresponding respectively with the ADC, high b-value DWI, and the T2 weighted MR inputs for a case called `case1`.",
+      "references": [
+        {
+          "label": "PI-CAI baseline algorithm on grand-challenge",
+          "uri": "https://grand-challenge.org/algorithms/pi-cai-baseline-nnu-net-semi-supervised/"
+        }
+      ],
+      "tables": []
+    },
+    "analyses": {
+      "title": "Evaluation",
+      "text": "Patient-level diagnosis performance is evaluated using the Area Under Receiver Operating Characteristic (AUROC) metric. Lesion-level detection performance is evaluated using the Average Precision (AP) metric.",
+      "references": [
+        {
+          "label": "PI-CAI AI challenge details",
+          "uri": "https://pi-cai.grand-challenge.org/AI/"
+        },
+        {
+          "label": "PI-CAI baseline algorithm evaluation results on grand-challenge.",
+          "uri": "https://pi-cai.grand-challenge.org/evaluation/fe187cdb-cb61-4cbb-ab63-2de483a52d60/"
+        }
+      ],
+      "tables": [
+        {
+          "label": "Evaluation results on the PI-CAI testing cohort of 1000 cases.",
+          "entries": {
+              "AUROC": "0.865",
+              "AP": "0.576"
+          }
+        }
+      ]
+    },
+    "evaluation": {
+      "title": "Evaluation data",
+      "text": "The PI-CAI Hidden Testing Cohort (1000 cases) includes internal testing data (unseen cases from seen centers) and external testing data (unseen cases from an unseen center).",
+      "references": [
+        {
+          "label": "PI-CAI data section",
+          "uri": "https://pi-cai.grand-challenge.org/DATA/"
+        }
+      ],
+      "tables": []
+    },
+    "training": {
+      "title": "Training data",
+      "text": "The publicly available PI-CAI training and development dataset of 1500 biparametric MRI exams was used for training [1]. AI-derived annotations were created for cases without manual annotations [2]. This model was trained using a custom preprocessing step followed by the standard nnU-Net pipeline. The default nnU-Net loss-function was changed to Cross-Entropy + Focal loss [3].",
+      "references": [
+        {
+          "label": "PI-CAI publicly available training and development dataset",
+          "uri": "https://zenodo.org/record/6624726"
+        },
+        {
+          "label": "Method to obtain AI-derived annotations",
+          "uri": "https://fastmri.eu/research/bosma22a.html"
+        },
+        {
+          "label": "Detailed description of training method",
+          "uri": "https://github.com/DIAGNijmegen/picai_baseline/blob/main/nnunet_baseline.md"
+        }
+      ],
+      "tables": []
+    },
+    "ethics": {
+      "title": "",
+      "text": "",
+      "references": [],
+      "tables": []
+    },
+    "limitations": {
+      "title": "Limitations",
+      "text": "This algorithm was developed for research purposes only.",
+      "references": [],
+      "tables": []
+    }
+  }
+}
diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py
@@ -0,0 +1,65 @@
+"""
+---------------------------------------------------------
+Mhub / DIAG - Run Module for the PICAI baseline Algorithm
+---------------------------------------------------------
+
+---------------------------------------------------------
+Author: Sil van de Leemput
+Email:  [email protected]
+---------------------------------------------------------
+"""
+
+import json
+import sys
+from pathlib import Path
+
+from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, ClassOutput, Meta
+
+
+CLI_PATH = Path(__file__).parent / "cli.py"
+
+
+@ValueOutput.Name('prostate_cancer_likelihood')
+@ValueOutput.Label('ProstateCancerLikelihood')
+@ValueOutput.Type(float)
+@ValueOutput.Description('Likelihood of case-level prostate cancer.')
+class ProstateCancerLikelihood(ValueOutput):
+    pass
+
+
+class PicaiBaselineRunner(Module):
+
+    @IO.Instance()
+    @IO.Input('in_data_t2', 'mha:mod=mr:type=t2w', the='input T2 weighted prostate MR image')
+    @IO.Input('in_data_adc', 'mha:mod=mr:type=adc', the='input ADC prostate MR image')
+    @IO.Input('in_data_hbv', 'mha:mod=mr:type=hbv', the='input HBV prostate MR image')
+    @IO.Output('cancer_likelihood_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer likelihood')
+    @IO.Output('cancer_lesion_detection_map', 'cspca-detection-map.mha', "mha:mod=dm", bundle='model', the='output detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]')
+    @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='PICAI baseline prostate cancer likelihood')
+    def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_lesion_detection_map: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None:
+        # build command (order matters!)
+        cmd = [
+            sys.executable,
+            str(CLI_PATH),
+            in_data_t2.abspath,
+            in_data_adc.abspath,
+            in_data_hbv.abspath,
+            cancer_likelihood_json.abspath,
+            cancer_lesion_detection_map.abspath,
+        ]
+
+        # run the command as subprocess
+        self.subprocess(cmd, text=True)
+
+        # Extract cancer likelihood value from cancer_likelihood_file
+        if not Path(cancer_likelihood_json.abspath).is_file():
+            raise FileNotFoundError(f"Output file {cancer_likelihood_json.abspath} could not be found!")
+
+        with open(cancer_likelihood_json.abspath, "r") as f:
+            cancer_lh = float(json.load(f))
+
+        if not (isinstance(cancer_lh, (float, int)) and (0.0 <= cancer_lh <= 1.0)):
+            raise ValueError(f"Cancer likelihood value should be between 0 and 1, found: {cancer_lh}")
+
+        # Output the predicted values
+        cancer_likelihood.value = cancer_lh
diff --git a/models/gc_picai_baseline/utils/__init__.py b/models/gc_picai_baseline/utils/__init__.py
@@ -0,0 +1 @@
+from .PicaiBaselineRunner import *