Skip to content

Commit

Permalink
Merge pull request #60 from DIAGNijmegen/m-gc-picai-baseline
Browse files Browse the repository at this point in the history
MHub / GC - Add PICAI baseline model/algorithm
  • Loading branch information
LennyN95 authored Jan 10, 2024
2 parents efbd003 + ed79ebd commit 56efeef
Show file tree
Hide file tree
Showing 7 changed files with 419 additions and 0 deletions.
34 changes: 34 additions & 0 deletions models/gc_picai_baseline/config/default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
general:
data_base_dir: /app/data
version: 1.0
description: Prostate MRI classification default (dicom to json)

execute:
- FileStructureImporter
- MhaConverter
- PicaiBaselineRunner
- ReportExporter
- DataOrganizer

modules:
FileStructureImporter:
input_dir: input_data
structures:
- $sid@instance/$type@dicom:mod=mr
import_id: sid

MhaConverter:
engine: panimg
allow_multi_input: true

ReportExporter:
format: compact
includes:
- data: prostate_cancer_likelihood
label: prostate_cancer_likelihood
value: value

DataOrganizer:
targets:
- json:mod=report-->[i:sid]/cspca-case-level-likelihood.json
- mha:mod=dm-->[i:sid]/cspca-detection-map.mha
31 changes: 31 additions & 0 deletions models/gc_picai_baseline/config/mha-pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
general:
data_base_dir: /app/data
version: 1.0
description: Prostate MRI classification MHA pipeline (mha to json)

execute:
- FileStructureImporter
- PicaiBaselineRunner
- ReportExporter
- DataOrganizer

modules:
FileStructureImporter:
input_dir: input_data
structures:
- $sid@instance/images/transverse-adc-prostate-mri/adc.mha@mha:mod=mradc
- $sid/images/transverse-t2-prostate-mri/t2w.mha@mha:mod=mrt2
- $sid/images/transverse-hbv-prostate-mri/hbv.mha@mha:mod=mrhbv
import_id: sid

ReportExporter:
format: compact
includes:
- data: prostate_cancer_likelihood
label: prostate_cancer_likelihood
value: value

DataOrganizer:
targets:
- json:mod=report-->[i:sid]/cspca-case-level-likelihood.json
- mha:mod=hm-->[i:sid]/cspca-detection-map.mha
55 changes: 55 additions & 0 deletions models/gc_picai_baseline/dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
FROM mhubai/base:latest

# Specify/override authors label
LABEL authors="[email protected]"

# Install PyTorch 2.0.1 (CUDA enabled)
RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html

# Install git-lfs (required for unpacking model weights)
RUN apt update && \
apt install -y --no-install-recommends git-lfs && \
rm -rf /var/lib/apt/lists/*

# Install PICAI baseline algorithm and model weights
# - Git clone the algorithm repository for v2.1.2 (fixed to v2.1.2 tag)
# - We remove unnecessary files for a compacter docker layer
# - Subsequently we remove the .git directory to procuce a compacter docker layer
RUN git clone --depth 1 --branch v2.1.2 https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm.git /opt/algorithm && \
rm -rf /opt/algorithm/test && \
rm -rf /opt/algorithm/.git

# Set this environment variable as a shortcut to avoid nnunet==1.7.0 crashing the build
# by pulling sklearn instead of scikit-learn
# N.B. this is a known issue:
# https://github.com/MIC-DKFZ/nnUNet/issues/1281
# https://github.com/MIC-DKFZ/nnUNet/pull/1209
ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True

# Install additional PICAI requirements
RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt

# Extend the nnUNet installation with custom trainers
RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \
mv /opt/algorithm/nnUNetTrainerV2_focalLoss.py "$SITE_PKG/nnunet/training/network_training/nnUNet_variants/loss_function/nnUNetTrainerV2_focalLoss.py"
RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \
mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py"
RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \
mv /opt/algorithm/nnUNetTrainerV2_Loss_FL_and_CE.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_FL_and_CE.py"

# Two code edits to the __init__ method of the algorithm class in process.py to prevent some of its default behavior
# 1. Skip forced error caused by using a different input locations than expected (we don't use the GC dirs)
# 2. Prevent unnecessary folder creation before input directories have been set (we will set the correct directory later)
RUN sed -i "s|file_paths = list(Path(folder).glob(scan_glob_format))|return|g" /opt/algorithm/process.py && \
sed -i "s|self.cspca_detection_map_path.parent.mkdir(exist_ok=True, parents=True)||g" /opt/algorithm/process.py

# Import the MHub model definiton
ARG MHUB_MODELS_REPO
RUN buildutils/import_mhub_model.sh gc_picai_baseline ${MHUB_MODELS_REPO}

# Add lobe segmentation code base to python path
ENV PYTHONPATH="/app:/opt/algorithm"

# Default entrypoint
ENTRYPOINT ["python3", "-m", "mhubio.run"]
CMD ["--config", "/app/models/gc_picai_baseline/config/default.yml"]
179 changes: 179 additions & 0 deletions models/gc_picai_baseline/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
{
"id": "c5f886fb-9f54-4555-a954-da02b22d6d3f",
"name": "gc_picai_baseline",
"title": "PI-CAI challenge baseline",
"summary": {
"description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image. This model algorithm was used as a baseline for the PI-CAI challenge hosted on Grand Challenge.",
"inputs": [
{
"label": "Transverse T2-weighted prostate biparametric MRI",
"description": "Transverse T2-weighted prostate biparametric MRI exam.",
"format": "DICOM",
"modality": "MR",
"bodypartexamined": "Prostate",
"slicethickness": "0.5 x 0.5 x 3.0 mm",
"non-contrast": false,
"contrast": false
},
{
"label": "Transverse high b-value diffusion-weighted maps of the prostate",
"description": "Transverse high b-value diffusion-weighted (DWI) maps, with b-value of 1400 or 2000, either acquired or vendor-calculated.",
"format": "DICOM",
"modality": "MR",
"bodypartexamined": "Prostate",
"slicethickness": "0.5 x 0.5 x 3.0 mm",
"non-contrast": false,
"contrast": false
},
{
"label": "Transverse apparent diffusion coefficient map of the prostate",
"description": "Transverse apparent diffusion coefficient (ADC) prostate MRI map.",
"format": "DICOM",
"modality": "MR",
"bodypartexamined": "Prostate",
"slicethickness": "0.5 x 0.5 x 3.0 mm",
"non-contrast": false,
"contrast": false
}
],
"outputs": [
{
"type": "Prediction",
"valueType": "Likelihood",
"label": "Prostate cancer likelihood",
"description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1].",
"classes": []
},
{
"type": "Prediction",
"valueType": "Likelihood map",
"label": "Transverse cancer detection map",
"description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]. This map is at the same spatial resolution and physical dimensions as the input transversal T2-weighted image.",
"classes": []
}
],
"model": {
"architecture": "3d fullres nnUNet",
"training": "semi-supervised",
"cmpapproach": "3D"
},
"data": {
"training": {
"vol_samples": 1500
},
"evaluation": {
"vol_samples": 1000
},
"public": false,
"external": false
}
},
"details": {
"name": "PI-CAI challenge baseline",
"version": "v2.1.1",
"devteam": "Diagnostic Image Analysis Group, Radboud University Medical Center, Nijmegen, The Netherlands",
"type": "Prediction",
"date": {
"weights": "2022-06-22",
"code": "2022-09-05",
"pub": ""
},
"cite": "J. S. Bosma, A. Saha, M. Hosseinzadeh, I. Slootweg, M. de Rooij, and H. Huisman, \"Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI\", Radiology: Artificial Intelligence, 230031, 2023. DOI: 10.1148/ryai.230031",
"license": {
"code": "Apache 2.0",
"weights": "CC-BY-NC-SA-4.0"
},
"publications": [
{
"uri": "https://doi.org/10.5281/zenodo.6667655",
"title": "Artificial Intelligence and Radiologists at Prostate Cancer Detection in MRI: The PI-CAI Challenge (Study Protocol)"
},
{
"uri": "https://pubs.rsna.org/doi/10.1148/ryai.230031",
"title": "Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI"
}
],
"github": "https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm",
"zenodo": "",
"colab": "",
"slicer": false
},
"info": {
"use": {
"title": "Intended use",
"text": "This algorithm is a deep learning-based detection/diagnosis model, which ensembles 5 independent nnU-Net models (5-fold cross-validation). To predict the likelihood of harboring clinically significant prostate cancer (csPCa), the transversal T2-weighted, apparent diffusion coefficient (ADC) and high b-value diffusion weighted maps are required. The input sequences should be co-registered or aligned reasonably well and the prostate gland should be localized within a volume of 460 cm³ from the centre coordinate. The nnU-Net framework will internally resample all input scans to 0.5 x 0.5 x 3.0 mm. Per case the input data should be put into the following folder structure: `case1/adc`, `case1/hbv`, `case1/t2w`, corresponding respectively with the ADC, high b-value DWI, and the T2 weighted MR inputs for a case called `case1`.",
"references": [
{
"label": "PI-CAI baseline algorithm on grand-challenge",
"uri": "https://grand-challenge.org/algorithms/pi-cai-baseline-nnu-net-semi-supervised/"
}
],
"tables": []
},
"analyses": {
"title": "Evaluation",
"text": "Patient-level diagnosis performance is evaluated using the Area Under Receiver Operating Characteristic (AUROC) metric. Lesion-level detection performance is evaluated using the Average Precision (AP) metric.",
"references": [
{
"label": "PI-CAI AI challenge details",
"uri": "https://pi-cai.grand-challenge.org/AI/"
},
{
"label": "PI-CAI baseline algorithm evaluation results on grand-challenge.",
"uri": "https://pi-cai.grand-challenge.org/evaluation/fe187cdb-cb61-4cbb-ab63-2de483a52d60/"
}
],
"tables": [
{
"label": "Evaluation results on the PI-CAI testing cohort of 1000 cases.",
"entries": {
"AUROC": "0.865",
"AP": "0.576"
}
}
]
},
"evaluation": {
"title": "Evaluation data",
"text": "The PI-CAI Hidden Testing Cohort (1000 cases) includes internal testing data (unseen cases from seen centers) and external testing data (unseen cases from an unseen center).",
"references": [
{
"label": "PI-CAI data section",
"uri": "https://pi-cai.grand-challenge.org/DATA/"
}
],
"tables": []
},
"training": {
"title": "Training data",
"text": "The publicly available PI-CAI training and development dataset of 1500 biparametric MRI exams was used for training [1]. AI-derived annotations were created for cases without manual annotations [2]. This model was trained using a custom preprocessing step followed by the standard nnU-Net pipeline. The default nnU-Net loss-function was changed to Cross-Entropy + Focal loss [3].",
"references": [
{
"label": "PI-CAI publicly available training and development dataset",
"uri": "https://zenodo.org/record/6624726"
},
{
"label": "Method to obtain AI-derived annotations",
"uri": "https://fastmri.eu/research/bosma22a.html"
},
{
"label": "Detailed description of training method",
"uri": "https://github.com/DIAGNijmegen/picai_baseline/blob/main/nnunet_baseline.md"
}
],
"tables": []
},
"ethics": {
"title": "",
"text": "",
"references": [],
"tables": []
},
"limitations": {
"title": "Limitations",
"text": "This algorithm was developed for research purposes only.",
"references": [],
"tables": []
}
}
}
65 changes: 65 additions & 0 deletions models/gc_picai_baseline/utils/PicaiBaselineRunner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
---------------------------------------------------------
Mhub / DIAG - Run Module for the PICAI baseline Algorithm
---------------------------------------------------------
---------------------------------------------------------
Author: Sil van de Leemput
Email: [email protected]
---------------------------------------------------------
"""

import json
import sys
from pathlib import Path

from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, ClassOutput, Meta


CLI_PATH = Path(__file__).parent / "cli.py"


@ValueOutput.Name('prostate_cancer_likelihood')
@ValueOutput.Label('ProstateCancerLikelihood')
@ValueOutput.Type(float)
@ValueOutput.Description('Likelihood of case-level prostate cancer.')
class ProstateCancerLikelihood(ValueOutput):
pass


class PicaiBaselineRunner(Module):

@IO.Instance()
@IO.Input('in_data_t2', 'mha:mod=mr:type=t2w', the='input T2 weighted prostate MR image')
@IO.Input('in_data_adc', 'mha:mod=mr:type=adc', the='input ADC prostate MR image')
@IO.Input('in_data_hbv', 'mha:mod=mr:type=hbv', the='input HBV prostate MR image')
@IO.Output('cancer_likelihood_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer likelihood')
@IO.Output('cancer_lesion_detection_map', 'cspca-detection-map.mha', "mha:mod=dm", bundle='model', the='output detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]')
@IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='PICAI baseline prostate cancer likelihood')
def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_lesion_detection_map: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None:
# build command (order matters!)
cmd = [
sys.executable,
str(CLI_PATH),
in_data_t2.abspath,
in_data_adc.abspath,
in_data_hbv.abspath,
cancer_likelihood_json.abspath,
cancer_lesion_detection_map.abspath,
]

# run the command as subprocess
self.subprocess(cmd, text=True)

# Extract cancer likelihood value from cancer_likelihood_file
if not Path(cancer_likelihood_json.abspath).is_file():
raise FileNotFoundError(f"Output file {cancer_likelihood_json.abspath} could not be found!")

with open(cancer_likelihood_json.abspath, "r") as f:
cancer_lh = float(json.load(f))

if not (isinstance(cancer_lh, (float, int)) and (0.0 <= cancer_lh <= 1.0)):
raise ValueError(f"Cancer likelihood value should be between 0 and 1, found: {cancer_lh}")

# Output the predicted values
cancer_likelihood.value = cancer_lh
1 change: 1 addition & 0 deletions models/gc_picai_baseline/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .PicaiBaselineRunner import *
Loading

0 comments on commit 56efeef

Please sign in to comment.