Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BAMF NNUnet Lung and Nodules (v2) #92

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/config/default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
general:
data_base_dir: /app/data
version: 1.0
description: default configuration for 3D semantic image segmentation of the lung and lung nodules from ct scan (dicom to dicom)

execute:
- DicomImporter
- NiftiConverter
- NNUnetRunnerV2
- BamfProcessorRunner
- DsegConverter
- DataOrganizer

modules:
DicomImporter:
source_dir: input_data
import_dir: sorted_data
sort_data: true
meta:
mod: '%Modality'

NNUnetRunnerV2:
in_data: nifti:mod=ct

DsegConverter:
model_name: bamf_ct_lung_nodule
target_dicom: dicom:mod=ct
source_segs: nifti:mod=seg:processor=bamf
skip_empty_slices: True

DataOrganizer:
targets:
- dicomseg-->[i:sid]/bamf_nnunet_ct_lungnodules.seg.dcm
31 changes: 31 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile
LennyN95 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM mhubai/base:latest

# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
# by pulling sklearn instead of scikit-learn
# N.B. this is a known issue:
# https://github.com/MIC-DKFZ/nnUNet/issues/1281
# https://github.com/MIC-DKFZ/nnUNet/pull/1209
ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True

# Install nnunet version 2
RUN pip3 install --no-cache-dir nnunetv2==2.0

# Clone the main branch of MHubAI/models
ARG MHUB_MODELS_REPO
RUN buildutils/import_mhub_model.sh bamf_nnunet_ct_lungnodules ${MHUB_MODELS_REPO}

# Pull weights into the container
ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/
RUN mkdir -p $WEIGHTS_DIR
ENV WEIGHTS_FN=Dataset007_Nodules.zip
ENV WEIGHTS_URL=https://zenodo.org/record/11582738/files/$WEIGHTS_FN
RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}

# specify nnunet specific environment variables
ENV WEIGHTS_FOLDER=$WEIGHTS_DIR

# Default run script
ENTRYPOINT ["mhub.run"]
CMD ["--config", "/app/models/bamf_nnunet_ct_lungnodules/config/default.yml"]
149 changes: 149 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"id": "",
"name": "bamf_nnunet_ct_lungnodules",
"title": "AIMI CT Lung and Nodules",
"summary": {
"description": "An nnU-Net based model to segment Lung and Nodules (3mm-30mm) from CT scans",
"inputs": [
{
"label": "Input Image",
"description": "The CT scan of a patient.",
"format": "DICOM",
"modality": "CT",
"bodypartexamined": "LUNG",
"slicethickness": "10mm",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm, the model suports CT scans with a slice thickness up to 1cm?

"non-contrast": true,
"contrast": false
}
],
"outputs": [
{
"label": "Segmentation",
"type": "Segmentation",
"description": "Lung and Nodules (3mm-30mm) from CT scans",
"classes": [
"LUNG",
"LUNG+NODULE"
]
}
],
"model": {
"architecture": "U-net",
"training": "supervised",
"cmpapproach": "3D"
},
"data": {
"training": {
"vol_samples": 1299
},
"evaluation": {
"vol_samples": 114
},
"public": true,
"external": true
}
},
"details": {
"name": "AIMI CT Lung and Nodule",
"version": "2.0.0",
"devteam": "BAMF Health",
"authors": [
"Soni, Rahul",
"McCrumb, Diana",
"Murugesan, Gowtham Krishnan",
"Van Oss, Jeff"
],
"type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
"date": {
"code": "17.10.2023",
"weights": "28.08.2023",
"pub": "23.10.2023"
},
"cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
"license": {
"code": "MIT",
"weights": "CC BY-NC 4.0"
},
"publications": [
{
"title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
"uri": "https://arxiv.org/abs/2310.14897"
}
],
"github": "https://github.com/bamf-health/aimi-lung2-ct"
},
"info": {
"use": {
"title": "Intended Use",
"text": "This model is designed for analyzing thoracic CT scans to segment lung structures and nodules. It requires input images from CT scans, which are processed using deep learning methods like U-Net. The model identifies and delineates lung regions and nodules, assisting in lung cancer screening and diagnostics. "
},
"analyses": {
"title": "Quantitative Analyses",
"text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD. Source radiological images from publicly available NCI IDC collections were filtered to match the modality and region requirements. To ensure the quality of AI-generated annotations, 10% of these annotations were evaluated by radiologists. "
},
"evaluation": {
"title": "Evaluation Data",
"text": "Quantitative metrics between AI and Radiologists annotations. The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 114 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
"tables": [
{
"label": "Dice Score",
"entries": {
"Lung": "1.0±0.0",
"Nodules": "0.78±0.28"
}
},
{
"label": "95% Hausdorff Distance",
"entries": {
"Lung": "0.00±0.00",
"Nodules": "62.07±10.54"
}
},
{
"label": "Normalized surface distance ",
"entries": {
"Lung": "0.02±0.11",
"Nodules": "10.54±14.43"
}
}
],
"references": [
{
"label": "QIN LUNG CT",
"uri": "https://www.cancerimagingarchive.net/collection/qin-lung-ct/"
},
{
"label": "SPIE-AAPM Lung CT Challenge",
"uri": "https://www.cancerimagingarchive.net/collection/spie-aapm-lung-ct-challenge/"
},
{
"label": "NLST",
"uri": "https://www.cancerimagingarchive.net/collection/nlst/"
}

]
},
"training": {
"title": "Training Data",
"text": "416 CT cases from NSCLC-Radiomics [2] and 883 CT cases from DICOM-LIDC-IDRI-Nodules [1] were used to train the model. Annotations for the lung regions in the training dataset were generated utilizing Totalsegmentator[3]",
"references": [
{
"label": "DICOM-LIDC-IDRI-Nodules",
"uri": "https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=44499647"
},
{
"label": "NSCLC-Radiomics",
"uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/"
},
{
"label": "Totalsegmentator",
"uri": "https://mhub.ai/models/totalsegmentator"
}
]
},
"limitations": {
"title": "Limitations",
"text": "The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
}
}
}
152 changes: 152 additions & 0 deletions models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""
-------------------------------------------------
MHub - Run Module for perform postprocessing logic on segmentations.
-------------------------------------------------
-------------------------------------------------
Author: Jithendra Kumar
Email: [email protected]
-------------------------------------------------
"""

from mhubio.core import Instance, InstanceData
from mhubio.core import Module, IO
from skimage import measure
import SimpleITK as sitk
import numpy as np


class BamfProcessorRunner(Module):

def max_planar_dimension(self, label_img, label_cnt):
"""
Calculate the maximum planar dimension of a specific label in a 3D label image.

Args:
label_img (sitk.Image): The 3D label image.
label_cnt (int): The label number to analyze.

Returns:
float: The maximum size of the label in millimeters (mm) across the most planar dimension.
"""
tumor = label_img == label_cnt

assert tumor.GetDimension() == 3
spacing = tumor.GetSpacing()
if spacing[0] == spacing[1] and spacing[1] != spacing[2]:
axis = 2
plane_space = spacing[0]
elif spacing[0] != spacing[1] and spacing[1] == spacing[2]:
axis = 0
plane_space = spacing[1]
else:
axis = 1
plane_space = spacing[2]

lsif = sitk.LabelShapeStatisticsImageFilter()
lsif.Execute(tumor)

boundingBox = np.array(lsif.GetBoundingBox(1))
sizes = boundingBox[3:].tolist()
del sizes[axis]
max_planar_size = plane_space * max(sizes) # mm
return max_planar_size

def filter_nodules(self, label_img, min_size=3):
"""
Filter lung nodules based on their size and re-label them accordingly.

Args:
label_img (sitk.Image): The 3D label image containing lung and nodule labels.
min_size (float): Minimum planar size (in mm) to retain a nodule.

Returns:
sitk.Image: The processed label image with nodules filtered by size.
"""
label_val_lung = 1
label_val_nodule = 2
label_val_large_nodule = 3

nodules_img = label_img == label_val_nodule
nodule_components = sitk.ConnectedComponent(nodules_img)

nodules_to_remove = []

for lbl in range(1, sitk.GetArrayFromImage(nodule_components).max() + 1):
max_size = self.max_planar_dimension(nodule_components, lbl)

if max_size < min_size:
nodules_to_remove.append(lbl)
# print("Removing label", lbl, "with size", max_size)
elif 3 <= max_size <= 30:
label_img = sitk.ChangeLabel(label_img, {lbl: label_val_nodule})
# print("Marking label", lbl, "as Nodule (label 2) with size", max_size)
else:
label_img = sitk.ChangeLabel(label_img, {lbl: label_val_large_nodule})
# print("Marking label", lbl, "as Large Nodule (label 3) with size", max_size)

label_img = sitk.ChangeLabel(label_img, {label_val_nodule: label_val_lung})
big_nodules = sitk.ChangeLabel(nodule_components, {x: 0 for x in nodules_to_remove})
label_img = sitk.Mask(label_img, big_nodules > 0, label_val_nodule, label_val_lung)
label_img = self.n_connected(label_img)

return label_img


def n_connected(self, img):
"""
Retain the largest connected components in a binary label image.

Args:
img (sitk.Image): The input binary label image.

Returns:
sitk.Image: The processed image with only the largest connected components retained.
"""
img_data = sitk.GetArrayFromImage(img)
img_data_mask = np.zeros(img_data.shape)
img_data_mask[img_data > 0] = 1
img_filtered = np.zeros(img_data_mask.shape)
blobs_labels = measure.label(img_data_mask, background=0)
lbl, counts = np.unique(blobs_labels, return_counts=True)
lbl_dict = {}
for i, j in zip(lbl, counts):
lbl_dict[i] = j
sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
count = 0

for key, value in sorted_dict.items():
if count >= 1 and count <= 2:
if count == 1:
val = value
img_filtered[blobs_labels == key] = 1
if count == 2 and value > (val * 0.2):
img_filtered[blobs_labels == key] = 1

count += 1

img_data[img_filtered != 1] = 0
img_masked = sitk.GetImageFromArray(img_data)
img_masked.CopyInformation(img)
return img_masked

@IO.Instance()
@IO.Input('in_data', 'nifti:mod=seg:model=nnunet', the='input segmentations')
@IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LUNG,LUNG+NODULE', data='in_data', the="lung and filtered nodules segmentation")
def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
"""
Main task function that processes the input lung and nodule segmentations,
filters nodules based on their size, and writes the output image.

Args:
instance (Instance): The MHub instance for processing.
in_data (InstanceData): Input data containing the segmentation.
out_data (InstanceData): Output data path to save the processed image.
"""
# Log bamf runner info
self.log("Running BamfProcessor on....")
self.log(f" > input data: {in_data.abspath}")
self.log(f" > output data: {out_data.abspath}")

label_img = sitk.ReadImage(in_data.abspath)
filtered_label_img = self.filter_nodules(label_img, min_size=3)
sitk.WriteImage(filtered_label_img, out_data.abspath)
Loading
Loading