From 5ce512a37c11c7e4f03b8d4a87e305c833c67e55 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Tue, 9 Jul 2024 16:52:44 +0530
Subject: [PATCH 01/15] version 2 model for lung/nodules

---
 models/bamf_nnunet_ct_lung/config/default.yml |  36 +++
 .../dockerfiles/Dockerfile                    |  31 +++
 models/bamf_nnunet_ct_lung/meta.json          | 114 ++++++++++
 .../utils/BamfProcessorRunner.py              | 115 ++++++++++
 .../utils/NNUnetRunnerV2.py                   | 211 ++++++++++++++++++
 models/bamf_nnunet_ct_lung/utils/__init__.py  |   1 +
 6 files changed, 508 insertions(+)
 create mode 100644 models/bamf_nnunet_ct_lung/config/default.yml
 create mode 100644 models/bamf_nnunet_ct_lung/dockerfiles/Dockerfile
 create mode 100644 models/bamf_nnunet_ct_lung/meta.json
 create mode 100644 models/bamf_nnunet_ct_lung/utils/BamfProcessorRunner.py
 create mode 100644 models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py
 create mode 100644 models/bamf_nnunet_ct_lung/utils/__init__.py

diff --git a/models/bamf_nnunet_ct_lung/config/default.yml b/models/bamf_nnunet_ct_lung/config/default.yml
new file mode 100644
index 00000000..19d149b3
--- /dev/null
+++ b/models/bamf_nnunet_ct_lung/config/default.yml
@@ -0,0 +1,36 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: default configuration for 3D semantic image segmentation of the lung and lung nodules from ct scan
+
+execute:
+- DicomImporter
+- NiftiConverter
+- NNUnetRunnerV2
+- BamfProcessorRunner
+- DsegConverter
+- DataOrganizer
+
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: true
+    meta: 
+      mod: '%Modality'
+
+  NNUnetRunnerV2:
+    in_data: nifti:mod=ct
+    nnunet_dataset: Dataset007_Nodules
+    nnunet_config: 3d_fullres
+    roi: LUNG,LUNG+NODULE
+    
+  DsegConverter:
+    model_name: bamf_ct_lung_nodule
+    target_dicom: dicom:mod=ct
+    source_segs: nifti:mod=seg:processor=bamf
+    skip_empty_slices: True
+
+  DataOrganizer:
+    targets:
+    - dicomseg-->[i:sid]/bamf_ct_lung_nodule.seg.dcm
diff --git a/models/bamf_nnunet_ct_lung/dockerfiles/Dockerfile b/models/bamf_nnunet_ct_lung/dockerfiles/Dockerfile
new file mode 100644
index 00000000..f8d234d0
--- /dev/null
+++ b/models/bamf_nnunet_ct_lung/dockerfiles/Dockerfile
@@ -0,0 +1,31 @@
+FROM mhubai/base:latest
+
+# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
+# by pulling sklearn instead of scikit-learn
+# N.B. this is a known issue:
+# https://github.com/MIC-DKFZ/nnUNet/issues/1281 
+# https://github.com/MIC-DKFZ/nnUNet/pull/1209
+ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
+
+# Install nnunet version 2
+RUN pip3 install --no-cache-dir nnunetv2==2.0
+
+# Clone the main branch of MHubAI/models
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh bamf_ct_lung_nodule ${MHUB_MODELS_REPO}
+
+# Pull weights into the container
+ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/
+RUN mkdir -p $WEIGHTS_DIR
+ENV WEIGHTS_FN=Dataset007_Nodules.zip
+ENV WEIGHTS_URL=https://zenodo.org/record/11582738/files/$WEIGHTS_FN
+RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
+RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
+RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}
+
+# specify nnunet specific environment variables
+ENV WEIGHTS_FOLDER=$WEIGHTS_DIR
+
+# Default run script
+ENTRYPOINT ["mhub.run"]
+CMD ["--config", "/app/models/bamf_ct_lung_nodule/config/default.yml"]
diff --git a/models/bamf_nnunet_ct_lung/meta.json b/models/bamf_nnunet_ct_lung/meta.json
new file mode 100644
index 00000000..ba224c71
--- /dev/null
+++ b/models/bamf_nnunet_ct_lung/meta.json
@@ -0,0 +1,114 @@
+{
+  "id": "",
+  "name": "bamf_nnunet_ct_lung",
+  "title": "AIMI CT Lung and Nodules",
+  "summary": {
+    "description": "An nnU-Net based model to segment Lung and Nodules (3mm-30mm) from CT scans",
+    "inputs": [
+      {
+        "label": "Input Image",
+        "description": "The CT scan of a patient.",
+        "format": "DICOM",
+        "modality": "CT",
+        "bodypartexamined": "LUNG",
+        "slicethickness": "10mm",
+        "non-contrast": true,
+        "contrast": false
+      }
+    ],
+    "outputs": [
+      {
+        "label": "Segmentation",
+        "type": "Segmentation",
+        "description": "Lung and Nodules (3mm-30mm) from CT scans",
+        "classes": [
+          "LUNG",
+          "LUNG+NODULE"
+        ]
+      }
+    ],
+    "model": {
+      "architecture": "U-net",
+      "training": "supervised",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 1299
+      },
+      "evaluation": {
+        "vol_samples": 9
+      },
+      "public": true,
+      "external": true
+    }
+  },
+  "details": {
+    "name": "AIMI CT Lung and Nodule",
+    "version": "2.0.0",
+    "devteam": "BAMF Health",
+    "authors": [
+      "Soni, Rahul",
+      "McCrumb, Diana",
+      "Murugesan, Gowtham Krishnan",
+      "Van Oss, Jeff"
+    ],
+    "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
+    "date": {
+      "code": "17.10.2023",
+      "weights": "28.08.2023",
+      "pub": "23.10.2023"
+    },
+    "cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
+    "license": {
+      "code": "MIT",
+      "weights": "CC BY-NC 4.0"
+    },
+    "publications": [
+        {
+            "title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
+            "uri": "https://arxiv.org/abs/2310.14897"
+        }
+    ],
+    "github": "https://github.com/MHubAI/models/tree/main/models/bamf_nnunet_ct_lung"
+  },
+  "info": {
+    "use": {
+      "title": "Intended Use",
+      "text": "This model is intended to perform lung and nodule segmentation in CT scans. The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
+    },
+    "evaluation": {
+      "title": "Evaluation Data",
+      "text": "The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 112 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
+      "references": [
+        {
+          "label": "QIN LUNG CT",
+          "uri": "https://www.cancerimagingarchive.net/collection/qin-lung-ct/"
+        },
+        {
+          "label": "SPIE-AAPM Lung CT Challenge",
+          "uri": "https://www.cancerimagingarchive.net/collection/spie-aapm-lung-ct-challenge/"
+        },
+        {
+          "label": "NLST",
+          "uri": "https://www.cancerimagingarchive.net/collection/nlst/"
+        }
+
+      ]
+    },
+    "training": {
+      "title": "Training Data",
+      "text": "",
+      "references": [
+        {
+          "label": "DICOM-LIDC-IDRI-Nodules",
+          "uri": "https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=44499647"
+        },
+        {
+          "label": "NSCLC-Radiomics",
+          "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/"
+        }
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/models/bamf_nnunet_ct_lung/utils/BamfProcessorRunner.py b/models/bamf_nnunet_ct_lung/utils/BamfProcessorRunner.py
new file mode 100644
index 00000000..617b33ef
--- /dev/null
+++ b/models/bamf_nnunet_ct_lung/utils/BamfProcessorRunner.py
@@ -0,0 +1,115 @@
+"""
+-------------------------------------------------
+MHub - Run Module for perform postprocessing logic on segmentations.
+-------------------------------------------------
+-------------------------------------------------
+Author: Jithendra Kumar
+Email:  jithendra.kumar@bamfhealth.com
+-------------------------------------------------
+"""
+
+from mhubio.core import Instance, InstanceData
+from mhubio.core import Module, IO
+from skimage import measure
+import SimpleITK as sitk
+import numpy as np
+
+
+class BamfProcessorRunner(Module):
+
+    def max_planar_dimension(self, label_img, label_cnt):
+        tumor = label_img == label_cnt
+
+        assert tumor.GetDimension() == 3
+        spacing = tumor.GetSpacing()
+        if spacing[0] == spacing[1] and spacing[1] != spacing[2]:
+            axis = 2
+            plane_space = spacing[0]
+        elif spacing[0] != spacing[1] and spacing[1] == spacing[2]:
+            axis = 0
+            plane_space = spacing[1]
+        else:
+            axis = 1
+            plane_space = spacing[2]
+
+        lsif = sitk.LabelShapeStatisticsImageFilter()
+        lsif.Execute(tumor)
+
+        boundingBox = np.array(lsif.GetBoundingBox(1))
+        sizes = boundingBox[3:].tolist()
+        del sizes[axis]
+        max_planar_size = plane_space * max(sizes)  # mm
+        return max_planar_size
+
+    def filter_nodules(self, label_img, min_size=3):
+        label_val_lung = 1
+        label_val_nodule = 2
+        label_val_large_nodule = 3
+
+        nodules_img = label_img == label_val_nodule
+        nodule_components = sitk.ConnectedComponent(nodules_img)
+
+        nodules_to_remove = []
+
+        for lbl in range(1, sitk.GetArrayFromImage(nodule_components).max() + 1):
+            max_size = self.max_planar_dimension(nodule_components, lbl)
+
+            if max_size < min_size:
+                nodules_to_remove.append(lbl)
+                # print("Removing label", lbl, "with size", max_size)
+            elif 3 <= max_size <= 30:
+                label_img = sitk.ChangeLabel(label_img, {lbl: label_val_nodule})
+                # print("Marking label", lbl, "as Nodule (label 2) with size", max_size)
+            else:
+                label_img = sitk.ChangeLabel(label_img, {lbl: label_val_large_nodule})
+                # print("Marking label", lbl, "as Large Nodule (label 3) with size", max_size)
+
+        label_img = sitk.ChangeLabel(label_img, {label_val_nodule: label_val_lung})
+        big_nodules = sitk.ChangeLabel(nodule_components, {x: 0 for x in nodules_to_remove})
+        label_img = sitk.Mask(label_img, big_nodules > 0, label_val_nodule, label_val_lung)
+        label_img = self.n_connected(label_img)
+
+        return label_img
+
+    @IO.Instance()
+    @IO.Input('in_data', 'nifti:mod=seg:model=nnunet', the='input segmentations')
+    @IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LUNG,LUNG+NODULE', data='in_data', the="lung and filtered nodules segmentation")
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+
+        # Log bamf runner info
+        self.log("Running BamfProcessor on....")
+        self.log(f" > input data:  {in_data.abspath}")
+        self.log(f" > output data: {out_data.abspath}")
+
+        label_img = sitk.ReadImage(in_data.abspath)
+        filtered_label_img = self.filter_nodules(label_img, min_size=3)
+        sitk.WriteImage(filtered_label_img, out_data.abspath)
+
+
+    def n_connected(self, img):
+        img_data = sitk.GetArrayFromImage(img)
+        img_data_mask = np.zeros(img_data.shape)
+        img_data_mask[img_data > 0] = 1
+        img_filtered = np.zeros(img_data_mask.shape)
+        blobs_labels = measure.label(img_data_mask, background=0)
+        lbl, counts = np.unique(blobs_labels, return_counts=True)
+        lbl_dict = {}
+        for i, j in zip(lbl, counts):
+            lbl_dict[i] = j
+        sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
+        count = 0
+
+        for key, value in sorted_dict.items():
+            if count >= 1 and count <= 2:
+                if count == 1:
+                    val = value
+                    img_filtered[blobs_labels == key] = 1
+                if count == 2 and value > (val * 0.2):
+                    img_filtered[blobs_labels == key] = 1
+
+            count += 1
+
+        img_data[img_filtered != 1] = 0
+        img_masked = sitk.GetImageFromArray(img_data)
+        img_masked.CopyInformation(img)
+        return img_masked
diff --git a/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py b/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py
new file mode 100644
index 00000000..6f02d820
--- /dev/null
+++ b/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py
@@ -0,0 +1,211 @@
+"""
+-------------------------------------------------
+MHub - NNU-Net Runner v2
+       Runner for pre-trained nnunet v2 models. 
+-------------------------------------------------
+
+-------------------------------------------------
+Author: Rahul Soni
+Email:  rahul.soni@bamfhealth.com
+-------------------------------------------------
+"""
+
+
+from typing import List, Optional
+import os, subprocess, shutil
+import SimpleITK as sitk, numpy as np
+from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO
+
+
+nnunet_dataset_name_regex = r"Dataset[0-9]{3}_[a-zA-Z0-9_]+"
+
+@IO.ConfigInput('in_data', 'nifti:mod=mr', the="input data to run nnunet on")
+@IO.Config('nnunet_dataset', str, None, the='nnunet dataset name')
+@IO.Config('nnunet_config', str, None, the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)')
+@IO.Config('folds', int, None, the='number of folds to run nnunet on')
+@IO.Config('use_tta', bool, True, the='flag to enable test time augmentation')
+@IO.Config('export_prob_maps', bool, False, the='flag to export probability maps')
+@IO.Config('prob_map_segments', list, [], the='segment labels for probability maps')
+@IO.Config('roi', str, None, the='roi or comma separated list of roi the nnunet segments')
+class NNUnetRunnerV2(Module):
+
+    nnunet_dataset: str
+    nnunet_config: str
+    input_data_type: DataType
+    folds: int                          # TODO: support optional config attributes
+    use_tta: bool
+    export_prob_maps: bool
+    prob_map_segments: list
+    roi: str
+
+    def export_prob_mask(self, nnunet_out_dir: str, ref_file: InstanceData, output_dtype: str = 'float32', structure_list: Optional[List[str]] = None):
+        """
+        Convert softmax probability maps to NRRD. For simplicity, the probability maps
+        are converted by default to UInt8
+        Arguments:
+            model_output_folder : required - path to the folder where the inferred segmentation masks should be stored.
+            ref_file            : required - InstanceData object of the generated segmentation mask used as reference file.
+            output_dtype        : optional - output data type. Data type float16 is not supported by the NRRD standard,
+                                            so the choice should be between uint8, uint16 or float32. 
+            structure_list      : optional - list of the structures whose probability maps are stored in the 
+                                            first channel of the `.npz` file (output from the nnU-Net pipeline
+                                            when `export_prob_maps` is set to True). 
+        Outputs:
+            This function [...]
+        """
+
+        # initialize structure list
+        if structure_list is None:
+            if self.roi is not None:
+                structure_list = self.roi.split(',')
+            else:
+                structure_list = []
+
+        # sanity check user inputs
+        assert(output_dtype in ["uint8", "uint16", "float32"])      
+
+        # input file containing the raw information
+        pred_softmax_fn = 'VOLUME_001.npz'
+        pred_softmax_path = os.path.join(nnunet_out_dir, pred_softmax_fn)
+
+        # parse NRRD file - we will make use of if to populate the header of the
+        # NRRD mask we are going to get from the inferred segmentation mask
+        sitk_ct = sitk.ReadImage(ref_file.abspath)
+
+        # generate bundle for prob masks
+        # TODO: we really have to create folders (or add this as an option that defaults to true) automatically
+        prob_masks_bundle = ref_file.getDataBundle('prob_masks')
+        if not os.path.isdir(prob_masks_bundle.abspath):
+            os.mkdir(prob_masks_bundle.abspath)
+
+        # load softmax probability maps
+        pred_softmax_all = np.load(pred_softmax_path)["softmax"]
+
+        # iterate all channels
+        for channel in range(0, len(pred_softmax_all)):
+
+            structure = structure_list[channel] if channel < len(structure_list) else f"structure_{channel}"
+            pred_softmax_segmask = pred_softmax_all[channel].astype(dtype = np.float32)
+
+            if output_dtype == "float32":
+                # no rescale needed - the values will be between 0 and 1
+                # set SITK image dtype to Float32
+                sitk_dtype = sitk.sitkFloat32
+
+            elif output_dtype == "uint8":
+                # rescale between 0 and 255, quantize
+                pred_softmax_segmask = (255*pred_softmax_segmask).astype(np.int32)
+                # set SITK image dtype to UInt8
+                sitk_dtype = sitk.sitkUInt8
+
+            elif output_dtype == "uint16":
+                # rescale between 0 and 65536
+                pred_softmax_segmask = (65536*pred_softmax_segmask).astype(np.int32)
+                # set SITK image dtype to UInt16
+                sitk_dtype = sitk.sitkUInt16
+            else:
+                raise ValueError("Invalid output data type. Please choose between uint8, uint16 or float32.")
+                
+            pred_softmax_segmask_sitk = sitk.GetImageFromArray(pred_softmax_segmask)
+            pred_softmax_segmask_sitk.CopyInformation(sitk_ct)
+            pred_softmax_segmask_sitk = sitk.Cast(pred_softmax_segmask_sitk, sitk_dtype)
+
+            # generate data
+            prob_mask = InstanceData(f'{structure}.nrrd', DataType(FileType.NRRD, {'mod': 'prob_mask', 'structure': structure}), bundle=prob_masks_bundle)
+
+            # export file
+            writer = sitk.ImageFileWriter()
+            writer.UseCompressionOn()
+            writer.SetFileName(prob_mask.abspath)
+            writer.Execute(pred_softmax_segmask_sitk)
+
+            # check if the file was written
+            if os.path.isfile(prob_mask.abspath):
+                self.v(f" > prob mask for {structure} saved to {prob_mask.abspath}")
+                prob_mask.confirm()
+
+    @IO.Instance()
+    @IO.Input("in_data", the="input data to run nnunet on")
+    @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet', data='in_data', the="output data from nnunet")
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+        
+        # get the nnunet model to run
+        self.v("Running nnUNet_predict.")
+        self.v(f" > dataset:     {self.nnunet_dataset}")
+        self.v(f" > config:      {self.nnunet_config}")
+        self.v(f" > input data:  {in_data.abspath}")
+        self.v(f" > output data: {out_data.abspath}")
+
+        # download weights if not found
+        # NOTE: only for testing / debugging. For productiio always provide the weights in the Docker container.
+        if not os.path.isdir(os.path.join(os.environ["WEIGHTS_FOLDER"], '')):
+            print("Downloading nnUNet model weights...")
+            bash_command = ["nnUNet_download_pretrained_model", self.nnunet_dataset]
+            self.subprocess(bash_command, text=True)
+
+        # bring input data in nnunet specific format
+        # NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now.
+        assert in_data.type.ftype == FileType.NIFTI
+        assert in_data.abspath.endswith('.nii.gz')
+        inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp")
+        inp_file = f'VOLUME_001_0000.nii.gz'
+        shutil.copyfile(in_data.abspath, os.path.join(inp_dir, inp_file))
+
+        # define output folder (temp dir) and also override environment variable for nnunet
+        out_dir = self.config.data.requestTempDir(label="nnunet-model-out")
+        os.environ['nnUNet_results'] = out_dir
+
+        # symlink nnunet input folder to the input data with python
+        # create symlink in python
+        # NOTE: this is a workaround for the nnunet bash script that expects the input data to be in a specific folder
+        #       structure. This is not the case for the mhub data structure. So we create a symlink to the input data
+        #       in the nnunet input folder structure.
+        os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset))
+        
+        # NOTE: instead of running from commandline this could also be done in a pythonic way:
+        #       `nnUNet/nnunet/inference/predict.py` - but it would require
+        #       to set manually all the arguments that the user is not intended
+        #       to fiddle with; so stick with the bash executable
+
+        # construct nnunet inference command
+        bash_command  = ["nnUNetv2_predict"]
+        bash_command += ["-i", str(inp_dir)]
+        bash_command += ["-o", str(out_dir)]
+        bash_command += ["-d", self.nnunet_dataset]
+        bash_command += ["-c", self.nnunet_config]
+        
+        # add optional arguments
+        if self.folds is not None:
+            bash_command += ["-f", str(self.folds)]
+
+        if not self.use_tta:
+            bash_command += ["--disable_tta"]
+        
+        if self.export_prob_maps:
+            bash_command += ["--save_probabilities"]
+
+        self.v(f" > bash_command:     {bash_command}")
+        # run command
+        self.subprocess(bash_command, text=True)
+
+        # output meta
+        meta = {
+            "model": "nnunet",
+            "nnunet_dataset": self.nnunet_dataset,
+            "nnunet_config": self.nnunet_config,
+            "roi": self.roi
+        }
+
+        # get output data
+        out_file = f'VOLUME_001.nii.gz'
+        out_path = os.path.join(out_dir, out_file)
+
+        # copy output data to instance
+        shutil.copyfile(out_path, out_data.abspath)
+
+        # export probabiliy maps if requested as dynamic data
+        if self.export_prob_maps:
+            self.export_prob_mask(str(out_dir), out_data, 'float32', self.prob_map_segments)
+
+        # update meta dynamically
+        out_data.type.meta += meta
diff --git a/models/bamf_nnunet_ct_lung/utils/__init__.py b/models/bamf_nnunet_ct_lung/utils/__init__.py
new file mode 100644
index 00000000..d6522730
--- /dev/null
+++ b/models/bamf_nnunet_ct_lung/utils/__init__.py
@@ -0,0 +1 @@
+from .BamfProcessorRunner import *
\ No newline at end of file

From 1d77d3a1a212a8553b1467ae4af29b9e2d8b9b21 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Wed, 31 Jul 2024 01:13:20 +0530
Subject: [PATCH 02/15] add stats

---
 models/bamf_nnunet_ct_lung/meta.json          | 25 ++++-
 .../utils/NNUnetRunnerV2.py                   | 94 -------------------
 2 files changed, 24 insertions(+), 95 deletions(-)

diff --git a/models/bamf_nnunet_ct_lung/meta.json b/models/bamf_nnunet_ct_lung/meta.json
index ba224c71..0a7e255c 100644
--- a/models/bamf_nnunet_ct_lung/meta.json
+++ b/models/bamf_nnunet_ct_lung/meta.json
@@ -79,7 +79,30 @@
     },
     "evaluation": {
       "title": "Evaluation Data",
-      "text": "The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 112 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
+      "text": "Quantitative metrics between AI and Radiologists annotations. The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 112 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
+      "tables": [
+        {
+          "label": "Dice Score",
+          "entries": {
+            "Lung": "1.0±0.0",
+            "Nodules": "0.78±0.28"
+          }
+        },
+        {
+          "label": "95% Hausdorff Distance",
+          "entries": {
+            "Lung": "0.00±0.00",
+            "Nodules": "62.07±10.54"
+          }
+        },
+        {
+          "label": "Normalized surface distance ",
+          "entries": {
+            "Lung": "0.02±0.11",
+            "Nodules": "10.54±14.43"
+          }
+        }
+      ],
       "references": [
         {
           "label": "QIN LUNG CT",
diff --git a/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py b/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py
index 6f02d820..b18e3cec 100644
--- a/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py
+++ b/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py
@@ -24,8 +24,6 @@
 @IO.Config('nnunet_config', str, None, the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)')
 @IO.Config('folds', int, None, the='number of folds to run nnunet on')
 @IO.Config('use_tta', bool, True, the='flag to enable test time augmentation')
-@IO.Config('export_prob_maps', bool, False, the='flag to export probability maps')
-@IO.Config('prob_map_segments', list, [], the='segment labels for probability maps')
 @IO.Config('roi', str, None, the='roi or comma separated list of roi the nnunet segments')
 class NNUnetRunnerV2(Module):
 
@@ -34,96 +32,8 @@ class NNUnetRunnerV2(Module):
     input_data_type: DataType
     folds: int                          # TODO: support optional config attributes
     use_tta: bool
-    export_prob_maps: bool
-    prob_map_segments: list
     roi: str
 
-    def export_prob_mask(self, nnunet_out_dir: str, ref_file: InstanceData, output_dtype: str = 'float32', structure_list: Optional[List[str]] = None):
-        """
-        Convert softmax probability maps to NRRD. For simplicity, the probability maps
-        are converted by default to UInt8
-        Arguments:
-            model_output_folder : required - path to the folder where the inferred segmentation masks should be stored.
-            ref_file            : required - InstanceData object of the generated segmentation mask used as reference file.
-            output_dtype        : optional - output data type. Data type float16 is not supported by the NRRD standard,
-                                            so the choice should be between uint8, uint16 or float32. 
-            structure_list      : optional - list of the structures whose probability maps are stored in the 
-                                            first channel of the `.npz` file (output from the nnU-Net pipeline
-                                            when `export_prob_maps` is set to True). 
-        Outputs:
-            This function [...]
-        """
-
-        # initialize structure list
-        if structure_list is None:
-            if self.roi is not None:
-                structure_list = self.roi.split(',')
-            else:
-                structure_list = []
-
-        # sanity check user inputs
-        assert(output_dtype in ["uint8", "uint16", "float32"])      
-
-        # input file containing the raw information
-        pred_softmax_fn = 'VOLUME_001.npz'
-        pred_softmax_path = os.path.join(nnunet_out_dir, pred_softmax_fn)
-
-        # parse NRRD file - we will make use of if to populate the header of the
-        # NRRD mask we are going to get from the inferred segmentation mask
-        sitk_ct = sitk.ReadImage(ref_file.abspath)
-
-        # generate bundle for prob masks
-        # TODO: we really have to create folders (or add this as an option that defaults to true) automatically
-        prob_masks_bundle = ref_file.getDataBundle('prob_masks')
-        if not os.path.isdir(prob_masks_bundle.abspath):
-            os.mkdir(prob_masks_bundle.abspath)
-
-        # load softmax probability maps
-        pred_softmax_all = np.load(pred_softmax_path)["softmax"]
-
-        # iterate all channels
-        for channel in range(0, len(pred_softmax_all)):
-
-            structure = structure_list[channel] if channel < len(structure_list) else f"structure_{channel}"
-            pred_softmax_segmask = pred_softmax_all[channel].astype(dtype = np.float32)
-
-            if output_dtype == "float32":
-                # no rescale needed - the values will be between 0 and 1
-                # set SITK image dtype to Float32
-                sitk_dtype = sitk.sitkFloat32
-
-            elif output_dtype == "uint8":
-                # rescale between 0 and 255, quantize
-                pred_softmax_segmask = (255*pred_softmax_segmask).astype(np.int32)
-                # set SITK image dtype to UInt8
-                sitk_dtype = sitk.sitkUInt8
-
-            elif output_dtype == "uint16":
-                # rescale between 0 and 65536
-                pred_softmax_segmask = (65536*pred_softmax_segmask).astype(np.int32)
-                # set SITK image dtype to UInt16
-                sitk_dtype = sitk.sitkUInt16
-            else:
-                raise ValueError("Invalid output data type. Please choose between uint8, uint16 or float32.")
-                
-            pred_softmax_segmask_sitk = sitk.GetImageFromArray(pred_softmax_segmask)
-            pred_softmax_segmask_sitk.CopyInformation(sitk_ct)
-            pred_softmax_segmask_sitk = sitk.Cast(pred_softmax_segmask_sitk, sitk_dtype)
-
-            # generate data
-            prob_mask = InstanceData(f'{structure}.nrrd', DataType(FileType.NRRD, {'mod': 'prob_mask', 'structure': structure}), bundle=prob_masks_bundle)
-
-            # export file
-            writer = sitk.ImageFileWriter()
-            writer.UseCompressionOn()
-            writer.SetFileName(prob_mask.abspath)
-            writer.Execute(pred_softmax_segmask_sitk)
-
-            # check if the file was written
-            if os.path.isfile(prob_mask.abspath):
-                self.v(f" > prob mask for {structure} saved to {prob_mask.abspath}")
-                prob_mask.confirm()
-
     @IO.Instance()
     @IO.Input("in_data", the="input data to run nnunet on")
     @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet', data='in_data', the="output data from nnunet")
@@ -203,9 +113,5 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
         # copy output data to instance
         shutil.copyfile(out_path, out_data.abspath)
 
-        # export probabiliy maps if requested as dynamic data
-        if self.export_prob_maps:
-            self.export_prob_mask(str(out_dir), out_data, 'float32', self.prob_map_segments)
-
         # update meta dynamically
         out_data.type.meta += meta

From c9a1cfe0ff3238f23ebe66c4df69c441084f3a91 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Wed, 31 Jul 2024 01:15:21 +0530
Subject: [PATCH 03/15] change folder name

---
 .../config/default.yml                                        | 2 +-
 .../dockerfiles/Dockerfile                                    | 4 ++--
 .../meta.json                                                 | 2 +-
 .../utils/BamfProcessorRunner.py                              | 0
 .../utils/NNUnetRunnerV2.py                                   | 0
 .../utils/__init__.py                                         | 0
 6 files changed, 4 insertions(+), 4 deletions(-)
 rename models/{bamf_nnunet_ct_lung => bamf_nnunet_ct_lungnodules}/config/default.yml (92%)
 rename models/{bamf_nnunet_ct_lung => bamf_nnunet_ct_lungnodules}/dockerfiles/Dockerfile (85%)
 rename models/{bamf_nnunet_ct_lung => bamf_nnunet_ct_lungnodules}/meta.json (99%)
 rename models/{bamf_nnunet_ct_lung => bamf_nnunet_ct_lungnodules}/utils/BamfProcessorRunner.py (100%)
 rename models/{bamf_nnunet_ct_lung => bamf_nnunet_ct_lungnodules}/utils/NNUnetRunnerV2.py (100%)
 rename models/{bamf_nnunet_ct_lung => bamf_nnunet_ct_lungnodules}/utils/__init__.py (100%)

diff --git a/models/bamf_nnunet_ct_lung/config/default.yml b/models/bamf_nnunet_ct_lungnodules/config/default.yml
similarity index 92%
rename from models/bamf_nnunet_ct_lung/config/default.yml
rename to models/bamf_nnunet_ct_lungnodules/config/default.yml
index 19d149b3..1564bee2 100644
--- a/models/bamf_nnunet_ct_lung/config/default.yml
+++ b/models/bamf_nnunet_ct_lungnodules/config/default.yml
@@ -33,4 +33,4 @@ modules:
 
   DataOrganizer:
     targets:
-    - dicomseg-->[i:sid]/bamf_ct_lung_nodule.seg.dcm
+    - dicomseg-->[i:sid]/bamf_nnunet_ct_lungnodules.seg.dcm
diff --git a/models/bamf_nnunet_ct_lung/dockerfiles/Dockerfile b/models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile
similarity index 85%
rename from models/bamf_nnunet_ct_lung/dockerfiles/Dockerfile
rename to models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile
index f8d234d0..f7ebf9eb 100644
--- a/models/bamf_nnunet_ct_lung/dockerfiles/Dockerfile
+++ b/models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile
@@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir nnunetv2==2.0
 
 # Clone the main branch of MHubAI/models
 ARG MHUB_MODELS_REPO
-RUN buildutils/import_mhub_model.sh bamf_ct_lung_nodule ${MHUB_MODELS_REPO}
+RUN buildutils/import_mhub_model.sh bamf_nnunet_ct_lungnodules ${MHUB_MODELS_REPO}
 
 # Pull weights into the container
 ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/
@@ -28,4 +28,4 @@ ENV WEIGHTS_FOLDER=$WEIGHTS_DIR
 
 # Default run script
 ENTRYPOINT ["mhub.run"]
-CMD ["--config", "/app/models/bamf_ct_lung_nodule/config/default.yml"]
+CMD ["--config", "/app/models/bamf_nnunet_ct_lungnodules/config/default.yml"]
diff --git a/models/bamf_nnunet_ct_lung/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
similarity index 99%
rename from models/bamf_nnunet_ct_lung/meta.json
rename to models/bamf_nnunet_ct_lungnodules/meta.json
index 0a7e255c..56e8f0e3 100644
--- a/models/bamf_nnunet_ct_lung/meta.json
+++ b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -1,6 +1,6 @@
 {
   "id": "",
-  "name": "bamf_nnunet_ct_lung",
+  "name": "bamf_nnunet_ct_lungnodules",
   "title": "AIMI CT Lung and Nodules",
   "summary": {
     "description": "An nnU-Net based model to segment Lung and Nodules (3mm-30mm) from CT scans",
diff --git a/models/bamf_nnunet_ct_lung/utils/BamfProcessorRunner.py b/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py
similarity index 100%
rename from models/bamf_nnunet_ct_lung/utils/BamfProcessorRunner.py
rename to models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py
diff --git a/models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
similarity index 100%
rename from models/bamf_nnunet_ct_lung/utils/NNUnetRunnerV2.py
rename to models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
diff --git a/models/bamf_nnunet_ct_lung/utils/__init__.py b/models/bamf_nnunet_ct_lungnodules/utils/__init__.py
similarity index 100%
rename from models/bamf_nnunet_ct_lung/utils/__init__.py
rename to models/bamf_nnunet_ct_lungnodules/utils/__init__.py

From 6c1f42180b714dd3026e0287c75363d93ff0b686 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Wed, 31 Jul 2024 01:19:19 +0530
Subject: [PATCH 04/15] add analyses to meta

---
 models/bamf_nnunet_ct_lungnodules/meta.json | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
index 56e8f0e3..3608803f 100644
--- a/models/bamf_nnunet_ct_lungnodules/meta.json
+++ b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -77,6 +77,10 @@
       "title": "Intended Use",
       "text": "This model is intended to perform lung and nodule segmentation in CT scans. The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
     },
+    "analyses": {
+      "title": "Quantitative Analyses",
+      "text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD"
+    },
     "evaluation": {
       "title": "Evaluation Data",
       "text": "Quantitative metrics between AI and Radiologists annotations. The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 112 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",

From 314e4b213f0ce349471f8a9df30301e135b15168 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Thu, 8 Aug 2024 16:57:57 +0530
Subject: [PATCH 05/15] update comments

---
 .../utils/BamfProcessorRunner.py              | 63 +++++++++++++++----
 1 file changed, 50 insertions(+), 13 deletions(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py b/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py
index 617b33ef..b23051e1 100644
--- a/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py
+++ b/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py
@@ -18,6 +18,16 @@
 class BamfProcessorRunner(Module):
 
     def max_planar_dimension(self, label_img, label_cnt):
+        """
+        Calculate the maximum planar dimension of a specific label in a 3D label image.
+
+        Args:
+            label_img (sitk.Image): The 3D label image.
+            label_cnt (int): The label number to analyze.
+
+        Returns:
+            float: The maximum size of the label in millimeters (mm) across the most planar dimension.
+        """
         tumor = label_img == label_cnt
 
         assert tumor.GetDimension() == 3
@@ -42,6 +52,16 @@ def max_planar_dimension(self, label_img, label_cnt):
         return max_planar_size
 
     def filter_nodules(self, label_img, min_size=3):
+        """
+        Filter lung nodules based on their size and re-label them accordingly.
+
+        Args:
+            label_img (sitk.Image): The 3D label image containing lung and nodule labels.
+            min_size (float): Minimum planar size (in mm) to retain a nodule.
+
+        Returns:
+            sitk.Image: The processed label image with nodules filtered by size.
+        """
         label_val_lung = 1
         label_val_nodule = 2
         label_val_large_nodule = 3
@@ -71,22 +91,17 @@ def filter_nodules(self, label_img, min_size=3):
 
         return label_img
 
-    @IO.Instance()
-    @IO.Input('in_data', 'nifti:mod=seg:model=nnunet', the='input segmentations')
-    @IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LUNG,LUNG+NODULE', data='in_data', the="lung and filtered nodules segmentation")
-    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
 
-        # Log bamf runner info
-        self.log("Running BamfProcessor on....")
-        self.log(f" > input data:  {in_data.abspath}")
-        self.log(f" > output data: {out_data.abspath}")
-
-        label_img = sitk.ReadImage(in_data.abspath)
-        filtered_label_img = self.filter_nodules(label_img, min_size=3)
-        sitk.WriteImage(filtered_label_img, out_data.abspath)
+    def n_connected(self, img):
+        """
+        Retain the largest connected components in a binary label image.
 
+        Args:
+            img (sitk.Image): The input binary label image.
 
-    def n_connected(self, img):
+        Returns:
+            sitk.Image: The processed image with only the largest connected components retained.
+        """
         img_data = sitk.GetArrayFromImage(img)
         img_data_mask = np.zeros(img_data.shape)
         img_data_mask[img_data > 0] = 1
@@ -113,3 +128,25 @@ def n_connected(self, img):
         img_masked = sitk.GetImageFromArray(img_data)
         img_masked.CopyInformation(img)
         return img_masked
+
+    @IO.Instance()
+    @IO.Input('in_data', 'nifti:mod=seg:model=nnunet', the='input segmentations')
+    @IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LUNG,LUNG+NODULE', data='in_data', the="lung and filtered nodules segmentation")
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+        """
+        Main task function that processes the input lung and nodule segmentations,
+        filters nodules based on their size, and writes the output image.
+
+        Args:
+            instance (Instance): The MHub instance for processing.
+            in_data (InstanceData): Input data containing the segmentation.
+            out_data (InstanceData): Output data path to save the processed image.
+        """
+        # Log bamf runner info
+        self.log("Running BamfProcessor on....")
+        self.log(f" > input data:  {in_data.abspath}")
+        self.log(f" > output data: {out_data.abspath}")
+
+        label_img = sitk.ReadImage(in_data.abspath)
+        filtered_label_img = self.filter_nodules(label_img, min_size=3)
+        sitk.WriteImage(filtered_label_img, out_data.abspath)
\ No newline at end of file

From 3f6b77d858ea4f904597277bd518f805c700acb5 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Thu, 8 Aug 2024 17:15:33 +0530
Subject: [PATCH 06/15] remove export map

---
 models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
index b18e3cec..38b837c0 100644
--- a/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
+++ b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
@@ -90,9 +90,6 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
 
         if not self.use_tta:
             bash_command += ["--disable_tta"]
-        
-        if self.export_prob_maps:
-            bash_command += ["--save_probabilities"]
 
         self.v(f" > bash_command:     {bash_command}")
         # run command

From cbdbbd7d047bfeebe558b5bbbd4a77e337b85b9d Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Thu, 22 Aug 2024 14:49:28 +0530
Subject: [PATCH 07/15] fix code review comments

---
 .../config/default.yml                        |  7 +--
 .../utils/NNUnetRunnerV2.py                   | 59 +++----------------
 2 files changed, 11 insertions(+), 55 deletions(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/config/default.yml b/models/bamf_nnunet_ct_lungnodules/config/default.yml
index 1564bee2..703e2d4d 100644
--- a/models/bamf_nnunet_ct_lungnodules/config/default.yml
+++ b/models/bamf_nnunet_ct_lungnodules/config/default.yml
@@ -1,7 +1,7 @@
 general:
   data_base_dir: /app/data
   version: 1.0
-  description: default configuration for 3D semantic image segmentation of the lung and lung nodules from ct scan
+  description: default configuration for 3D semantic image segmentation of the lung and lung nodules from ct scan (dicom to dicom)
 
 execute:
 - DicomImporter
@@ -21,10 +21,7 @@ modules:
 
   NNUnetRunnerV2:
     in_data: nifti:mod=ct
-    nnunet_dataset: Dataset007_Nodules
-    nnunet_config: 3d_fullres
-    roi: LUNG,LUNG+NODULE
-    
+
   DsegConverter:
     model_name: bamf_ct_lung_nodule
     target_dicom: dicom:mod=ct
diff --git a/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
index 38b837c0..f93748f4 100644
--- a/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
+++ b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
@@ -1,50 +1,36 @@
 """
 -------------------------------------------------
 MHub - NNU-Net Runner v2
-       Runner for pre-trained nnunet v2 models. 
+       Custom Runner for pre-trained nnunet v2 models.
 -------------------------------------------------
 
 -------------------------------------------------
-Author: Rahul Soni
-Email:  rahul.soni@bamfhealth.com
+Author: Jithendra Kumar
+Email:  jithendra.kumar@bamfhealth.com
 -------------------------------------------------
 """
 
 
-from typing import List, Optional
-import os, subprocess, shutil
-import SimpleITK as sitk, numpy as np
+import os, shutil
 from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO
 
 
-nnunet_dataset_name_regex = r"Dataset[0-9]{3}_[a-zA-Z0-9_]+"
 
-@IO.ConfigInput('in_data', 'nifti:mod=mr', the="input data to run nnunet on")
-@IO.Config('nnunet_dataset', str, None, the='nnunet dataset name')
-@IO.Config('nnunet_config', str, None, the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)')
-@IO.Config('folds', int, None, the='number of folds to run nnunet on')
-@IO.Config('use_tta', bool, True, the='flag to enable test time augmentation')
-@IO.Config('roi', str, None, the='roi or comma separated list of roi the nnunet segments')
+@IO.ConfigInput('in_data', 'nifti:mod=ct', the="input data to run nnunet on")
 class NNUnetRunnerV2(Module):
 
-    nnunet_dataset: str
-    nnunet_config: str
+    nnunet_dataset: str = 'Dataset007_Nodules'
+    nnunet_config: str = '3d_fullres'
     input_data_type: DataType
-    folds: int                          # TODO: support optional config attributes
-    use_tta: bool
-    roi: str
 
     @IO.Instance()
     @IO.Input("in_data", the="input data to run nnunet on")
-    @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet', data='in_data', the="output data from nnunet")
+    @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet:roi=LUNG,LUNG+NODULE', data='in_data', the="output data from nnunet")
     def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
         
         # get the nnunet model to run
-        self.v("Running nnUNet_predict.")
-        self.v(f" > dataset:     {self.nnunet_dataset}")
-        self.v(f" > config:      {self.nnunet_config}")
+        self.v("Running nnUNetv2_predict.")
         self.v(f" > input data:  {in_data.abspath}")
-        self.v(f" > output data: {out_data.abspath}")
 
         # download weights if not found
         # NOTE: only for testing / debugging. For productiio always provide the weights in the Docker container.
@@ -53,10 +39,6 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
             bash_command = ["nnUNet_download_pretrained_model", self.nnunet_dataset]
             self.subprocess(bash_command, text=True)
 
-        # bring input data in nnunet specific format
-        # NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now.
-        assert in_data.type.ftype == FileType.NIFTI
-        assert in_data.abspath.endswith('.nii.gz')
         inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp")
         inp_file = f'VOLUME_001_0000.nii.gz'
         shutil.copyfile(in_data.abspath, os.path.join(inp_dir, inp_file))
@@ -71,11 +53,6 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
         #       structure. This is not the case for the mhub data structure. So we create a symlink to the input data
         #       in the nnunet input folder structure.
         os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset))
-        
-        # NOTE: instead of running from commandline this could also be done in a pythonic way:
-        #       `nnUNet/nnunet/inference/predict.py` - but it would require
-        #       to set manually all the arguments that the user is not intended
-        #       to fiddle with; so stick with the bash executable
 
         # construct nnunet inference command
         bash_command  = ["nnUNetv2_predict"]
@@ -83,32 +60,14 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
         bash_command += ["-o", str(out_dir)]
         bash_command += ["-d", self.nnunet_dataset]
         bash_command += ["-c", self.nnunet_config]
-        
-        # add optional arguments
-        if self.folds is not None:
-            bash_command += ["-f", str(self.folds)]
-
-        if not self.use_tta:
-            bash_command += ["--disable_tta"]
 
         self.v(f" > bash_command:     {bash_command}")
         # run command
         self.subprocess(bash_command, text=True)
 
-        # output meta
-        meta = {
-            "model": "nnunet",
-            "nnunet_dataset": self.nnunet_dataset,
-            "nnunet_config": self.nnunet_config,
-            "roi": self.roi
-        }
-
         # get output data
         out_file = f'VOLUME_001.nii.gz'
         out_path = os.path.join(out_dir, out_file)
 
         # copy output data to instance
         shutil.copyfile(out_path, out_data.abspath)
-
-        # update meta dynamically
-        out_data.type.meta += meta

From 78fb5800efe303ecdf0db6601fe705d4480548fd Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Fri, 23 Aug 2024 01:01:27 +0530
Subject: [PATCH 08/15] fix meta json

---
 models/bamf_nnunet_ct_lungnodules/meta.json | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
index 3608803f..953b6ff4 100644
--- a/models/bamf_nnunet_ct_lungnodules/meta.json
+++ b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -37,7 +37,7 @@
         "vol_samples": 1299
       },
       "evaluation": {
-        "vol_samples": 9
+        "vol_samples": 114
       },
       "public": true,
       "external": true
@@ -70,20 +70,20 @@
             "uri": "https://arxiv.org/abs/2310.14897"
         }
     ],
-    "github": "https://github.com/MHubAI/models/tree/main/models/bamf_nnunet_ct_lung"
+    "github": "https://github.com/bamf-health/aimi-lung2-ct"
   },
   "info": {
     "use": {
       "title": "Intended Use",
-      "text": "This model is intended to perform lung and nodule segmentation in CT scans. The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
+      "text": "The lung and nodule segmentation model processes thoracic CT scans to identify and delineate lung structures and nodules. It aids in lung cancer screening and diagnostic support by automating nodule detection, thus improving efficiency and consistency. "
     },
     "analyses": {
       "title": "Quantitative Analyses",
-      "text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD"
+      "text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD. Source radiological images from publicly available NCI IDC collections were filtered to match the modality and region requirements. To ensure the quality of AI-generated annotations, 10% of these annotations were evaluated by radiologists. "
     },
     "evaluation": {
       "title": "Evaluation Data",
-      "text": "Quantitative metrics between AI and Radiologists annotations. The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 112 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
+      "text": "Quantitative metrics between AI and Radiologists annotations. The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 114 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
       "tables": [
         {
           "label": "Dice Score",
@@ -125,7 +125,7 @@
     },
     "training": {
       "title": "Training Data",
-      "text": "",
+      "text": "416 CT cases from NSCLC-Radiomics [2] and 883 CT cases from DICOM-LIDC-IDRI-Nodules [1] were used to train the model. Annotations for the lung regions in the training dataset were generated utilizing Totalsegmentator[3]"
       "references": [
         {
           "label": "DICOM-LIDC-IDRI-Nodules",
@@ -134,8 +134,16 @@
         {
           "label": "NSCLC-Radiomics",
           "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/"
+        },
+        {
+          "label": "Totalsegmentator",
+          "uri": "https://mhub.ai/models/totalsegmentator"
         }
       ]
+    },
+    "limitations": {
+      "title": "Limitations",
+      "text": "The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown.",
     }
   }
 }
\ No newline at end of file

From 527cd365d81d9dc03714acb0490456b336b4a88b Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Fri, 23 Aug 2024 17:24:02 +0530
Subject: [PATCH 09/15] fix meta file

---
 models/bamf_nnunet_ct_lungnodules/meta.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
index 953b6ff4..e0476325 100644
--- a/models/bamf_nnunet_ct_lungnodules/meta.json
+++ b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -75,7 +75,7 @@
   "info": {
     "use": {
       "title": "Intended Use",
-      "text": "The lung and nodule segmentation model processes thoracic CT scans to identify and delineate lung structures and nodules. It aids in lung cancer screening and diagnostic support by automating nodule detection, thus improving efficiency and consistency. "
+      "text": "This model is designed for analyzing thoracic CT scans to segment lung structures and nodules. It requires input images from CT scans, which are processed using deep learning methods like U-Net. The model identifies and delineates lung regions and nodules, assisting in lung cancer screening and diagnostics. "
     },
     "analyses": {
       "title": "Quantitative Analyses",
@@ -143,7 +143,7 @@
     },
     "limitations": {
       "title": "Limitations",
-      "text": "The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown.",
+      "text": "The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
     }
   }
 }
\ No newline at end of file

From d6b506c2edcb11c64fbe69a8576811e7dcd17302 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Fri, 23 Aug 2024 22:36:32 +0530
Subject: [PATCH 10/15] fix comments

---
 models/bamf_nnunet_ct_lungnodules/meta.json           |  2 +-
 .../utils/NNUnetRunnerV2.py                           | 11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
index e0476325..ce6b5bce 100644
--- a/models/bamf_nnunet_ct_lungnodules/meta.json
+++ b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -125,7 +125,7 @@
     },
     "training": {
       "title": "Training Data",
-      "text": "416 CT cases from NSCLC-Radiomics [2] and 883 CT cases from DICOM-LIDC-IDRI-Nodules [1] were used to train the model. Annotations for the lung regions in the training dataset were generated utilizing Totalsegmentator[3]"
+      "text": "416 CT cases from NSCLC-Radiomics [2] and 883 CT cases from DICOM-LIDC-IDRI-Nodules [1] were used to train the model. Annotations for the lung regions in the training dataset were generated utilizing Totalsegmentator[3]",
       "references": [
         {
           "label": "DICOM-LIDC-IDRI-Nodules",
diff --git a/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
index f93748f4..aab7296c 100644
--- a/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
+++ b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py
@@ -25,9 +25,10 @@ class NNUnetRunnerV2(Module):
 
     @IO.Instance()
     @IO.Input("in_data", the="input data to run nnunet on")
-    @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet:roi=LUNG,LUNG+NODULE', data='in_data', the="output data from nnunet")
+    @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet:nnunet_dataset=Dataset007_Nodules:'
+                'nnunet_config=3d_fullres:roi=LUNG,LUNG+NODULE', data='in_data', the="output data from nnunet")
     def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
-        
+
         # get the nnunet model to run
         self.v("Running nnUNetv2_predict.")
         self.v(f" > input data:  {in_data.abspath}")
@@ -47,11 +48,9 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
         out_dir = self.config.data.requestTempDir(label="nnunet-model-out")
         os.environ['nnUNet_results'] = out_dir
 
-        # symlink nnunet input folder to the input data with python
         # create symlink in python
-        # NOTE: this is a workaround for the nnunet bash script that expects the input data to be in a specific folder
-        #       structure. This is not the case for the mhub data structure. So we create a symlink to the input data
-        #       in the nnunet input folder structure.
+        # NOTE: this is a workaround for the nnunet bash script that expects the model data to be in a output folder
+        #       structure. This is not the case for the mhub data structure.
         os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset))
 
         # construct nnunet inference command

From f7bac4a1e48e9dc8e95b890696344276024fec9f Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Sat, 24 Aug 2024 00:34:08 +0530
Subject: [PATCH 11/15] fix slice thickness

---
 models/bamf_nnunet_ct_lungnodules/meta.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
index ce6b5bce..384ac28e 100644
--- a/models/bamf_nnunet_ct_lungnodules/meta.json
+++ b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -11,7 +11,7 @@
         "format": "DICOM",
         "modality": "CT",
         "bodypartexamined": "LUNG",
-        "slicethickness": "10mm",
+        "slicethickness": "3mm",
         "non-contrast": true,
         "contrast": false
       }

From 6d2f6543d6a7828c8c5f5b7024a7618832c746a8 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Mon, 26 Aug 2024 15:38:09 +0530
Subject: [PATCH 12/15] use dcm2niix engine

---
 models/bamf_nnunet_ct_lungnodules/config/default.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/config/default.yml b/models/bamf_nnunet_ct_lungnodules/config/default.yml
index 703e2d4d..c91e72c0 100644
--- a/models/bamf_nnunet_ct_lungnodules/config/default.yml
+++ b/models/bamf_nnunet_ct_lungnodules/config/default.yml
@@ -19,11 +19,14 @@ modules:
     meta: 
       mod: '%Modality'
 
+  NiftiConverter:
+    engine: dcm2niix
+
   NNUnetRunnerV2:
     in_data: nifti:mod=ct
 
   DsegConverter:
-    model_name: bamf_ct_lung_nodule
+    model_name: bamf_nnunet_ct_lungnodules
     target_dicom: dicom:mod=ct
     source_segs: nifti:mod=seg:processor=bamf
     skip_empty_slices: True

From 4a71c206a27b5bd754e90f8494dbc9e4cc5cbddf Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Mon, 26 Aug 2024 19:48:23 +0000
Subject: [PATCH 13/15] update model name

---
 models/bamf_nnunet_ct_lungnodules/config/default.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/config/default.yml b/models/bamf_nnunet_ct_lungnodules/config/default.yml
index c91e72c0..306adb2c 100644
--- a/models/bamf_nnunet_ct_lungnodules/config/default.yml
+++ b/models/bamf_nnunet_ct_lungnodules/config/default.yml
@@ -26,7 +26,7 @@ modules:
     in_data: nifti:mod=ct
 
   DsegConverter:
-    model_name: bamf_nnunet_ct_lungnodules
+    model_name: BAMF Lung and Lung Nodule AI Segmentation
     target_dicom: dicom:mod=ct
     source_segs: nifti:mod=seg:processor=bamf
     skip_empty_slices: True

From f81380630061f8f181f3f05fe2742f7f9d3b2e46 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Mon, 30 Sep 2024 23:24:11 +0700
Subject: [PATCH 14/15] /test update test files

---
 models/bamf_nnunet_ct_lungnodules/mhub.toml | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 models/bamf_nnunet_ct_lungnodules/mhub.toml

diff --git a/models/bamf_nnunet_ct_lungnodules/mhub.toml b/models/bamf_nnunet_ct_lungnodules/mhub.toml
new file mode 100644
index 00000000..7c0ce649
--- /dev/null
+++ b/models/bamf_nnunet_ct_lungnodules/mhub.toml
@@ -0,0 +1,3 @@
+
+[model.deployment]
+test = "https://zenodo.org/records/13862988/files/test.zip?download=1"
\ No newline at end of file

From 3c6f6edfbd64bb2557d451167c9391524b21c221 Mon Sep 17 00:00:00 2001
From: jithenece <jithenece@gmail.com>
Date: Wed, 2 Oct 2024 18:26:40 +0700
Subject: [PATCH 15/15] /test publication update

---
 models/bamf_nnunet_ct_lungnodules/meta.json | 12 ++++++------
 models/bamf_nnunet_ct_lungnodules/mhub.toml |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json
index 384ac28e..330185ef 100644
--- a/models/bamf_nnunet_ct_lungnodules/meta.json
+++ b/models/bamf_nnunet_ct_lungnodules/meta.json
@@ -55,19 +55,19 @@
     ],
     "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
     "date": {
-      "code": "17.10.2023",
-      "weights": "28.08.2023",
-      "pub": "23.10.2023"
+      "code": "28.09.2024",
+      "weights": "11.06.2024",
+      "pub": "30.09.2024"
     },
-    "cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
+    "cite": "Gowtham Krishnan Murugesan, Diana McCrumb, Rahul Soni, Jithendra Kumar, Leonard Nuernberg, Linmin Pei, Ulrike Wagner, Sutton Granger, Andrey Y. Fedorov, Stephen Moore, Jeff Van Oss. AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons. arXiv:2409.20342  (2024).",
     "license": {
       "code": "MIT",
       "weights": "CC BY-NC 4.0"
     },
     "publications": [
         {
-            "title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
-            "uri": "https://arxiv.org/abs/2310.14897"
+            "title": "AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons",
+            "uri": "https://arxiv.org/abs/2409.20342"
         }
     ],
     "github": "https://github.com/bamf-health/aimi-lung2-ct"
diff --git a/models/bamf_nnunet_ct_lungnodules/mhub.toml b/models/bamf_nnunet_ct_lungnodules/mhub.toml
index 7c0ce649..7659547e 100644
--- a/models/bamf_nnunet_ct_lungnodules/mhub.toml
+++ b/models/bamf_nnunet_ct_lungnodules/mhub.toml
@@ -1,3 +1,3 @@
 
 [model.deployment]
-test = "https://zenodo.org/records/13862988/files/test.zip?download=1"
\ No newline at end of file
+test = "https://zenodo.org/records/13880663/files/bamf_nnunet_ct_lungnodules.test.zip?download=1"
\ No newline at end of file