MHubAI · jithenece · Jun 20, 2024 · Jun 20, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/models/bamf_ct_liver_tumor/config/default.yml b/models/bamf_ct_liver_tumor/config/default.yml
@@ -0,0 +1,53 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: default configuration for Bamf CT Liver and tumor segmentation (dicom to dicom)
+
+execute:
+- DicomImporter
+- NiftiConverter
+- NNUnetRunnerV2
+- PostProcessor
+- DsegConverter
+- DataOrganizer
+
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: true
+    meta: 
+      mod: '%Modality'
+
+  NiftiConverter:
+    engine: dcm2niix
+
+  NNUnetRunnerV2:
+    in_data: nifti:mod=ct
+    nnunet_dataset: Dataset006_Liver
+    nnunet_config: 3d_fullres
+    roi: SPLEEN,KIDNEY,GALLBLADDER,DUODENUM,PANCREAS,SMALL_INTESTINE,LUNG,LIVER,LIVER+NEOPLASM_MALIGNANT
+    export_prob_maps: False
+
+  DsegConverter:
+    model_name: bamf_ct_liver_tumor
+    target_dicom: dicom:mod=ct
+    source_segs: nifti:mod=seg:processor=bamf
+    skip_empty_slices: True
+
+  DataOrganizer:
+    targets:
+    - dicomseg-->[i:sid]/bamf_ct_liver_tumor.seg.dcm
+
+segdb:
+    triplets:
+        T_LIVER_LESION:
+            code: C159516
+            meaning: Liver lesion
+            scheme_designator: NCIt
+    segments:
+        NEOPLASM_MALIGNANT:
+            name: Neoplasm Malignant
+            category: C_RADIOLOGIC_FINDING
+            type: T_LIVER_LESION
+            color: [255, 0, 0]
diff --git a/models/bamf_ct_liver_tumor/dockerfiles/Dockerfile b/models/bamf_ct_liver_tumor/dockerfiles/Dockerfile
@@ -0,0 +1,36 @@
+FROM mhubai/base:latest
+
+# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
+# by pulling sklearn instead of scikit-learn
+# N.B. this is a known issue:
+# https://github.com/MIC-DKFZ/nnUNet/issues/1281 
+# https://github.com/MIC-DKFZ/nnUNet/pull/1209
+ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
+
+# Install nnunet and platipy
+RUN pip3 install --no-cache-dir nnunetv2==2.0 \
+        dcm2niix==1.0.20220715
+
+# Install updated segdb entries
+RUN pip uninstall segdb -y
+RUN pip install git+https://github.com/MHubAI/segdb.git
+
+# Clone the main branch of MHubAI/models
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh bamf_ct_liver_tumor ${MHUB_MODELS_REPO}
+
+# Pull nnUNet model weights into the container for Dataset009_Breast
+ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/
+RUN mkdir -p $WEIGHTS_DIR
+ENV WEIGHTS_FN=Dataset006_Liver.zip
+ENV WEIGHTS_URL=https://zenodo.org/records/11582728/files/$WEIGHTS_FN
+RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
+RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
+RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}
+
+# specify nnunet specific environment variables
+ENV WEIGHTS_FOLDER=$WEIGHTS_DIR
+
+# Default run script
+ENTRYPOINT ["mhub.run"]
+CMD ["--config", "/app/models/bamf_ct_liver_tumor/config/default.yml"]
diff --git a/models/bamf_ct_liver_tumor/meta.json b/models/bamf_ct_liver_tumor/meta.json
@@ -0,0 +1,136 @@
+{
+  "id": "",
+  "name": "bamf_ct_liver_tumor",
+  "title": "BAMF CT Liver and Tumor Seg (nnU-Net)",
+  "summary": {
+    "description": "An nnU-Net based model to segment liver and tumor from CT scans",
+    "inputs": [
+      {
+        "label": "Input Image",
+        "description": "The CT scan of a patient.",
+        "format": "DICOM",
+        "modality": "CT",
+        "bodypartexamined": "LIVER",
+        "slicethickness": "2.5mm",
+        "non-contrast": false,
+        "contrast": true
+      }
+    ],
+    "outputs": [
+      {
+        "label": "Segmentation",
+        "type": "Segmentation",
+        "description": "Segmentation liver",
+        "classes": [
+          "LIVER",
+          "LIVER+TUMOR"
+        ]
+      }
+    ],
+    "model": {
+      "architecture": "U-net",
+      "training": "supervised",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 262
+      },
+      "evaluation": {
+        "vol_samples": 52
+      },
+      "public": true,
+      "external": true
+    }
+  },
+  "details": {
+    "name": "AIMI CT Liver",
+    "version": "1.0.0",
+    "devteam": "BAMF Health",
+    "authors": [
+      "Soni, Rahul",
+      "McCrumb, Diana",
+      "Murugesan, Gowtham Krishnan",
+      "Van Oss, Jeff"
+    ],
+    "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
+    "date": {
+      "code": "17.10.2023",
+      "weights": "28.08.2023",
+      "pub": "23.10.2023"
+    },
+    "cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
+    "license": {
+      "code": "MIT",
+      "weights": "CC BY-NC 4.0"
+    },
+    "publications": [
+      {
+        "title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
+        "uri": "https://arxiv.org/abs/2310.14897"
+      }
+    ],
+    "github": "https://github.com/MHubAI/models"
+  },
+  "info": {
+    "use": {
+      "title": "Intended Use",
+      "text": "This model is intended to perform liver and tumor segmentation in CT scans. The liver is a common site of primary (i.e. originating in the liver like hepatocellular carcinoma, HCC) or secondary (i.e. spreading to the liver like colorectal cancer) tumor development.The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
+    },
+    "analyses": {
+      "title": "Quantitative Analyses",
+      "text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD"
+    },
+    "evaluation": {
+      "title": "Evaluation Data",
+      "text": "The model was used to segment cases 509 from the Colorectal-Liver-Metastases [1] collection HCC-TACE-Seg [2]. 52 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
+      "tables": [
+        {
+          "label": "Dice Score",
+          "entries": {
+            "Liver": "0.99±0.02",
+            "Tumor": "0.80±0.35"
+          }
+        },
+        {
+          "label": "95% Hausdorff Distance",
+          "entries": {
+            "Liver": "2.33±7.70",
+            "Tumor": "19.73±38.35"
+          }
+        },
+        {
+          "label": "Normalized surface distance ",
+          "entries": {
+            "Liver": "0.29±0.95",
+            "Tumor": "4.38±8.70"
+          }
+        }
+      ],
+      "references": [
+        {
+          "label": "Colorectal-Liver-Metastases",
+          "uri": "https://doi.org/10.7937/QXK2-QG03"
+        },
+        {
+          "label": "HCC-TACE-Seg",
+          "uri": "https://doi.org/10.7937/TCIA.5FNA-0924"
+        }
+      ]
+    },
+    "training": {
+      "title": "Training Data",
+      "text": "The training dataset consists of 210 CT liver and tumor annotations taken from LiTS dataset and additional organ annotation generated using TotalSegmentator",
+      "references": [
+        {
+          "label": "LiTS - Liver Tumor Segmentation Challenge",
+          "uri": "https://competitions.codalab.org/competitions/17094"
+        },
+        {
+          "label": "TotalSegmentator",
+          "uri": "https://zenodo.org/records/10047292"
+        }
+      ]
+    }
+  }
+}
diff --git a/models/bamf_ct_liver_tumor/utils/NNUnetRunnerV2.py b/models/bamf_ct_liver_tumor/utils/NNUnetRunnerV2.py
@@ -0,0 +1,115 @@
+"""
+-------------------------------------------------
+MHub - NNU-Net Runner v2
+       Runner for pre-trained nnunet v2 models. 
+-------------------------------------------------
+
+-------------------------------------------------
+Author: Rahul Soni
+Email:  [email protected]
+-------------------------------------------------
+"""
+
+
+from typing import List, Optional
+import os, subprocess, shutil
+import SimpleITK as sitk, numpy as np
+from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO
+
+
+nnunet_dataset_name_regex = r"Dataset[0-9]{3}_[a-zA-Z0-9_]+"
+
+@IO.ConfigInput('in_data', 'nifti:mod=mr', the="input data to run nnunet on")
+@IO.Config('nnunet_dataset', str, None, the='nnunet dataset name')
+@IO.Config('nnunet_config', str, None, the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)')
+@IO.Config('folds', int, None, the='number of folds to run nnunet on')
+@IO.Config('use_tta', bool, True, the='flag to enable test time augmentation')
+@IO.Config('roi', str, None, the='roi or comma separated list of roi the nnunet segments')
+class NNUnetRunnerV2(Module):
+
+    nnunet_dataset: str
+    nnunet_config: str
+    input_data_type: DataType
+    folds: int                          # TODO: support optional config attributes
+    use_tta: bool
+    roi: str
+
+    @IO.Instance()
+    @IO.Input("in_data", the="input data to run nnunet on")
+    @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet', data='in_data', the="output data from nnunet")
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+
+        # get the nnunet model to run
+        self.v("Running nnUNet_predict.")
+        self.v(f" > dataset:     {self.nnunet_dataset}")
+        self.v(f" > config:      {self.nnunet_config}")
+        self.v(f" > input data:  {in_data.abspath}")
+        self.v(f" > output data: {out_data.abspath}")
+
+        # download weights if not found
+        # NOTE: only for testing / debugging. For productiio always provide the weights in the Docker container.
+        if not os.path.isdir(os.path.join(os.environ["WEIGHTS_FOLDER"], '')):
+            print("Downloading nnUNet model weights...")
+            bash_command = ["nnUNet_download_pretrained_model", self.nnunet_dataset]
+            self.subprocess(bash_command, text=True)
+
+        # bring input data in nnunet specific format
+        # NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now.
+        assert in_data.type.ftype == FileType.NIFTI
+        assert in_data.abspath.endswith('.nii.gz')
+        inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp")
+        inp_file = f'VOLUME_001_0000.nii.gz'
+        shutil.copyfile(in_data.abspath, os.path.join(inp_dir, inp_file))
+
+        # define output folder (temp dir) and also override environment variable for nnunet
+        out_dir = self.config.data.requestTempDir(label="nnunet-model-out")
+        os.environ['nnUNet_results'] = out_dir
+
+        # symlink nnunet input folder to the input data with python
+        # create symlink in python
+        # NOTE: this is a workaround for the nnunet bash script that expects the input data to be in a specific folder
+        #       structure. This is not the case for the mhub data structure. So we create a symlink to the input data
+        #       in the nnunet input folder structure.
+        # os.symlink(os.environ['WEIGHTS_FOLDER'], os.path.join(out_dir, 'nnUNet'))
+        os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset))
+
+        # NOTE: instead of running from commandline this could also be done in a pythonic way:
+        #       `nnUNet/nnunet/inference/predict.py` - but it would require
+        #       to set manually all the arguments that the user is not intended
+        #       to fiddle with; so stick with the bash executable
+
+        # construct nnunet inference command
+        bash_command  = ["nnUNetv2_predict"]
+        bash_command += ["-i", str(inp_dir)]
+        bash_command += ["-o", str(out_dir)]
+        bash_command += ["-d", self.nnunet_dataset]
+        bash_command += ["-c", self.nnunet_config]
+
+        # add optional arguments
+        if self.folds is not None:
+            bash_command += ["-f", str(self.folds)]
+
+        if not self.use_tta:
+            bash_command += ["--disable_tta"]
+
+        self.v(f" > bash_command:     {bash_command}")
+        # run command
+        self.subprocess(bash_command, text=True)
+
+        # output meta
+        meta = {
+            "model": "nnunet",
+            "nnunet_dataset": self.nnunet_dataset,
+            "nnunet_config": self.nnunet_config,
+            "roi": self.roi
+        }
+
+        # get output data
+        out_file = f'VOLUME_001.nii.gz'
+        out_path = os.path.join(out_dir, out_file)
+
+        # copy output data to instance
+        shutil.copyfile(out_path, out_data.abspath)
+
+        # update meta dynamically
+        out_data.type.meta += meta