Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BAMF Liver and Tumor segmentation #84

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions models/bamf_ct_liver_tumor/config/default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
general:
data_base_dir: /app/data
version: 1.0
description: default configuration for Bamf CT Liver and tumor segmentation (dicom to dicom)

execute:
- DicomImporter
- NiftiConverter
- NNUnetRunnerV2
- PostProcessor
- DsegConverter
- DataOrganizer

modules:
DicomImporter:
source_dir: input_data
import_dir: sorted_data
sort_data: true
meta:
mod: '%Modality'

NiftiConverter:
engine: dcm2niix

NNUnetRunnerV2:
in_data: nifti:mod=ct
nnunet_dataset: Dataset006_Liver
nnunet_config: 3d_fullres
roi: SPLEEN,KIDNEY,GALLBLADDER,DUODENUM,PANCREAS,SMALL_INTESTINE,LUNG,LIVER,LIVER+NEOPLASM_MALIGNANT
export_prob_maps: False

DsegConverter:
model_name: bamf_ct_liver_tumor
target_dicom: dicom:mod=ct
source_segs: nifti:mod=seg:processor=bamf
skip_empty_slices: True

DataOrganizer:
targets:
- dicomseg-->[i:sid]/bamf_ct_liver_tumor.seg.dcm

segdb:
triplets:
T_LIVER_LESION:
code: C159516
meaning: Liver lesion
scheme_designator: NCIt
segments:
NEOPLASM_MALIGNANT:
name: Neoplasm Malignant
category: C_RADIOLOGIC_FINDING
type: T_LIVER_LESION
color: [255, 0, 0]
36 changes: 36 additions & 0 deletions models/bamf_ct_liver_tumor/dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
FROM mhubai/base:latest

# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
# by pulling sklearn instead of scikit-learn
# N.B. this is a known issue:
# https://github.com/MIC-DKFZ/nnUNet/issues/1281
# https://github.com/MIC-DKFZ/nnUNet/pull/1209
ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True

# Install nnunet and platipy
RUN pip3 install --no-cache-dir nnunetv2==2.0 \
dcm2niix==1.0.20220715

# Install updated segdb entries
RUN pip uninstall segdb -y
RUN pip install git+https://github.com/MHubAI/segdb.git

# Clone the main branch of MHubAI/models
ARG MHUB_MODELS_REPO
RUN buildutils/import_mhub_model.sh bamf_ct_liver_tumor ${MHUB_MODELS_REPO}

# Pull nnUNet model weights into the container for Dataset009_Breast
ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/
RUN mkdir -p $WEIGHTS_DIR
ENV WEIGHTS_FN=Dataset006_Liver.zip
ENV WEIGHTS_URL=https://zenodo.org/records/11582728/files/$WEIGHTS_FN
RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}

# specify nnunet specific environment variables
ENV WEIGHTS_FOLDER=$WEIGHTS_DIR

# Default run script
ENTRYPOINT ["mhub.run"]
CMD ["--config", "/app/models/bamf_ct_liver_tumor/config/default.yml"]
136 changes: 136 additions & 0 deletions models/bamf_ct_liver_tumor/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
"id": "",
"name": "bamf_ct_liver_tumor",
"title": "BAMF CT Liver and Tumor Seg (nnU-Net)",
"summary": {
"description": "An nnU-Net based model to segment liver and tumor from CT scans",
"inputs": [
{
"label": "Input Image",
"description": "The CT scan of a patient.",
"format": "DICOM",
"modality": "CT",
"bodypartexamined": "LIVER",
"slicethickness": "2.5mm",
"non-contrast": false,
"contrast": true
}
],
"outputs": [
{
"label": "Segmentation",
"type": "Segmentation",
"description": "Segmentation liver",
"classes": [
"LIVER",
"LIVER+TUMOR"
]
}
],
"model": {
"architecture": "U-net",
"training": "supervised",
"cmpapproach": "3D"
},
"data": {
"training": {
"vol_samples": 262
},
"evaluation": {
"vol_samples": 52
},
"public": true,
"external": true
}
},
"details": {
"name": "AIMI CT Liver",
"version": "1.0.0",
"devteam": "BAMF Health",
"authors": [
"Soni, Rahul",
"McCrumb, Diana",
"Murugesan, Gowtham Krishnan",
"Van Oss, Jeff"
],
"type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
"date": {
"code": "17.10.2023",
"weights": "28.08.2023",
"pub": "23.10.2023"
},
"cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
"license": {
"code": "MIT",
"weights": "CC BY-NC 4.0"
},
"publications": [
{
"title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
"uri": "https://arxiv.org/abs/2310.14897"
}
],
"github": "https://github.com/MHubAI/models"
},
"info": {
"use": {
"title": "Intended Use",
"text": "This model is intended to perform liver and tumor segmentation in CT scans. The liver is a common site of primary (i.e. originating in the liver like hepatocellular carcinoma, HCC) or secondary (i.e. spreading to the liver like colorectal cancer) tumor development.The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
},
"analyses": {
"title": "Quantitative Analyses",
"text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD"
},
"evaluation": {
"title": "Evaluation Data",
"text": "The model was used to segment cases 509 from the Colorectal-Liver-Metastases [1] collection HCC-TACE-Seg [2]. 52 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
"tables": [
{
"label": "Dice Score",
"entries": {
"Liver": "0.99±0.02",
"Tumor": "0.80±0.35"
}
},
{
"label": "95% Hausdorff Distance",
"entries": {
"Liver": "2.33±7.70",
"Tumor": "19.73±38.35"
}
},
{
"label": "Normalized surface distance ",
"entries": {
"Liver": "0.29±0.95",
"Tumor": "4.38±8.70"
}
}
],
"references": [
{
"label": "Colorectal-Liver-Metastases",
"uri": "https://doi.org/10.7937/QXK2-QG03"
},
{
"label": "HCC-TACE-Seg",
"uri": "https://doi.org/10.7937/TCIA.5FNA-0924"
}
]
},
"training": {
"title": "Training Data",
"text": "The training dataset consists of 210 CT liver and tumor annotations taken from LiTS dataset and additional organ annotation generated using TotalSegmentator",
"references": [
{
"label": "LiTS - Liver Tumor Segmentation Challenge",
"uri": "https://competitions.codalab.org/competitions/17094"
},
{
"label": "TotalSegmentator",
"uri": "https://zenodo.org/records/10047292"
}
]
}
}
}
115 changes: 115 additions & 0 deletions models/bamf_ct_liver_tumor/utils/NNUnetRunnerV2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
-------------------------------------------------
MHub - NNU-Net Runner v2
Runner for pre-trained nnunet v2 models.
-------------------------------------------------

-------------------------------------------------
Author: Rahul Soni
Email: [email protected]
-------------------------------------------------
"""


from typing import List, Optional
import os, subprocess, shutil
import SimpleITK as sitk, numpy as np
from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO


nnunet_dataset_name_regex = r"Dataset[0-9]{3}_[a-zA-Z0-9_]+"

@IO.ConfigInput('in_data', 'nifti:mod=mr', the="input data to run nnunet on")
@IO.Config('nnunet_dataset', str, None, the='nnunet dataset name')
@IO.Config('nnunet_config', str, None, the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)')
@IO.Config('folds', int, None, the='number of folds to run nnunet on')
@IO.Config('use_tta', bool, True, the='flag to enable test time augmentation')
@IO.Config('roi', str, None, the='roi or comma separated list of roi the nnunet segments')
class NNUnetRunnerV2(Module):

nnunet_dataset: str
nnunet_config: str
input_data_type: DataType
folds: int # TODO: support optional config attributes
use_tta: bool
roi: str

@IO.Instance()
@IO.Input("in_data", the="input data to run nnunet on")
@IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet', data='in_data', the="output data from nnunet")
def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:

# get the nnunet model to run
self.v("Running nnUNet_predict.")
self.v(f" > dataset: {self.nnunet_dataset}")
self.v(f" > config: {self.nnunet_config}")
self.v(f" > input data: {in_data.abspath}")
self.v(f" > output data: {out_data.abspath}")

# download weights if not found
# NOTE: only for testing / debugging. For productiio always provide the weights in the Docker container.
if not os.path.isdir(os.path.join(os.environ["WEIGHTS_FOLDER"], '')):
print("Downloading nnUNet model weights...")
bash_command = ["nnUNet_download_pretrained_model", self.nnunet_dataset]
self.subprocess(bash_command, text=True)

# bring input data in nnunet specific format
# NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now.
assert in_data.type.ftype == FileType.NIFTI
assert in_data.abspath.endswith('.nii.gz')
inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp")
inp_file = f'VOLUME_001_0000.nii.gz'
shutil.copyfile(in_data.abspath, os.path.join(inp_dir, inp_file))

# define output folder (temp dir) and also override environment variable for nnunet
out_dir = self.config.data.requestTempDir(label="nnunet-model-out")
os.environ['nnUNet_results'] = out_dir

# symlink nnunet input folder to the input data with python
# create symlink in python
# NOTE: this is a workaround for the nnunet bash script that expects the input data to be in a specific folder
# structure. This is not the case for the mhub data structure. So we create a symlink to the input data
# in the nnunet input folder structure.
# os.symlink(os.environ['WEIGHTS_FOLDER'], os.path.join(out_dir, 'nnUNet'))
os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset))

# NOTE: instead of running from commandline this could also be done in a pythonic way:
# `nnUNet/nnunet/inference/predict.py` - but it would require
# to set manually all the arguments that the user is not intended
# to fiddle with; so stick with the bash executable

# construct nnunet inference command
bash_command = ["nnUNetv2_predict"]
bash_command += ["-i", str(inp_dir)]
bash_command += ["-o", str(out_dir)]
bash_command += ["-d", self.nnunet_dataset]
bash_command += ["-c", self.nnunet_config]

# add optional arguments
if self.folds is not None:
bash_command += ["-f", str(self.folds)]

if not self.use_tta:
bash_command += ["--disable_tta"]

self.v(f" > bash_command: {bash_command}")
# run command
self.subprocess(bash_command, text=True)

# output meta
meta = {
"model": "nnunet",
"nnunet_dataset": self.nnunet_dataset,
"nnunet_config": self.nnunet_config,
"roi": self.roi
}

# get output data
out_file = f'VOLUME_001.nii.gz'
out_path = os.path.join(out_dir, out_file)

# copy output data to instance
shutil.copyfile(out_path, out_data.abspath)

# update meta dynamically
out_data.type.meta += meta
Loading
Loading