Merge pull request #81 from DIAGNijmegen/m-gc-wsi-bgseg

LennyN95 · web-flow · commit 82e7d77c5187 · 2024-06-11T12:42:54.000+02:00
MHub / GC - WSI background/tissue segmentation model
diff --git a/models/gc_wsi_bgseg/config/default.yml b/models/gc_wsi_bgseg/config/default.yml
@@ -0,0 +1,24 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: WSI background tissue segmentation algorithm (dicom:sm to tiff:seg)
+
+execute:
+- DicomImporter
+- TiffConverter
+- WSIBackgroundSegmentationRunner
+- DataOrganizer
+
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: True
+    meta:
+      mod: "%Modality"
+
+  DataOrganizer:
+    target_dir: output_data
+    require_data_confirmation: true
+    targets:
+      - tiff:mod=seg-->[i:sid]/gc_wsi_bgseg.tiff
diff --git a/models/gc_wsi_bgseg/config/tiff.yml b/models/gc_wsi_bgseg/config/tiff.yml
@@ -0,0 +1,22 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: WSI background tissue segmentation algorithm (tiff:sm to mha:seg)
+
+execute:
+- FileStructureImporter
+- WSIBackgroundSegmentationRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    input_dir: input_data
+    structures:
+      - $sid@instance/test.tiff@tiff:mod=sm
+    import_id: sid
+
+  DataOrganizer:
+    target_dir: output_data
+    require_data_confirmation: true
+    targets:
+      - tiff:mod=seg-->[i:sid]/wsi_background_segmentation.tiff
diff --git a/models/gc_wsi_bgseg/dockerfiles/Dockerfile b/models/gc_wsi_bgseg/dockerfiles/Dockerfile
@@ -0,0 +1,64 @@
+FROM mhubai/base:latest
+
+# Update authors label
+LABEL authors="sil.vandeleemput@radboudumc.nl"
+
+# Install pipenv (for a custom Python/Pip environment for ASAP-2.1 and the other algorithm requirements)
+RUN pip3 install --no-cache-dir pipenv
+
+# Set environment variables for pipenv (installs into /app/.venv)
+ENV PIPENV_VENV_IN_PROJECT=1
+
+# Install ASAP 2.1
+RUN apt-get update && \
+    apt-get -y install curl libpython3.8-dev && \
+    curl --remote-name --location "https://github.com/computationalpathologygroup/ASAP/releases/download/ASAP-2.1/ASAP-2.1-py38-Ubuntu2004.deb" && \
+    dpkg --install ASAP-2.1-py38-Ubuntu2004.deb || true && \
+    apt-get -f install --fix-missing --fix-broken --assume-yes && \
+    ldconfig -v && \
+    apt-get clean && \
+    rm ASAP-2.1-py38-Ubuntu2004.deb
+
+# Setup and install algorithm pipenv environment
+#   1. Ensure we configure a new empty pipenv for Python 3.8
+#   2. Link ASAP libraries into our environment
+#   3. Upgrade pip
+#   4. Upgrade version of numpy and numba to function correctly with ASAP
+#   5. Install required dependencies for algorithm
+RUN pipenv install --python 3.8 && \
+    echo "/opt/ASAP/bin" > /app/.venv/lib/python3.8/site-packages/asap.pth && \
+    pipenv run pip install --no-cache-dir --upgrade pip && \
+    pipenv run pip install --no-cache-dir --upgrade numpy==1.24.4 numba==0.58.1 && \
+    pipenv run pip install --no-cache-dir scipy==1.10.1 scikit-image==0.21.0 h5py==3.11.0
+
+# build/install Tensorflow 2.11.0 with GPU support (without conda), with CUDA 11 toolkit and cudnn 8
+# tensorflow-2.11.0 Python 3.7-3.10 cuDNN >= 8.1 CUDA >= 11.2
+RUN pipenv run pip install --no-cache-dir \
+    nvidia-cuda-runtime-cu11 \
+    nvidia-cusolver-cu11 \
+    nvidia-curand-cu11 \
+    nvidia-cufft-cu11 \
+    nvidia-cublas-cu11 \
+    nvidia-cusparse-cu11 \
+    nvidia-cudnn-cu11 \
+    nvidia-tensorrt==7.2.3.4 \
+    tensorflow==2.11.0 \
+    --extra-index-url https://pypi.ngc.nvidia.com
+
+# Configure required paths for tensorflow with GPU support
+ENV NVIDIA_DIR /app/.venv/lib/python3.8/site-packages/nvidia
+ENV LD_LIBRARY_PATH /app/.venv/lib/python3.8/site-packages/tensorrt:$NVIDIA_DIR/cublas/lib:$NVIDIA_DIR/cuda_runtime/lib:$NVIDIA_DIR/cudnn/lib:$NVIDIA_DIR/cufft/lib:$NVIDIA_DIR/curand/lib:$NVIDIA_DIR/cusolver/lib:$NVIDIA_DIR/cusparse/lib
+
+# Import the MHub model definiton
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh gc_wsi_bgseg ${MHUB_MODELS_REPO}
+
+# Install pathology tissue background segmentation processor code and weights (version 1.0.0)
+RUN git clone --depth 1 --branch 1.0.0 https://github.com/DIAGNijmegen/pathology-tissue-background-segmentation-processor.git /app/src
+
+# Add model and algorithm code bases to python path
+ENV PYTHONPATH="/app:/app/src"
+
+# Default run script
+ENTRYPOINT ["python3", "-m", "mhubio.run"]
+CMD ["--config", "/app/models/gc_wsi_bgseg/config/default.yml"]
diff --git a/models/gc_wsi_bgseg/meta.json b/models/gc_wsi_bgseg/meta.json
@@ -0,0 +1,156 @@
+{
+  "id": "bb083d5e-b5ed-481a-8a24-00e95d301067",
+  "name": "gc_wsi_bgseg",
+  "title": "Tissue-Background segmentation in histopathological whole-slide images",
+  "summary": {
+    "description": "This algorithm segments the background and tissue in histopathological whole-slide images.",
+    "inputs": [
+      {
+        "label": "Whole-slide image",
+        "description": "Whole-slide histopathology image",
+        "format": "DICOM",
+        "modality": "SM",
+        "bodypartexamined": "WHOLEBODY",
+        "slicethickness": "",
+        "non-contrast": false,
+        "contrast": false
+      }
+    ],
+    "outputs": [
+      {
+        "type": "Prediction",
+        "valueType": "Segmentation mask",
+        "label": "WSI background and tissue segmentation mask",
+        "description": "WSI background and tissue segmentation mask (MHA). The labels are: 0-background 1-tissue. The output will have a pixel spacing",
+        "classes": []
+      }
+    ],
+    "model": {
+      "architecture": "Fully convolutional network with 7 layers, ReLU activation functions in the first 6 convolutional layers and softmax in the last one. Max pooling was inserted after each of the first 3 convolutional layers.",
+      "training": "supervised",
+      "cmpapproach": "2D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 70
+      },
+      "evaluation": {
+        "vol_samples": 38
+      },
+      "public": false,
+      "external": false
+    }
+  },
+  "details": {
+    "name": "Tissue-Background Segmentation in Histopathological Whole-Slide Images",
+    "version": "1.0.0",
+    "devteam": "Peter Bándi",
+    "type": "Segmentation",
+    "date": {
+      "weights": "2019-12-13",
+      "code": "2024-05-16",
+      "pub": "2019-12-17"
+    },
+    "cite": "Bándi P, Balkenhol M, van Ginneken B, van der Laak J, Litjens G. 2019. Resolution-agnostic tissue segmentation in whole-slide histopathology images with convolutional neural networks. PeerJ 7:e8242",
+    "license": {
+      "code": "AGPL v3.0",
+      "weights": "AGPL v3.0"
+    },
+    "publications": [
+      {
+        "uri": "https://peerj.com/articles/8242/",
+        "title": "Resolution-agnostic tissue segmentation in whole-slide histopathology images with convolutional neural networks"
+      }
+    ],
+    "github": "https://github.com/DIAGNijmegen/pathology-tissue-background-segmentation-processor",
+    "zenodo": "",
+    "colab": "",
+    "slicer": false
+  },
+  "info": {
+    "use": {
+      "title": "Intended use",
+      "text": "This algorithm is a segmentation method of the background on whole-slide histopathology images. The algorithm will segment WSI with various stains and resolutions. It takes pixel spacing as input parameter (default 2.0μm) and will create a segmentation of that resolution. The input WSI needs to contain at least one layer of that resolution, with a 25% tolerance. The algorithm can also be found on Grand Challenge [1] (access with verified account).",
+      "references": [
+        {
+          "label": "Algorithm on Grand Challenge (access with verified account)",
+          "uri": "https://grand-challenge.org/algorithms/tissue-background-segmenation"
+        }
+      ],
+      "tables": []
+    },
+    "analyses": {
+      "title": "Evaluation",
+      "text": "Evaluation was determined with DICE score, sensitivity, and false-positive count, and compared with three other non-deeplearning methods: Otsu's, FESI, and Thresholding. See the paper (Methods, pages 12-13, section on Measurements, and Results, pages 13-15) for additional details [1].",
+      "references": [
+         {
+          "label": "Resolution-agnostic tissue segmentation in whole-slide histopathology images with convolutional neural networks",
+          "uri": "https://peerj.com/articles/8242/"
+        }
+      ],
+      "tables": [
+        {
+          "label": "Dice scores at 2.0μm pixel spacing.",
+          "entries": {
+            "Thresholding": "0.8627 ±0.1361",
+            "Otsu's": "0.7373 ±0.1596",
+            "FESI": "0.8284 ±0.3288",
+            "This model": "0.9822 ±0.0195"
+          }
+        },
+        {
+          "label": "Sensitivity scores at 2.0μm pixel spacing.",
+          "entries": {
+            "Thresholding": "0.5763 ±0.3631",
+            "Otsu's": "0.2890 ±0.3665",
+            "FESI": "0.6495 ±0.3690",
+            "This model": "0.8953 ±0.1302"
+          }
+        },
+        {
+          "label": "False-positive count at 2.0μm pixel spacing.",
+          "entries": {
+            "Thresholding": "34.50 ±76.49",
+            "Otsu's": "12.63 ±12.09",
+            "FESI": "1.37 ±2.63",
+            "This model": "5.37 ±8.25"
+          }
+        }
+      ]
+    },
+    "evaluation": {
+      "title": "Evaluation data",
+      "text": "The evaluation was performed on two private datasets. The first dataset consisted of 30 WSI with breast, lymph node, kidney, lung, rectum and tongue tissue, stained with hematoxylin and eosin (H&E), Sirius Red, Periodic Acid-Schiff (PAS), cytokeratin AE1/AE3 (AE1AE3), Ki-67, and a cocktail of cytokeratin 8 and cytokeratin 18 (CK8-18). The second dataset consisted of 8 WSI with mostly different tissue types and all different staining methods, to evaluate generalization of the model. It contained tissue types lung, cornea, aorta, brain, skin, uterus, and kidney. The tissues were stained with Grocott, Alcian Blue, Von Kossa, Perls, and Chromotrope Aniline Blue (CAB) stains. See the paper (Materials, pages 4-6, sections on the development and the dissimilar datasets) for additional details [1].",
+      "references": [
+        {
+          "label": "Resolution-agnostic tissue segmentation in whole-slide histopathology images with convolutional neural networks",
+          "uri": "https://peerj.com/articles/8242/"
+        }
+      ],
+      "tables": []
+    },
+    "training": {
+      "title": "Training data",
+      "text": "The training dataset consisted of 70 WSI from Breast, Lymph node, Kidney, Lung, Rectum and Tongue tissue, stained with hematoxylin and eosin (H&E), Sirius Red, Periodic Acid-Schiff (PAS), cytokeratin AE1/AE3 (AE1AE3), Ki-67, and a cocktail of cytokeratin 8 and cytokeratin 18 (CK8-18). See the paper (Materials, pages 4-5, development dataset section) for additional details [1].",
+      "references": [
+        {
+          "label": "Resolution-agnostic tissue segmentation in whole-slide histopathology images with convolutional neural networks",
+          "uri": "https://peerj.com/articles/8242/"
+        }
+      ],
+      "tables": []
+    },
+    "ethics": {
+      "title": "",
+      "text": "",
+      "references": [],
+      "tables": []
+    },
+    "limitations": {
+      "title": "Limitations",
+      "text": "This algorithm was developed for research purposes only. The provided input should contain a layer with the selected pixel spacing (default 2.0μm ± 25%).",
+      "references": [],
+      "tables": []
+    }
+  }
+}
diff --git a/models/gc_wsi_bgseg/utils/WSIBackgroundSegmentationRunner.py b/models/gc_wsi_bgseg/utils/WSIBackgroundSegmentationRunner.py
@@ -0,0 +1,77 @@
+"""
+------------------------------------------------------------------
+Mhub / DIAG - Run Module for WSI Background Segmentation Algorithm
+------------------------------------------------------------------
+
+------------------------------------------------------------------
+Author: Sil van de Leemput
+Email:  sil.vandeleemput@radboudumc.nl
+------------------------------------------------------------------
+"""
+from typing import Optional
+
+import tempfile
+
+from mhubio.core import Instance, InstanceData, IO, Module
+
+from pathlib import Path
+
+@IO.Config(
+    'input_spacing', float, 2.0,
+    the="Desired input spacing to run the segmentation algorithm for. "
+        "The closest level matching the spacing in the input Tiff image will be selected. "
+        "Default is 2.0 micrometer."
+)
+@IO.Config(
+    'output_spacing', Optional[float], None,
+    the="Desired output spacing for the output segmentation. "
+        "By default this matches the input_spacing."
+)
+@IO.Config(
+    'spacing_tolerance', float, 0.25,
+    the="Relative spacing tolerance with respect to the desired input_spacing. "
+        "By default this is set to 25%."
+)
+class WSIBackgroundSegmentationRunner(Module):
+
+    input_spacing: float
+    output_spacing: Optional[float]
+    spacing_tolerance: float
+
+    CLI_SCRIPT_PATH = Path("/app") / "src" / "process.py"
+
+    @IO.Instance()
+    @IO.Input('in_data', 'tif|tiff:mod=sm', the='input whole slide image Tiff')
+    @IO.Output('out_data', 'gc_wsi_bg_segmentation.tif', 'tiff:mod=seg:model=WSIBackgroundSegmentation', 'in_data', the='Background segmentation of the input WSI.')
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+        output_spacing = self.output_spacing
+        if self.output_spacing is None:
+            output_spacing = self.input_spacing
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # Execute the Tiger LB2 Algorithm through a Python subprocess and associated pipenv environment
+            self.subprocess(
+                [
+                    "pipenv",
+                    "run",
+                    "python",
+                    str(self.CLI_SCRIPT_PATH),
+                    in_data.abspath,
+                    out_data.abspath,
+                    "--work-dir",
+                    tmp_dir,
+                    "--input-spacing",
+                    str(self.input_spacing),
+                    "--output-spacing",
+                    str(output_spacing),
+                    "--spacing-tolerance",
+                    str(self.spacing_tolerance)
+                ]
+            )
+
+        # Validate that the required output was generated by the subprocess
+        if not Path(out_data.abspath).is_file():
+            raise FileNotFoundError(
+                f"Couldn't find expected output file: `{out_data.abspath}`. "
+                f"The subprocess `{self.CLI_SCRIPT_PATH}` did not generate the required output file."
+            )
diff --git a/models/gc_wsi_bgseg/utils/__init__.py b/models/gc_wsi_bgseg/utils/__init__.py