Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \

WORKDIR /opt/app/

# you can add any Python dependencies to requirements.in
# core deps live in requirements.in; model runtime extras live in requirements-models.in
RUN python -m pip install --upgrade pip setuptools pip-tools \
&& rm -rf /home/user/.cache/pip

# install slide2vec
COPY --chown=user:user requirements.in /opt/app/requirements.in
COPY --chown=user:user requirements-models.in /opt/app/requirements-models.in
RUN python -m pip install \
--no-cache-dir \
--no-color \
--requirement /opt/app/requirements.in \
--requirement /opt/app/requirements-models.in \
&& rm -rf /home/user/.cache/pip

COPY --chown=user:user slide2vec /opt/app/slide2vec
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@ RUN python -m pip install --upgrade pip setuptools pip-tools \
&& rm -rf /root/.cache/pip

COPY --chown=user:user requirements.in /opt/app/requirements.in
COPY --chown=user:user requirements-models.in /opt/app/requirements-models.in
RUN python -m pip install \
--no-cache-dir \
--no-color \
--requirement /opt/app/requirements.in \
--requirement /opt/app/requirements-models.in \
&& rm -rf /root/.cache/pip

COPY --chown=user:user slide2vec /opt/app/slide2vec
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
pip install slide2vec
```

Install the full model runtime only when you need embedding/model execution:

```shell
pip install "slide2vec[models]"
```

`slide2vec` now keeps the base install focused on the core package surface and moves the heavier model stack into the optional `models` extra.

## Python API

```python
Expand Down
32 changes: 32 additions & 0 deletions requirements-models.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-r requirements.in
torch>=2.3,<2.8
torchvision>=0.18.0
einops>=0.8.0
timm>=1.0.3
huggingface-hub>=0.30.0,<1.0
environs
einops-exts>=0.0.4
transformers>=4.53
sacremoses
xformers>=0.0.31

## Hibou
scipy~=1.8.1
scikit-image~=0.19.3

## MUSK & CONCH
git+https://github.com/lilab-stanford/MUSK.git
git+https://github.com/Mahmoodlab/CONCH.git

## Prov-GigaPath
torchmetrics>=0.10.3
fvcore
iopath
webdataset
scikit-survival
scikit-learn
fairscale
packaging==23.2
ninja==1.11.1.1
psutil<6
git+https://github.com/prov-gigapath/prov-gigapath.git
40 changes: 7 additions & 33 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -1,42 +1,16 @@
omegaconf>=2.3.0
h5py
huggingface-hub>=0.30.0,<1.0
matplotlib
numpy<2
pandas
pillow
rich
tqdm
wandb
torch>=2.3,<2.8
torchvision>=0.18.0
hs2p>=2.0,<3
torch
torchvision
transformers
wandb
wholeslidedata<0.0.16
timm>=1.0.3
einops>=0.8.0
einops-exts>=0.0.4
transformers>=4.53
sacremoses
environs
xformers>=0.0.31
matplotlib

## Hibou
scipy~=1.8.1
scikit-image~=0.19.3

## MUSK & CONCH
git+https://github.com/lilab-stanford/MUSK.git
git+https://github.com/Mahmoodlab/CONCH.git

## Prov-GigaPath
torchmetrics>=0.10.3
fvcore
iopath
webdataset
scikit-survival
scikit-learn
fairscale
packaging==23.2
ninja==1.11.1.1
psutil<6
git+https://github.com/prov-gigapath/prov-gigapath.git
einops
timm
21 changes: 12 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
timm
wandb
numpy==1.26.1
hs2p>=2.0,<3
omegaconf>=2.3.0
h5py
matplotlib
numpy<2
pandas
pillow
rich
einops
torch
torchvision
transformers
tqdm
omegaconf
wholeslidedata
huggingface_hub
torch==2.1.0
torchvision==0.16.0
wandb
wholeslidedata<0.0.16
einops
timm
20 changes: 10 additions & 10 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,30 @@ install_requires =
hs2p>=2.0,<3
omegaconf
h5py
huggingface-hub
matplotlib
numpy<2
pandas
pillow
rich
tqdm
torchvision
wholeslidedata<0.0.16
matplotlib
timm
torch
torchvision
transformers
environs
sacremoses
einops
einops-exts
xformers
wandb
wholeslidedata<0.0.16
einops
timm

python_requires = >=3.10
zip_safe = no
include_package_data = True

[options.extras_require]
models =
huggingface-hub
sacremoses
einops-exts
xformers
testing =
pytest>=6.0
pytest-cov>=2.0
Expand Down
6 changes: 2 additions & 4 deletions slide2vec/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from timm.data import resolve_data_config
from timm.data.constants import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
from timm.data.transforms_factory import create_transform
from transformers import AutoImageProcessor, AutoModel
from torchvision import transforms
from torchvision.transforms import v2
from transformers import AutoImageProcessor, AutoModel

import slide2vec.distributed as distributed
import slide2vec.models.vision_transformer_dino as vits_dino
Expand Down Expand Up @@ -231,9 +231,7 @@ def build_encoder(self):
raise NotImplementedError

def get_transforms(self):
data_config = resolve_data_config(
self.encoder.pretrained_cfg, model=self.encoder
)
data_config = resolve_data_config(self.encoder.pretrained_cfg, model=self.encoder)
transform = create_transform(**data_config)
return transform

Expand Down
137 changes: 137 additions & 0 deletions tests/test_dependency_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import ast
import configparser
import re
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
SETUP_CFG = ROOT / "setup.cfg"
README = ROOT / "README.md"
CORE_REQUIREMENTS = ROOT / "requirements.in"
CORE_REQUIREMENTS_TXT = ROOT / "requirements.txt"
MODELS_REQUIREMENTS = ROOT / "requirements-models.in"

FOUNDATION_REQUIREMENT_NAMES = {
"huggingface-hub",
"sacremoses",
"xformers",
}

CORE_RUNTIME_REQUIREMENT_NAMES = {
"einops",
"hs2p",
"matplotlib",
"numpy",
"omegaconf",
"pandas",
"pillow",
"rich",
"torch",
"torchvision",
"transformers",
"tqdm",
"timm",
"wandb",
"wholeslidedata",
}


def _load_setup_cfg() -> configparser.ConfigParser:
parser = configparser.ConfigParser()
parser.read(SETUP_CFG, encoding="utf-8")
return parser


def _requirement_names(raw_block: str) -> set[str]:
names: set[str] = set()
for line in raw_block.splitlines():
requirement = line.strip()
if not requirement or requirement.startswith("#") or requirement.startswith("-r "):
continue
match = re.match(r"^[A-Za-z0-9_.-]+", requirement)
assert match is not None, f"Could not parse requirement line: {requirement}"
names.add(match.group(0).replace("_", "-").lower())
return names


def _requirement_lines(raw_block: str) -> dict[str, str]:
lines: dict[str, str] = {}
for raw_line in raw_block.splitlines():
requirement = raw_line.strip()
if not requirement or requirement.startswith("#") or requirement.startswith("-r "):
continue
match = re.match(r"^[A-Za-z0-9_.-]+", requirement)
assert match is not None, f"Could not parse requirement line: {requirement}"
lines[match.group(0).replace("_", "-").lower()] = requirement
return lines


def _top_level_imported_modules(path: Path) -> set[str]:
tree = ast.parse(path.read_text(encoding="utf-8"))
modules: set[str] = set()
for node in tree.body:
if isinstance(node, ast.Import):
modules.update(alias.name.split(".")[0] for alias in node.names)
elif isinstance(node, ast.ImportFrom) and node.module:
modules.add(node.module.split(".")[0])
return modules


def test_setup_cfg_moves_model_runtime_deps_into_models_extra():
parser = _load_setup_cfg()

install_requires = _requirement_names(parser["options"]["install_requires"])
models_extra = _requirement_names(parser["options.extras_require"]["models"])

assert FOUNDATION_REQUIREMENT_NAMES.isdisjoint(install_requires)
assert FOUNDATION_REQUIREMENT_NAMES <= models_extra
assert CORE_RUNTIME_REQUIREMENT_NAMES <= install_requires


def test_requirements_files_split_core_from_foundation_runtime():
core_requirements_text = CORE_REQUIREMENTS.read_text(encoding="utf-8")
foundation_requirements_text = MODELS_REQUIREMENTS.read_text(encoding="utf-8")
core_requirements = _requirement_names(core_requirements_text)
foundation_requirements = _requirement_names(foundation_requirements_text)
core_requirement_lines = _requirement_lines(core_requirements_text)
foundation_requirement_lines = _requirement_lines(foundation_requirements_text)

assert FOUNDATION_REQUIREMENT_NAMES.isdisjoint(core_requirements)
assert FOUNDATION_REQUIREMENT_NAMES <= foundation_requirements
assert CORE_RUNTIME_REQUIREMENT_NAMES <= core_requirements
assert "-r requirements.in" in foundation_requirements_text
assert core_requirement_lines["torch"] == "torch"
assert core_requirement_lines["torchvision"] == "torchvision"
assert core_requirement_lines["einops"] == "einops"
assert core_requirement_lines["timm"] == "timm"
assert core_requirement_lines["transformers"] == "transformers"
assert foundation_requirement_lines["torch"] == "torch>=2.3,<2.8"
assert foundation_requirement_lines["torchvision"] == "torchvision>=0.18.0"
assert foundation_requirement_lines["einops"] == "einops>=0.8.0"
assert foundation_requirement_lines["timm"] == "timm>=1.0.3"
assert foundation_requirement_lines["transformers"] == "transformers>=4.53"


def test_requirements_txt_matches_generic_core_runtime_requirements():
requirement_lines = _requirement_lines(CORE_REQUIREMENTS_TXT.read_text(encoding="utf-8"))

assert requirement_lines["torch"] == "torch"
assert requirement_lines["torchvision"] == "torchvision"
assert requirement_lines["einops"] == "einops"
assert requirement_lines["timm"] == "timm"
assert requirement_lines["transformers"] == "transformers"


def test_readme_documents_core_and_models_installs():
readme = README.read_text(encoding="utf-8")

assert 'pip install slide2vec' in readme
assert 'pip install "slide2vec[models]"' in readme


def test_tile_dataset_uses_direct_transformers_type_check():
source = (ROOT / "slide2vec" / "data" / "dataset.py").read_text(encoding="utf-8")

assert "from transformers.image_processing_utils import BaseImageProcessor" in source
assert "isinstance(self.transforms, BaseImageProcessor)" in source
imported_modules = _top_level_imported_modules(ROOT / "slide2vec" / "models" / "models.py")
assert "transformers" in imported_modules
Loading