clemsgrs · clemsgrs · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/README.md b/README.md
@@ -47,15 +47,18 @@ result = pipeline.run(manifest_path="/path/to/slides.csv")
 
 ### Input Manifest
 
-Manifest-driven runs use the schema below. `mask_path` is optional.
+Manifest-driven runs use the schema below. `mask_path` and `spacing_at_level_0` are optional.
 
 ```csv
-sample_id,image_path,mask_path
-slide-1,/path/to/slide-1.svs,/path/to/mask-1.png
-slide-2,/path/to/slide-2.svs,
+sample_id,image_path,mask_path,spacing_at_level_0
+slide-1,/path/to/slide-1.svs,/path/to/mask-1.png,0.25
+slide-2,/path/to/slide-2.svs,,
 ...
 ```
 
+Use `spacing_at_level_0` when the slide file reports a missing or incorrect level-0 spacing and you want to override it.
+
+
 ### Outputs
 
 The package writes explicit artifact directories:

diff --git a/docs/cli.md b/docs/cli.md
@@ -34,14 +34,16 @@ This command:
 
 ## Input Manifest
 
-The manifest must use the HS2P schema. `mask_path` is optional.
+The manifest must use the hs2p schema. `mask_path` and `spacing_at_level_0` are optional.
 
 ```csv
-sample_id,image_path,mask_path
-slide-1,/path/to/slide-1.svs,/path/to/mask-1.png
-slide-2,/path/to/slide-2.svs,
+sample_id,image_path,mask_path,spacing_at_level_0
+slide-1,/path/to/slide-1.svs,/path/to/mask-1.png,0.25
+slide-2,/path/to/slide-2.svs,,
 ```
 
+Use `spacing_at_level_0` when you need to override the slide's native level-0 spacing metadata for tiling.
+
 Set `csv:` in your config file to point to this manifest.
 
 ## What the Config Controls
@@ -115,6 +117,18 @@ The CLI writes explicit artifact directories under the run output directory:
 - optional `slide_latents/<sample_id>.pt` or `.npz`
 - `process_list.csv`
 - the resolved saved config file for the run
+- `logs/` with the main log plus distributed worker stdout/stderr captures when multi-GPU workers are used
+
+## Progress UX
+
+When stdout is an interactive terminal, the CLI shows live `rich` progress for:
+
+- tiling discovery and completion
+- overall slide embedding progress
+- current-slide tile or region progress
+- slide-level aggregation when the model pools tile features into slide embeddings
+
+When stdout is not interactive, the CLI falls back to plain text stage updates and summaries.
 
 ## Typical Workflows
 

diff --git a/docs/python-api.md b/docs/python-api.md
@@ -43,6 +43,8 @@ Shape conventions:
 
 Use `embed_slides(...)` for ordered multi-slide in-memory extraction.
 
+If a slide reports the wrong native spacing, pass a `SlideSpec`-like object or mapping with `spacing_at_level_0`, or use `Model.embed_slide(..., spacing_at_level_0=...)` for path-like inputs.
+
 When `ExecutionOptions(num_gpus=2)` or another value greater than `1` is used:
 
 - `embed_slide(...)` shards one slide's tiles across GPUs
@@ -145,3 +147,5 @@ result = pipeline.run(manifest_path="/path/to/slides.csv")
 - `tile_artifacts`
 - `slide_artifacts`
 - `process_list_path`
+
+The manifest schema matches HS2P and accepts optional `mask_path` and `spacing_at_level_0` columns.
diff --git a/requirements.in b/requirements.in
@@ -4,6 +4,7 @@ huggingface-hub>=0.30.0,<1.0
 numpy<2
 pandas
 pillow
+rich
 tqdm
 wandb
 torch>=2.3,<2.8

diff --git a/requirements.txt b/requirements.txt
@@ -3,10 +3,11 @@ wandb
 numpy==1.26.1
 pandas
 pillow
+rich
 einops
 tqdm
 omegaconf
 wholeslidedata
 huggingface_hub
 torch==2.1.0
-torchvision==0.16.0
+torchvision==0.16.0
diff --git a/setup.cfg b/setup.cfg
@@ -23,6 +23,7 @@ install_requires =
     numpy<2
     pandas
     pillow
+    rich
     tqdm
     torchvision
     wholeslidedata<0.0.16

diff --git a/slide2vec/api.py b/slide2vec/api.py
@@ -5,10 +5,11 @@
 from slide2vec.artifacts import SlideEmbeddingArtifact, TileEmbeddingArtifact
 
 if TYPE_CHECKING:
-    from slide2vec.inference import LoadedModel, SlideRecord
+    from hs2p import SlideSpec
+    from slide2vec.inference import LoadedModel
 else:
     LoadedModel = Any
-    SlideRecord = Any
+    SlideSpec = Any
 
 
 DEFAULT_LEVEL_BY_NAME = {
@@ -29,9 +30,10 @@ class SlideLike(Protocol):
     sample_id: str
     image_path: PathLike
     mask_path: PathLike | None
+    spacing_at_level_0: float | None
 
 
-SlideInput = PathLike | Mapping[str, object] | SlideLike | SlideRecord
+SlideInput = PathLike | Mapping[str, object] | SlideLike | SlideSpec
 SlideSequence = Sequence[SlideInput]
 TilingResultsInput = Sequence[Any] | Mapping[str, Any]
 
@@ -237,18 +239,20 @@ def embed_slide(
         execution: ExecutionOptions | None = None,
         sample_id: str | None = None,
         mask_path: PathLike | None = None,
+        spacing_at_level_0: float | None = None,
     ) -> EmbeddedSlide:
         ...
 
     @overload
     def embed_slide(
         self,
-        slide: Mapping[str, object] | SlideLike | SlideRecord,
+        slide: Mapping[str, object] | SlideLike | SlideSpec,
         *,
         preprocessing: PreprocessingConfig,
         execution: ExecutionOptions | None = None,
         sample_id: None = None,
         mask_path: None = None,
+        spacing_at_level_0: None = None,
     ) -> EmbeddedSlide:
         ...
 
@@ -260,15 +264,19 @@ def embed_slide(
         execution: ExecutionOptions | None = None,
         sample_id: str | None = None,
         mask_path: PathLike | None = None,
+        spacing_at_level_0: float | None = None,
     ) -> EmbeddedSlide:
         if isinstance(slide, (str, Path)):
             slide = {
                 "sample_id": sample_id or Path(slide).stem,
                 "image_path": Path(slide),
                 "mask_path": Path(mask_path) if mask_path is not None else None,
+                "spacing_at_level_0": spacing_at_level_0,
             }
-        elif sample_id is not None or mask_path is not None:
-            raise ValueError("sample_id and mask_path overrides are only supported when slide is a path-like input")
+        elif sample_id is not None or mask_path is not None or spacing_at_level_0 is not None:
+            raise ValueError(
+                "sample_id, mask_path, and spacing_at_level_0 overrides are only supported when slide is a path-like input"
+            )
         return self.embed_slides(
             [slide],
             preprocessing=preprocessing,

diff --git a/slide2vec/cli.py b/slide2vec/cli.py
@@ -1,6 +1,7 @@
 import argparse
 
 from slide2vec.api import ExecutionOptions, Model, Pipeline, PreprocessingConfig
+import slide2vec.progress as progress
 
 
 def get_args_parser(add_help: bool = True):
@@ -44,10 +45,12 @@ def main(argv=None):
     parser = get_args_parser(add_help=True)
     args = parser.parse_args(argv)
     pipeline, cfg = build_model_and_pipeline(args)
-    return pipeline.run(
-        manifest_path=cfg.csv,
-        tiling_only=args.tiling_only,
-    )
+    reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
+    with progress.activate_progress_reporter(reporter):
+        return pipeline.run(
+            manifest_path=cfg.csv,
+            tiling_only=args.tiling_only,
+        )
 
 
 def _setup_cli_config(args):

diff --git a/slide2vec/configs/preprocessing/default.yaml b/slide2vec/configs/preprocessing/default.yaml
@@ -20,15 +20,15 @@ tiling:
     drop_holes: false # whether or not to drop tiles whose center pixel falls withing an identified holes
     use_padding: true # whether to pad the border of the slide
   seg_params:
-    downsample: 16 # find the closest downsample in the slide for tissue segmentation
+    downsample: 64 # find the closest downsample in the slide for tissue segmentation
     sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when use_otsu=True)
     sthresh_up: 255 # upper threshold value for scaling the binary mask
     mthresh: 7 # median filter size (positive, odd integer)
     close: 4 # additional morphological closing to apply following initial thresholding (positive integer)
     use_otsu: false # use otsu's method instead of simple binary thresholding
     use_hsv: true # use HSV thresholding instead of simple binary thresholding
   filter_params:
-    ref_tile_size: 16 # reference tile size at spacing tiling.params.target_spacing_um
+    ref_tile_size: ${target_tile_size_px} # reference tile size at spacing tiling.params.target_spacing_um
     a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.target_spacing_um will be kept)
     a_h: 2 # area filter threshold for holes (positive integer, the minimum size of detected holes/cavities in foreground contours to avoid, once again relative to the reference tile size ref_tile_size)
     max_n_holes: 8 # maximum of holes to consider per detected foreground contours (positive integer, higher values lead to more accurate patching but increase computational cost ; keeps the biggest holes)

diff --git a/slide2vec/distributed/direct_embed_worker.py b/slide2vec/distributed/direct_embed_worker.py
@@ -1,4 +1,5 @@
 import argparse
+from contextlib import nullcontext
 import json
 from pathlib import Path
 
@@ -25,6 +26,7 @@ def main(argv=None) -> int:
         deserialize_preprocessing,
         load_successful_tiled_slides,
     )
+    from slide2vec.progress import JsonlProgressReporter, activate_progress_reporter
 
     parser = get_args_parser(add_help=True)
     args = parser.parse_args(argv)
@@ -52,49 +54,53 @@ def main(argv=None) -> int:
             slide.sample_id: (slide, tiling_result)
             for slide, tiling_result in zip(slide_records, tiling_results)
         }
+        progress_events_path = request.get("progress_events_path")
+        reporter = JsonlProgressReporter(progress_events_path, rank=global_rank) if progress_events_path else None
+        context = activate_progress_reporter(reporter) if reporter is not None else nullcontext()
 
-        if request["strategy"] == "tile_shard":
-            sample_id = request["sample_id"]
-            slide, tiling_result = paired_by_sample[sample_id]
-            num_tiles = len(tiling_result.x)
-            tile_indices = np.array_split(np.arange(num_tiles, dtype=np.int64), world_size)[global_rank]
-            loaded = model._load_backend()
-            tile_embeddings = _compute_tile_embeddings_for_slide(
-                loaded,
+        with context:
+            if request["strategy"] == "tile_shard":
+                sample_id = request["sample_id"]
+                slide, tiling_result = paired_by_sample[sample_id]
+                num_tiles = len(tiling_result.x)
+                tile_indices = np.array_split(np.arange(num_tiles, dtype=np.int64), world_size)[global_rank]
+                loaded = model._load_backend()
+                tile_embeddings = _compute_tile_embeddings_for_slide(
+                    loaded,
+                    model,
+                    slide,
+                    tiling_result,
+                    preprocessing=preprocessing,
+                    execution=execution,
+                    tile_indices=tile_indices,
+                )
+                payload = {
+                    "tile_index": torch.as_tensor(tile_indices, dtype=torch.long),
+                    "tile_embeddings": tile_embeddings.detach().cpu() if torch.is_tensor(tile_embeddings) else torch.as_tensor(tile_embeddings),
+                }
+                torch.save(payload, coordination_dir / f"{sample_id}.tiles.rank{global_rank}.pt")
+                return 0
+
+            assigned_ids = list(request.get("assignments", {}).get(str(global_rank), []))
+            if not assigned_ids:
+                return 0
+            assigned_slides = [paired_by_sample[sample_id][0] for sample_id in assigned_ids]
+            assigned_tiling_results = [paired_by_sample[sample_id][1] for sample_id in assigned_ids]
+            embedded_slides = _compute_embedded_slides(
                 model,
-                slide,
-                tiling_result,
+                assigned_slides,
+                assigned_tiling_results,
                 preprocessing=preprocessing,
                 execution=execution,
-                tile_indices=tile_indices,
             )
-            payload = {
-                "tile_index": torch.as_tensor(tile_indices, dtype=torch.long),
-                "tile_embeddings": tile_embeddings.detach().cpu() if torch.is_tensor(tile_embeddings) else torch.as_tensor(tile_embeddings),
-            }
-            torch.save(payload, coordination_dir / f"{sample_id}.tiles.rank{global_rank}.pt")
-            return 0
-
-        assigned_ids = list(request.get("assignments", {}).get(str(global_rank), []))
-        if not assigned_ids:
+            for embedded_slide in embedded_slides:
+                payload = {
+                    "tile_embeddings": _to_cpu_payload(torch, embedded_slide.tile_embeddings),
+                    "slide_embedding": _to_cpu_payload(torch, embedded_slide.slide_embedding),
+                    "latents": _to_cpu_payload(torch, embedded_slide.latents),
+                }
+                torch.save(payload, coordination_dir / f"{embedded_slide.sample_id}.embedded.pt")
             return 0
-        assigned_slides = [paired_by_sample[sample_id][0] for sample_id in assigned_ids]
-        assigned_tiling_results = [paired_by_sample[sample_id][1] for sample_id in assigned_ids]
-        embedded_slides = _compute_embedded_slides(
-            model,
-            assigned_slides,
-            assigned_tiling_results,
-            preprocessing=preprocessing,
-            execution=execution,
-        )
-        for embedded_slide in embedded_slides:
-            payload = {
-                "tile_embeddings": _to_cpu_payload(torch, embedded_slide.tile_embeddings),
-                "slide_embedding": _to_cpu_payload(torch, embedded_slide.slide_embedding),
-                "latents": _to_cpu_payload(torch, embedded_slide.latents),
-            }
-            torch.save(payload, coordination_dir / f"{embedded_slide.sample_id}.embedded.pt")
-        return 0
     finally:
         if dist.is_available() and dist.is_initialized():
             dist.destroy_process_group()

diff --git a/slide2vec/distributed/pipeline_worker.py b/slide2vec/distributed/pipeline_worker.py
@@ -1,4 +1,5 @@
 import argparse
+from contextlib import nullcontext
 import json
 from pathlib import Path
 
@@ -22,6 +23,7 @@ def main(argv=None) -> int:
         deserialize_preprocessing,
         load_successful_tiled_slides,
     )
+    from slide2vec.progress import JsonlProgressReporter, activate_progress_reporter
 
     parser = get_args_parser(add_help=True)
     args = parser.parse_args(argv)
@@ -49,21 +51,25 @@ def main(argv=None) -> int:
             return 0
         assigned_slides = [slide for slide, _ in assigned_pairs]
         assigned_tiling_results = [tiling_result for _, tiling_result in assigned_pairs]
-        embedded_slides = _compute_embedded_slides(
-            model,
-            assigned_slides,
-            assigned_tiling_results,
-            preprocessing=preprocessing,
-            execution=execution,
-        )
-        for embedded_slide, tiling_result in zip(embedded_slides, assigned_tiling_results):
-            _persist_embedded_slide(
+        progress_events_path = request.get("progress_events_path")
+        reporter = JsonlProgressReporter(progress_events_path, rank=global_rank) if progress_events_path else None
+        context = activate_progress_reporter(reporter) if reporter is not None else nullcontext()
+        with context:
+            embedded_slides = _compute_embedded_slides(
                 model,
-                embedded_slide,
-                tiling_result,
+                assigned_slides,
+                assigned_tiling_results,
                 preprocessing=preprocessing,
                 execution=execution,
             )
+            for embedded_slide, tiling_result in zip(embedded_slides, assigned_tiling_results):
+                _persist_embedded_slide(
+                    model,
+                    embedded_slide,
+                    tiling_result,
+                    preprocessing=preprocessing,
+                    execution=execution,
+                )
         return 0
     finally:
         if dist.is_available() and dist.is_initialized():
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ huggingface-hub>=0.30.0,<1.0 @@
     numpy<2
     pandas
     pillow
+    rich
     tqdm
     wandb
     torch>=2.3,<2.8
@@ Expand Down @@