Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ repos:
hooks:
- id: vulture
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.20.1
rev: v1.20.2
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🌐 Web query:

mypy version 1.20.2 release notes or changelog

💡 Result:

No evidence found for a mypy version 1.20.2 release as of 2026-05-07. The latest versions are mypy 1.20.0 (released March 31, 2026 [1][2]) and 1.20.1 (April 2026 [3][4][5]). GitHub tags confirm up to v1.20.0 [1], no v1.20.2 tag or PyPI page [2]. Changelog at v1.20.1 includes 1.20.1 fixes like SQLite cache sync disable and narrowing fixes [3][4]. Official release notes for 1.20.0 cover better type narrowing, Python 3.9 support drop, SQLite cache default, local partial types improvements, and Mypy 2.0 plans [6][7]. For full 1.20 details, see https://github.com/python/mypy/blob/v1.20.1/CHANGELOG.md [3] or https://mypy.readthedocs.io/en/stable/changelog.html [7].

Citations:


Change mypy version to v1.20.1 or v1.20.0.

mypy v1.20.2 does not exist. As of May 2026, the latest available versions are v1.20.1 (April 2026) and v1.20.0 (March 2026). Update the revision to one of these releases.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In @.pre-commit-config.yaml at line 66, The .pre-commit-config.yaml specifies an
invalid mypy rev v1.20.2; update the rev for the mypy hook to a valid release
(e.g., change the rev value from "v1.20.2" to "v1.20.1" or "v1.20.0") so the
pre-commit hook points to an existing mypy release.

hooks:
- id: mypy
exclude: >
Expand Down
27 changes: 18 additions & 9 deletions cytotable/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ def _source_pageset_to_parquet(
A string of the output filepath.
"""

import hashlib
import pathlib

import duckdb
Expand All @@ -426,10 +427,16 @@ def _source_pageset_to_parquet(

source_type = str(source["source_path"].suffix).lower()

# attempt to build dest_path
# hash of parent path discriminates sources whose parent dirs share a name
# (e.g. analyses/{1,2}/analysis) — see cytomining/CytoTable#442
source_parent_hash = hashlib.sha1(
str(source["source_path"].parent).encode("utf-8"),
usedforsecurity=False,
).hexdigest()[:12]
source_dest_path = (
f"{dest_path}/{str(AnyPath(source_group_name).stem).lower()}/"
f"{str(source['source_path'].parent.name).lower()}"
f"{str(source['source_path'].parent.name).lower()}/"
f"{source_parent_hash}"
)
pathlib.Path(source_dest_path).mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -825,13 +832,15 @@ def _concat_source_group(
# remove the file which was written in the concatted parquet file (we no longer need it)
pathlib.Path(table).unlink()

# attempt to clean up dir containing original table(s) only if it's empty
try:
pathlib.Path(pathlib.Path(source["table"][0]).parent).rmdir()
except OSError as os_err:
# raise only if we don't have a dir not empty errno
if os_err.errno != errno.ENOTEMPTY:
raise
# clean up the per-source hash dir and its parent if empty
chunk_parent = pathlib.Path(source["table"][0]).parent
for cleanup_dir in (chunk_parent, chunk_parent.parent):
try:
cleanup_dir.rmdir()
except OSError as os_err:
if os_err.errno != errno.ENOTEMPTY:
raise
break

# return the concatted parquet filename
concatted[0]["table"] = [destination_path]
Expand Down
61 changes: 61 additions & 0 deletions tests/test_convert_threaded.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@


import pathlib
import shutil
from typing import List

import anndata as ad
Expand Down Expand Up @@ -470,3 +471,63 @@ def test_convert_nested_dirs(fx_tempdir: pathlib.Path):

table = parquet.read_table(source=result)
assert table.shape == (397, 6049)


def test_convert_multi_source_colliding_parent_dir_names(
load_parsl_threaded: None,
fx_tempdir: str,
data_dir_cellprofiler: str,
):
"""
Regression test for cytomining/CytoTable#442: multi-source convert with
identical parent dir names (analyses/{1,2,3}/analysis) used to collide on
intermediate parquet paths.
"""

src_root = pathlib.Path(fx_tempdir) / "analyses"
for site in ("1", "2", "3"):
site_dir = src_root / site / "analysis"
site_dir.mkdir(parents=True, exist_ok=True)
for table_name in ("Cells.csv", "Cytoplasm.csv", "Nuclei.csv", "Image.csv"):
shutil.copy(
f"{data_dir_cellprofiler}/ExampleHuman/{table_name}",
site_dir / table_name,
)

result = convert(
source_path=str(src_root),
dest_path=f"{fx_tempdir}/multi_site.parquet",
dest_datatype="parquet",
preset="cellprofiler_csv",
join=False,
)
assert isinstance(result, dict)

assert set(result.keys()) == {
"Cells.csv",
"Cytoplasm.csv",
"Nuclei.csv",
"Image.csv",
}

single_site_result = convert(
source_path=f"{data_dir_cellprofiler}/ExampleHuman",
dest_path=f"{fx_tempdir}/single_site.parquet",
dest_datatype="parquet",
source_datatype="csv",
preset="cellprofiler_csv",
join=False,
)
assert isinstance(single_site_result, dict)

for compartment in ("Cells.csv", "Cytoplasm.csv", "Nuclei.csv"):
multi_rows = parquet.read_table(
source=result[compartment][0]["table"][0]
).num_rows
single_rows = parquet.read_table(
source=single_site_result[compartment][0]["table"][0]
).num_rows
assert multi_rows == 3 * single_rows, (
f"{compartment}: expected 3x single-site rows ({3 * single_rows}),"
f" got {multi_rows}"
)
Loading