Skip to content

Commit c972f7f

Browse files
authored
Port more stateful test actions from icechunk (#3130)
* Port more stateful test actions from icechunk * Parallelize with pytest-xdist * minor edit * Tweak profiles Closes #3010 * one more tweak
1 parent ba1f71a commit c972f7f

File tree

7 files changed

+179
-16
lines changed

7 files changed

+179
-16
lines changed

.github/workflows/hypothesis.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,19 @@ jobs:
2525

2626
strategy:
2727
matrix:
28-
python-version: ['3.11']
28+
python-version: ['3.12']
2929
numpy-version: ['2.2']
3030
dependency-set: ["optional"]
3131

3232
steps:
3333
- uses: actions/checkout@v4
34+
- name: Set HYPOTHESIS_PROFILE based on trigger
35+
run: |
36+
if [[ "${{ github.event_name }}" == "schedule" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
37+
echo "HYPOTHESIS_PROFILE=nightly" >> $GITHUB_ENV
38+
else
39+
echo "HYPOTHESIS_PROFILE=ci" >> $GITHUB_ENV
40+
fi
3441
- name: Set up Python
3542
uses: actions/setup-python@v5
3643
with:
@@ -58,6 +65,7 @@ jobs:
5865
if: success()
5966
id: status
6067
run: |
68+
echo "Using Hypothesis profile: $HYPOTHESIS_PROFILE"
6169
hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis
6270
6371
# explicitly save the cache so it gets updated, also do this even if it fails.

.github/workflows/test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ jobs:
6161
hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }}
6262
hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
6363
- name: Run Tests
64+
env:
65+
HYPOTHESIS_PROFILE: ci
6466
run: |
6567
hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage
6668
- name: Upload coverage

changes/3130.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Port more stateful testing actions from `Icechunk <https://icechunk.io>`_.

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ test = [
8080
"rich",
8181
"mypy",
8282
"hypothesis",
83+
"pytest-xdist",
8384
]
8485
remote_tests = [
8586
'zarr[remote]',
@@ -165,7 +166,7 @@ run = "run-coverage --no-cov"
165166
run-pytest = "run"
166167
run-verbose = "run-coverage --verbose"
167168
run-mypy = "mypy src"
168-
run-hypothesis = "run-coverage --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*"
169+
run-hypothesis = "run-coverage -nauto --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*"
169170
list-env = "pip list"
170171

171172
[tool.hatch.envs.doctest]
@@ -194,7 +195,7 @@ run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report
194195
run = "run-coverage --no-cov"
195196
run-verbose = "run-coverage --verbose"
196197
run-mypy = "mypy src"
197-
run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
198+
run-hypothesis = "run-coverage --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*"
198199
list-env = "pip list"
199200

200201
[tool.hatch.envs.docs]

src/zarr/testing/stateful.py

Lines changed: 131 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@
2121
from zarr.core.buffer import Buffer, BufferPrototype, cpu, default_buffer_prototype
2222
from zarr.core.sync import SyncMixin
2323
from zarr.storage import LocalStore, MemoryStore
24-
from zarr.testing.strategies import key_ranges, node_names, np_array_and_chunks, numpy_arrays
24+
from zarr.testing.strategies import (
25+
basic_indices,
26+
chunk_paths,
27+
key_ranges,
28+
node_names,
29+
np_array_and_chunks,
30+
numpy_arrays,
31+
)
2532
from zarr.testing.strategies import keys as zarr_keys
2633

2734
MAX_BINARY_SIZE = 100
@@ -120,6 +127,120 @@ def add_array(
120127
)
121128
self.all_arrays.add(path)
122129

130+
@rule()
131+
def clear(self) -> None:
132+
note("clearing")
133+
import zarr
134+
135+
self._sync(self.store.clear())
136+
self._sync(self.model.clear())
137+
138+
assert self._sync(self.store.is_empty("/"))
139+
assert self._sync(self.model.is_empty("/"))
140+
141+
self.all_groups.clear()
142+
self.all_arrays.clear()
143+
144+
zarr.group(store=self.store)
145+
zarr.group(store=self.model)
146+
147+
# TODO: MemoryStore is broken?
148+
# assert not self._sync(self.store.is_empty("/"))
149+
# assert not self._sync(self.model.is_empty("/"))
150+
151+
def draw_directory(self, data: DataObject) -> str:
152+
group_st = st.sampled_from(sorted(self.all_groups)) if self.all_groups else st.nothing()
153+
array_st = st.sampled_from(sorted(self.all_arrays)) if self.all_arrays else st.nothing()
154+
array_or_group = data.draw(st.one_of(group_st, array_st))
155+
if data.draw(st.booleans()) and array_or_group in self.all_arrays:
156+
arr = zarr.open_array(path=array_or_group, store=self.model)
157+
path = data.draw(
158+
st.one_of(
159+
st.sampled_from([array_or_group]),
160+
chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape).map(
161+
lambda x: f"{array_or_group}/c/"
162+
),
163+
)
164+
)
165+
else:
166+
path = array_or_group
167+
return path
168+
169+
@precondition(lambda self: bool(self.all_groups))
170+
@rule(data=st.data())
171+
def check_list_dir(self, data: DataObject) -> None:
172+
path = self.draw_directory(data)
173+
note(f"list_dir for {path=!r}")
174+
# Consider .list_dir("path/to/array") for an array with a single chunk.
175+
# The MemoryStore model will return `"c", "zarr.json"` only if the chunk exists
176+
# If that chunk was deleted, then `"c"` is not returned.
177+
# LocalStore will not have this behaviour :/
178+
# There are similar consistency issues with delete_dir("/path/to/array/c/0/0")
179+
assume(not isinstance(self.store, LocalStore))
180+
model_ls = sorted(self._sync_iter(self.model.list_dir(path)))
181+
store_ls = sorted(self._sync_iter(self.store.list_dir(path)))
182+
assert model_ls == store_ls, (model_ls, store_ls)
183+
184+
@precondition(lambda self: bool(self.all_arrays))
185+
@rule(data=st.data())
186+
def delete_chunk(self, data: DataObject) -> None:
187+
array = data.draw(st.sampled_from(sorted(self.all_arrays)))
188+
arr = zarr.open_array(path=array, store=self.model)
189+
chunk_path = data.draw(chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape, subset=False))
190+
path = f"{array}/c/{chunk_path}"
191+
note(f"deleting chunk {path=!r}")
192+
self._sync(self.model.delete(path))
193+
self._sync(self.store.delete(path))
194+
195+
@precondition(lambda self: bool(self.all_arrays))
196+
@rule(data=st.data())
197+
def overwrite_array_basic_indexing(self, data: DataObject) -> None:
198+
array = data.draw(st.sampled_from(sorted(self.all_arrays)))
199+
model_array = zarr.open_array(path=array, store=self.model)
200+
store_array = zarr.open_array(path=array, store=self.store)
201+
slicer = data.draw(basic_indices(shape=model_array.shape))
202+
note(f"overwriting array with basic indexer: {slicer=}")
203+
new_data = data.draw(
204+
npst.arrays(shape=np.shape(model_array[slicer]), dtype=model_array.dtype)
205+
)
206+
model_array[slicer] = new_data
207+
store_array[slicer] = new_data
208+
209+
@precondition(lambda self: bool(self.all_arrays))
210+
@rule(data=st.data())
211+
def resize_array(self, data: DataObject) -> None:
212+
array = data.draw(st.sampled_from(sorted(self.all_arrays)))
213+
model_array = zarr.open_array(path=array, store=self.model)
214+
store_array = zarr.open_array(path=array, store=self.store)
215+
ndim = model_array.ndim
216+
new_shape = tuple(
217+
0 if oldsize == 0 else newsize
218+
for newsize, oldsize in zip(
219+
data.draw(npst.array_shapes(max_dims=ndim, min_dims=ndim, min_side=0)),
220+
model_array.shape,
221+
strict=True,
222+
)
223+
)
224+
225+
note(f"resizing array from {model_array.shape} to {new_shape}")
226+
model_array.resize(new_shape)
227+
store_array.resize(new_shape)
228+
229+
@precondition(lambda self: bool(self.all_arrays) or bool(self.all_groups))
230+
@rule(data=st.data())
231+
def delete_dir(self, data: DataObject) -> None:
232+
path = self.draw_directory(data)
233+
note(f"delete_dir with {path=!r}")
234+
self._sync(self.model.delete_dir(path))
235+
self._sync(self.store.delete_dir(path))
236+
237+
matches = set()
238+
for node in self.all_groups | self.all_arrays:
239+
if node.startswith(path):
240+
matches.add(node)
241+
self.all_groups = self.all_groups - matches
242+
self.all_arrays = self.all_arrays - matches
243+
123244
# @precondition(lambda self: bool(self.all_groups))
124245
# @precondition(lambda self: bool(self.all_arrays))
125246
# @rule(data=st.data())
@@ -230,13 +351,19 @@ def delete_group_using_del(self, data: DataObject) -> None:
230351
# self.check_group_arrays(group)
231352
# t1 = time.time()
232353
# note(f"Checks took {t1 - t0} sec.")
233-
234354
@invariant()
235355
def check_list_prefix_from_root(self) -> None:
236356
model_list = self._sync_iter(self.model.list_prefix(""))
237357
store_list = self._sync_iter(self.store.list_prefix(""))
238-
note(f"Checking {len(model_list)} keys")
239-
assert sorted(model_list) == sorted(store_list)
358+
note(f"Checking {len(model_list)} expected keys vs {len(store_list)} actual keys")
359+
assert sorted(model_list) == sorted(store_list), (
360+
sorted(model_list),
361+
sorted(store_list),
362+
)
363+
364+
# check that our internal state matches that of the store and model
365+
assert all(f"{path}/zarr.json" in model_list for path in self.all_groups | self.all_arrays)
366+
assert all(f"{path}/zarr.json" in store_list for path in self.all_groups | self.all_arrays)
240367

241368

242369
class SyncStoreWrapper(zarr.core.sync.SyncMixin):

src/zarr/testing/strategies.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]:
7777

7878
return st.text(
7979
alphabet=st.characters(
80-
blacklist_categories=["Cs"], # Avoid *technically allowed* surrogates
80+
exclude_categories=["Cs"], # Avoid *technically allowed* surrogates
8181
min_codepoint=32,
8282
),
8383
min_size=1,
@@ -324,7 +324,7 @@ def is_negative_slice(idx: Any) -> bool:
324324

325325

326326
@st.composite
327-
def end_slices(draw: st.DrawFn, *, shape: tuple[int]) -> Any:
327+
def end_slices(draw: st.DrawFn, *, shape: tuple[int, ...]) -> Any:
328328
"""
329329
A strategy that slices ranges that include the last chunk.
330330
This is intended to stress-test handling of a possibly smaller last chunk.
@@ -342,7 +342,7 @@ def end_slices(draw: st.DrawFn, *, shape: tuple[int]) -> Any:
342342
def basic_indices(
343343
draw: st.DrawFn,
344344
*,
345-
shape: tuple[int],
345+
shape: tuple[int, ...],
346346
min_dims: int = 0,
347347
max_dims: int | None = None,
348348
allow_newaxis: bool = False,
@@ -370,7 +370,7 @@ def basic_indices(
370370

371371
@st.composite
372372
def orthogonal_indices(
373-
draw: st.DrawFn, *, shape: tuple[int]
373+
draw: st.DrawFn, *, shape: tuple[int, ...]
374374
) -> tuple[tuple[np.ndarray[Any, Any], ...], tuple[np.ndarray[Any, Any], ...]]:
375375
"""
376376
Strategy that returns
@@ -426,3 +426,12 @@ def make_request(start: int, length: int) -> RangeByteRequest:
426426
)
427427
key_tuple = st.tuples(keys, byte_ranges)
428428
return st.lists(key_tuple, min_size=1, max_size=10)
429+
430+
431+
@st.composite
432+
def chunk_paths(draw: st.DrawFn, ndim: int, numblocks: tuple[int, ...], subset: bool = True) -> str:
433+
blockidx = draw(
434+
st.tuples(*tuple(st.integers(min_value=0, max_value=max(0, b - 1)) for b in numblocks))
435+
)
436+
subset_slicer = slice(draw(st.integers(min_value=0, max_value=ndim))) if subset else slice(None)
437+
return "/".join(map(str, blockidx[subset_slicer]))

tests/conftest.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import os
34
import pathlib
45
from dataclasses import dataclass, field
56
from typing import TYPE_CHECKING
@@ -188,17 +189,31 @@ def pytest_collection_modifyitems(config: Any, items: Any) -> None:
188189

189190

190191
settings.register_profile(
191-
"ci",
192-
max_examples=1000,
193-
deadline=None,
192+
"default",
193+
parent=settings.get_profile("default"),
194+
max_examples=300,
194195
suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
196+
deadline=None,
197+
verbosity=Verbosity.verbose,
195198
)
196199
settings.register_profile(
197-
"local",
200+
"ci",
201+
parent=settings.get_profile("ci"),
198202
max_examples=300,
203+
derandomize=True, # more like regression testing
204+
deadline=None,
199205
suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
200-
verbosity=Verbosity.verbose,
201206
)
207+
settings.register_profile(
208+
"nightly",
209+
max_examples=500,
210+
parent=settings.get_profile("ci"),
211+
derandomize=False,
212+
stateful_step_count=100,
213+
)
214+
215+
settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default"))
216+
202217

203218
# TODO: uncomment these overrides when we can get mypy to accept them
204219
"""

0 commit comments

Comments
 (0)