Skip to content

Commit 36ca497

Browse files
authored
Use automatic chunking in array creation routines (#3103)
* Delegate logic for chunks to AsyncArray._create * Fix changelog number * Test public API * Add todo
1 parent af55fcf commit 36ca497

File tree

4 files changed

+48
-8
lines changed

4 files changed

+48
-8
lines changed

changes/3103.bugfix.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
When creating arrays without explicitly specifying a chunk size using `zarr.create` and other
2+
array creation routines, the chunk size will now set automatically instead of defaulting to the data shape.
3+
For large arrays this will result in smaller default chunk sizes.
4+
To retain previous behaviour, explicitly set the chunk shape to the data shape.
5+
6+
This fix matches the existing chunking behaviour of
7+
`zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`.

src/zarr/api/asynchronous.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -992,19 +992,11 @@ async def create(
992992
)
993993

994994
if zarr_format == 2:
995-
if chunks is None:
996-
chunks = shape
997995
dtype = parse_dtype(dtype, zarr_format)
998996
if not filters:
999997
filters = _default_filters(dtype)
1000998
if compressor == "auto":
1001999
compressor = _default_compressor(dtype)
1002-
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
1003-
if chunks is not None:
1004-
chunk_shape = chunks
1005-
chunks = None
1006-
else:
1007-
chunk_shape = shape
10081000

10091001
if synchronizer is not None:
10101002
warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2)

src/zarr/core/chunk_grids.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ def _guess_chunks(
6464
if isinstance(shape, int):
6565
shape = (shape,)
6666

67+
if typesize == 0:
68+
return shape
69+
6770
ndims = len(shape)
6871
# require chunks to have non-zero length for all dimensions
6972
chunks = np.maximum(np.array(shape, dtype="=f8"), 1)

tests/test_api.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,3 +1301,41 @@ def test_no_overwrite_load(tmp_path: Path) -> None:
13011301
with contextlib.suppress(NotImplementedError):
13021302
zarr.load(store)
13031303
assert existing_fpath.exists()
1304+
1305+
1306+
@pytest.mark.parametrize(
1307+
"f",
1308+
[
1309+
zarr.array,
1310+
zarr.create,
1311+
zarr.create_array,
1312+
zarr.ones,
1313+
zarr.ones_like,
1314+
zarr.empty,
1315+
zarr.empty_like,
1316+
zarr.full,
1317+
zarr.full_like,
1318+
zarr.zeros,
1319+
zarr.zeros_like,
1320+
],
1321+
)
1322+
def test_auto_chunks(f: Callable[..., Array]) -> None:
1323+
# Make sure chunks are set automatically across the public API
1324+
# TODO: test shards with this test too
1325+
shape = (1000, 1000)
1326+
dtype = np.uint8
1327+
kwargs = {"shape": shape, "dtype": dtype}
1328+
array = np.zeros(shape, dtype=dtype)
1329+
store = zarr.storage.MemoryStore()
1330+
1331+
if f in [zarr.full, zarr.full_like]:
1332+
kwargs["fill_value"] = 0
1333+
if f in [zarr.array]:
1334+
kwargs["data"] = array
1335+
if f in [zarr.empty_like, zarr.full_like, zarr.empty_like, zarr.ones_like, zarr.zeros_like]:
1336+
kwargs["a"] = array
1337+
if f in [zarr.create_array]:
1338+
kwargs["store"] = store
1339+
1340+
a = f(**kwargs)
1341+
assert a.chunks == (500, 500)

0 commit comments

Comments
 (0)