Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d2b10d2
Proof of concept: Unified chunk grid in zarr
maxrjones Mar 9, 2026
3e27417
Improve preferred chunks
maxrjones Mar 10, 2026
cc78540
Fastforward
maxrjones Mar 14, 2026
2510d31
Fastfoward
maxrjones Mar 20, 2026
8a108a8
Merge branch 'main' into poc/unified-zarr-chunk-grid
maxrjones Mar 29, 2026
9eb62f2
Support writing variable chunks
maxrjones Mar 30, 2026
362b5f1
Only test when rectilinear chunks are available
maxrjones Mar 30, 2026
163abce
Merge branch 'pydata:main' into poc/unified-zarr-chunk-grid
maxrjones Apr 2, 2026
bf8d22f
Apply suggestions from code review
maxrjones Apr 8, 2026
785b887
Remove zarr upstream pin
maxrjones Apr 8, 2026
8517aaa
Merge branch 'main' into poc/unified-zarr-chunk-grid
keewis Apr 12, 2026
367b4e9
Merge branch 'main' into poc/unified-zarr-chunk-grid
maxrjones Apr 14, 2026
9e26952
Check chunk alignment
maxrjones Apr 14, 2026
b0f1c34
Changelog
maxrjones Apr 14, 2026
95abeb0
Fix validation
maxrjones Apr 15, 2026
bb0316e
chore: improve tests
maxrjones May 3, 2026
c84ca4d
fix: remove dead code
maxrjones May 3, 2026
723e558
chore: clarify release notes
maxrjones May 3, 2026
a95cade
Merge branch 'main' into poc/unified-zarr-chunk-grid
maxrjones May 3, 2026
ec795f2
fix: detect feature using public API
maxrjones May 3, 2026
3f65e8a
fix: retain var names
maxrjones May 3, 2026
59461d5
fix: implement using public API
maxrjones May 3, 2026
684a121
fix: missed change
maxrjones May 3, 2026
ba5b357
fix: revert another name change
maxrjones May 3, 2026
147a4f0
docs: add rectilinear chunks details
maxrjones May 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions doc/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,33 @@ shape of each coordinate array in the ``encoding`` argument:
The number of chunks on Tair matches our dask chunks, while there is now only a single
chunk in the directory stores of each coordinate.

.. _io.zarr.rectilinear_chunks:

Variable-sized (rectilinear) chunks
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Zarr v3 supports *rectilinear* chunk grids, where chunk sizes vary along one
or more dimensions. This is useful when natural data boundaries (yearly chunks
of a daily time series, per-tile spatial extents) don't align to a regular
grid. Requires ``zarr-python >= 3.2``, and the experimental feature must be
enabled for both reading and writing:

.. code-block:: python

import zarr

with zarr.config.set({"array.rectilinear_chunks": True}):
ds = xr.Dataset({"var": ("x", np.arange(60))}).chunk({"x": (10, 20, 30)})
ds.to_zarr("rectilinear.zarr", zarr_format=3, mode="w")

roundtrip = xr.open_zarr("rectilinear.zarr", zarr_format=3)
roundtrip.chunks["x"] # (10, 20, 30)

Rectilinear chunks can also be specified through the ``encoding`` argument
(one sequence per dimension), e.g. ``encoding={"var": {"chunks": ((10, 20, 30),)}}``.
Writing non-uniform chunks to a zarr v2 store raises ``ValueError`` because the
feature is Zarr Format 3-only.

Groups
~~~~~~

Expand Down
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ v2026.05.0 (unreleased)
New Features
~~~~~~~~~~~~

- Support reading and writing Zarr V3 arrays with rectilinear (variable-sized)
chunk grids. Requires zarr-python >= 3.2 with
``zarr.config.set({"array.rectilinear_chunks": True})``, which must be set
for both reading and writing rectilinear-chunked stores. (:pull:`11279`).
By `Max Jones <https://github.com/maxrjones>`_.

Breaking Changes
~~~~~~~~~~~~~~~~
Expand Down
80 changes: 77 additions & 3 deletions xarray/backends/chunks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import itertools

import numpy as np

from xarray.core.datatree import Variable
Expand Down Expand Up @@ -133,9 +135,12 @@ def align_nd_chunks(

def build_grid_chunks(
size: int,
chunk_size: int,
chunk_size: int | tuple[int, ...],
region: slice | None = None,
) -> tuple[int, ...]:
if isinstance(chunk_size, (list, tuple)):
return _build_rectilinear_grid_chunks(chunk_size, region)

if region is None:
region = slice(0, size)

Expand All @@ -153,9 +158,39 @@ def build_grid_chunks(
return tuple(chunks_on_region)


def _build_rectilinear_grid_chunks(
chunk_sizes: tuple[int, ...],
region: slice | None = None,
) -> tuple[int, ...]:
"""Build grid chunks for a rectilinear dimension within a region."""
if region is None or region == slice(None):
return tuple(chunk_sizes)

region_start = region.start or 0
region_stop = region.stop or sum(chunk_sizes)

boundaries = [0]
for cs in chunk_sizes:
boundaries.append(boundaries[-1] + cs)

result = []
for i in range(len(chunk_sizes)):
chunk_start = boundaries[i]
chunk_end = boundaries[i + 1]

if chunk_end <= region_start or chunk_start >= region_stop:
continue

effective_start = max(chunk_start, region_start)
effective_end = min(chunk_end, region_stop)
result.append(effective_end - effective_start)

return tuple(result)


def grid_rechunk(
v: Variable,
enc_chunks: tuple[int, ...],
enc_chunks: tuple[int, ...] | tuple[int | tuple[int, ...], ...],
region: tuple[slice, ...],
) -> Variable:
nd_v_chunks = v.chunks
Expand All @@ -181,9 +216,36 @@ def grid_rechunk(
return v


def _validate_rectilinear_chunk_alignment(
dask_chunks: tuple[int, ...],
enc_chunks: tuple[int, ...],
axis: int,
name: str,
region: slice = slice(None),
) -> None:
"""Validate dask chunks align with rectilinear encoding chunk boundaries."""
enc_stops = set(itertools.accumulate(enc_chunks))
region_start = region.start or 0
dask_stops = {region_start + s for s in itertools.accumulate(dask_chunks)}
# The final stop (total size) always matches — exclude it
total = sum(enc_chunks)
enc_stops.discard(total)
dask_stops.discard(total)
bad = dask_stops - enc_stops
if bad:
raise ValueError(
f"Specified rectilinear encoding chunks {enc_chunks!r} for variable "
f"named {name!r} would overlap multiple Dask chunks on axis {axis}. "
f"Dask chunk boundaries at positions {sorted(bad)} do not align with "
f"encoding chunk boundaries at {sorted(enc_stops)}. "
"Writing this array in parallel with Dask could lead to corrupted data. "
"Consider rechunking using `chunk()` or setting `safe_chunks=False`."
)


def validate_grid_chunks_alignment(
nd_v_chunks: tuple[tuple[int, ...], ...] | None,
enc_chunks: tuple[int, ...],
enc_chunks: tuple[int | tuple[int, ...], ...],
backend_shape: tuple[int, ...],
region: tuple[slice, ...],
allow_partial_chunks: bool,
Expand Down Expand Up @@ -213,6 +275,18 @@ def validate_grid_chunks_alignment(
backend_shape,
strict=True,
):
if isinstance(chunk_size, (list, tuple)):
# Rectilinear dimension — use boundary-based validation
_validate_rectilinear_chunk_alignment(
dask_chunks=v_chunks,
enc_chunks=chunk_size,
axis=axis,
name=name,
region=interval,
)
continue

# Regular dimension — existing validation logic
for i, chunk in enumerate(v_chunks[1:-1]):
if chunk % chunk_size:
raise ValueError(
Expand Down
55 changes: 46 additions & 9 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
from xarray.core.types import ZarrArray, ZarrGroup


def _has_rectilinear_chunks() -> bool:
return module_available("zarr", minversion="3.2")


def _get_mappers(*, storage_options, store, chunk_store):
# expand str and path-like arguments
store = _normalize_path(store)
Expand Down Expand Up @@ -333,7 +337,7 @@ async def async_getitem(self, key):
)


def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, zarr_format):
"""
Given encoding chunks (possibly None or []) and variable chunks
(possibly None or []).
Expand All @@ -355,18 +359,34 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
# while dask chunks can be variable sized
# https://dask.pydata.org/en/latest/array-design.html#chunks
if var_chunks and not enc_chunks:
if zarr_format == 3 and _has_rectilinear_chunks():
# Check if dask chunks are regular (uniform except for last chunk)
has_varying_interior = any(
len(set(chunks[:-1])) > 1 for chunks in var_chunks
)
has_larger_final = any(chunks[0] < chunks[-1] for chunks in var_chunks)
if has_varying_interior or has_larger_final:
# Truly rectilinear — return dask-style tuples of per-chunk sizes.
# Requires zarr config: array.rectilinear_chunks = True
return tuple(var_chunks)
# Regular chunks — return the first chunk size per dimension
return tuple(chunk[0] for chunk in var_chunks)

if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
raise ValueError(
"Zarr requires uniform chunk sizes except for final chunk. "
"Zarr v2 requires uniform chunk sizes except for the final chunk. "
f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
"Consider rechunking using `chunk()`."
"Consider rechunking using `chunk()`, or switching to the "
"zarr v3 format with zarr-python>=3.2."
)
if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
raise ValueError(
"Final chunk of Zarr array must be the same size or smaller "
f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}."
"Consider either rechunking using `chunk()` or instead deleting "
"or modifying `encoding['chunks']`."
"The final chunk of a Zarr v2 array or a Zarr v3 array without the "
"rectilinear chunks extension must be the same size or smaller "
f"than the first. Variable named {name!r} has incompatible Dask "
f"chunks {var_chunks!r}. "
"Consider switching to Zarr v3 with the rectilinear chunks extension, "
"rechunking using `chunk()` or deleting or modifying `encoding['chunks']`."
)
# return the first chunk for each dimension
return tuple(chunk[0] for chunk in var_chunks)
Expand All @@ -389,8 +409,17 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
var_chunks,
ndim,
name,
zarr_format,
)

# Rectilinear chunks: each element is a sequence of per-chunk edge lengths
if (
zarr_format == 3
and _has_rectilinear_chunks()
and any(not isinstance(x, int) for x in enc_chunks_tuple)
):
return enc_chunks_tuple

for x in enc_chunks_tuple:
if not isinstance(x, int):
raise TypeError(
Expand Down Expand Up @@ -532,6 +561,7 @@ def extract_zarr_variable_encoding(
var_chunks=variable.chunks,
ndim=variable.ndim,
name=name,
zarr_format=zarr_format,
)
if _zarr_v3() and chunks is None:
chunks = "auto"
Expand Down Expand Up @@ -910,9 +940,16 @@ def open_store_variable(self, name):
)
attributes = dict(attributes)

try:
chunks = tuple(zarr_array.chunks)
except NotImplementedError:
# Rectilinear chunk grid (zarr >= 3.2) — chunks vary along the axis
chunks = zarr_array.write_chunk_sizes
preferred_chunks = dict(zip(dimensions, chunks, strict=True))

encoding = {
"chunks": zarr_array.chunks,
"preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)),
"chunks": chunks,
"preferred_chunks": preferred_chunks,
}

if _zarr_v3():
Expand Down
3 changes: 3 additions & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def _importorskip(
has_zarr_v3, requires_zarr_v3 = _importorskip("zarr", "3.0.0")
has_zarr_v3_dtypes, requires_zarr_v3_dtypes = _importorskip("zarr", "3.1.0")
has_zarr_v3_async_oindex, requires_zarr_v3_async_oindex = _importorskip("zarr", "3.1.2")
has_zarr_rectilinear_chunks, requires_zarr_rectilinear_chunks = _importorskip(
"zarr", "3.2.0"
)
if has_zarr_v3:
import zarr

Expand Down
Loading
Loading