Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3288.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Sort dictionary keys before returning consolidated metadata to ensure deterministic output.
10 changes: 9 additions & 1 deletion docs/user-guide/consolidated_metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ that can be used.:
>>> consolidated = zarr.open_group(store=store)
>>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata
>>> from pprint import pprint
>>> pprint(dict(sorted(consolidated_metadata.items())))
>>> pprint(dict(consolidated_metadata.items()))
{'a': ArrayV3Metadata(shape=(1,),
data_type=Float64(endianness='little'),
chunk_grid=RegularChunkGrid(chunk_shape=(1,)),
Expand Down Expand Up @@ -100,6 +100,14 @@ With nested groups, the consolidated metadata is available on the children, recu
>>> consolidated['child'].metadata.consolidated_metadata
ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False)

.. versionadded:: 3.1.1

The keys in the consolidated metadata are sorted prior to writing. Keys are
sorted in ascending order by path depth, where a path is defined as a sequence
of strings joined by ``"/"``. For keys with the same path length, lexicographic
order is used to break the tie. This behaviour ensures deterministic metadata
output for a given group.

Synchronization and Concurrency
-------------------------------

Expand Down
12 changes: 11 additions & 1 deletion src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import itertools
import json
import logging
import unicodedata
import warnings
from collections import defaultdict
from dataclasses import asdict, dataclass, field, fields, replace
Expand Down Expand Up @@ -141,7 +142,16 @@ def to_dict(self) -> dict[str, JSON]:
return {
"kind": self.kind,
"must_understand": self.must_understand,
"metadata": {k: v.to_dict() for k, v in self.flattened_metadata.items()},
"metadata": {
k: v.to_dict()
for k, v in sorted(
self.flattened_metadata.items(),
key=lambda item: (
item[0].count("/"),
unicodedata.normalize("NFKC", item[0]).casefold(),
),
)
},
}

@classmethod
Expand Down
29 changes: 29 additions & 0 deletions tests/test_metadata/test_consolidated.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,35 @@ def test_to_dict_empty(self):
}
assert result == expected

@pytest.mark.parametrize("zarr_format", [2, 3])
async def test_to_dict_order(
self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat
) -> None:
with zarr.config.set(default_zarr_format=zarr_format):
g = await group(store=memory_store)

# Create groups in non-lexicographix order
dtype = "float32"
await g.create_array(name="b", shape=(1,), dtype=dtype)
child = await g.create_group("c", attributes={"key": "child"})
await g.create_array(name="a", shape=(1,), dtype=dtype)

await child.create_array("e", shape=(1,), dtype=dtype)
await child.create_array("d", shape=(1,), dtype=dtype)

# Consolidate metadata and re-open store
await zarr.api.asynchronous.consolidate_metadata(memory_store)
g2 = await zarr.api.asynchronous.open_group(store=memory_store)

assert list(g2.metadata.consolidated_metadata.metadata) == ["a", "b", "c"]
assert list(g2.metadata.consolidated_metadata.flattened_metadata) == [
"a",
"b",
"c",
"c/d",
"c/e",
]

@pytest.mark.parametrize("zarr_format", [2, 3])
async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat):
store = zarr.storage.MemoryStore()
Expand Down
Loading