Skip to content

Commit f0443db

Browse files
authored
Add array storage helpers (zarr-developers#2065)
* implement store.list_prefix and store._set_dict * simplify string handling * add nchunks_initialized, and necessary additions for it * rename _iter_chunks to _iter_chunk_coords * fix test name * bring in correct store list_dir implementations * bump numcodecs to dodge zstd exception * remove store._set_dict, and add _set_many and get_many instead * update deprecation warning template * add a type annotation * refactor chunk iterators. they are not properties any more, just methods, and they can take an origin kwarg * _get_many returns tuple[str, buffer] * stricter store types * fix types * lint * remove deprecation warnings * fix zip list_prefix * tests for nchunks_initialized, chunks_initialized; add selection_shape kwarg to grid iteration; make chunk grid iterators consistent for array and async array * add nchunks test * fix docstrings * fix docstring * revert unnecessary changes to project config
1 parent 19ed733 commit f0443db

File tree

16 files changed

+578
-52
lines changed

16 files changed

+578
-52
lines changed

src/zarr/abc/store.py

+34-13
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
1+
from __future__ import annotations
2+
13
from abc import ABC, abstractmethod
24
from asyncio import gather
35
from collections.abc import AsyncGenerator, Iterable
4-
from types import TracebackType
5-
from typing import Any, NamedTuple, Protocol, runtime_checkable
6+
from typing import TYPE_CHECKING, Any, NamedTuple, Protocol, runtime_checkable
7+
8+
if TYPE_CHECKING:
9+
from collections.abc import AsyncGenerator, Iterable
10+
from types import TracebackType
11+
from typing import Any, TypeAlias
612

7-
from typing_extensions import Self
13+
from typing_extensions import Self
814

9-
from zarr.core.buffer import Buffer, BufferPrototype
10-
from zarr.core.common import AccessModeLiteral, BytesLike
15+
from zarr.core.buffer import Buffer, BufferPrototype
16+
from zarr.core.common import AccessModeLiteral, BytesLike
1117

1218
__all__ = ["Store", "AccessMode", "ByteGetter", "ByteSetter", "set_or_delete"]
1319

20+
ByteRangeRequest: TypeAlias = tuple[int | None, int | None]
21+
1422

1523
class AccessMode(NamedTuple):
1624
str: AccessModeLiteral
@@ -100,14 +108,14 @@ async def get(
100108
self,
101109
key: str,
102110
prototype: BufferPrototype,
103-
byte_range: tuple[int | None, int | None] | None = None,
111+
byte_range: ByteRangeRequest | None = None,
104112
) -> Buffer | None:
105113
"""Retrieve the value associated with a given key.
106114
107115
Parameters
108116
----------
109117
key : str
110-
byte_range : tuple[int, Optional[int]], optional
118+
byte_range : tuple[int | None, int | None], optional
111119
112120
Returns
113121
-------
@@ -119,13 +127,13 @@ async def get(
119127
async def get_partial_values(
120128
self,
121129
prototype: BufferPrototype,
122-
key_ranges: list[tuple[str, tuple[int | None, int | None]]],
130+
key_ranges: Iterable[tuple[str, ByteRangeRequest]],
123131
) -> list[Buffer | None]:
124132
"""Retrieve possibly partial values from given key_ranges.
125133
126134
Parameters
127135
----------
128-
key_ranges : list[tuple[str, tuple[int, int]]]
136+
key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]]
129137
Ordered set of key, range pairs, a key may occur multiple times with different ranges
130138
131139
Returns
@@ -195,7 +203,9 @@ def supports_partial_writes(self) -> bool:
195203
...
196204

197205
@abstractmethod
198-
async def set_partial_values(self, key_start_values: list[tuple[str, int, BytesLike]]) -> None:
206+
async def set_partial_values(
207+
self, key_start_values: Iterable[tuple[str, int, BytesLike]]
208+
) -> None:
199209
"""Store values at a given key, starting at byte range_start.
200210
201211
Parameters
@@ -259,21 +269,32 @@ def close(self) -> None:
259269
"""Close the store."""
260270
self._is_open = False
261271

272+
async def _get_many(
273+
self, requests: Iterable[tuple[str, BufferPrototype, ByteRangeRequest | None]]
274+
) -> AsyncGenerator[tuple[str, Buffer | None], None]:
275+
"""
276+
Retrieve a collection of objects from storage. In general this method does not guarantee
277+
that objects will be retrieved in the order in which they were requested, so this method
278+
yields tuple[str, Buffer | None] instead of just Buffer | None
279+
"""
280+
for req in requests:
281+
yield (req[0], await self.get(*req))
282+
262283

263284
@runtime_checkable
264285
class ByteGetter(Protocol):
265286
async def get(
266-
self, prototype: BufferPrototype, byte_range: tuple[int, int | None] | None = None
287+
self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None
267288
) -> Buffer | None: ...
268289

269290

270291
@runtime_checkable
271292
class ByteSetter(Protocol):
272293
async def get(
273-
self, prototype: BufferPrototype, byte_range: tuple[int, int | None] | None = None
294+
self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None
274295
) -> Buffer | None: ...
275296

276-
async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: ...
297+
async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: ...
277298

278299
async def delete(self) -> None: ...
279300

src/zarr/codecs/sharding.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
Codec,
1818
CodecPipeline,
1919
)
20-
from zarr.abc.store import ByteGetter, ByteSetter
20+
from zarr.abc.store import ByteGetter, ByteRangeRequest, ByteSetter
2121
from zarr.codecs.bytes import BytesCodec
2222
from zarr.codecs.crc32c_ import Crc32cCodec
2323
from zarr.core.array_spec import ArraySpec
@@ -78,7 +78,7 @@ class _ShardingByteGetter(ByteGetter):
7878
chunk_coords: ChunkCoords
7979

8080
async def get(
81-
self, prototype: BufferPrototype, byte_range: tuple[int, int | None] | None = None
81+
self, prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None
8282
) -> Buffer | None:
8383
assert byte_range is None, "byte_range is not supported within shards"
8484
assert (
@@ -91,7 +91,7 @@ async def get(
9191
class _ShardingByteSetter(_ShardingByteGetter, ByteSetter):
9292
shard_dict: ShardMutableMapping
9393

94-
async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None:
94+
async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None:
9595
assert byte_range is None, "byte_range is not supported within shards"
9696
self.shard_dict[self.chunk_coords] = value
9797

0 commit comments

Comments
 (0)