Skip to content

Commit 4689b9c

Browse files
committed
Merge branch 'main' into store-move
2 parents e4e6ab0 + 8826415 commit 4689b9c

File tree

13 files changed

+302
-83
lines changed

13 files changed

+302
-83
lines changed

changes/2972.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Avoid an unnecessary memory copy when writing Zarr with obstore

changes/3039.bugfix.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
It is now possible to specify no compressor when creating a zarr format 2 array.
2+
This can be done by passing ``compressor=None`` to the various array creation routines.
3+
4+
The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given.
5+
To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead.

changes/3049.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added tests for ``AsyncArray``, ``Array`` and removed duplicate argument parsing.

src/zarr/api/asynchronous.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
12+
from zarr.core.array import (
13+
Array,
14+
AsyncArray,
15+
CompressorLike,
16+
create_array,
17+
from_array,
18+
get_array_metadata,
19+
)
1320
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
1421
from zarr.core.buffer import NDArrayLike
1522
from zarr.core.common import (
@@ -838,7 +845,7 @@ async def create(
838845
*, # Note: this is a change from v2
839846
chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True
840847
dtype: npt.DTypeLike | None = None,
841-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
848+
compressor: CompressorLike = "auto",
842849
fill_value: Any | None = 0, # TODO: need type
843850
order: MemoryOrder | None = None,
844851
store: str | StoreLike | None = None,
@@ -991,7 +998,7 @@ async def create(
991998
dtype = parse_dtype(dtype, zarr_format)
992999
if not filters:
9931000
filters = _default_filters(dtype)
994-
if not compressor:
1001+
if compressor == "auto":
9951002
compressor = _default_compressor(dtype)
9961003
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
9971004
if chunks is not None:
@@ -1012,11 +1019,6 @@ async def create(
10121019
warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2)
10131020
if read_only is not None:
10141021
warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2)
1015-
if dimension_separator is not None and zarr_format == 3:
1016-
raise ValueError(
1017-
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
1018-
)
1019-
10201022
if order is not None:
10211023
_warn_order_kwarg()
10221024
if write_empty_chunks is not None:

src/zarr/api/synchronous.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.core.array import Array, AsyncArray
10+
from zarr.core.array import Array, AsyncArray, CompressorLike
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
1313
from zarr.core.sync_group import create_hierarchy
@@ -599,7 +599,7 @@ def create(
599599
*, # Note: this is a change from v2
600600
chunks: ChunkCoords | int | bool | None = None,
601601
dtype: npt.DTypeLike | None = None,
602-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
602+
compressor: CompressorLike = "auto",
603603
fill_value: Any | None = 0, # TODO: need type
604604
order: MemoryOrder | None = None,
605605
store: str | StoreLike | None = None,

src/zarr/core/array.py

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
T_ArrayMetadata,
103103
)
104104
from zarr.core.metadata.v2 import (
105+
CompressorLikev2,
105106
_default_compressor,
106107
_default_filters,
107108
parse_compressor,
@@ -139,7 +140,8 @@ def parse_array_metadata(data: Any) -> ArrayMetadata:
139140
if isinstance(data, ArrayMetadata):
140141
return data
141142
elif isinstance(data, dict):
142-
if data["zarr_format"] == 3:
143+
zarr_format = data.get("zarr_format")
144+
if zarr_format == 3:
143145
meta_out = ArrayV3Metadata.from_dict(data)
144146
if len(meta_out.storage_transformers) > 0:
145147
msg = (
@@ -148,9 +150,11 @@ def parse_array_metadata(data: Any) -> ArrayMetadata:
148150
)
149151
raise ValueError(msg)
150152
return meta_out
151-
elif data["zarr_format"] == 2:
153+
elif zarr_format == 2:
152154
return ArrayV2Metadata.from_dict(data)
153-
raise TypeError
155+
else:
156+
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")
157+
raise TypeError # pragma: no cover
154158

155159

156160
def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
@@ -159,8 +163,7 @@ def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
159163
elif isinstance(metadata, ArrayV2Metadata):
160164
v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
161165
return get_pipeline_class().from_codecs([v2_codec])
162-
else:
163-
raise TypeError
166+
raise TypeError # pragma: no cover
164167

165168

166169
async def get_array_metadata(
@@ -267,17 +270,6 @@ def __init__(
267270
store_path: StorePath,
268271
config: ArrayConfigLike | None = None,
269272
) -> None:
270-
if isinstance(metadata, dict):
271-
zarr_format = metadata["zarr_format"]
272-
# TODO: remove this when we extensively type the dict representation of metadata
273-
_metadata = cast(dict[str, JSON], metadata)
274-
if zarr_format == 2:
275-
metadata = ArrayV2Metadata.from_dict(_metadata)
276-
elif zarr_format == 3:
277-
metadata = ArrayV3Metadata.from_dict(_metadata)
278-
else:
279-
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")
280-
281273
metadata_parsed = parse_array_metadata(metadata)
282274
config_parsed = parse_array_config(config)
283275

@@ -303,7 +295,7 @@ async def create(
303295
dimension_separator: Literal[".", "/"] | None = None,
304296
order: MemoryOrder | None = None,
305297
filters: list[dict[str, JSON]] | None = None,
306-
compressor: dict[str, JSON] | None = None,
298+
compressor: CompressorLikev2 | Literal["auto"] = "auto",
307299
# runtime
308300
overwrite: bool = False,
309301
data: npt.ArrayLike | None = None,
@@ -394,7 +386,7 @@ async def create(
394386
dimension_separator: Literal[".", "/"] | None = None,
395387
order: MemoryOrder | None = None,
396388
filters: list[dict[str, JSON]] | None = None,
397-
compressor: dict[str, JSON] | None = None,
389+
compressor: CompressorLike = "auto",
398390
# runtime
399391
overwrite: bool = False,
400392
data: npt.ArrayLike | None = None,
@@ -429,7 +421,7 @@ async def create(
429421
dimension_separator: Literal[".", "/"] | None = None,
430422
order: MemoryOrder | None = None,
431423
filters: list[dict[str, JSON]] | None = None,
432-
compressor: dict[str, JSON] | None = None,
424+
compressor: CompressorLike = "auto",
433425
# runtime
434426
overwrite: bool = False,
435427
data: npt.ArrayLike | None = None,
@@ -570,7 +562,7 @@ async def _create(
570562
dimension_separator: Literal[".", "/"] | None = None,
571563
order: MemoryOrder | None = None,
572564
filters: list[dict[str, JSON]] | None = None,
573-
compressor: dict[str, JSON] | None = None,
565+
compressor: CompressorLike = "auto",
574566
# runtime
575567
overwrite: bool = False,
576568
data: npt.ArrayLike | None = None,
@@ -604,7 +596,7 @@ async def _create(
604596
raise ValueError(
605597
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead."
606598
)
607-
if compressor is not None:
599+
if compressor != "auto":
608600
raise ValueError(
609601
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead."
610602
)
@@ -768,7 +760,7 @@ def _create_metadata_v2(
768760
dimension_separator: Literal[".", "/"] | None = None,
769761
fill_value: float | None = None,
770762
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
771-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
763+
compressor: CompressorLikev2 = None,
772764
attributes: dict[str, JSON] | None = None,
773765
) -> ArrayV2Metadata:
774766
if dimension_separator is None:
@@ -809,7 +801,7 @@ async def _create_v2(
809801
dimension_separator: Literal[".", "/"] | None = None,
810802
fill_value: float | None = None,
811803
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
812-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
804+
compressor: CompressorLike = "auto",
813805
attributes: dict[str, JSON] | None = None,
814806
overwrite: bool = False,
815807
) -> AsyncArray[ArrayV2Metadata]:
@@ -821,6 +813,17 @@ async def _create_v2(
821813
else:
822814
await ensure_no_existing_node(store_path, zarr_format=2)
823815

816+
compressor_parsed: CompressorLikev2
817+
if compressor == "auto":
818+
compressor_parsed = _default_compressor(dtype)
819+
elif isinstance(compressor, BytesBytesCodec):
820+
raise ValueError(
821+
"Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
822+
"Use a numcodecs codec directly instead."
823+
)
824+
else:
825+
compressor_parsed = compressor
826+
824827
metadata = cls._create_metadata_v2(
825828
shape=shape,
826829
dtype=dtype,
@@ -829,7 +832,7 @@ async def _create_v2(
829832
dimension_separator=dimension_separator,
830833
fill_value=fill_value,
831834
filters=filters,
832-
compressor=compressor,
835+
compressor=compressor_parsed,
833836
attributes=attributes,
834837
)
835838

@@ -1751,7 +1754,7 @@ def create(
17511754
dimension_separator: Literal[".", "/"] | None = None,
17521755
order: MemoryOrder | None = None,
17531756
filters: list[dict[str, JSON]] | None = None,
1754-
compressor: dict[str, JSON] | None = None,
1757+
compressor: CompressorLike = "auto",
17551758
# runtime
17561759
overwrite: bool = False,
17571760
config: ArrayConfigLike | None = None,
@@ -1880,7 +1883,7 @@ def _create(
18801883
dimension_separator: Literal[".", "/"] | None = None,
18811884
order: MemoryOrder | None = None,
18821885
filters: list[dict[str, JSON]] | None = None,
1883-
compressor: dict[str, JSON] | None = None,
1886+
compressor: CompressorLike = "auto",
18841887
# runtime
18851888
overwrite: bool = False,
18861889
config: ArrayConfigLike | None = None,
@@ -3792,7 +3795,11 @@ def _get_default_codecs(
37923795
| Literal["auto"]
37933796
| None
37943797
)
3795-
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
3798+
# Union of acceptable types for users to pass in for both v2 and v3 compressors
3799+
CompressorLike: TypeAlias = (
3800+
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
3801+
)
3802+
37963803
CompressorsLike: TypeAlias = (
37973804
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
37983805
| dict[str, JSON]

src/zarr/core/buffer/core.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,19 @@ def as_numpy_array(self) -> npt.NDArray[Any]:
255255
"""
256256
...
257257

258+
def as_buffer_like(self) -> BytesLike:
259+
"""Returns the buffer as an object that implements the Python buffer protocol.
260+
261+
Notes
262+
-----
263+
Might have to copy data, since the implementation uses `.as_numpy_array()`.
264+
265+
Returns
266+
-------
267+
An object that implements the Python buffer protocol
268+
"""
269+
return memoryview(self.as_numpy_array()) # type: ignore[arg-type]
270+
258271
def to_bytes(self) -> bytes:
259272
"""Returns the buffer as `bytes` (host memory).
260273

src/zarr/core/metadata/v2.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections.abc import Iterable, Sequence
66
from enum import Enum
77
from functools import cached_property
8-
from typing import TYPE_CHECKING, Any, TypedDict, cast
8+
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast
99

1010
import numcodecs.abc
1111

@@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict):
4343
attributes: dict[str, JSON]
4444

4545

46+
# Union of acceptable types for v2 compressors
47+
CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None
48+
49+
4650
@dataclass(frozen=True, kw_only=True)
4751
class ArrayV2Metadata(Metadata):
4852
shape: ChunkCoords
@@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata):
5256
order: MemoryOrder = "C"
5357
filters: tuple[numcodecs.abc.Codec, ...] | None = None
5458
dimension_separator: Literal[".", "/"] = "."
55-
compressor: numcodecs.abc.Codec | None = None
59+
compressor: CompressorLikev2
5660
attributes: dict[str, JSON] = field(default_factory=dict)
5761
zarr_format: Literal[2] = field(init=False, default=2)
5862

@@ -65,7 +69,7 @@ def __init__(
6569
fill_value: Any,
6670
order: MemoryOrder,
6771
dimension_separator: Literal[".", "/"] = ".",
68-
compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
72+
compressor: CompressorLikev2 = None,
6973
filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
7074
attributes: dict[str, JSON] | None = None,
7175
) -> None:

src/zarr/storage/_local.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ def _put(
5252
with path.open("r+b") as f:
5353
f.seek(start)
5454
# write takes any object supporting the buffer protocol
55-
f.write(value.as_numpy_array()) # type: ignore[arg-type]
55+
f.write(value.as_buffer_like())
5656
return None
5757
else:
58-
view = memoryview(value.as_numpy_array()) # type: ignore[arg-type]
58+
view = value.as_buffer_like()
5959
if exclusive:
6060
mode = "xb"
6161
else:

src/zarr/storage/_obstore.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,15 @@ async def set(self, key: str, value: Buffer) -> None:
161161

162162
self._check_writable()
163163

164-
buf = value.to_bytes()
164+
buf = value.as_buffer_like()
165165
await obs.put_async(self.store, key, buf)
166166

167167
async def set_if_not_exists(self, key: str, value: Buffer) -> None:
168168
# docstring inherited
169169
import obstore as obs
170170

171171
self._check_writable()
172-
buf = value.to_bytes()
172+
buf = value.as_buffer_like()
173173
with contextlib.suppress(obs.exceptions.AlreadyExistsError):
174174
await obs.put_async(self.store, key, buf, mode="create")
175175

0 commit comments

Comments
 (0)