Skip to content

Commit 074050d

Browse files
authored
Merge branch 'main' into test-coverage
2 parents a0b5f4f + aa33415 commit 074050d

11 files changed

Lines changed: 89 additions & 24 deletions

File tree

changes/2972.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Avoid an unnecessary memory copy when writing Zarr with obstore

changes/3039.bugfix.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
It is now possible to specify no compressor when creating a zarr format 2 array.
2+
This can be done by passing ``compressor=None`` to the various array creation routines.
3+
4+
The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given.
5+
To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead.

src/zarr/api/asynchronous.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
12+
from zarr.core.array import (
13+
Array,
14+
AsyncArray,
15+
CompressorLike,
16+
create_array,
17+
from_array,
18+
get_array_metadata,
19+
)
1320
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
1421
from zarr.core.buffer import NDArrayLike
1522
from zarr.core.common import (
@@ -838,7 +845,7 @@ async def create(
838845
*, # Note: this is a change from v2
839846
chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True
840847
dtype: npt.DTypeLike | None = None,
841-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
848+
compressor: CompressorLike = "auto",
842849
fill_value: Any | None = 0, # TODO: need type
843850
order: MemoryOrder | None = None,
844851
store: str | StoreLike | None = None,
@@ -991,7 +998,7 @@ async def create(
991998
dtype = parse_dtype(dtype, zarr_format)
992999
if not filters:
9931000
filters = _default_filters(dtype)
994-
if not compressor:
1001+
if compressor == "auto":
9951002
compressor = _default_compressor(dtype)
9961003
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
9971004
if chunks is not None:

src/zarr/api/synchronous.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.core.array import Array, AsyncArray
10+
from zarr.core.array import Array, AsyncArray, CompressorLike
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
1313
from zarr.core.sync_group import create_hierarchy
@@ -599,7 +599,7 @@ def create(
599599
*, # Note: this is a change from v2
600600
chunks: ChunkCoords | int | bool | None = None,
601601
dtype: npt.DTypeLike | None = None,
602-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
602+
compressor: CompressorLike = "auto",
603603
fill_value: Any | None = 0, # TODO: need type
604604
order: MemoryOrder | None = None,
605605
store: str | StoreLike | None = None,

src/zarr/core/array.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
T_ArrayMetadata,
103103
)
104104
from zarr.core.metadata.v2 import (
105+
CompressorLikev2,
105106
_default_compressor,
106107
_default_filters,
107108
parse_compressor,
@@ -294,7 +295,7 @@ async def create(
294295
dimension_separator: Literal[".", "/"] | None = None,
295296
order: MemoryOrder | None = None,
296297
filters: list[dict[str, JSON]] | None = None,
297-
compressor: dict[str, JSON] | None = None,
298+
compressor: CompressorLikev2 | Literal["auto"] = "auto",
298299
# runtime
299300
overwrite: bool = False,
300301
data: npt.ArrayLike | None = None,
@@ -385,7 +386,7 @@ async def create(
385386
dimension_separator: Literal[".", "/"] | None = None,
386387
order: MemoryOrder | None = None,
387388
filters: list[dict[str, JSON]] | None = None,
388-
compressor: dict[str, JSON] | None = None,
389+
compressor: CompressorLike = "auto",
389390
# runtime
390391
overwrite: bool = False,
391392
data: npt.ArrayLike | None = None,
@@ -420,7 +421,7 @@ async def create(
420421
dimension_separator: Literal[".", "/"] | None = None,
421422
order: MemoryOrder | None = None,
422423
filters: list[dict[str, JSON]] | None = None,
423-
compressor: dict[str, JSON] | None = None,
424+
compressor: CompressorLike = "auto",
424425
# runtime
425426
overwrite: bool = False,
426427
data: npt.ArrayLike | None = None,
@@ -561,7 +562,7 @@ async def _create(
561562
dimension_separator: Literal[".", "/"] | None = None,
562563
order: MemoryOrder | None = None,
563564
filters: list[dict[str, JSON]] | None = None,
564-
compressor: dict[str, JSON] | None = None,
565+
compressor: CompressorLike = "auto",
565566
# runtime
566567
overwrite: bool = False,
567568
data: npt.ArrayLike | None = None,
@@ -595,7 +596,7 @@ async def _create(
595596
raise ValueError(
596597
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead."
597598
)
598-
if compressor is not None:
599+
if compressor != "auto":
599600
raise ValueError(
600601
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead."
601602
)
@@ -759,7 +760,7 @@ def _create_metadata_v2(
759760
dimension_separator: Literal[".", "/"] | None = None,
760761
fill_value: float | None = None,
761762
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
762-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
763+
compressor: CompressorLikev2 = None,
763764
attributes: dict[str, JSON] | None = None,
764765
) -> ArrayV2Metadata:
765766
if dimension_separator is None:
@@ -800,7 +801,7 @@ async def _create_v2(
800801
dimension_separator: Literal[".", "/"] | None = None,
801802
fill_value: float | None = None,
802803
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
803-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
804+
compressor: CompressorLike = "auto",
804805
attributes: dict[str, JSON] | None = None,
805806
overwrite: bool = False,
806807
) -> AsyncArray[ArrayV2Metadata]:
@@ -812,6 +813,17 @@ async def _create_v2(
812813
else:
813814
await ensure_no_existing_node(store_path, zarr_format=2)
814815

816+
compressor_parsed: CompressorLikev2
817+
if compressor == "auto":
818+
compressor_parsed = _default_compressor(dtype)
819+
elif isinstance(compressor, BytesBytesCodec):
820+
raise ValueError(
821+
"Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
822+
"Use a numcodecs codec directly instead."
823+
)
824+
else:
825+
compressor_parsed = compressor
826+
815827
metadata = cls._create_metadata_v2(
816828
shape=shape,
817829
dtype=dtype,
@@ -820,7 +832,7 @@ async def _create_v2(
820832
dimension_separator=dimension_separator,
821833
fill_value=fill_value,
822834
filters=filters,
823-
compressor=compressor,
835+
compressor=compressor_parsed,
824836
attributes=attributes,
825837
)
826838

@@ -1742,7 +1754,7 @@ def create(
17421754
dimension_separator: Literal[".", "/"] | None = None,
17431755
order: MemoryOrder | None = None,
17441756
filters: list[dict[str, JSON]] | None = None,
1745-
compressor: dict[str, JSON] | None = None,
1757+
compressor: CompressorLike = "auto",
17461758
# runtime
17471759
overwrite: bool = False,
17481760
config: ArrayConfigLike | None = None,
@@ -1871,7 +1883,7 @@ def _create(
18711883
dimension_separator: Literal[".", "/"] | None = None,
18721884
order: MemoryOrder | None = None,
18731885
filters: list[dict[str, JSON]] | None = None,
1874-
compressor: dict[str, JSON] | None = None,
1886+
compressor: CompressorLike = "auto",
18751887
# runtime
18761888
overwrite: bool = False,
18771889
config: ArrayConfigLike | None = None,
@@ -3783,7 +3795,11 @@ def _get_default_codecs(
37833795
| Literal["auto"]
37843796
| None
37853797
)
3786-
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
3798+
# Union of acceptable types for users to pass in for both v2 and v3 compressors
3799+
CompressorLike: TypeAlias = (
3800+
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
3801+
)
3802+
37873803
CompressorsLike: TypeAlias = (
37883804
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
37893805
| dict[str, JSON]

src/zarr/core/buffer/core.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,19 @@ def as_numpy_array(self) -> npt.NDArray[Any]:
255255
"""
256256
...
257257

258+
def as_buffer_like(self) -> BytesLike:
259+
"""Returns the buffer as an object that implements the Python buffer protocol.
260+
261+
Notes
262+
-----
263+
Might have to copy data, since the implementation uses `.as_numpy_array()`.
264+
265+
Returns
266+
-------
267+
An object that implements the Python buffer protocol
268+
"""
269+
return memoryview(self.as_numpy_array()) # type: ignore[arg-type]
270+
258271
def to_bytes(self) -> bytes:
259272
"""Returns the buffer as `bytes` (host memory).
260273

src/zarr/core/metadata/v2.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections.abc import Iterable, Sequence
66
from enum import Enum
77
from functools import cached_property
8-
from typing import TYPE_CHECKING, Any, TypedDict, cast
8+
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast
99

1010
import numcodecs.abc
1111

@@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict):
4343
attributes: dict[str, JSON]
4444

4545

46+
# Union of acceptable types for v2 compressors
47+
CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None
48+
49+
4650
@dataclass(frozen=True, kw_only=True)
4751
class ArrayV2Metadata(Metadata):
4852
shape: ChunkCoords
@@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata):
5256
order: MemoryOrder = "C"
5357
filters: tuple[numcodecs.abc.Codec, ...] | None = None
5458
dimension_separator: Literal[".", "/"] = "."
55-
compressor: numcodecs.abc.Codec | None = None
59+
compressor: CompressorLikev2
5660
attributes: dict[str, JSON] = field(default_factory=dict)
5761
zarr_format: Literal[2] = field(init=False, default=2)
5862

@@ -65,7 +69,7 @@ def __init__(
6569
fill_value: Any,
6670
order: MemoryOrder,
6771
dimension_separator: Literal[".", "/"] = ".",
68-
compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
72+
compressor: CompressorLikev2 = None,
6973
filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
7074
attributes: dict[str, JSON] | None = None,
7175
) -> None:

src/zarr/storage/_local.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ def _put(
5252
with path.open("r+b") as f:
5353
f.seek(start)
5454
# write takes any object supporting the buffer protocol
55-
f.write(value.as_numpy_array()) # type: ignore[arg-type]
55+
f.write(value.as_buffer_like())
5656
return None
5757
else:
58-
view = memoryview(value.as_numpy_array()) # type: ignore[arg-type]
58+
view = value.as_buffer_like()
5959
if exclusive:
6060
mode = "xb"
6161
else:

src/zarr/storage/_obstore.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,15 @@ async def set(self, key: str, value: Buffer) -> None:
161161

162162
self._check_writable()
163163

164-
buf = value.to_bytes()
164+
buf = value.as_buffer_like()
165165
await obs.put_async(self.store, key, buf)
166166

167167
async def set_if_not_exists(self, key: str, value: Buffer) -> None:
168168
# docstring inherited
169169
import obstore as obs
170170

171171
self._check_writable()
172-
buf = value.to_bytes()
172+
buf = value.as_buffer_like()
173173
with contextlib.suppress(obs.exceptions.AlreadyExistsError):
174174
await obs.put_async(self.store, key, buf, mode="create")
175175

tests/test_api.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import re
44
from typing import TYPE_CHECKING
55

6+
import zarr.codecs
7+
68
if TYPE_CHECKING:
79
import pathlib
810

@@ -1217,3 +1219,20 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
12171219
# assert_array_equal doesn't check the type
12181220
assert isinstance(result, type(src))
12191221
cp.testing.assert_array_equal(result, src[:10, :10])
1222+
1223+
1224+
def test_v2_without_compressor() -> None:
1225+
# Make sure it's possible to set no compressor for v2 arrays
1226+
arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None)
1227+
assert arr.compressors == ()
1228+
1229+
1230+
def test_v2_with_v3_compressor() -> None:
1231+
# Check trying to create a v2 array with a v3 compressor fails
1232+
with pytest.raises(
1233+
ValueError,
1234+
match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.",
1235+
):
1236+
zarr.create(
1237+
store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec()
1238+
)

0 commit comments

Comments
 (0)