Skip to content

Commit 95e5253

Browse files
authored
Merge branch 'main' into rmg/ArrayNotFoundError
2 parents 5af016e + cc2fa37 commit 95e5253

41 files changed

Lines changed: 569 additions & 341 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

changes/3318.misc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Define a ``Protocol`` to model the ``numcodecs.abc.Codec`` interface. This is groundwork toward
2+
making ``numcodecs`` an optional dependency for ``zarr-python``.

changes/3371.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure that tests for executable examples are run in a fresh python environment.

changes/3372.misc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Make certain imports in ``zarr.abc.store`` local to method definitions. This minimizes the risk of
2+
circular imports when adding new classes to ``zarr.abc.store``.

changes/3374.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Replaces usage of the ``zarr.core.common.ChunkCoords`` typealias with ``tuple[int, ...]``.

changes/3378.bugfix.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Ensure passing `config` is handled properly when `open`ing an existing
2+
array.
3+

docs/developers/roadmap.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Goals
5656
- Provide a complete implementation of Zarr V3 through the Zarr-Python
5757
API
5858
- Clear the way for exciting extensions / ZEPs
59-
(i.e. `sharding <https://zarr-specs.readthedocs.io/en/latest/v3/codecs/sharding-indexed/v1.0.html>`__,
59+
(i.e. `sharding <https://zarr-specs.readthedocs.io/en/latest/v3/codecs/sharding-indexed/>`__,
6060
`variable chunking <https://zarr.dev/zeps/draft/ZEP0003.html>`__,
6161
etc.)
6262
- Provide a developer API that can be used to implement and register V3

src/zarr/abc/codec.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from __future__ import annotations
22

33
from abc import abstractmethod
4-
from typing import TYPE_CHECKING, Generic, TypeVar
4+
from collections.abc import Mapping
5+
from typing import TYPE_CHECKING, Generic, TypeGuard, TypeVar
6+
7+
from typing_extensions import ReadOnly, TypedDict
58

69
from zarr.abc.metadata import Metadata
710
from zarr.core.buffer import Buffer, NDBuffer
8-
from zarr.core.common import ChunkCoords, concurrent_map
11+
from zarr.core.common import NamedConfig, concurrent_map
912
from zarr.core.config import config
1013

1114
if TYPE_CHECKING:
@@ -34,6 +37,27 @@
3437
CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer)
3538
CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer)
3639

40+
TName = TypeVar("TName", bound=str, covariant=True)
41+
42+
43+
class CodecJSON_V2(TypedDict, Generic[TName]):
44+
"""The JSON representation of a codec for Zarr V2"""
45+
46+
id: ReadOnly[TName]
47+
48+
49+
def _check_codecjson_v2(data: object) -> TypeGuard[CodecJSON_V2[str]]:
50+
return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str)
51+
52+
53+
CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]]
54+
"""The JSON representation of a codec for Zarr V3."""
55+
56+
# The widest type we will *accept* for a codec JSON
57+
# This covers v2 and v3
58+
CodecJSON = str | Mapping[str, object]
59+
"""The widest type of JSON-like input that could specify a codec."""
60+
3761

3862
class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]):
3963
"""Generic base class for codecs.
@@ -96,7 +120,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
96120
def validate(
97121
self,
98122
*,
99-
shape: ChunkCoords,
123+
shape: tuple[int, ...],
100124
dtype: ZDType[TBaseDType, TBaseScalar],
101125
chunk_grid: ChunkGrid,
102126
) -> None:
@@ -105,7 +129,7 @@ def validate(
105129
106130
Parameters
107131
----------
108-
shape : ChunkCoords
132+
shape : tuple[int, ...]
109133
The array shape
110134
dtype : np.dtype[Any]
111135
The array data type
@@ -311,14 +335,18 @@ def supports_partial_encode(self) -> bool: ...
311335

312336
@abstractmethod
313337
def validate(
314-
self, *, shape: ChunkCoords, dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid
338+
self,
339+
*,
340+
shape: tuple[int, ...],
341+
dtype: ZDType[TBaseDType, TBaseScalar],
342+
chunk_grid: ChunkGrid,
315343
) -> None:
316344
"""Validates that all codec configurations are compatible with the array metadata.
317345
Raises errors when a codec configuration is not compatible.
318346
319347
Parameters
320348
----------
321-
shape : ChunkCoords
349+
shape : tuple[int, ...]
322350
The array shape
323351
dtype : np.dtype[Any]
324352
The array data type

src/zarr/abc/numcodec.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
from typing import Any, Self, TypeGuard
2+
3+
from typing_extensions import Protocol
4+
5+
6+
class Numcodec(Protocol):
7+
"""
8+
A protocol that models the ``numcodecs.abc.Codec`` interface.
9+
10+
This protocol should be considered experimental. Expect the type annotations for ``buf`` and
11+
``out`` to narrow in the future.
12+
"""
13+
14+
codec_id: str
15+
16+
def encode(self, buf: Any) -> Any:
17+
"""Encode data from ``buf``.
18+
19+
Parameters
20+
----------
21+
buf : Any
22+
Data to be encoded.
23+
24+
Returns
25+
-------
26+
enc: Any
27+
Encoded data.
28+
"""
29+
...
30+
31+
def decode(self, buf: Any, out: Any | None = None) -> Any:
32+
"""
33+
Decode data in ``buf``.
34+
35+
Parameters
36+
----------
37+
buf : Any
38+
Encoded data.
39+
out : Any
40+
Writeable buffer to store decoded data. If provided, this buffer must
41+
be exactly the right size to store the decoded data.
42+
43+
Returns
44+
-------
45+
dec : Any
46+
Decoded data.
47+
"""
48+
...
49+
50+
def get_config(self) -> Any:
51+
"""
52+
Return a JSON-serializable configuration dictionary for this
53+
codec. Must include an ``'id'`` field with the codec identifier.
54+
"""
55+
...
56+
57+
@classmethod
58+
def from_config(cls, config: Any) -> Self:
59+
"""
60+
Instantiate a codec from a configuration dictionary.
61+
62+
Parameters
63+
----------
64+
config : Any
65+
A configuration dictionary for this codec.
66+
"""
67+
...
68+
69+
70+
def _is_numcodec_cls(obj: object) -> TypeGuard[type[Numcodec]]:
71+
"""
72+
Check if the given object is a class implements the Numcodec protocol.
73+
74+
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
75+
members (i.e., attributes), so we use this function to manually check for the presence of the
76+
required attributes and methods on a given object.
77+
"""
78+
return (
79+
isinstance(obj, type)
80+
and hasattr(obj, "codec_id")
81+
and isinstance(obj.codec_id, str)
82+
and hasattr(obj, "encode")
83+
and callable(obj.encode)
84+
and hasattr(obj, "decode")
85+
and callable(obj.decode)
86+
and hasattr(obj, "get_config")
87+
and callable(obj.get_config)
88+
and hasattr(obj, "from_config")
89+
and callable(obj.from_config)
90+
)
91+
92+
93+
def _is_numcodec(obj: object) -> TypeGuard[Numcodec]:
94+
"""
95+
Check if the given object implements the Numcodec protocol.
96+
97+
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
98+
members (i.e., attributes), so we use this function to manually check for the presence of the
99+
required attributes and methods on a given object.
100+
"""
101+
return _is_numcodec_cls(type(obj))

src/zarr/abc/store.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@
66
from itertools import starmap
77
from typing import TYPE_CHECKING, Protocol, runtime_checkable
88

9-
from zarr.core.buffer.core import default_buffer_prototype
10-
from zarr.core.common import concurrent_map
11-
from zarr.core.config import config
12-
139
if TYPE_CHECKING:
1410
from collections.abc import AsyncGenerator, AsyncIterator, Iterable
1511
from types import TracebackType
@@ -438,6 +434,9 @@ async def getsize(self, key: str) -> int:
438434
# Note to implementers: this default implementation is very inefficient since
439435
# it requires reading the entire object. Many systems will have ways to get the
440436
# size of an object without reading it.
437+
# avoid circular import
438+
from zarr.core.buffer.core import default_buffer_prototype
439+
441440
value = await self.get(key, prototype=default_buffer_prototype())
442441
if value is None:
443442
raise FileNotFoundError(key)
@@ -476,6 +475,11 @@ async def getsize_prefix(self, prefix: str) -> int:
476475
# on to getting sizes. Ideally we would overlap those two, which should
477476
# improve tail latency and might reduce memory pressure (since not all keys
478477
# would be in memory at once).
478+
479+
# avoid circular import
480+
from zarr.core.common import concurrent_map
481+
from zarr.core.config import config
482+
479483
keys = [(x,) async for x in self.list_prefix(prefix)]
480484
limit = config.get("async.concurrency")
481485
sizes = await concurrent_map(keys, self.getsize, limit=limit)

src/zarr/api/asynchronous.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from zarr.core.common import (
2525
JSON,
2626
AccessModeLiteral,
27-
ChunkCoords,
2827
DimensionNames,
2928
MemoryOrder,
3029
ZarrFormat,
@@ -53,9 +52,8 @@
5352
if TYPE_CHECKING:
5453
from collections.abc import Iterable
5554

56-
import numcodecs.abc
57-
5855
from zarr.abc.codec import Codec
56+
from zarr.abc.numcodec import Numcodec
5957
from zarr.core.buffer import NDArrayLikeOrScalar
6058
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
6159
from zarr.storage import StoreLike
@@ -108,7 +106,7 @@ def _infer_overwrite(mode: AccessModeLiteral) -> bool:
108106
return mode in _OVERWRITE_MODES
109107

110108

111-
def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoords | None]:
109+
def _get_shape_chunks(a: ArrayLike | Any) -> tuple[tuple[int, ...] | None, tuple[int, ...] | None]:
112110
"""Helper function to get the shape and chunks from an array-like object"""
113111
shape = None
114112
chunks = None
@@ -360,7 +358,9 @@ async def open(
360358
zarr_format = _metadata_dict["zarr_format"]
361359
is_v3_array = zarr_format == 3 and _metadata_dict.get("node_type") == "array"
362360
if is_v3_array or zarr_format == 2:
363-
return AsyncArray(store_path=store_path, metadata=_metadata_dict)
361+
return AsyncArray(
362+
store_path=store_path, metadata=_metadata_dict, config=kwargs.get("config")
363+
)
364364
except (AssertionError, FileNotFoundError, NodeTypeValidationError):
365365
pass
366366
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
@@ -866,9 +866,9 @@ async def open_group(
866866

867867

868868
async def create(
869-
shape: ChunkCoords | int,
869+
shape: tuple[int, ...] | int,
870870
*, # Note: this is a change from v2
871-
chunks: ChunkCoords | int | bool | None = None,
871+
chunks: tuple[int, ...] | int | bool | None = None,
872872
dtype: ZDTypeLike | None = None,
873873
compressor: CompressorLike = "auto",
874874
fill_value: Any | None = DEFAULT_FILL_VALUE,
@@ -878,7 +878,7 @@ async def create(
878878
overwrite: bool = False,
879879
path: PathLike | None = None,
880880
chunk_store: StoreLike | None = None,
881-
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
881+
filters: Iterable[dict[str, JSON] | Numcodec] | None = None,
882882
cache_metadata: bool | None = None,
883883
cache_attrs: bool | None = None,
884884
read_only: bool | None = None,
@@ -890,7 +890,7 @@ async def create(
890890
meta_array: Any | None = None, # TODO: need type
891891
attributes: dict[str, JSON] | None = None,
892892
# v3 only
893-
chunk_shape: ChunkCoords | int | None = None,
893+
chunk_shape: tuple[int, ...] | int | None = None,
894894
chunk_key_encoding: (
895895
ChunkKeyEncoding
896896
| tuple[Literal["default"], Literal[".", "/"]]
@@ -1075,7 +1075,7 @@ async def create(
10751075

10761076

10771077
async def empty(
1078-
shape: ChunkCoords, **kwargs: Any
1078+
shape: tuple[int, ...], **kwargs: Any
10791079
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
10801080
"""Create an empty array with the specified shape. The contents will be filled with the
10811081
array's fill value or zeros if no fill value is provided.
@@ -1127,7 +1127,7 @@ async def empty_like(
11271127

11281128
# TODO: add type annotations for fill_value and kwargs
11291129
async def full(
1130-
shape: ChunkCoords, fill_value: Any, **kwargs: Any
1130+
shape: tuple[int, ...], fill_value: Any, **kwargs: Any
11311131
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
11321132
"""Create an array, with `fill_value` being used as the default value for
11331133
uninitialized portions of the array.
@@ -1174,7 +1174,7 @@ async def full_like(
11741174

11751175

11761176
async def ones(
1177-
shape: ChunkCoords, **kwargs: Any
1177+
shape: tuple[int, ...], **kwargs: Any
11781178
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
11791179
"""Create an array, with one being used as the default value for
11801180
uninitialized portions of the array.
@@ -1297,7 +1297,7 @@ async def open_like(
12971297

12981298

12991299
async def zeros(
1300-
shape: ChunkCoords, **kwargs: Any
1300+
shape: tuple[int, ...], **kwargs: Any
13011301
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
13021302
"""Create an array, with zero being used as the default value for
13031303
uninitialized portions of the array.

0 commit comments

Comments
 (0)