Skip to content

Commit 9a2f35b

Browse files
committed
push numcodec prototol into abcs; remove all numcodecs.abc.Codec type annotations
1 parent 84c9780 commit 9a2f35b

12 files changed

Lines changed: 154 additions & 151 deletions

File tree

src/zarr/abc/numcodec.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from typing import Self, TypeGuard
2+
3+
from typing_extensions import Protocol
4+
5+
from zarr.abc.codec import CodecJSON_V2
6+
from zarr.core.buffer import Buffer, NDBuffer
7+
8+
9+
class Numcodec(Protocol):
10+
"""
11+
A protocol that models the ``numcodecs.abc.Codec`` interface.
12+
"""
13+
14+
codec_id: str
15+
16+
def encode(self, buf: Buffer | NDBuffer) -> Buffer | NDBuffer: ...
17+
18+
def decode(
19+
self, buf: Buffer | NDBuffer, out: Buffer | NDBuffer | None = None
20+
) -> Buffer | NDBuffer: ...
21+
22+
def get_config(self) -> CodecJSON_V2[str]: ...
23+
24+
@classmethod
25+
def from_config(cls, config: CodecJSON_V2[str]) -> Self: ...
26+
27+
28+
def _is_numcodec_cls(obj: object) -> TypeGuard[type[Numcodec]]:
29+
"""
30+
Check if the given object is a class implements the Numcodec protocol.
31+
32+
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
33+
members (i.e., attributes), so we use this function to manually check for the presence of the
34+
required attributes and methods on a given object.
35+
"""
36+
return (
37+
isinstance(obj, type)
38+
and hasattr(obj, "codec_id")
39+
and isinstance(obj.codec_id, str)
40+
and hasattr(obj, "encode")
41+
and callable(obj.encode)
42+
and hasattr(obj, "decode")
43+
and callable(obj.decode)
44+
and hasattr(obj, "get_config")
45+
and callable(obj.get_config)
46+
and hasattr(obj, "from_config")
47+
and callable(obj.from_config)
48+
)
49+
50+
51+
def _is_numcodec(obj: object) -> TypeGuard[Numcodec]:
52+
"""
53+
Check if the given object implements the Numcodec protocol.
54+
55+
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
56+
members (i.e., attributes), so we use this function to manually check for the presence of the
57+
required attributes and methods on a given object.
58+
"""
59+
return _is_numcodec_cls(type(obj))

src/zarr/api/asynchronous.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
from collections.abc import Iterable
4848

4949
from zarr.abc.codec import Codec
50-
from zarr.codecs._v2 import Numcodec
50+
from zarr.abc.numcodec import Numcodec
5151
from zarr.core.buffer import NDArrayLikeOrScalar
5252
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
5353
from zarr.storage import StoreLike

src/zarr/api/synchronous.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
import numpy.typing as npt
1919

2020
from zarr.abc.codec import Codec
21+
from zarr.abc.numcodec import Numcodec
2122
from zarr.api.asynchronous import ArrayLike, PathLike
22-
from zarr.codecs._v2 import Numcodec
2323
from zarr.core.array import (
2424
CompressorsLike,
2525
FiltersLike,

src/zarr/codecs/_numcodecs.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

src/zarr/codecs/_v2.py

Lines changed: 3 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,73 +2,20 @@
22

33
import asyncio
44
from dataclasses import dataclass
5-
from typing import TYPE_CHECKING, ClassVar, Self, TypeGuard
5+
from typing import TYPE_CHECKING
66

77
import numpy as np
88
from numcodecs.compat import ensure_bytes, ensure_ndarray_like
9-
from typing_extensions import Protocol
109

11-
from zarr.abc.codec import ArrayBytesCodec, CodecJSON_V2
10+
from zarr.abc.codec import ArrayBytesCodec
1211
from zarr.registry import get_ndbuffer_class
1312

1413
if TYPE_CHECKING:
14+
from zarr.abc.numcodec import Numcodec
1515
from zarr.core.array_spec import ArraySpec
1616
from zarr.core.buffer import Buffer, NDBuffer
1717

1818

19-
class Numcodec(Protocol):
20-
"""
21-
A protocol that models the ``numcodecs.abc.Codec`` interface.
22-
"""
23-
24-
codec_id: ClassVar[str]
25-
26-
def encode(self, buf: Buffer | NDBuffer) -> Buffer | NDBuffer: ...
27-
28-
def decode(
29-
self, buf: Buffer | NDBuffer, out: Buffer | NDBuffer | None = None
30-
) -> Buffer | NDBuffer: ...
31-
32-
def get_config(self) -> CodecJSON_V2[str]: ...
33-
34-
@classmethod
35-
def from_config(cls, config: CodecJSON_V2[str]) -> Self: ...
36-
37-
38-
def _is_numcodec(obj: object) -> TypeGuard[Numcodec]:
39-
"""
40-
Check if the given object implements the Numcodec protocol.
41-
42-
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
43-
members (i.e., attributes), so we use this function to manually check for the presence of the
44-
required attributes and methods on a given object.
45-
"""
46-
return _is_numcodec_cls(type(obj))
47-
48-
49-
def _is_numcodec_cls(obj: object) -> TypeGuard[type[Numcodec]]:
50-
"""
51-
Check if the given object is a class implements the Numcodec protocol.
52-
53-
The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method
54-
members (i.e., attributes), so we use this function to manually check for the presence of the
55-
required attributes and methods on a given object.
56-
"""
57-
return (
58-
isinstance(obj, type)
59-
and hasattr(obj, "codec_id")
60-
and isinstance(obj.codec_id, str)
61-
and hasattr(obj, "encode")
62-
and callable(obj.encode)
63-
and hasattr(obj, "decode")
64-
and callable(obj.decode)
65-
and hasattr(obj, "get_config")
66-
and callable(obj.get_config)
67-
and hasattr(obj, "from_config")
68-
and callable(obj.from_config)
69-
)
70-
71-
7219
@dataclass(frozen=True)
7320
class V2Codec(ArrayBytesCodec):
7421
filters: tuple[Numcodec, ...] | None

src/zarr/core/_info.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
from typing import TYPE_CHECKING, Literal
66

77
if TYPE_CHECKING:
8-
import numcodecs.abc
9-
108
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
9+
from zarr.abc.numcodec import Numcodec
1110
from zarr.core.common import ZarrFormat
1211
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
1312

@@ -88,9 +87,9 @@ class ArrayInfo:
8887
_order: Literal["C", "F"]
8988
_read_only: bool
9089
_store_type: str
91-
_filters: tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...] = ()
90+
_filters: tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...] = ()
9291
_serializer: ArrayBytesCodec | None = None
93-
_compressors: tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...] = ()
92+
_compressors: tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...] = ()
9493
_count_bytes: int | None = None
9594
_count_bytes_stored: int | None = None
9695
_count_chunks_initialized: int | None = None

src/zarr/core/array.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,14 @@
1919
)
2020
from warnings import warn
2121

22-
import numcodecs
23-
import numcodecs.abc
2422
import numpy as np
2523
from typing_extensions import deprecated
2624

2725
import zarr
2826
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
27+
from zarr.abc.numcodec import Numcodec, _is_numcodec
2928
from zarr.abc.store import Store, set_or_delete
30-
from zarr.codecs._v2 import Numcodec, V2Codec
29+
from zarr.codecs._v2 import V2Codec
3130
from zarr.codecs.bytes import BytesCodec
3231
from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
3332
from zarr.codecs.zstd import ZstdCodec
@@ -1033,7 +1032,7 @@ def size(self) -> int:
10331032
return np.prod(self.metadata.shape).item()
10341033

10351034
@property
1036-
def filters(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...]:
1035+
def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]:
10371036
"""
10381037
Filters that are applied to each chunk of the array, in order, before serializing that
10391038
chunk to bytes.
@@ -1062,7 +1061,7 @@ def serializer(self) -> ArrayBytesCodec | None:
10621061

10631062
@property
10641063
@deprecated("Use AsyncArray.compressors instead.")
1065-
def compressor(self) -> numcodecs.abc.Codec | None:
1064+
def compressor(self) -> Numcodec | None:
10661065
"""
10671066
Compressor that is applied to each chunk of the array.
10681067
@@ -1075,7 +1074,7 @@ def compressor(self) -> numcodecs.abc.Codec | None:
10751074
raise TypeError("`compressor` is not available for Zarr format 3 arrays.")
10761075

10771076
@property
1078-
def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...]:
1077+
def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]:
10791078
"""
10801079
Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any
10811080
filters are applied (if any are specified) and the data is serialized into bytes.
@@ -2227,7 +2226,7 @@ def fill_value(self) -> Any:
22272226
return self.metadata.fill_value
22282227

22292228
@property
2230-
def filters(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...]:
2229+
def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]:
22312230
"""
22322231
Filters that are applied to each chunk of the array, in order, before serializing that
22332232
chunk to bytes.
@@ -2243,7 +2242,7 @@ def serializer(self) -> None | ArrayBytesCodec:
22432242

22442243
@property
22452244
@deprecated("Use Array.compressors instead.")
2246-
def compressor(self) -> numcodecs.abc.Codec | None:
2245+
def compressor(self) -> Numcodec | None:
22472246
"""
22482247
Compressor that is applied to each chunk of the array.
22492248
@@ -2254,7 +2253,7 @@ def compressor(self) -> numcodecs.abc.Codec | None:
22542253
return self._async_array.compressor
22552254

22562255
@property
2257-
def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...]:
2256+
def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]:
22582257
"""
22592258
Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any
22602259
filters are applied (if any are specified) and the data is serialized into bytes.
@@ -3900,15 +3899,13 @@ def _build_parents(
39003899
FiltersLike: TypeAlias = (
39013900
Iterable[dict[str, JSON] | ArrayArrayCodec | Numcodec]
39023901
| ArrayArrayCodec
3903-
| Iterable[numcodecs.abc.Codec]
3904-
| numcodecs.abc.Codec
3902+
| Iterable[Numcodec]
3903+
| Numcodec
39053904
| Literal["auto"]
39063905
| None
39073906
)
39083907
# Union of acceptable types for users to pass in for both v2 and v3 compressors
3909-
CompressorLike: TypeAlias = (
3910-
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
3911-
)
3908+
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | Numcodec | Literal["auto"] | None
39123909

39133910
CompressorsLike: TypeAlias = (
39143911
Iterable[dict[str, JSON] | BytesBytesCodec | Numcodec]
@@ -4775,7 +4772,7 @@ def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec:
47754772
return serializer
47764773

47774774

4778-
def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] | None:
4775+
def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[Numcodec] | None:
47794776
"""
47804777
Given a data type, return the default filters for that data type.
47814778
@@ -4797,28 +4794,28 @@ def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] |
47974794
return None
47984795

47994796

4800-
def default_compressor_v2(dtype: ZDType[Any, Any]) -> numcodecs.abc.Codec:
4797+
def default_compressor_v2(dtype: ZDType[Any, Any]) -> Numcodec:
48014798
"""
48024799
Given a data type, return the default compressors for that data type.
48034800
48044801
This is just the numcodecs ``Zstd`` codec.
48054802
"""
48064803
from numcodecs import Zstd
48074804

4808-
return Zstd(level=0, checksum=False)
4805+
return Zstd(level=0, checksum=False) # type: ignore[no-any-return]
48094806

48104807

48114808
def _parse_chunk_encoding_v2(
48124809
*,
48134810
compressor: CompressorsLike,
48144811
filters: FiltersLike,
48154812
dtype: ZDType[TBaseDType, TBaseScalar],
4816-
) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
4813+
) -> tuple[tuple[Numcodec, ...] | None, Numcodec | None]:
48174814
"""
48184815
Generate chunk encoding classes for Zarr format 2 arrays with optional defaults.
48194816
"""
4820-
_filters: tuple[numcodecs.abc.Codec, ...] | None
4821-
_compressor: numcodecs.abc.Codec | None
4817+
_filters: tuple[Numcodec, ...] | None
4818+
_compressor: Numcodec | None
48224819

48234820
if compressor is None or compressor == ():
48244821
_compressor = None
@@ -4839,7 +4836,7 @@ def _parse_chunk_encoding_v2(
48394836
else:
48404837
if isinstance(filters, Iterable):
48414838
for idx, f in enumerate(filters):
4842-
if not isinstance(f, numcodecs.abc.Codec):
4839+
if not _is_numcodec(f):
48434840
msg = (
48444841
"For Zarr format 2 arrays, all elements of `filters` must be numcodecs codecs. "
48454842
f"Element at index {idx} has type {type(f)}, which is not a numcodecs codec."
@@ -4852,12 +4849,12 @@ def _parse_chunk_encoding_v2(
48524849
if _compressor is None:
48534850
object_codec_id = None
48544851
else:
4855-
object_codec_id = get_object_codec_id((_compressor.get_config(),))
4852+
object_codec_id = get_object_codec_id((_compressor.get_config(),)) # type: ignore[arg-type]
48564853
else:
48574854
object_codec_id = get_object_codec_id(
48584855
(
4859-
*[f.get_config() for f in _filters],
4860-
_compressor.get_config() if _compressor is not None else None,
4856+
*[f.get_config() for f in _filters], # type: ignore[arg-type]
4857+
_compressor.get_config() if _compressor is not None else None, # type: ignore[arg-type]
48614858
)
48624859
)
48634860
if object_codec_id is None:
@@ -4944,7 +4941,7 @@ def _parse_deprecated_compressor(
49444941
# "no compression"
49454942
compressors = ()
49464943
else:
4947-
compressors = (compressor,) # type: ignore[assignment]
4944+
compressors = (compressor,)
49484945
elif zarr_format == 2 and compressor == compressors == "auto":
49494946
compressors = ({"id": "blosc"},)
49504947
return compressors

0 commit comments

Comments
 (0)