11from __future__ import annotations
22
33import warnings
4- from collections .abc import Iterable , Sequence
4+ from collections .abc import Iterable , Mapping , Sequence
55from functools import cached_property
66from typing import TYPE_CHECKING , Any , TypeAlias , TypedDict , cast
77
8- import numcodecs .abc
9-
8+ from zarr .abc .codec import CodecJSON_V2 , _check_codecjson_v2
109from zarr .abc .metadata import Metadata
10+ from zarr .codecs ._numcodecs import get_numcodec
11+ from zarr .codecs ._v2 import Numcodec , _is_numcodec
1112from zarr .core .chunk_grids import RegularChunkGrid
1213from zarr .core .dtype import get_data_type_from_json
1314from zarr .core .dtype .common import OBJECT_CODEC_IDS , DTypeSpec_V2
3031import json
3132from dataclasses import dataclass , field , fields , replace
3233
33- import numcodecs
3434import numpy as np
3535
3636from zarr .core .array_spec import ArrayConfig , ArraySpec
@@ -56,7 +56,7 @@ class ArrayV2MetadataDict(TypedDict):
5656
5757
5858# Union of acceptable types for v2 compressors
59- CompressorLikev2 : TypeAlias = dict [str , JSON ] | numcodecs . abc . Codec | None
59+ CompressorLike_V2 : TypeAlias = CodecJSON_V2 [str ] | Numcodec
6060
6161
6262@dataclass (frozen = True , kw_only = True )
@@ -66,9 +66,9 @@ class ArrayV2Metadata(Metadata):
6666 dtype : ZDType [TBaseDType , TBaseScalar ]
6767 fill_value : int | float | str | bytes | None = None
6868 order : MemoryOrder = "C"
69- filters : tuple [numcodecs . abc . Codec , ...] | None = None
69+ filters : tuple [Numcodec , ...] | None = None
7070 dimension_separator : Literal ["." , "/" ] = "."
71- compressor : numcodecs . abc . Codec | None
71+ compressor : Numcodec | None
7272 attributes : dict [str , JSON ] = field (default_factory = dict )
7373 zarr_format : Literal [2 ] = field (init = False , default = 2 )
7474
@@ -81,8 +81,8 @@ def __init__(
8181 fill_value : Any ,
8282 order : MemoryOrder ,
8383 dimension_separator : Literal ["." , "/" ] = "." ,
84- compressor : CompressorLikev2 = None ,
85- filters : Iterable [numcodecs . abc . Codec | dict [ str , JSON ] ] | None = None ,
84+ compressor : CompressorLike_V2 | None = None ,
85+ filters : Iterable [CompressorLike_V2 ] | None = None ,
8686 attributes : dict [str , JSON ] | None = None ,
8787 ) -> None :
8888 """
@@ -197,12 +197,12 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
197197
198198 def to_dict (self ) -> dict [str , JSON ]:
199199 zarray_dict = super ().to_dict ()
200- if isinstance (zarray_dict ["compressor" ], numcodecs . abc . Codec ):
200+ if _is_numcodec (zarray_dict ["compressor" ]):
201201 codec_config = zarray_dict ["compressor" ].get_config ()
202202 # Hotfix for https://github.com/zarr-developers/zarr-python/issues/2647
203- if codec_config [ "id" ] == "zstd" and not codec_config .get ("checksum" , False ):
204- codec_config .pop ("checksum" )
205- zarray_dict ["compressor" ] = codec_config
203+ if codec_config . get ( "id" ) == "zstd" and not codec_config .get ("checksum" , False ):
204+ codec_config .pop ("checksum" ) # type: ignore[typeddict-item]
205+ zarray_dict ["compressor" ] = codec_config # type: ignore[assignment]
206206
207207 if zarray_dict ["filters" ] is not None :
208208 raw_filters = zarray_dict ["filters" ]
@@ -212,11 +212,12 @@ def to_dict(self) -> dict[str, JSON]:
212212 raise TypeError ("Invalid type for filters. Expected a list or tuple." )
213213 new_filters = []
214214 for f in raw_filters :
215- if isinstance ( f , numcodecs . abc . Codec ):
215+ if _is_numcodec ( f ):
216216 new_filters .append (f .get_config ())
217217 else :
218218 new_filters .append (f )
219- zarray_dict ["filters" ] = new_filters
219+ # TODO: remove the type ignore when we property type the output
220+ zarray_dict ["filters" ] = new_filters # type: ignore[assignment]
220221
221222 # serialize the fill value after dtype-specific JSON encoding
222223 if self .fill_value is not None :
@@ -262,44 +263,44 @@ def parse_zarr_format(data: object) -> Literal[2]:
262263 raise ValueError (f"Invalid value. Expected 2. Got { data } ." )
263264
264265
265- def parse_filters (data : object ) -> tuple [numcodecs . abc . Codec , ...] | None :
266+ def parse_filters (data : object ) -> tuple [Numcodec , ...] | None :
266267 """
267268 Parse a potential tuple of filters
268269 """
269- out : list [numcodecs . abc . Codec ] = []
270+ out : list [Numcodec ] = []
270271
271272 if data is None :
272273 return data
273274 if isinstance (data , Iterable ):
274- for idx , val in enumerate ( data ) :
275- if isinstance (val , numcodecs . abc . Codec ):
275+ for val in data :
276+ if _is_numcodec (val ):
276277 out .append (val )
277- elif isinstance (val , dict ):
278- out .append (numcodecs . get_codec (val ))
278+ if _check_codecjson_v2 (val ):
279+ out .append (get_numcodec (val ))
279280 else :
280- msg = f" Invalid filter at index { idx } . Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got { type ( val ) } instead."
281+ msg = f' Invalid representation of Numcodec. Got { data } , expected a dict with an "id" key or a Numcodec instance.'
281282 raise TypeError (msg )
282283 if len (out ) == 0 :
283284 # Per the v2 spec, an empty tuple is not allowed -- use None to express "no filters"
284285 return None
285286 else :
286287 return tuple (out )
287288 # take a single codec instance and wrap it in a tuple
288- if isinstance (data , numcodecs . abc . Codec ):
289+ if _is_numcodec (data ):
289290 return (data ,)
290- msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec . Got { type (data )} instead."
291+ msg = f"Invalid filters. Expected None, an iterable of Numcodec or dict representations of Numcodec . Got { type (data )} instead."
291292 raise TypeError (msg )
292293
293294
294- def parse_compressor (data : object ) -> numcodecs . abc . Codec | None :
295+ def parse_compressor (data : object ) -> Numcodec | None :
295296 """
296297 Parse a potential compressor.
297298 """
298- if data is None or isinstance (data , numcodecs . abc . Codec ):
299+ if data is None or _is_numcodec (data ):
299300 return data
300- if isinstance (data , dict ):
301- return numcodecs . get_codec (data )
302- msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec , or a dict representation of a numcodecs.abc.Codec . Got { type (data )} instead."
301+ if _check_codecjson_v2 (data ):
302+ return get_numcodec (data )
303+ msg = f"Invalid compressor. Expected None, a Numcodec , or a dict representation of a Numcodec . Got { type (data )} instead."
303304 raise ValueError (msg )
304305
305306
@@ -313,7 +314,7 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
313314 return data
314315
315316
316- def get_object_codec_id (maybe_object_codecs : Sequence [ JSON ]) -> str | None :
317+ def get_object_codec_id (maybe_object_codecs : Iterable [ object ]) -> str | None :
317318 """
318319 Inspect a sequence of codecs / filters for an "object codec", i.e. a codec
319320 that can serialize object arrays to contiguous bytes. Zarr python
@@ -324,7 +325,7 @@ def get_object_codec_id(maybe_object_codecs: Sequence[JSON]) -> str | None:
324325 object_codec_id = None
325326 for maybe_object_codec in maybe_object_codecs :
326327 if (
327- isinstance (maybe_object_codec , dict )
328+ isinstance (maybe_object_codec , Mapping )
328329 and maybe_object_codec .get ("id" ) in OBJECT_CODEC_IDS
329330 ):
330331 return cast ("str" , maybe_object_codec ["id" ])
0 commit comments