Skip to content

Commit b702060

Browse files
committed
pass filters and compressors to processing functions, rather than full metadata
1 parent dba4073 commit b702060

1 file changed

Lines changed: 22 additions & 24 deletions

File tree

src/zarr/core/metadata/converter/migrate_to_v3.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
ZarrFormat,
2424
)
2525
from zarr.core.dtype.common import HasEndianness
26+
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
2627
from zarr.core.group import Group, GroupMetadata
2728
from zarr.core.metadata.v2 import ArrayV2Metadata
2829
from zarr.core.metadata.v3 import ArrayV3Metadata
@@ -35,6 +36,7 @@
3536

3637

3738
def migrate_v2_to_v3(
39+
*,
3840
input_store: StoreLike,
3941
output_store: StoreLike | None = None,
4042
storage_options: dict[str, Any] | None = None,
@@ -197,7 +199,9 @@ def _convert_array_metadata(metadata_v2: ArrayV2Metadata) -> ArrayV3Metadata:
197199
if metadata_v2.order == "F":
198200
# F is equivalent to order: n-1, ... 1, 0
199201
codecs.append(TransposeCodec(order=list(range(len(metadata_v2.shape) - 1, -1, -1))))
200-
codecs.extend(_convert_filters(metadata_v2))
202+
203+
if metadata_v2.filters is not None:
204+
codecs.extend(_convert_filters(metadata_v2.filters))
201205

202206
# array-bytes codecs
203207
if not isinstance(metadata_v2.dtype, HasEndianness):
@@ -206,8 +210,8 @@ def _convert_array_metadata(metadata_v2: ArrayV2Metadata) -> ArrayV3Metadata:
206210
codecs.append(BytesCodec(endian=metadata_v2.dtype.endianness))
207211

208212
# bytes-bytes codecs
209-
bytes_bytes_codec = _convert_compressor(metadata_v2)
210-
if bytes_bytes_codec is not None:
213+
if metadata_v2.compressor is not None:
214+
bytes_bytes_codec = _convert_compressor(metadata_v2.compressor, metadata_v2.dtype)
211215
codecs.append(bytes_bytes_codec)
212216

213217
return ArrayV3Metadata(
@@ -223,46 +227,40 @@ def _convert_array_metadata(metadata_v2: ArrayV2Metadata) -> ArrayV3Metadata:
223227
)
224228

225229

226-
def _convert_filters(metadata_v2: ArrayV2Metadata) -> list[ArrayArrayCodec]:
227-
if metadata_v2.filters is None:
228-
return []
229-
230-
filters_codecs = [_find_numcodecs_zarr3(filter) for filter in metadata_v2.filters]
230+
def _convert_filters(filters: tuple[numcodecs.abc.Codec, ...]) -> list[ArrayArrayCodec]:
231+
filters_codecs = [_find_numcodecs_zarr3(filter) for filter in filters]
231232
for codec in filters_codecs:
232233
if not isinstance(codec, ArrayArrayCodec):
233234
raise TypeError(f"Filter {type(codec)} is not an ArrayArrayCodec")
234235

235236
return cast(list[ArrayArrayCodec], filters_codecs)
236237

237238

238-
def _convert_compressor(metadata_v2: ArrayV2Metadata) -> BytesBytesCodec | None:
239-
if metadata_v2.compressor is None:
240-
return None
241-
242-
compressor_name = metadata_v2.compressor.codec_id
243-
244-
match compressor_name:
239+
def _convert_compressor(
240+
compressor: numcodecs.abc.Codec, dtype: ZDType[TBaseDType, TBaseScalar]
241+
) -> BytesBytesCodec:
242+
match compressor.codec_id:
245243
case "blosc":
246244
return BloscCodec(
247-
typesize=metadata_v2.dtype.to_native_dtype().itemsize,
248-
cname=metadata_v2.compressor.cname,
249-
clevel=metadata_v2.compressor.clevel,
250-
shuffle=BloscShuffle.from_int(metadata_v2.compressor.shuffle),
251-
blocksize=metadata_v2.compressor.blocksize,
245+
typesize=dtype.to_native_dtype().itemsize,
246+
cname=compressor.cname,
247+
clevel=compressor.clevel,
248+
shuffle=BloscShuffle.from_int(compressor.shuffle),
249+
blocksize=compressor.blocksize,
252250
)
253251

254252
case "zstd":
255253
return ZstdCodec(
256-
level=metadata_v2.compressor.level,
257-
checksum=metadata_v2.compressor.checksum,
254+
level=compressor.level,
255+
checksum=compressor.checksum,
258256
)
259257

260258
case "gzip":
261-
return GzipCodec(level=metadata_v2.compressor.level)
259+
return GzipCodec(level=compressor.level)
262260

263261
case _:
264262
# If possible, find matching numcodecs.zarr3 codec
265-
compressor_codec = _find_numcodecs_zarr3(metadata_v2.compressor)
263+
compressor_codec = _find_numcodecs_zarr3(compressor)
266264

267265
if not isinstance(compressor_codec, BytesBytesCodec):
268266
raise TypeError(f"Compressor {type(compressor_codec)} is not a BytesBytesCodec")

0 commit comments

Comments
 (0)