Skip to content

Commit f304838

Browse files
authored
chore: Refactor codecs (#74)
1 parent 2aabf8c commit f304838

6 files changed

Lines changed: 621 additions & 144 deletions

File tree

pixi.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ docs = ["dev", "docs"]
114114
test-min-versions = ["dev", "test", "minimum-versions", "py311"] # test minimum versions with python 3.11
115115
test = ["dev", "test", "latest-versions", "py314"] # test latest versions with python 3.14
116116

117+
[tool.pytest.ini_options]
118+
filterwarnings = [
119+
"ignore:Imagecodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations.:UserWarning",
120+
"ignore:Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.:rasterio.errors.NotGeoreferencedWarning",
121+
]
122+
117123
[tool.coverage.run]
118124
source_pkgs = [""]
119125
branch = true

src/virtual_tiff/codecs.py

Lines changed: 105 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,103 @@
1-
# Copied and modified from https://github.com/zarr-developers/zarr-python/blob/bb55f0c58320a6d27be3a0ba918feee398a53db4/src/zarr/codecs/bytes.py
2-
1+
# Adapted from https://github.com/zarr-developers/zarr-python/blob/main/src/zarr/codecs/bytes.py and https://github.com/zarr-developers/zarr-python/pull/3332
32
from __future__ import annotations
43

4+
from collections.abc import Mapping
55
from dataclasses import dataclass, replace
6-
from typing import TYPE_CHECKING
6+
from typing import TYPE_CHECKING, Literal, Self, cast, overload
77

88
import numpy as np
9-
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec
9+
from zarr.abc.codec import (
10+
ArrayArrayCodec,
11+
ArrayBytesCodec,
12+
CodecJSON,
13+
CodecJSON_V2,
14+
CodecJSON_V3,
15+
)
1016
from zarr.codecs.bytes import Endian
1117
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
12-
from zarr.core.common import JSON, parse_enum, parse_named_configuration
18+
from zarr.core.common import JSON
1319
from zarr.registry import register_codec
1420

1521
if TYPE_CHECKING:
16-
from typing import Self
17-
1822
from zarr.core.array_spec import ArraySpec
1923

2024

25+
def check_codecjson_v2(data: object) -> bool:
26+
return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str)
27+
28+
29+
ZarrFormat = Literal[2, 3]
30+
31+
32+
def _parse_endian(data: object) -> Endian | None:
33+
if data is None:
34+
return None
35+
if isinstance(data, Endian):
36+
return data
37+
if isinstance(data, str) and data in ("little", "big"):
38+
return Endian(data)
39+
raise ValueError(
40+
f"Invalid endian value: {data!r}. Expected 'little', 'big', or None."
41+
)
42+
43+
2144
@dataclass(frozen=True)
2245
class ChunkyCodec(ArrayBytesCodec):
2346
is_fixed_size = True
2447

2548
endian: Endian | None
2649

2750
def __init__(self, *, endian: Endian | str | None = "little") -> None:
28-
endian_parsed = None if endian is None else parse_enum(endian, Endian)
29-
object.__setattr__(self, "endian", endian_parsed)
51+
object.__setattr__(self, "endian", _parse_endian(endian))
3052

3153
@classmethod
3254
def from_dict(cls, data: dict[str, JSON]) -> Self:
33-
_, configuration_parsed = parse_named_configuration(
34-
data, "ChunkyCodec", require_configuration=False
35-
)
36-
configuration_parsed = configuration_parsed or {}
37-
return cls(**configuration_parsed) # type: ignore[arg-type]
55+
return cls.from_json(data) # type: ignore[arg-type]
3856

3957
def to_dict(self) -> dict[str, JSON]:
40-
if self.endian is not None:
41-
return {
42-
"name": "ChunkyCodec",
43-
"configuration": {"endian": self.endian.value},
44-
}
45-
return {"name": "ChunkyCodec"}
58+
return cast(dict[str, JSON], self.to_json(zarr_format=3))
59+
60+
@classmethod
61+
def _from_json_v2(cls, data: CodecJSON) -> Self:
62+
if isinstance(data, Mapping):
63+
return cls(endian=data.get("endian"))
64+
raise ValueError(f"Invalid JSON: {data}")
65+
66+
@classmethod
67+
def _from_json_v3(cls, data: CodecJSON) -> Self:
68+
if isinstance(data, str):
69+
return cls()
70+
if isinstance(data, Mapping):
71+
config = data.get("configuration", {})
72+
return cls(**config)
73+
raise ValueError(f"Invalid JSON: {data}")
74+
75+
@classmethod
76+
def from_json(cls, data: CodecJSON) -> Self:
77+
if check_codecjson_v2(data):
78+
return cls._from_json_v2(data)
79+
return cls._from_json_v3(data)
80+
81+
@overload
82+
def to_json(self, zarr_format: Literal[2]) -> CodecJSON_V2: ...
83+
@overload
84+
def to_json(self, zarr_format: Literal[3]) -> CodecJSON_V3: ...
85+
86+
def to_json(self, zarr_format: ZarrFormat) -> CodecJSON_V2 | CodecJSON_V3:
87+
if zarr_format == 2:
88+
if self.endian is not None:
89+
return {"id": "ChunkyCodec", "endian": self.endian.value} # type: ignore[return-value, typeddict-item]
90+
return {"id": "ChunkyCodec"} # type: ignore[return-value]
91+
elif zarr_format == 3:
92+
if self.endian is not None:
93+
return {
94+
"name": "ChunkyCodec",
95+
"configuration": {"endian": self.endian.value},
96+
}
97+
return {"name": "ChunkyCodec"}
98+
raise ValueError(
99+
f"Unsupported Zarr format {zarr_format}. Expected 2 or 3."
100+
) # pragma: no cover
46101

47102
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
48103
if array_spec.dtype.item_size == 0:
@@ -123,14 +178,38 @@ def __init__(self) -> None:
123178

124179
@classmethod
125180
def from_dict(cls, data: dict[str, JSON]) -> Self:
126-
_, configuration_parsed = parse_named_configuration(
127-
data, "HorizontalDeltaCodec", require_configuration=False
128-
)
129-
configuration_parsed = configuration_parsed or {}
130-
return cls(**configuration_parsed) # type: ignore[arg-type]
181+
return cls.from_json(data) # type: ignore[arg-type]
131182

132183
def to_dict(self) -> dict[str, JSON]:
133-
return {"name": "HorizontalDeltaCodec"}
184+
return cast(dict[str, JSON], self.to_json(zarr_format=3))
185+
186+
@classmethod
187+
def _from_json_v2(cls, data: CodecJSON) -> Self:
188+
return cls()
189+
190+
@classmethod
191+
def _from_json_v3(cls, data: CodecJSON) -> Self:
192+
return cls()
193+
194+
@classmethod
195+
def from_json(cls, data: CodecJSON) -> Self:
196+
if check_codecjson_v2(data):
197+
return cls._from_json_v2(data)
198+
return cls._from_json_v3(data)
199+
200+
@overload
201+
def to_json(self, zarr_format: Literal[2]) -> CodecJSON_V2: ...
202+
@overload
203+
def to_json(self, zarr_format: Literal[3]) -> CodecJSON_V3: ...
204+
205+
def to_json(self, zarr_format: ZarrFormat) -> CodecJSON_V2 | CodecJSON_V3:
206+
if zarr_format == 2:
207+
return {"id": "HorizontalDeltaCodec"} # type: ignore[return-value]
208+
elif zarr_format == 3:
209+
return {"name": "HorizontalDeltaCodec"}
210+
raise ValueError(
211+
f"Unsupported Zarr format {zarr_format}. Expected 2 or 3."
212+
) # pragma: no cover
134213

135214
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
136215
return self

src/virtual_tiff/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from numcodecs.zarr3 import LZMA, Zlib
1+
from zarr.codecs.numcodecs import LZMA, Zlib
22

33
from virtual_tiff.imagecodecs import (
44
DeflateCodec,

0 commit comments

Comments
 (0)