Skip to content

Commit 2fbd30a

Browse files
d-v-bmaxrjones
andauthored
chore: clean up metadata tests (#3897)
* chore: make types more accurate * test: add test helpers and simplify metadata tests * chore: lint * changelog * chore: bump typing-extensions * Update src/zarr/core/metadata/v3.py Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com> * test: add tests for ARRAY_METADATA_KEYS --------- Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com>
1 parent 0ea15fd commit 2fbd30a

File tree

6 files changed

+350
-384
lines changed

6 files changed

+350
-384
lines changed

changes/3897.misc.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Bump the minimum version of `typing-extensions` to 4.13 to support the `extra_items`
2+
keyword argument on `TypedDict` (PEP 728).

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ dependencies = [
3636
'numpy>=2',
3737
'numcodecs>=0.14',
3838
'google-crc32c>=1.5',
39-
'typing_extensions>=4.12',
39+
'typing_extensions>=4.13',
4040
'donfig>=0.8',
4141
]
4242

@@ -243,7 +243,7 @@ extra-dependencies = [
243243
'fsspec==2023.10.0',
244244
's3fs==2023.10.0',
245245
'universal_pathlib==0.2.0',
246-
'typing_extensions==4.12.*',
246+
'typing_extensions==4.13.*',
247247
'donfig==0.8.*',
248248
'obstore==0.5.*',
249249
]

src/zarr/core/metadata/v3.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import json
44
from collections.abc import Iterable, Mapping, Sequence
55
from dataclasses import dataclass, field, replace
6-
from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypedDict, TypeGuard, cast
6+
from typing import TYPE_CHECKING, Any, Final, Literal, NotRequired, TypeGuard, cast
7+
8+
from typing_extensions import TypedDict
79

810
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
911
from zarr.abc.metadata import Metadata
@@ -136,10 +138,11 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
136138
)
137139

138140

139-
class AllowedExtraField(TypedDict):
141+
class AllowedExtraField(TypedDict, extra_items=JSON): # type: ignore[call-arg]
140142
"""
141143
This class models allowed extra fields in array metadata.
142-
They are ignored by Zarr Python.
144+
They must have ``must_understand`` set to ``False``, and may contain
145+
arbitrary additional JSON data.
143146
"""
144147

145148
must_understand: Literal[False]
@@ -411,25 +414,43 @@ def parse_chunk_grid(
411414
raise ValueError(f"Unknown chunk grid name: {name!r}")
412415

413416

414-
class ArrayMetadataJSON_V3(TypedDict):
417+
class ArrayMetadataJSON_V3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg]
415418
"""
416-
A typed dictionary model for zarr v3 metadata.
419+
A typed dictionary model for zarr v3 array metadata.
420+
421+
Extra keys are permitted if they conform to ``AllowedExtraField``
422+
(i.e. they are mappings with ``must_understand: false``).
417423
"""
418424

419425
zarr_format: Literal[3]
420426
node_type: Literal["array"]
421-
data_type: str | NamedConfig[str, Mapping[str, object]]
427+
data_type: str | NamedConfig[str, Mapping[str, JSON]]
422428
shape: tuple[int, ...]
423-
chunk_grid: NamedConfig[str, Mapping[str, object]]
424-
chunk_key_encoding: NamedConfig[str, Mapping[str, object]]
425-
fill_value: object
426-
codecs: tuple[str | NamedConfig[str, Mapping[str, object]], ...]
429+
chunk_grid: str | NamedConfig[str, Mapping[str, JSON]]
430+
chunk_key_encoding: str | NamedConfig[str, Mapping[str, JSON]]
431+
fill_value: JSON
432+
codecs: tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]
427433
attributes: NotRequired[Mapping[str, JSON]]
428-
storage_transformers: NotRequired[tuple[NamedConfig[str, Mapping[str, object]], ...]]
429-
dimension_names: NotRequired[tuple[str | None]]
430-
431-
432-
ARRAY_METADATA_KEYS = set(ArrayMetadataJSON_V3.__annotations__.keys())
434+
storage_transformers: NotRequired[tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]]
435+
dimension_names: NotRequired[tuple[str | None, ...]]
436+
437+
438+
"""
439+
The names of the fields of the array metadata document defined in the zarr V3 spec.
440+
"""
441+
ARRAY_METADATA_KEYS: Final[set[str]] = {
442+
"zarr_format",
443+
"node_type",
444+
"data_type",
445+
"shape",
446+
"chunk_grid",
447+
"chunk_key_encoding",
448+
"fill_value",
449+
"codecs",
450+
"attributes",
451+
"storage_transformers",
452+
"dimension_names",
453+
}
433454

434455

435456
@dataclass(frozen=True, kw_only=True)
@@ -617,8 +638,8 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
617638

618639
return cls(
619640
shape=_data_typed["shape"],
620-
chunk_grid=_data_typed["chunk_grid"],
621-
chunk_key_encoding=_data_typed["chunk_key_encoding"],
641+
chunk_grid=_data_typed["chunk_grid"], # type: ignore[arg-type]
642+
chunk_key_encoding=_data_typed["chunk_key_encoding"], # type: ignore[arg-type]
622643
codecs=_data_typed["codecs"],
623644
attributes=_data_typed.get("attributes", {}), # type: ignore[arg-type]
624645
dimension_names=_data_typed.get("dimension_names", None),

tests/conftest.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,25 @@
5858
from zarr.core.dtype.wrapper import ZDType
5959

6060

61+
@dataclass
62+
class Expect[TIn, TOut]:
63+
"""A test case with explicit input, expected output, and a human-readable id."""
64+
65+
input: TIn
66+
output: TOut
67+
id: str
68+
69+
70+
@dataclass
71+
class ExpectFail[TIn]:
72+
"""A test case that should raise an exception."""
73+
74+
input: TIn
75+
exception: type[Exception]
76+
id: str
77+
msg: str | None = None
78+
79+
6180
async def parse_store(
6281
store: Literal["local", "memory", "fsspec", "zip", "memory_get_latency"], path: str
6382
) -> LocalStore | MemoryStore | FsspecStore | ZipStore | LatencyStore:

tests/test_metadata/conftest.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Any
4+
5+
from zarr.codecs.bytes import BytesCodec
6+
7+
if TYPE_CHECKING:
8+
from zarr.core.metadata.v3 import ArrayMetadataJSON_V3
9+
10+
11+
def minimal_metadata_dict_v3(
12+
extra_fields: dict[str, Any] | None = None, **overrides: Any
13+
) -> ArrayMetadataJSON_V3:
14+
"""Build a minimal valid V3 array metadata JSON dict.
15+
16+
The output matches the shape of ``ArrayV3Metadata.to_dict()`` — all
17+
fields that ``to_dict`` always emits are included.
18+
19+
Parameters
20+
----------
21+
extra_fields : dict, optional
22+
Extra keys to inject into the dict (e.g. extension fields).
23+
**overrides
24+
Override any of the standard metadata fields.
25+
"""
26+
d: ArrayMetadataJSON_V3 = {
27+
"zarr_format": 3,
28+
"node_type": "array",
29+
"shape": (4, 4),
30+
"data_type": "uint8",
31+
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (4, 4)}},
32+
"chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}},
33+
"fill_value": 0,
34+
"codecs": (BytesCodec().to_dict(),), # type: ignore[typeddict-item]
35+
"attributes": {},
36+
"storage_transformers": (),
37+
}
38+
d.update(overrides) # type: ignore[typeddict-item]
39+
if extra_fields is not None:
40+
d.update(extra_fields) # type: ignore[typeddict-item]
41+
return d

0 commit comments

Comments
 (0)