Skip to content

Commit eac9c86

Browse files
d-v-bclaude
andauthored
fix(zarr-metadata): model stored metadata more closely (zarr-developers#3962)
* refactor: bring in types from zarr-metadata * fix: better modelling of Zarr V2 metadata Zarr V2 uses a separate JSON document named `.zattrs` for the attributes of an array or group. This package was inconsistent about how it modelled this fact. The array metadata document type modelled array fields (`shape`, `dtype`, etc), which would be stored in `.zarray`, AND the `attributes` field, which would be stored in `.zattrs`. Thus the array metadata model matched the representation of an array that a program might use, rather than the stored layout. But the group metadata type didn't follow this pattern -- it has no `attributes` field. This PR addresses that inconsistency by adding an `attributes` field to `GroupMetadataV2`. That field is not required. To model the stored representation of V2 data, this PR adds 3 new types: `ZArrayMetadata`, `ZGroupMetadata`, and `ZAttrsMetadata`, that closely model the contents of the `.zarray`, `.zgroup`, and `.zattrs` documents, respectively. This change makes the V2 consolidated metadata type more accurate, as consolidated metadata for Zarr V2 is comprised of inlined metadata documents. * Revert "refactor: bring in types from zarr-metadata" This reverts commit 629e565. * feat(zarr-metadata): re-export ZArrayMetadata, ZAttrsMetadata, ZGroupMetadata at top level The on-disk file types added in 8b7af90 were importable from the v2 submodule but not from the package root. Add them to the top-level __init__.py so consumers can import them as `zarr_metadata.ZArrayMetadata` etc. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * chore: remove lockfile --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1020ca5 commit eac9c86

14 files changed

Lines changed: 122 additions & 4067 deletions

packages/zarr-metadata/src/zarr_metadata/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
ArrayMetadataV2,
55
ArrayOrderV2,
66
DataTypeMetadataV2,
7+
ZArrayMetadata,
78
)
9+
from zarr_metadata.v2.attributes import ZAttrsMetadata
810
from zarr_metadata.v2.codec import CodecMetadataV2
911
from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2
10-
from zarr_metadata.v2.group import GroupMetadataV2
12+
from zarr_metadata.v2.group import GroupMetadataV2, ZGroupMetadata
1113
from zarr_metadata.v3._common import MetadataFieldV3
1214
from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3
1315
from zarr_metadata.v3.consolidated import ConsolidatedMetadataV3
@@ -32,5 +34,8 @@
3234
"GroupMetadataV3",
3335
"MetadataFieldV3",
3436
"NamedConfig",
37+
"ZArrayMetadata",
38+
"ZAttrsMetadata",
39+
"ZGroupMetadata",
3540
"__version__",
3641
]

packages/zarr-metadata/src/zarr_metadata/v2/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
ArrayMetadataV2,
66
ArrayOrderV2,
77
DataTypeMetadataV2,
8+
ZArrayMetadata,
89
)
10+
from zarr_metadata.v2.attributes import ZAttrsMetadata
911
from zarr_metadata.v2.codec import CodecMetadataV2
1012
from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2
11-
from zarr_metadata.v2.group import GroupMetadataV2
13+
from zarr_metadata.v2.group import GroupMetadataV2, ZGroupMetadata
1214

1315
__all__ = [
1416
"ArrayDimensionSeparatorV2",
@@ -18,4 +20,7 @@
1820
"ConsolidatedMetadataV2",
1921
"DataTypeMetadataV2",
2022
"GroupMetadataV2",
23+
"ZArrayMetadata",
24+
"ZAttrsMetadata",
25+
"ZGroupMetadata",
2126
]

packages/zarr-metadata/src/zarr_metadata/v2/array.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,39 @@
3939
"""
4040

4141

42+
class ZArrayMetadata(TypedDict):
43+
"""
44+
On-disk `.zarray` file content.
45+
46+
Strict shape of the JSON document persisted at `<path>/.zarray` for
47+
a v2 array. User attributes live in a sibling `.zattrs` file and are
48+
NOT part of this type; see `ZAttrsMetadata`.
49+
50+
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
51+
"""
52+
53+
zarr_format: Literal[2]
54+
shape: tuple[int, ...]
55+
chunks: tuple[int, ...]
56+
dtype: DataTypeMetadataV2
57+
compressor: CodecMetadataV2 | None
58+
fill_value: object
59+
order: ArrayOrderV2
60+
filters: tuple[CodecMetadataV2, ...] | None
61+
dimension_separator: NotRequired[ArrayDimensionSeparatorV2]
62+
63+
4264
class ArrayMetadataV2(TypedDict):
4365
"""
44-
Zarr v2 array metadata document.
66+
Zarr v2 array metadata document, in-memory merged form.
4567
4668
Models the union of `.zarray` (the spec-defined fields) and `.zattrs`
4769
(user attributes). On disk, attributes live in a sibling `.zattrs` file
4870
and are not part of `.zarray`; this type folds them in as the
4971
`attributes` field so a single TypedDict represents the complete
5072
in-memory state of a v2 array node. Consumers that read or write a
51-
real `.zarray` file should split / merge `attributes` accordingly.
73+
real `.zarray` file should split / merge `attributes` accordingly,
74+
or use `ZArrayMetadata` (strict on-disk) plus `ZAttrsMetadata` directly.
5275
5376
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
5477
"""
@@ -62,7 +85,7 @@ class ArrayMetadataV2(TypedDict):
6285
order: ArrayOrderV2
6386
filters: tuple[CodecMetadataV2, ...] | None
6487
dimension_separator: NotRequired[ArrayDimensionSeparatorV2]
65-
attributes: Mapping[str, object]
88+
attributes: NotRequired[Mapping[str, object]]
6689
"""User attributes from the sibling `.zattrs` file (not part of `.zarray`).
6790
6891
See the class docstring for the rationale behind the merged representation.
@@ -74,4 +97,5 @@ class ArrayMetadataV2(TypedDict):
7497
"ArrayMetadataV2",
7598
"ArrayOrderV2",
7699
"DataTypeMetadataV2",
100+
"ZArrayMetadata",
77101
]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""Zarr v2 user-attributes file content.
2+
3+
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
4+
"""
5+
6+
from collections.abc import Mapping
7+
8+
ZAttrsMetadata = Mapping[str, object]
9+
"""On-disk `.zattrs` file content.
10+
11+
A JSON object holding user-defined attributes for a v2 array or group.
12+
Spec-defined keys for arrays / groups live in sibling `.zarray` / `.zgroup`
13+
files (modeled by `ZArrayMetadata` / `ZGroupMetadata`). This type does not
14+
constrain the keys or values of the attributes mapping.
15+
"""
16+
17+
18+
__all__ = [
19+
"ZAttrsMetadata",
20+
]

packages/zarr-metadata/src/zarr_metadata/v2/consolidated.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010

1111
from typing_extensions import TypedDict
1212

13-
from zarr_metadata.v2.array import ArrayMetadataV2
14-
from zarr_metadata.v2.group import GroupMetadataV2
13+
from zarr_metadata.v2.array import ZArrayMetadata
14+
from zarr_metadata.v2.attributes import ZAttrsMetadata
15+
from zarr_metadata.v2.group import ZGroupMetadata
1516

1617

1718
class ConsolidatedMetadataV2(TypedDict):
@@ -20,11 +21,20 @@ class ConsolidatedMetadataV2(TypedDict):
2021
2122
The `metadata` map uses flat path keys (`"foo/bar/.zarray"`,
2223
`"foo/.zattrs"`, etc.) pointing to the JSON contents of the file at
23-
that path. The keys include the filename suffix, not just the node path.
24+
that path. The keys include the filename suffix, not just the node
25+
path; the value's shape is determined by which file the key points at:
26+
27+
- `<path>/.zarray` -> `ZArrayMetadata`
28+
- `<path>/.zgroup` -> `ZGroupMetadata`
29+
- `<path>/.zattrs` -> `ZAttrsMetadata`
30+
31+
The TypedDict cannot discriminate the value shape on the key suffix
32+
at the type level; consumers should narrow at runtime by inspecting
33+
`key.endswith(".zarray")` etc.
2434
"""
2535

2636
zarr_consolidated_format: int
27-
metadata: Mapping[str, GroupMetadataV2 | ArrayMetadataV2]
37+
metadata: Mapping[str, ZArrayMetadata | ZGroupMetadata | ZAttrsMetadata]
2838

2939

3040
__all__ = [

packages/zarr-metadata/src/zarr_metadata/v2/group.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,46 @@
33
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
44
"""
55

6-
from typing import Literal
6+
from collections.abc import Mapping
7+
from typing import Literal, NotRequired
78

89
from typing_extensions import TypedDict
910

1011

12+
class ZGroupMetadata(TypedDict):
13+
"""
14+
On-disk `.zgroup` file content.
15+
16+
Strict shape of the JSON document persisted at `<path>/.zgroup` for
17+
a v2 group. The spec defines exactly one field. User attributes live
18+
in a sibling `.zattrs` file and are NOT part of this type; see
19+
`ZAttrsMetadata`.
20+
21+
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
22+
"""
23+
24+
zarr_format: Literal[2]
25+
26+
1127
class GroupMetadataV2(TypedDict):
1228
"""
13-
Zarr v2 group metadata document (the `.zgroup` content).
29+
Zarr v2 group metadata document, in-memory merged form.
1430
15-
Attributes live in a sibling `.zattrs` file, so they are not part
16-
of this dict.
31+
Models the union of `.zgroup` (the spec-defined `zarr_format` field)
32+
and `.zattrs` (user attributes). On disk these are persisted as two
33+
separate files; this type folds them so a single TypedDict represents
34+
the complete in-memory state of a v2 group node. Consumers that read
35+
or write the real on-disk files should use `ZGroupMetadata` (strict
36+
`.zgroup`) plus `ZAttrsMetadata` directly.
1737
1838
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
1939
"""
2040

2141
zarr_format: Literal[2]
42+
attributes: NotRequired[Mapping[str, object]]
2243

2344

2445
__all__ = [
2546
"GroupMetadataV2",
47+
"ZGroupMetadata",
2648
]

packages/zarr-metadata/tests/v2/array/blosc_compressor_with_filters.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,5 @@
1515
"filters": [
1616
{"id": "delta", "dtype": "<f8"}
1717
],
18-
"dimension_separator": ".",
19-
"attributes": {"name": "demo"}
18+
"dimension_separator": "."
2019
}

packages/zarr-metadata/tests/v2/array/empty_filters_list.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,5 @@
66
"compressor": {"id": "gzip", "level": 1},
77
"fill_value": 0,
88
"order": "C",
9-
"filters": [],
10-
"attributes": {}
9+
"filters": []
1110
}

packages/zarr-metadata/tests/v2/array/simple_dtype_no_compressor.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,5 @@
66
"compressor": null,
77
"fill_value": 0,
88
"order": "C",
9-
"filters": null,
10-
"attributes": {}
9+
"filters": null
1110
}

packages/zarr-metadata/tests/v2/array/structured_dtype.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,5 @@
1111
"fill_value": 0,
1212
"order": "F",
1313
"filters": null,
14-
"dimension_separator": "/",
15-
"attributes": {}
14+
"dimension_separator": "/"
1615
}

0 commit comments

Comments
 (0)