Skip to content

Commit be160fd

Browse files
authored
misc(zarr-metadata): typed constants and fixes (zarr-developers#3978)
* fix: correct bug in bytes + gzip codec metadata object types * feat: add typed constants
1 parent 2e58a7a commit be160fd

10 files changed

Lines changed: 84 additions & 28 deletions

File tree

packages/zarr-metadata/src/zarr_metadata/v2/array.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Zarr v2 array metadata types."""
22

33
from collections.abc import Mapping
4-
from typing import Literal, NotRequired
4+
from typing import Final, Literal, NotRequired
55

66
from typing_extensions import TypedDict
77

@@ -22,22 +22,28 @@
2222
"""
2323

2424
ArrayOrderV2 = Literal["C", "F"]
25-
"""Permitted values for the `order` field of v2 array metadata.
25+
"""Literal type of permitted values for the `order` field of v2 array metadata.
2626
2727
`"C"` (row-major) or `"F"` (column-major) — the in-chunk byte layout.
2828
2929
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
3030
"""
3131

32+
ARRAY_ORDER_V2: Final = ("C", "F")
33+
"""Tuple of permitted values for the `order` field of v2 array metadata."""
34+
3235
ArrayDimensionSeparatorV2 = Literal[".", "/"]
33-
"""Permitted values for the `dimension_separator` field of v2 array metadata.
36+
"""Literal type of permitted values for the `dimension_separator` field of v2 array metadata.
3437
3538
`"."` (legacy default) joins chunk grid coordinates as `0.0`, `0.1`, ...
3639
`"/"` joins them as `0/0`, `0/1`, ... yielding nested directories.
3740
3841
See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
3942
"""
4043

44+
ARRAY_DIMENSION_SEPARATOR_V2: Final = (".", "/")
45+
"""Tuple of permitted values for the `dimension_separator` field of v2 array metadata."""
46+
4147

4248
class ZArrayMetadata(TypedDict):
4349
"""
@@ -93,6 +99,8 @@ class ArrayMetadataV2(TypedDict):
9399

94100

95101
__all__ = [
102+
"ARRAY_DIMENSION_SEPARATOR_V2",
103+
"ARRAY_ORDER_V2",
96104
"ArrayDimensionSeparatorV2",
97105
"ArrayMetadataV2",
98106
"ArrayOrderV2",

packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/default.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@
1818
"""Literal type of the `name` field of the default chunk key encoding."""
1919

2020
DefaultChunkKeyEncodingSeparator = Literal["/", "."]
21-
"""Permitted `separator` values for the default chunk key encoding.
21+
"""Literal type of permitted `separator` values for the default chunk key encoding.
2222
2323
Defaults to `"/"` if absent.
2424
"""
2525

26+
DEFAULT_CHUNK_KEY_ENCODING_SEPARATOR: Final = ("/", ".")
27+
"""Tuple of permitted values for the `separator` field of the default chunk key encoding."""
28+
2629

2730
class DefaultChunkKeyEncodingConfiguration(TypedDict):
2831
"""Configuration for the default chunk key encoding.
@@ -49,6 +52,7 @@ class DefaultChunkKeyEncodingObject(TypedDict):
4952

5053
__all__ = [
5154
"DEFAULT_CHUNK_KEY_ENCODING_NAME",
55+
"DEFAULT_CHUNK_KEY_ENCODING_SEPARATOR",
5256
"DefaultChunkKeyEncodingConfiguration",
5357
"DefaultChunkKeyEncodingMetadata",
5458
"DefaultChunkKeyEncodingName",

packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@
1818
"""Literal type of the `name` field of the v2 chunk key encoding."""
1919

2020
V2ChunkKeyEncodingSeparator = Literal["/", "."]
21-
"""Permitted `separator` values for the v2 chunk key encoding.
21+
"""Literal type of permitted `separator` values for the v2 chunk key encoding.
2222
2323
Defaults to `"."` if absent.
2424
"""
2525

26+
V2_CHUNK_KEY_ENCODING_SEPARATOR: Final = ("/", ".")
27+
"""Tuple of permitted values for the `separator` field of the v2 chunk key encoding."""
28+
2629

2730
class V2ChunkKeyEncodingConfiguration(TypedDict):
2831
"""Configuration for the v2 chunk key encoding.
@@ -49,6 +52,7 @@ class V2ChunkKeyEncodingObject(TypedDict):
4952

5053
__all__ = [
5154
"V2_CHUNK_KEY_ENCODING_NAME",
55+
"V2_CHUNK_KEY_ENCODING_SEPARATOR",
5256
"V2ChunkKeyEncodingConfiguration",
5357
"V2ChunkKeyEncodingMetadata",
5458
"V2ChunkKeyEncodingName",

packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,16 @@
1515
"""Literal type of the `name` field of the `blosc` codec."""
1616

1717
BloscShuffle = Literal["noshuffle", "shuffle", "bitshuffle"]
18-
"""Blosc shuffle mode names."""
18+
"""Literal type of blosc shuffle mode names."""
19+
20+
BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle")
21+
"""Tuple of permitted values for the `shuffle` field of the `blosc` codec."""
1922

2023
BloscCName = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"]
21-
"""Blosc compressor identifiers."""
24+
"""Literal type of blosc compressor identifiers."""
25+
26+
BLOSC_CNAME: Final = ("lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd")
27+
"""Tuple of permitted values for the `cname` field of the `blosc` codec."""
2228

2329

2430
class BloscCodecConfiguration(TypedDict):
@@ -47,7 +53,9 @@ class BloscCodecObject(TypedDict):
4753
"""
4854

4955
__all__ = [
56+
"BLOSC_CNAME",
5057
"BLOSC_CODEC_NAME",
58+
"BLOSC_SHUFFLE",
5159
"BloscCName",
5260
"BloscCodecConfiguration",
5361
"BloscCodecMetadata",

packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
"""Literal type of the `name` field of the `bytes` codec."""
1616

1717
Endian = Literal["little", "big"]
18-
"""Byte order of multi-byte numeric data."""
18+
"""Literal type of byte order of multi-byte numeric data."""
19+
20+
ENDIAN: Final = ("little", "big")
21+
"""Tuple of permitted values for the `endian` field of the `bytes` codec."""
1922

2023

2124
class BytesCodecConfiguration(TypedDict):
@@ -29,22 +32,30 @@ class BytesCodecConfiguration(TypedDict):
2932

3033

3134
class BytesCodecObject(TypedDict):
32-
"""`bytes` codec metadata in object form."""
35+
"""`bytes` codec metadata in object form.
36+
37+
`configuration` is itself optional — when no configuration fields are
38+
set, the entire `configuration` key may be omitted. This matches the
39+
bare-string short-hand form (`BytesCodecName`) at the canonical data
40+
level; both encodings describe a `bytes` codec with default settings.
41+
"""
3342

3443
name: BytesCodecName
35-
configuration: BytesCodecConfiguration
44+
configuration: NotRequired[BytesCodecConfiguration]
3645

3746

3847
BytesCodecMetadata = BytesCodecObject | BytesCodecName
3948
"""Permitted JSON shapes for `bytes` codec metadata.
4049
4150
The configuration has no required keys (`endian` is conditionally required
4251
at runtime based on data type), so the spec's short-hand-name form is
43-
permitted in addition to the object form.
52+
permitted in addition to the object form, and the object form may itself
53+
omit `configuration` entirely.
4454
"""
4555

4656
__all__ = [
4757
"BYTES_CODEC_NAME",
58+
"ENDIAN",
4859
"BytesCodecConfiguration",
4960
"BytesCodecMetadata",
5061
"BytesCodecName",

packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,29 @@
2323
"towards-negative",
2424
"nearest-away",
2525
]
26-
"""Permitted values for the `rounding` configuration field.
26+
"""Literal type of permitted values for the `rounding` configuration field.
2727
2828
Defaults to `"nearest-even"` if absent.
2929
"""
3030

31+
ROUNDING_MODE: Final = (
32+
"nearest-even",
33+
"towards-zero",
34+
"towards-positive",
35+
"towards-negative",
36+
"nearest-away",
37+
)
38+
"""Tuple of permitted values for the `rounding` field of the `cast_value` codec."""
39+
3140
OutOfRangeMode = Literal["clamp", "wrap"]
32-
"""Permitted values for the `out_of_range` configuration field.
41+
"""Literal type of permitted values for the `out_of_range` configuration field.
3342
3443
If absent, out-of-range values are an encoding/decoding error.
3544
"""
3645

46+
OUT_OF_RANGE_MODE: Final = ("clamp", "wrap")
47+
"""Tuple of permitted values for the `out_of_range` field of the `cast_value` codec."""
48+
3749
ScalarMapEntry = tuple[object, object]
3850
"""A single `[input, output]` mapping in a `scalar_map` direction.
3951
@@ -81,6 +93,8 @@ class CastValueCodecObject(TypedDict):
8193

8294
__all__ = [
8395
"CAST_VALUE_CODEC_NAME",
96+
"OUT_OF_RANGE_MODE",
97+
"ROUNDING_MODE",
8498
"CastValueCodecConfiguration",
8599
"CastValueCodecMetadata",
86100
"CastValueCodecName",

packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/index.html
55
"""
66

7-
from typing import Final, Literal, NotRequired
7+
from typing import Final, Literal
88

99
from typing_extensions import TypedDict
1010

@@ -20,11 +20,15 @@ class GzipCodecConfiguration(TypedDict):
2020
Configuration for the Zarr v3 `gzip` codec.
2121
2222
`level` is an integer in the range 0-9; 0 disables compression and 9
23-
is slowest with the best compression ratio. The spec does not mandate
24-
a default.
23+
is slowest with the best compression ratio. The codec's compressed
24+
output depends on `level`, so metadata that omits it cannot
25+
reproducibly identify the chunk bytes produced by a writer — `level`
26+
is required for the metadata to fulfill its reproducibility role,
27+
even though the spec text does not mark it required with RFC 2119
28+
keywords.
2529
"""
2630

27-
level: NotRequired[int]
31+
level: int
2832

2933

3034
class GzipCodecObject(TypedDict):
@@ -34,11 +38,12 @@ class GzipCodecObject(TypedDict):
3438
configuration: GzipCodecConfiguration
3539

3640

37-
GzipCodecMetadata = GzipCodecObject | GzipCodecName
38-
"""Permitted JSON shapes for `gzip` codec metadata.
41+
GzipCodecMetadata = GzipCodecObject
42+
"""Permitted JSON shape for `gzip` codec metadata.
3943
40-
The configuration has no required keys (`level` has no spec-mandated
41-
default but is `NotRequired`), so the short-hand-name form is permitted.
44+
`configuration.level` is required (it determines the codec's output bytes
45+
and is therefore part of the metadata's reproducibility contract), so
46+
only the object form is valid; the short-hand-name form is not permitted.
4247
"""
4348

4449
__all__ = [

packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
"""Literal type of the `name` field of the `sharding_indexed` codec."""
1818

1919
IndexLocation = Literal["start", "end"]
20-
"""Position of the shard index within the encoded shard."""
20+
"""Literal type of the position of the shard index within the encoded shard."""
21+
22+
INDEX_LOCATION: Final = ("start", "end")
23+
"""Tuple of permitted values for the `index_location` field of the `sharding_indexed` codec."""
2124

2225

2326
class ShardingIndexedCodecConfiguration(TypedDict):
@@ -58,6 +61,7 @@ class ShardingIndexedCodecObject(TypedDict):
5861
"""
5962

6063
__all__ = [
64+
"INDEX_LOCATION",
6165
"SHARDING_INDEXED_CODEC_NAME",
6266
"IndexLocation",
6367
"ShardingIndexedCodecConfiguration",

packages/zarr-metadata/tests/v3/codec/bytes/cases.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,8 @@
1111
"name": "bytes",
1212
"configuration": {}
1313
},
14+
"no_configuration": {
15+
"name": "bytes"
16+
},
1417
"short_hand_name": "bytes"
1518
}

packages/zarr-metadata/tests/v3/codec/gzip/cases.json

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,5 @@
22
"with_level": {
33
"name": "gzip",
44
"configuration": {"level": 5}
5-
},
6-
"no_level": {
7-
"name": "gzip",
8-
"configuration": {}
9-
},
10-
"short_hand_name": "gzip"
5+
}
116
}

0 commit comments

Comments
 (0)