diff --git a/pyproject.toml b/pyproject.toml index 5a1d60485d..dffca78808 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -352,6 +352,7 @@ module = [ "tests.test_store.test_fsspec", "tests.test_store.test_memory", "tests.test_codecs.test_codecs", + "tests.test_metadata.*", ] strict = false @@ -359,7 +360,6 @@ strict = false # and fix the errors [[tool.mypy.overrides]] module = [ - "tests.test_metadata.*", "tests.test_store.test_core", "tests.test_store.test_logging", "tests.test_store.test_object", diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 51e638edd8..4fcddaa8b5 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -3,7 +3,7 @@ import json import warnings from asyncio import gather -from collections.abc import Iterable +from collections.abc import Iterable, Mapping from dataclasses import dataclass, field, replace from itertools import starmap from logging import getLogger @@ -3907,7 +3907,7 @@ def _build_parents( CompressorsLike: TypeAlias = ( Iterable[dict[str, JSON] | BytesBytesCodec | Numcodec] - | dict[str, JSON] + | Mapping[str, JSON] | BytesBytesCodec | Numcodec | Literal["auto"] diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index e23444cf93..0995be3c6d 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import numpy as np import pytest @@ -10,8 +10,8 @@ import zarr.api.asynchronous import zarr.api.synchronous import zarr.storage +from zarr import AsyncGroup from zarr.api.asynchronous import ( - AsyncGroup, consolidate_metadata, group, open, @@ -27,11 +27,11 @@ if TYPE_CHECKING: from zarr.abc.store import Store - from zarr.core.common import ZarrFormat + from zarr.core.common import JSON, ZarrFormat @pytest.fixture -async def memory_store_with_hierarchy(memory_store: Store) -> None: +async def memory_store_with_hierarchy(memory_store: Store) -> Store: g = await group(store=memory_store, attributes={"foo": "bar"}) dtype = "uint8" await g.create_array(name="air", shape=(1, 2, 3), dtype=dtype) @@ -51,15 +51,15 @@ async def memory_store_with_hierarchy(memory_store: Store) -> None: class TestConsolidated: - async def test_open_consolidated_false_raises(self): + async def test_open_consolidated_false_raises(self) -> None: store = zarr.storage.MemoryStore() with pytest.raises(TypeError, match="use_consolidated"): - await zarr.api.asynchronous.open_consolidated(store, use_consolidated=False) + await zarr.api.asynchronous.open_consolidated(store, use_consolidated=False) # type: ignore[arg-type] - def test_open_consolidated_false_raises_sync(self): + def test_open_consolidated_false_raises_sync(self) -> None: store = zarr.storage.MemoryStore() with pytest.raises(TypeError, match="use_consolidated"): - zarr.open_consolidated(store, use_consolidated=False) + zarr.open_consolidated(store, use_consolidated=False) # type: ignore[arg-type] async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: # TODO: Figure out desired keys in @@ -75,7 +75,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: await consolidate_metadata(memory_store_with_hierarchy) group2 = await AsyncGroup.open(memory_store_with_hierarchy) - array_metadata = { + array_metadata: dict[str, JSON] = { "attributes": {}, "chunk_key_encoding": { "configuration": {"separator": "/"}, @@ -192,13 +192,12 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: group4 = await open_consolidated(store=memory_store_with_hierarchy) assert group4.metadata == expected - result_raw = json.loads( - ( - await memory_store_with_hierarchy.get( - "zarr.json", prototype=default_buffer_prototype() - ) - ).to_bytes() - )["consolidated_metadata"] + buf = await memory_store_with_hierarchy.get( + "zarr.json", prototype=default_buffer_prototype() + ) + assert buf is not None + + result_raw = json.loads(buf.to_bytes())["consolidated_metadata"] assert result_raw["kind"] == "inline" assert sorted(result_raw["metadata"]) == [ "air", @@ -212,7 +211,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: "time", ] - def test_consolidated_sync(self, memory_store): + def test_consolidated_sync(self, memory_store: Store) -> None: g = zarr.api.synchronous.group(store=memory_store, attributes={"foo": "bar"}) dtype = "uint8" g.create_array(name="air", shape=(1, 2, 3), dtype=dtype) @@ -225,9 +224,9 @@ def test_consolidated_sync(self, memory_store): match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): zarr.api.synchronous.consolidate_metadata(memory_store) - group2 = zarr.api.synchronous.Group.open(memory_store) + group2 = zarr.Group.open(memory_store) - array_metadata = { + array_metadata: dict[str, JSON] = { "attributes": {}, "chunk_key_encoding": { "configuration": {"separator": "/"}, @@ -320,8 +319,8 @@ async def test_non_root_node(self, memory_store_with_hierarchy: Store) -> None: assert "air" not in child.metadata.consolidated_metadata.metadata assert "grandchild" in child.metadata.consolidated_metadata.metadata - def test_consolidated_metadata_from_dict(self): - data = {"must_understand": False} + def test_consolidated_metadata_from_dict(self) -> None: + data: dict[str, JSON] = {"must_understand": False} # missing kind with pytest.raises(ValueError, match="kind='None'"): @@ -343,8 +342,8 @@ def test_consolidated_metadata_from_dict(self): data["metadata"] = {} ConsolidatedMetadata.from_dict(data) - def test_flatten(self): - array_metadata = { + def test_flatten(self) -> None: + array_metadata: dict[str, Any] = { "attributes": {}, "chunk_key_encoding": { "configuration": {"separator": "/"}, @@ -421,27 +420,28 @@ def test_flatten(self): }, ) result = metadata.flattened_metadata + expected = { "air": metadata.metadata["air"], "lat": metadata.metadata["lat"], "child": GroupMetadata( attributes={"key": "child"}, consolidated_metadata=ConsolidatedMetadata(metadata={}) ), - "child/array": metadata.metadata["child"].consolidated_metadata.metadata["array"], + "child/array": metadata.metadata["child"].consolidated_metadata.metadata["array"], # type: ignore[union-attr] "child/grandchild": GroupMetadata( attributes={"key": "grandchild"}, consolidated_metadata=ConsolidatedMetadata(metadata={}), ), "child/grandchild/array": ( metadata.metadata["child"] - .consolidated_metadata.metadata["grandchild"] + .consolidated_metadata.metadata["grandchild"] # type: ignore[union-attr] .consolidated_metadata.metadata["array"] ), } assert result == expected - def test_invalid_metadata_raises(self): - payload = { + def test_invalid_metadata_raises(self) -> None: + payload: dict[str, JSON] = { "kind": "inline", "must_understand": False, "metadata": { @@ -452,7 +452,7 @@ def test_invalid_metadata_raises(self): with pytest.raises(TypeError, match="key='foo', type='list'"): ConsolidatedMetadata.from_dict(payload) - def test_to_dict_empty(self): + def test_to_dict_empty(self) -> None: meta = ConsolidatedMetadata( metadata={ "empty": GroupMetadata( @@ -507,6 +507,7 @@ async def test_to_dict_order( await zarr.api.asynchronous.consolidate_metadata(memory_store) g2 = await zarr.api.asynchronous.open_group(store=memory_store) + assert g2.metadata.consolidated_metadata is not None assert list(g2.metadata.consolidated_metadata.metadata) == ["a", "b", "c"] assert list(g2.metadata.consolidated_metadata.flattened_metadata) == [ "a", @@ -517,7 +518,7 @@ async def test_to_dict_order( ] @pytest.mark.parametrize("zarr_format", [2, 3]) - async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat): + async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat) -> None: store = zarr.storage.MemoryStore() await AsyncGroup.from_store(store, zarr_format=zarr_format) with pytest.raises(ValueError): @@ -535,12 +536,15 @@ async def v2_consolidated_metadata_empty_dataset( b'{"metadata":{".zgroup":{"zarr_format":2}},"zarr_consolidated_format":1}' ) return AsyncGroup._from_bytes_v2( - None, zgroup_bytes, zattrs_bytes=None, consolidated_metadata_bytes=zmetadata_bytes + StorePath(memory_store, path=""), + zgroup_bytes, + zattrs_bytes=None, + consolidated_metadata_bytes=zmetadata_bytes, ) async def test_consolidated_metadata_backwards_compatibility( - self, v2_consolidated_metadata_empty_dataset - ): + self, v2_consolidated_metadata_empty_dataset: AsyncGroup + ) -> None: """ Test that consolidated metadata handles a missing .zattrs key. This is necessary for backwards compatibility with zarr-python 2.x. See https://github.com/zarr-developers/zarr-python/issues/2694 """ @@ -550,7 +554,7 @@ async def test_consolidated_metadata_backwards_compatibility( result = await zarr.api.asynchronous.open_consolidated(store, zarr_format=2) assert result.metadata == v2_consolidated_metadata_empty_dataset.metadata - async def test_consolidated_metadata_v2(self): + async def test_consolidated_metadata_v2(self) -> None: store = zarr.storage.MemoryStore() g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2) dtype = parse_dtype("uint8", zarr_format=2) @@ -638,7 +642,9 @@ async def test_use_consolidated_false( assert good.metadata.consolidated_metadata assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"] - async def test_stale_child_metadata_ignored(self, memory_store: zarr.storage.MemoryStore): + async def test_stale_child_metadata_ignored( + self, memory_store: zarr.storage.MemoryStore + ) -> None: # https://github.com/zarr-developers/zarr-python/issues/2921 # When consolidating metadata, we should ignore any (possibly stale) metadata # from previous consolidations, *including at child nodes*. @@ -660,7 +666,7 @@ async def test_stale_child_metadata_ignored(self, memory_store: zarr.storage.Mem async def test_use_consolidated_for_children_members( self, memory_store: zarr.storage.MemoryStore - ): + ) -> None: # A test that has *unconsolidated* metadata at the root group, but discovers # a child group with consolidated metadata. @@ -690,7 +696,7 @@ async def test_use_consolidated_for_children_members( @pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf]) async def test_consolidated_metadata_encodes_special_chars( memory_store: Store, zarr_format: ZarrFormat, fill_value: float -): +) -> None: root = await group(store=memory_store, zarr_format=zarr_format) _time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value) if zarr_format == 3: @@ -728,7 +734,7 @@ def supports_consolidated_metadata(self) -> bool: return False -async def test_consolidate_metadata_raises_for_self_consolidating_stores(): +async def test_consolidate_metadata_raises_for_self_consolidating_stores() -> None: """Verify calling consolidate_metadata on a non supporting stores raises an error.""" memory_store = NonConsolidatedStore() @@ -739,7 +745,7 @@ async def test_consolidate_metadata_raises_for_self_consolidating_stores(): await zarr.api.asynchronous.consolidate_metadata(memory_store) -async def test_open_group_in_non_consolidating_stores(): +async def test_open_group_in_non_consolidating_stores() -> None: memory_store = NonConsolidatedStore() root = await zarr.api.asynchronous.create_group(store=memory_store) await root.create_group("a/b") diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py index e18841f1f3..424b2881d6 100644 --- a/tests/test_metadata/test_v2.py +++ b/tests/test_metadata/test_v2.py @@ -18,9 +18,11 @@ from zarr.errors import ZarrUserWarning if TYPE_CHECKING: + from pathlib import Path from typing import Any from zarr.abc.codec import Codec + from zarr.core.common import JSON def test_parse_zarr_format_valid() -> None: @@ -105,7 +107,7 @@ class TestConsolidated: async def v2_consolidated_metadata( self, memory_store: zarr.storage.MemoryStore ) -> zarr.storage.MemoryStore: - zmetadata = { + zmetadata: dict[str, JSON] = { "metadata": { ".zattrs": { "Conventions": "COARDS", @@ -160,8 +162,7 @@ async def v2_consolidated_metadata( }, "zarr_consolidated_format": 1, } - store_dict = {} - store = zarr.storage.MemoryStore(store_dict=store_dict) + store = zarr.storage.MemoryStore() await store.set( ".zattrs", cpu.Buffer.from_bytes(json.dumps({"Conventions": "COARDS"}).encode()) ) @@ -169,19 +170,19 @@ async def v2_consolidated_metadata( await store.set(".zmetadata", cpu.Buffer.from_bytes(json.dumps(zmetadata).encode())) await store.set( "air/.zarray", - cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zarray"]).encode()), + cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zarray"]).encode()), # type: ignore[index, call-overload] ) await store.set( "air/.zattrs", - cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zattrs"]).encode()), + cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zattrs"]).encode()), # type: ignore[index, call-overload] ) await store.set( "time/.zarray", - cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zarray"]).encode()), + cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zarray"]).encode()), # type: ignore[index, call-overload] ) await store.set( "time/.zattrs", - cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zattrs"]).encode()), + cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zattrs"]).encode()), # type: ignore[index, call-overload] ) # and a nested group for fun @@ -194,13 +195,13 @@ async def v2_consolidated_metadata( await store.set( "nested/array/.zarray", cpu.Buffer.from_bytes( - json.dumps(zmetadata["metadata"]["nested/array/.zarray"]).encode() + json.dumps(zmetadata["metadata"]["nested/array/.zarray"]).encode() # type: ignore[index, call-overload] ), ) await store.set( "nested/array/.zattrs", cpu.Buffer.from_bytes( - json.dumps(zmetadata["metadata"]["nested/array/.zattrs"]).encode() + json.dumps(zmetadata["metadata"]["nested/array/.zattrs"]).encode() # type: ignore[index, call-overload] ), ) @@ -208,7 +209,7 @@ async def v2_consolidated_metadata( async def test_read_consolidated_metadata( self, v2_consolidated_metadata: zarr.storage.MemoryStore - ): + ) -> None: # .zgroup, .zattrs, .metadata store = v2_consolidated_metadata group = zarr.open_consolidated(store=store, zarr_format=2) @@ -271,10 +272,13 @@ async def test_read_consolidated_metadata( result = group.metadata.consolidated_metadata assert result == expected - async def test_getitem_consolidated(self, v2_consolidated_metadata): + async def test_getitem_consolidated( + self, v2_consolidated_metadata: zarr.storage.MemoryStore + ) -> None: store = v2_consolidated_metadata group = await zarr.api.asynchronous.open_consolidated(store=store, zarr_format=2) air = await group.getitem("air") + assert isinstance(air, zarr.AsyncArray) assert air.metadata.shape == (730,) @@ -320,8 +324,10 @@ def test_zstd_checksum() -> None: @pytest.mark.parametrize("fill_value", [np.void((0, 0), np.dtype([("foo", "i4"), ("bar", "i4")]))]) -def test_structured_dtype_fill_value_serialization(tmp_path, fill_value): - zarr_format = 2 +def test_structured_dtype_fill_value_serialization( + tmp_path: Path, fill_value: np.void | np.dtype[Any] +) -> None: + zarr_format: Literal[2] = 2 group_path = tmp_path / "test.zarr" root_group = zarr.open_group(group_path, mode="w", zarr_format=zarr_format) dtype = np.dtype([("foo", "i4"), ("bar", "i4")]) @@ -335,5 +341,5 @@ def test_structured_dtype_fill_value_serialization(tmp_path, fill_value): zarr.consolidate_metadata(root_group.store, zarr_format=zarr_format) root_group = zarr.open_group(group_path, mode="r") - observed = root_group.metadata.consolidated_metadata.metadata["structured_dtype"].fill_value + observed = root_group.metadata.consolidated_metadata.metadata["structured_dtype"].fill_value # type: ignore[union-attr] assert observed == fill_value diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 4f385afa6d..2093696454 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -128,7 +128,7 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None: Test that parse_fill_value(fill_value, dtype) correctly handles complex values represented as length-2 sequences """ - zarr_format = 3 + zarr_format: Literal[3] = 3 dtype = get_data_type_from_native_dtype(dtype_str) expected = dtype.to_native_dtype().type(complex(*fill_value)) observed = dtype.from_json_scalar(fill_value, zarr_format=zarr_format) @@ -249,7 +249,7 @@ def test_metadata_to_dict( @pytest.mark.parametrize("indent", [2, 4, None]) -def test_json_indent(indent: int): +def test_json_indent(indent: int) -> None: with config.set({"json_indent": indent}): m = GroupMetadata() d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() @@ -258,9 +258,9 @@ def test_json_indent(indent: int): @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) @pytest.mark.parametrize("precision", ["ns", "D"]) -async def test_datetime_metadata(fill_value: int, precision: str) -> None: +async def test_datetime_metadata(fill_value: int, precision: Literal["ns", "D"]) -> None: dtype = DateTime64(unit=precision) - metadata_dict = { + metadata_dict: dict[str, Any] = { "zarr_format": 3, "node_type": "array", "shape": (1,), @@ -284,7 +284,7 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None: ("data_type", "fill_value"), [("uint8", {}), ("int32", [0, 1]), ("float32", "foo")] ) async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> None: - metadata_dict = { + metadata_dict: dict[str, Any] = { "zarr_format": 3, "node_type": "array", "shape": (1,), @@ -301,7 +301,7 @@ async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> N @pytest.mark.parametrize("fill_value", [("NaN"), "Infinity", "-Infinity"]) async def test_special_float_fill_values(fill_value: str) -> None: - metadata_dict = { + metadata_dict: dict[str, Any] = { "zarr_format": 3, "node_type": "array", "shape": (1,),