Merge branch 'main' into ig/shard_order

ilan-gold · ilan-gold · commit 417df788f824 · 2026-03-24T12:23:36.000+01:00
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import random
-from collections.abc import Iterable, Mapping, MutableMapping
+from collections.abc import Iterable, Mapping, MutableMapping, Sequence
 from dataclasses import dataclass, replace
 from enum import Enum
 from functools import lru_cache
@@ -46,7 +46,9 @@
 from zarr.core.dtype.npy.int import UInt64
 from zarr.core.indexing import (
     BasicIndexer,
+    ChunkProjection,
     SelectorTuple,
+    _morton_order,
     _morton_order_keys,
     c_order_iter,
     get_indexer,
@@ -543,7 +545,7 @@ async def _decode_partial_single(
         else:
             return out
 
-    def _subchunk_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tuple[int, ...]]:
+    def _subchunk_order_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tuple[int, ...]]:
         match self.subchunk_write_order:
             case SubchunkWriteOrder.morton:
                 subchunk_iter = morton_order_iter(chunks_per_shard)
@@ -557,6 +559,17 @@ def _subchunk_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tuple[in
                 subchunk_iter = iter(subchunk_list)
         return subchunk_iter
 
+    def _subchunk_order_vectorized(self, chunks_per_shard: tuple[int, ...]) -> npt.NDArray[np.intp]:
+        match self.subchunk_write_order:
+            case SubchunkWriteOrder.morton:
+                subchunk_order_vectorized = _morton_order(chunks_per_shard)
+            case _:
+                subchunk_order_vectorized = np.fromiter(
+                    self._subchunk_order_iter(chunks_per_shard),
+                    dtype=np.dtype((int, len(chunks_per_shard))),
+                )
+        return subchunk_order_vectorized
+
     async def _encode_single(
         self,
         shard_array: NDBuffer,
@@ -574,7 +587,7 @@ async def _encode_single(
                 chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape),
             )
         )
-        shard_builder = dict.fromkeys(self._subchunk_iter(chunks_per_shard))
+        shard_builder = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard))
 
         await self.codec_pipeline.write(
             [
@@ -608,23 +621,26 @@ async def _encode_partial_single(
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
         chunk_spec = self._get_chunk_spec(shard_spec)
 
-        shard_reader = await self._load_full_shard_maybe(
-            byte_getter=byte_setter,
-            prototype=chunk_spec.prototype,
-            chunks_per_shard=chunks_per_shard,
-        )
-        shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard)
-        # Use vectorized lookup for better performance
-        shard_dict = shard_reader.to_dict_vectorized(
-            np.asarray(list(self._subchunk_iter(chunks_per_shard)))
-        )
-
         indexer = list(
             get_indexer(
                 selection, shape=shard_shape, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape)
             )
         )
 
+        if self._is_complete_shard_write(indexer, chunks_per_shard):
+            shard_dict = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard))
+        else:
+            shard_reader = await self._load_full_shard_maybe(
+                byte_getter=byte_setter,
+                prototype=chunk_spec.prototype,
+                chunks_per_shard=chunks_per_shard,
+            )
+            shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard)
+            # Use vectorized lookup for better performance
+            shard_dict = shard_reader.to_dict_vectorized(
+                self._subchunk_order_vectorized(chunks_per_shard)
+            )
+
         await self.codec_pipeline.write(
             [
                 (
@@ -661,7 +677,7 @@ async def _encode_shard_dict(
 
         template = buffer_prototype.buffer.create_zero_length()
         chunk_start = 0
-        for chunk_coords in self._subchunk_iter(chunks_per_shard):
+        for chunk_coords in self._subchunk_order_iter(chunks_per_shard):
             value = map.get(chunk_coords)
             if value is None:
                 continue
@@ -697,6 +713,16 @@ def _is_total_shard(
             chunk_coords in all_chunk_coords for chunk_coords in c_order_iter(chunks_per_shard)
         )
 
+    def _is_complete_shard_write(
+        self,
+        indexed_chunks: Sequence[ChunkProjection],
+        chunks_per_shard: tuple[int, ...],
+    ) -> bool:
+        all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks}
+        return self._is_total_shard(all_chunk_coords, chunks_per_shard) and all(
+            is_complete_chunk for *_, is_complete_chunk in indexed_chunks
+        )
+
     async def _decode_shard_index(
         self, index_bytes: Buffer, chunks_per_shard: tuple[int, ...]
     ) -> _ShardIndex:
diff --git a/tests/test_array.py b/tests/test_array.py
@@ -2259,9 +2259,34 @@ def test_create_array_with_data_num_gets(
         data = zarr.zeros(shape, dtype="int64")
 
     zarr.create_array(store, data=data, chunks=chunk_shape, shards=shard_shape, fill_value=-1)  # type: ignore[arg-type]
-    # one get for the metadata and one per shard.
-    # Note: we don't actually need one get per shard, but this is the current behavior
-    assert store.counter["get"] == 1 + num_shards
+    # One get for the metadata; full-shard writes should not read shard payloads.
+    assert store.counter["get"] == 1
+
+
+@pytest.mark.parametrize(
+    ("selection", "expected_gets"),
+    [(slice(None), 0), (slice(1, 9), 1)],
+)
+def test_shard_write_num_gets(selection: slice, expected_gets: int) -> None:
+    """
+    Test that partial-shard writes read the existing data and full-shard writes don't.
+    """
+    store = LoggingStore(store=MemoryStore())
+    arr = zarr.create_array(
+        store,
+        shape=(10,),
+        chunks=(1,),
+        shards=(10,),
+        dtype="int64",
+        fill_value=-1,
+    )
+    arr[:] = 0
+
+    store.counter.clear()
+
+    arr[selection] = 1
+
+    assert store.counter["get"] == expected_gets
 
 
 @pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True}, {"order": "C"}])
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
@@ -564,9 +564,7 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None:
     "subchunk_write_order",
     list(SubchunkWriteOrder),
 )
-async def test_encoded_subchunk_write_order(
-    subchunk_write_order: SubchunkWriteOrder,
-) -> None:
+async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteOrder) -> None:
     """Subchunks must be physically laid out in the shard in the order specified by
     ``subchunk_write_order``.  We verify this by decoding the shard index and sorting
     the chunk coordinates by their byte offset."""
@@ -612,7 +610,7 @@ async def test_encoded_subchunk_write_order(
 
     # The physical write order is recovered by sorting coordinates by start offset.
     actual_order = [coord for _, coord in sorted(offset_to_coord.items())]
-    expected_order = list(codec._subchunk_iter(chunks_per_shard))
+    expected_order = list(codec._subchunk_order_iter(chunks_per_shard))
     assert (actual_order == expected_order) == (
         subchunk_write_order != SubchunkWriteOrder.unordered
     )
@@ -622,13 +620,15 @@ async def test_encoded_subchunk_write_order(
     "subchunk_write_order",
     list(SubchunkWriteOrder),
 )
-def test_subchunk_write_order_roundtrip(subchunk_write_order: SubchunkWriteOrder) -> None:
+@pytest.mark.parametrize("do_partial", [True, False], ids=["partial", "complete"])
+def test_subchunk_write_order_roundtrip(
+    subchunk_write_order: SubchunkWriteOrder, do_partial: bool
+) -> None:
     """Data written with any ``subchunk_write_order`` must round-trip correctly."""
     chunks_per_shard = (3, 2)
     chunk_shape = (4, 4)
     shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, chunk_shape, strict=True))
     data = np.arange(np.prod(shard_shape), dtype="uint16").reshape(shard_shape)
-
     arr = zarr.create_array(
         StorePath(MemoryStore()),
         shape=shard_shape,
@@ -643,5 +643,10 @@ def test_subchunk_write_order_roundtrip(subchunk_write_order: SubchunkWriteOrder
         compressors=None,
         fill_value=0,
     )
-    arr[:] = data
+    if do_partial:
+        sub_data = data[: (shard_shape[0] // 2)]
+        arr[: (shard_shape[0] // 2)] = data[: (shard_shape[0] // 2)]
+        data = np.vstack([sub_data, np.zeros_like(sub_data)])
+    else:
+        arr[:] = data
     np.testing.assert_array_equal(arr[:], data)