refactor: remove dead layout methods — ChunkLayout owns only resolve_index + pack_and_store

d-v-b · claude · d-v-b · commit 883167242071 · 2026-04-15T20:54:51.000+02:00
Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
@@ -1026,14 +1026,17 @@ def merge_and_encode_from_index(
 class ChunkLayout:
     """Describes how a stored blob maps to one or more inner chunks.
 
-    The pipeline interacts with the layout in four phases:
-
-    1. **Resolve index** (IO) — read shard indexes to determine where
-       chunk data lives. Returns a ``ShardIndex``.
-    2. **Fetch chunks** (IO) — read the byte ranges from the index.
-    3. **Decode / merge+encode** (compute) — decode fetched bytes, or
-       merge new data and re-encode.
-    4. **Store** (IO) — write results back.
+    The pipeline interacts with the layout through two IO responsibilities:
+
+    - ``resolve_index`` — read shard indexes (if any) to determine byte
+      ranges for inner chunks. Returns a ``ShardIndex``.
+    - ``pack_and_store`` — assemble encoded chunks into a blob and write
+      it to the store.
+
+    Fetching, decoding, merging, and encoding are handled by module-level
+    functions (``fetch_chunks_sync``, ``decode_chunks_from_index``,
+    ``merge_and_encode_from_index``) that operate on the ``ShardIndex``
+    returned by ``resolve_index``.
     """
 
     chunk_shape: tuple[int, ...]
@@ -1048,37 +1051,15 @@ def is_sharded(self) -> bool:
     def needed_coords(self, chunk_selection: SelectorTuple) -> set[tuple[int, ...]] | None:
         return None
 
-    # -- Phase 1: resolve index --
+    # -- resolve index (IO) --
 
     def resolve_index(self, byte_getter: Any, key: str, chunk_selection: SelectorTuple | None = None) -> ShardIndex:
         raise NotImplementedError
 
     async def resolve_index_async(self, byte_getter: Any, key: str, chunk_selection: SelectorTuple | None = None) -> ShardIndex:
         raise NotImplementedError
 
-    # -- Phase 2: fetch chunk data --
-
-    def fetch_chunks(self, byte_getter: Any, index: ShardIndex, prototype: BufferPrototype) -> dict[tuple[int, ...], Buffer | None]:
-        raise NotImplementedError
-
-    async def fetch_chunks_async(self, byte_getter: Any, index: ShardIndex, prototype: BufferPrototype) -> dict[tuple[int, ...], Buffer | None]:
-        raise NotImplementedError
-
-    # -- Phase 3: compute --
-
-    def decode_chunks(self, raw_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> NDBuffer:
-        raise NotImplementedError
-
-    def merge_and_encode(self, existing_chunks: dict[tuple[int, ...], Buffer | None], value: NDBuffer, chunk_spec: ArraySpec, chunk_selection: SelectorTuple, out_selection: SelectorTuple, drop_axes: tuple[int, ...]) -> dict[tuple[int, ...], Buffer | None]:
-        raise NotImplementedError
-
-    # -- Phase 4: store --
-
-    def store_chunks_sync(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> None:
-        raise NotImplementedError
-
-    async def store_chunks_async(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> None:
-        raise NotImplementedError
+    # -- pack and store (IO) --
 
     def pack_and_store_sync(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None]) -> None:
         raise NotImplementedError
@@ -1115,96 +1096,7 @@ def resolve_index(self, byte_getter: Any, key: str, chunk_selection: SelectorTup
     async def resolve_index_async(self, byte_getter: Any, key: str, chunk_selection: SelectorTuple | None = None) -> ShardIndex:
         return self.resolve_index(byte_getter, key, chunk_selection)
 
-    # -- Phase 2: fetch chunk data --
-
-    def fetch_chunks(self, byte_getter: Any, index: ShardIndex, prototype: BufferPrototype) -> dict[tuple[int, ...], Buffer | None]:
-        coord = next(iter(index.chunks))
-        raw = byte_getter.get_sync(prototype=prototype)
-        return {coord: raw}  # type: ignore[no-any-return]
-
-    async def fetch_chunks_async(self, byte_getter: Any, index: ShardIndex, prototype: BufferPrototype) -> dict[tuple[int, ...], Buffer | None]:
-        coord = next(iter(index.chunks))
-        raw = await byte_getter.get(prototype=prototype)
-        return {coord: raw}  # type: ignore[no-any-return]
-
-    # -- Phase 3: compute --
-
-    def decode_chunks(self, raw_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> NDBuffer:
-        raw = next(iter(raw_chunks.values()))
-        if raw is None:
-            return chunk_spec.prototype.nd_buffer.create(
-                shape=chunk_spec.shape,
-                dtype=chunk_spec.dtype.to_native_dtype(),
-                order=chunk_spec.order,
-                fill_value=fill_value_or_default(chunk_spec),
-            )
-        chunk_shape = chunk_spec.shape if chunk_spec.shape != self.chunk_shape else None
-        return self.inner_transform.decode_chunk(raw, chunk_shape=chunk_shape)
-
-    def encode(
-        self,
-        chunk_array: NDBuffer,
-        chunk_spec: ArraySpec,
-    ) -> Buffer | None:
-        chunk_shape = chunk_spec.shape if chunk_spec.shape != self.chunk_shape else None
-        return self.inner_transform.encode_chunk(chunk_array, chunk_shape=chunk_shape)
-
-    def merge_and_encode(self, existing_chunks: dict[tuple[int, ...], Buffer | None], value: NDBuffer, chunk_spec: ArraySpec, chunk_selection: SelectorTuple, out_selection: SelectorTuple, drop_axes: tuple[int, ...]) -> dict[tuple[int, ...], Buffer | None]:
-        coord = next(iter(existing_chunks)) if existing_chunks else (0,) * len(self.chunks_per_shard)
-
-        # Decode existing
-        existing_raw = existing_chunks.get(coord)
-        if existing_raw is not None:
-            chunk_array = self.inner_transform.decode_chunk(existing_raw, chunk_shape=chunk_spec.shape)
-            if not chunk_array.as_ndarray_like().flags.writeable:  # type: ignore[attr-defined]
-                chunk_array = chunk_spec.prototype.nd_buffer.from_ndarray_like(
-                    chunk_array.as_ndarray_like().copy()
-                )
-        else:
-            chunk_array = chunk_spec.prototype.nd_buffer.create(
-                shape=chunk_spec.shape,
-                dtype=chunk_spec.dtype.to_native_dtype(),
-                fill_value=fill_value_or_default(chunk_spec),
-            )
-
-        # Merge value
-        if chunk_selection == () or is_scalar(
-            value.as_ndarray_like(), chunk_spec.dtype.to_native_dtype()
-        ):
-            chunk_value = value
-        else:
-            chunk_value = value[out_selection]
-            if drop_axes:
-                item = tuple(
-                    None if idx in drop_axes else slice(None) for idx in range(chunk_spec.ndim)
-                )
-                chunk_value = chunk_value[item]
-        chunk_array[chunk_selection] = chunk_value
-
-        # Check write_empty_chunks
-        if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal(
-            chunk_spec.fill_value
-        ):
-            return {coord: None}
-
-        encoded = self.encode(chunk_array, chunk_spec)
-        return {coord: encoded}
-
-    # -- Phase 4: store --
-
-    def store_chunks_sync(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> None:
-        blob = next(iter(encoded_chunks.values()))
-        if blob is None:
-            byte_setter.delete_sync()  # type: ignore[attr-defined]
-        else:
-            byte_setter.set_sync(blob)  # type: ignore[attr-defined]
-
-    async def store_chunks_async(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> None:
-        blob = next(iter(encoded_chunks.values()))
-        if blob is None:
-            await byte_setter.delete()
-        else:
-            await byte_setter.set(blob)
+    # -- pack and store --
 
     def pack_and_store_sync(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None]) -> None:
         coord = (0,) * len(self.chunks_per_shard)
@@ -1550,130 +1442,7 @@ async def resolve_index_async(self, byte_getter: Any, key: str, chunk_selection:
 
         return ShardIndex(key=key, chunks=flat, leaf_transform=leaf_transform, is_sharded=True)
 
-    # -- Phase 2: fetch chunk data --
-
-    def fetch_chunks(self, byte_getter: Any, index: ShardIndex, prototype: BufferPrototype) -> dict[tuple[int, ...], Buffer | None]:
-        result: dict[tuple[int, ...], Buffer | None] = {}
-        for coord, byte_range in index.chunks.items():
-            if byte_range is None:
-                result[coord] = None
-            else:
-                result[coord] = byte_getter.get_sync(prototype=prototype, byte_range=byte_range)  # type: ignore[no-any-return]
-        return result
-
-    async def fetch_chunks_async(self, byte_getter: Any, index: ShardIndex, prototype: BufferPrototype) -> dict[tuple[int, ...], Buffer | None]:
-        result: dict[tuple[int, ...], Buffer | None] = {}
-        for coord, byte_range in index.chunks.items():
-            if byte_range is None:
-                result[coord] = None
-            else:
-                result[coord] = await byte_getter.get(prototype=prototype, byte_range=byte_range)
-        return result
-
-    # -- Phase 3: compute --
-
-    def decode_chunks(self, raw_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> NDBuffer:
-        return self._decode_per_chunk(raw_chunks, chunk_spec)
-
-    def merge_and_encode(self, existing_chunks: dict[tuple[int, ...], Buffer | None], value: NDBuffer, chunk_spec: ArraySpec, chunk_selection: SelectorTuple, out_selection: SelectorTuple, drop_axes: tuple[int, ...]) -> dict[tuple[int, ...], Buffer | None]:
-        from zarr.core.chunk_grids import ChunkGrid as _ChunkGrid
-        from zarr.core.indexing import get_indexer
-
-        chunk_dict = dict(existing_chunks)
-
-        # Fill missing coords with None
-        for coord in np.ndindex(self.chunks_per_shard):
-            if coord not in chunk_dict:
-                chunk_dict[coord] = None
-
-        inner_spec = ArraySpec(
-            shape=self.inner_chunk_shape,
-            dtype=chunk_spec.dtype,
-            fill_value=chunk_spec.fill_value,
-            config=chunk_spec.config,
-            prototype=chunk_spec.prototype,
-        )
-
-        # Extract the shard's portion of the write value.
-        if is_scalar(value.as_ndarray_like(), chunk_spec.dtype.to_native_dtype()):
-            shard_value = value
-        else:
-            shard_value = value[out_selection]
-            if drop_axes:
-                item = tuple(
-                    None if idx in drop_axes else slice(None)
-                    for idx in range(len(chunk_spec.shape))
-                )
-                shard_value = shard_value[item]
-
-        # Determine which inner chunks are affected
-        indexer = get_indexer(
-            chunk_selection,
-            shape=chunk_spec.shape,
-            chunk_grid=_ChunkGrid.from_sizes(chunk_spec.shape, self.inner_chunk_shape),
-        )
-
-        for inner_coords, inner_sel, value_sel, _ in indexer:
-            existing_bytes = chunk_dict.get(inner_coords)
-
-            # Decode just this inner chunk
-            if existing_bytes is not None:
-                inner_array = self.inner_transform.decode_chunk(existing_bytes)
-                if not inner_array.as_ndarray_like().flags.writeable:  # type: ignore[attr-defined]
-                    inner_array = inner_spec.prototype.nd_buffer.from_ndarray_like(
-                        inner_array.as_ndarray_like().copy()
-                    )
-            else:
-                inner_array = inner_spec.prototype.nd_buffer.create(
-                    shape=inner_spec.shape,
-                    dtype=inner_spec.dtype.to_native_dtype(),
-                    fill_value=fill_value_or_default(inner_spec),
-                )
-
-            # Merge new data
-            if inner_sel == () or is_scalar(
-                shard_value.as_ndarray_like(), inner_spec.dtype.to_native_dtype()
-            ):
-                inner_value = shard_value
-            else:
-                inner_value = shard_value[value_sel]
-            inner_array[inner_sel] = inner_value
-
-            # Re-encode
-            if not chunk_spec.config.write_empty_chunks and inner_array.all_equal(
-                chunk_spec.fill_value
-            ):
-                chunk_dict[inner_coords] = None
-            else:
-                chunk_dict[inner_coords] = self.inner_transform.encode_chunk(inner_array)
-
-        return chunk_dict
-
-    # -- Phase 4: store --
-
-    def store_chunks_sync(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> None:
-        from zarr.core.buffer import default_buffer_prototype
-
-        if all(v is None for v in encoded_chunks.values()):
-            byte_setter.delete_sync()  # type: ignore[attr-defined]
-        else:
-            blob = self.pack_blob(encoded_chunks, default_buffer_prototype())
-            if blob is None:
-                byte_setter.delete_sync()  # type: ignore[attr-defined]
-            else:
-                byte_setter.set_sync(blob)  # type: ignore[attr-defined]
-
-    async def store_chunks_async(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None], chunk_spec: ArraySpec) -> None:
-        from zarr.core.buffer import default_buffer_prototype
-
-        if all(v is None for v in encoded_chunks.values()):
-            await byte_setter.delete()
-        else:
-            blob = self.pack_blob(encoded_chunks, default_buffer_prototype())
-            if blob is None:
-                await byte_setter.delete()
-            else:
-                await byte_setter.set(blob)
+    # -- pack and store --
 
     def pack_and_store_sync(self, byte_setter: Any, encoded_chunks: dict[tuple[int, ...], Buffer | None]) -> None:
         from zarr.core.buffer import default_buffer_prototype
@@ -1699,35 +1468,6 @@ async def pack_and_store_async(self, byte_setter: Any, encoded_chunks: dict[tupl
         else:
             await byte_setter.set(blob)
 
-    def _decode_per_chunk(
-        self,
-        chunk_dict: dict[tuple[int, ...], Buffer | None],
-        shard_spec: ArraySpec,
-    ) -> NDBuffer:
-        """Assemble inner chunk buffers into a chunk-shaped array."""
-        out = shard_spec.prototype.nd_buffer.empty(
-            shape=shard_spec.shape,
-            dtype=shard_spec.dtype.to_native_dtype(),
-            order=shard_spec.order,
-        )
-
-        inner_shape = self.inner_chunk_shape
-        fill = fill_value_or_default(shard_spec)
-        decode = self.inner_transform.decode_chunk
-
-        for coords, chunk_bytes in chunk_dict.items():
-            out_selection = tuple(
-                slice(c * s, min((c + 1) * s, sh))
-                for c, s, sh in zip(coords, inner_shape, shard_spec.shape, strict=True)
-            )
-            if chunk_bytes is not None:
-                chunk_array = decode(chunk_bytes)
-                out[out_selection] = chunk_array
-            else:
-                out[out_selection] = fill
-
-        return out
-
     async def _fetch_index(self, byte_getter: Any) -> Any:
         from zarr.abc.store import RangeByteRequest, SuffixByteRequest
         from zarr.codecs.sharding import ShardingCodecIndexLocation