zarr-developers
diff --git a/‎src/zarr/abc/codec.py‎
Lines changed: 21 additions & 16 deletions b/‎src/zarr/abc/codec.py‎
Lines changed: 21 additions & 16 deletions
diff --git a/‎src/zarr/codecs/bytes.py‎
Lines changed: 1 addition & 1 deletion b/‎src/zarr/codecs/bytes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/zarr/codecs/sharding.py‎
Lines changed: 3 additions & 4 deletions b/‎src/zarr/codecs/sharding.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/zarr/core/array.py‎
Lines changed: 11 additions & 1 deletion b/‎src/zarr/core/array.py‎
Lines changed: 11 additions & 1 deletion
@@ -36,7 +36,7 @@
     "GetResult",
     "PreparedWrite",
     "SupportsChunkCodec",
-    "SupportsChunkPacking",
+    "SupportsChunkMapping",
     "SupportsSyncCodec",
 ]
 
@@ -100,21 +100,26 @@ def encode_chunk(self, chunk_array: NDBuffer) -> Buffer | None: ...
 
 
 @runtime_checkable
-class SupportsChunkPacking(Protocol):
-    """Protocol for codecs that can pack/unpack inner chunks into a storage blob
-    and manage the prepare/finalize IO lifecycle.
-
-    `BytesCodec` and `ShardingCodec` implement this protocol. The pipeline
-    uses it to separate IO (prepare/finalize) from compute (encode/decode),
-    enabling the compute phase to run in a thread pool.
-
-    The lifecycle is:
-
-    1. **Prepare**: fetch existing bytes from the store (if partial write),
-       unpack into per-inner-chunk buffers → `PreparedWrite`
-    2. **Compute**: iterate `PreparedWrite.indexer`, decode each inner chunk,
-       merge new data, re-encode, update `PreparedWrite.chunk_dict`
-    3. **Finalize**: pack `chunk_dict` back into a blob and write to store
+class SupportsChunkMapping(Protocol):
+    """Protocol for codecs that expose their stored data as a mapping
+    from chunk coordinates to encoded buffers.
+
+    A single store key holds a blob. This protocol defines how to
+    interpret that blob as a ``dict[tuple[int, ...], Buffer | None]`` —
+    a mapping from inner-chunk coordinates to their encoded bytes.
+
+    For a non-sharded codec (``BytesCodec``), the mapping is trivial:
+    one entry at ``(0,)`` containing the entire blob. For a sharded
+    codec, the mapping has one entry per inner chunk, derived from the
+    shard index embedded in the blob. The pipeline doesn't need to know
+    which case it's dealing with — it operates on the mapping uniformly.
+
+    This abstraction enables the three-phase IO/compute/IO pattern:
+
+    1. **IO**: fetch the blob from the store.
+    2. **Compute**: unpack the blob into the chunk mapping, decode/merge/
+       re-encode entries, pack back into a blob. All pure compute.
+    3. **IO**: write the blob to the store.
     """
 
     @property
 
@@ -127,7 +127,7 @@ async def _encode_single(
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length
 
-    # -- SupportsChunkPacking --
+    # -- SupportsChunkMapping --
 
     @property
     def inner_codec_chain(self) -> SupportsChunkCodec | None:
 
@@ -35,6 +35,7 @@
     numpy_buffer_prototype,
 )
 from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid
+from zarr.core.codec_pipeline import ChunkTransform
 from zarr.core.common import (
     ShapeLike,
     parse_enum,
@@ -423,10 +424,8 @@ def _get_inner_chunk_transform(self, shard_spec: ArraySpec) -> Any:
         evolved = tuple(c.evolve_from_array_spec(array_spec=chunk_spec) for c in self.codecs)
         return ChunkTransform(codecs=evolved, array_spec=chunk_spec)
 
-    def _get_index_chunk_transform(self, chunks_per_shard: tuple[int, ...]) -> Any:
+    def _get_index_chunk_transform(self, chunks_per_shard: tuple[int, ...]) -> ChunkTransform:
         """Build a ChunkTransform for index codecs."""
-        from zarr.core.codec_pipeline import ChunkTransform
-
         index_spec = self._get_index_chunk_spec(chunks_per_shard)
         evolved = tuple(c.evolve_from_array_spec(array_spec=index_spec) for c in self.index_codecs)
         return ChunkTransform(codecs=evolved, array_spec=index_spec)
@@ -523,7 +522,7 @@ def _encode_sync(
             morton_order_iter(chunks_per_shard)
         )
 
-        for chunk_coords, chunk_selection, out_selection, _ in indexer:
+        for chunk_coords, _chunk_selection, out_selection, _ in indexer:
             chunk_array = shard_array[out_selection]
             encoded = inner_transform.encode_chunk(chunk_array)
             shard_builder[chunk_coords] = encoded
 
@@ -205,7 +205,17 @@ def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None
             pass
 
     if isinstance(metadata, ArrayV3Metadata):
-        return get_pipeline_class().from_codecs(metadata.codecs)
+        pipeline = get_pipeline_class().from_codecs(metadata.codecs)
+        # PhasedCodecPipeline needs evolve_from_array_spec to build its
+        # ChunkTransform and ShardLayout. BatchedCodecPipeline does not.
+        if hasattr(pipeline, "chunk_transform") and pipeline.chunk_transform is None:
+            chunk_spec = metadata.get_chunk_spec(
+                (0,) * len(metadata.shape),
+                ArrayConfig.from_dict({}),
+                default_buffer_prototype(),
+            )
+            pipeline = pipeline.evolve_from_array_spec(chunk_spec)
+        return pipeline
     elif isinstance(metadata, ArrayV2Metadata):
         v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
         return get_pipeline_class().from_codecs([v2_codec])