zarr-developers
diff --git a/‎src/zarr/codecs/sharding.py‎
Lines changed: 21 additions & 12 deletions b/‎src/zarr/codecs/sharding.py‎
Lines changed: 21 additions & 12 deletions
diff --git a/‎src/zarr/core/codec_pipeline.py‎
Lines changed: 60 additions & 135 deletions b/‎src/zarr/core/codec_pipeline.py‎
Lines changed: 60 additions & 135 deletions
@@ -429,34 +429,40 @@ def validate(
                     )
 
     def _get_inner_chunk_transform(self, shard_spec: ArraySpec) -> Any:
-        """Build a ChunkTransform for inner codecs, bound to the inner chunk spec."""
+        """Build a ChunkTransform for the inner codec chain.
+
+        The cache key is the shard_spec because evolved codecs may
+        depend on it. The runtime chunk_spec is supplied per call.
+        """
         from zarr.core.codec_pipeline import ChunkTransform
 
         chunk_spec = self._get_chunk_spec(shard_spec)
         evolved = tuple(c.evolve_from_array_spec(array_spec=chunk_spec) for c in self.codecs)
-        return ChunkTransform(codecs=evolved, array_spec=chunk_spec)
+        return ChunkTransform(codecs=evolved)
 
     def _get_index_chunk_transform(self, chunks_per_shard: tuple[int, ...]) -> Any:
-        """Build a ChunkTransform for index codecs."""
+        """Build a ChunkTransform for the index codec chain."""
         from zarr.core.codec_pipeline import ChunkTransform
 
         index_spec = self._get_index_chunk_spec(chunks_per_shard)
         evolved = tuple(c.evolve_from_array_spec(array_spec=index_spec) for c in self.index_codecs)
-        return ChunkTransform(codecs=evolved, array_spec=index_spec)
+        return ChunkTransform(codecs=evolved)
 
     def _decode_shard_index_sync(
         self, index_bytes: Buffer, chunks_per_shard: tuple[int, ...]
     ) -> _ShardIndex:
         """Decode shard index synchronously using ChunkTransform."""
         index_transform = self._get_index_chunk_transform(chunks_per_shard)
-        index_array = index_transform.decode_chunk(index_bytes)
+        index_spec = self._get_index_chunk_spec(chunks_per_shard)
+        index_array = index_transform.decode_chunk(index_bytes, index_spec)
         return _ShardIndex(index_array.as_numpy_array())
 
     def _encode_shard_index_sync(self, index: _ShardIndex) -> Buffer:
         """Encode shard index synchronously using ChunkTransform."""
         index_transform = self._get_index_chunk_transform(index.chunks_per_shard)
+        index_spec = self._get_index_chunk_spec(index.chunks_per_shard)
         index_nd = get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths)
-        result: Buffer | None = index_transform.encode_chunk(index_nd)
+        result: Buffer | None = index_transform.encode_chunk(index_nd, index_spec)
         assert result is not None
         return result
 
@@ -511,7 +517,7 @@ def _decode_sync(
             except KeyError:
                 out[out_selection] = shard_spec.fill_value
                 continue
-            chunk_array = inner_transform.decode_chunk(chunk_bytes)
+            chunk_array = inner_transform.decode_chunk(chunk_bytes, chunk_spec)
             out[out_selection] = chunk_array[chunk_selection]
 
         return out
@@ -524,6 +530,7 @@ def _encode_sync(
         """Encode a full shard synchronously."""
         shard_shape = shard_spec.shape
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
+        chunk_spec = self._get_chunk_spec(shard_spec)
         inner_transform = self._get_inner_chunk_transform(shard_spec)
 
         indexer = BasicIndexer(
@@ -546,7 +553,7 @@ def _encode_sync(
             if skip_empty and chunk_array.all_equal(fill_value):
                 shard_builder[chunk_coords] = None
             else:
-                encoded = inner_transform.encode_chunk(chunk_array)
+                encoded = inner_transform.encode_chunk(chunk_array, chunk_spec)
                 shard_builder[chunk_coords] = encoded
 
         return self._encode_shard_dict_sync(
@@ -636,10 +643,12 @@ def _byte_offset(coords: tuple[int, ...]) -> int:
                         existing_chunk_bytes = existing[
                             byte_offset : byte_offset + chunk_byte_length
                         ]
-                        chunk_array = inner_transform.decode_chunk(existing_chunk_bytes).copy()
+                        chunk_array = inner_transform.decode_chunk(
+                            existing_chunk_bytes, chunk_spec
+                        ).copy()
                         chunk_array[chunk_sel] = chunk_value
 
-                    encoded = inner_transform.encode_chunk(chunk_array)
+                    encoded = inner_transform.encode_chunk(chunk_array, chunk_spec)
                     if encoded is not None:
                         store.set_range_sync(key, encoded, byte_offset)
                         index.set_chunk_slice(
@@ -685,7 +694,7 @@ def _byte_offset(coords: tuple[int, ...]) -> int:
             else:
                 existing_raw = shard_dict.get(chunk_coords)
                 if existing_raw is not None:
-                    chunk_array = inner_transform.decode_chunk(existing_raw).copy()
+                    chunk_array = inner_transform.decode_chunk(existing_raw, chunk_spec).copy()
                 else:
                     chunk_array = chunk_spec.prototype.nd_buffer.create(
                         shape=self.chunk_shape,
@@ -698,7 +707,7 @@ def _byte_offset(coords: tuple[int, ...]) -> int:
             if skip_empty and chunk_array.all_equal(fill_value):
                 shard_dict[chunk_coords] = None
             else:
-                shard_dict[chunk_coords] = inner_transform.encode_chunk(chunk_array)
+                shard_dict[chunk_coords] = inner_transform.encode_chunk(chunk_array, chunk_spec)
 
         blob = self._encode_shard_dict_sync(
             shard_dict,
 
@@ -3,7 +3,7 @@
 import os
 import threading
 from concurrent.futures import ThreadPoolExecutor
-from dataclasses import dataclass, field, replace
+from dataclasses import dataclass, field
 from itertools import islice, pairwise
 from typing import TYPE_CHECKING, Any
 from warnings import warn
@@ -87,24 +87,23 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any:
 
 @dataclass(slots=True, kw_only=True)
 class ChunkTransform:
-    """A synchronous codec chain bound to an ArraySpec.
+    """A synchronous codec chain.
 
-    Provides `encode` and `decode` for pure-compute codec operations
-    (no IO, no threading, no batching).
+    Provides `encode_chunk` and `decode_chunk` for pure-compute codec
+    operations (no IO, no threading, no batching). The `chunk_spec` is
+    supplied per call so the same transform can be reused across chunks
+    with different shapes, prototypes, etc.
 
     All codecs must implement `SupportsSyncCodec`. Construction will
     raise `TypeError` if any codec does not.
     """
 
     codecs: tuple[Codec, ...]
-    array_spec: ArraySpec
 
-    # (sync codec, input_spec) pairs in pipeline order.
-    _aa_codecs: tuple[tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ArraySpec], ...] = field(
+    _aa_codecs: tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ...] = field(
         init=False, repr=False, compare=False
     )
     _ab_codec: SupportsSyncCodec[NDBuffer, Buffer] = field(init=False, repr=False, compare=False)
-    _ab_spec: ArraySpec = field(init=False, repr=False, compare=False)
     _bb_codecs: tuple[SupportsSyncCodec[Buffer, Buffer], ...] = field(
         init=False, repr=False, compare=False
     )
@@ -118,131 +117,78 @@ def __post_init__(self) -> None:
             )
 
         aa, ab, bb = codecs_from_list(list(self.codecs))
+        for c in (*aa, ab, *bb):
+            assert isinstance(c, SupportsSyncCodec)
+        self._aa_codecs = tuple(aa)  # type: ignore[assignment]
+        self._ab_codec = ab  # type: ignore[assignment]
+        self._bb_codecs = tuple(bb)  # type: ignore[assignment]
+
+    _cached_key: tuple[tuple[int, ...], int] | None = field(
+        init=False, repr=False, compare=False, default=None
+    )
+    _cached_aa_specs: tuple[ArraySpec, ...] | None = field(
+        init=False, repr=False, compare=False, default=None
+    )
+    _cached_ab_spec: ArraySpec | None = field(
+        init=False, repr=False, compare=False, default=None
+    )
 
-        aa_codecs: list[tuple[SupportsSyncCodec[NDBuffer, NDBuffer], ArraySpec]] = []
-        spec = self.array_spec
-        for aa_codec in aa:
-            assert isinstance(aa_codec, SupportsSyncCodec)
-            aa_codecs.append((aa_codec, spec))
-            spec = aa_codec.resolve_metadata(spec)
-
-        self._aa_codecs = tuple(aa_codecs)
-        assert isinstance(ab, SupportsSyncCodec)
-        self._ab_codec = ab
-        self._ab_spec = spec
-        bb_sync: list[SupportsSyncCodec[Buffer, Buffer]] = []
-        for bb_codec in bb:
-            assert isinstance(bb_codec, SupportsSyncCodec)
-            bb_sync.append(bb_codec)
-        self._bb_codecs = tuple(bb_sync)
-
-    def _spec_for_shape(
-        self, shape: tuple[int, ...], prototype: BufferPrototype | None = None
-    ) -> ArraySpec:
-        """Build an ArraySpec with the given shape (and optional prototype)."""
-        if shape == self._ab_spec.shape and (
-            prototype is None or prototype is self._ab_spec.prototype
-        ):
-            return self._ab_spec
-        if prototype is None:
-            return replace(self._ab_spec, shape=shape)
-        return replace(self._ab_spec, shape=shape, prototype=prototype)
+    def _resolve_specs(self, chunk_spec: ArraySpec) -> tuple[tuple[ArraySpec, ...], ArraySpec]:
+        """Return per-AA-codec input specs and the AB spec for ``chunk_spec``.
 
-    def decode_chunk(
-        self,
-        chunk_bytes: Buffer,
-        chunk_shape: tuple[int, ...] | None = None,
-        prototype: BufferPrototype | None = None,
-    ) -> NDBuffer:
+        The codec chain only changes ``shape`` (via TransposeCodec etc.) —
+        ``prototype``, ``dtype``, ``fill_value``, and ``config`` are
+        invariant. We cache the resolved spec chain keyed on
+        ``(chunk_spec.shape, id(chunk_spec))``, and reuse it directly
+        when the same ``chunk_spec`` is passed again. For a different
+        ``chunk_spec`` with the same shape, we recompute (cheap).
+        """
+        if not self._aa_codecs:
+            return (), chunk_spec
+        key = (chunk_spec.shape, id(chunk_spec))
+        if self._cached_key == key:
+            assert self._cached_aa_specs is not None
+            assert self._cached_ab_spec is not None
+            return self._cached_aa_specs, self._cached_ab_spec
+
+        aa_specs: list[ArraySpec] = []
+        spec = chunk_spec
+        for aa_codec in self._aa_codecs:
+            aa_specs.append(spec)
+            spec = aa_codec.resolve_metadata(spec)  # type: ignore[attr-defined]
+        aa_specs_t = tuple(aa_specs)
+        self._cached_key = key
+        self._cached_aa_specs = aa_specs_t
+        self._cached_ab_spec = spec
+        return aa_specs_t, spec
+
+    def decode_chunk(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> NDBuffer:
         """Decode a single chunk through the full codec chain, synchronously.
 
         Pure compute -- no IO.
-
-        Parameters
-        ----------
-        chunk_bytes : Buffer
-            The encoded chunk bytes.
-        chunk_shape : tuple[int, ...] or None
-            The shape of this chunk. If None, uses the shape from the
-            ArraySpec provided at construction. Required for rectilinear
-            grids where chunks have different shapes.
-        prototype : BufferPrototype or None
-            The buffer prototype for the output. If None, uses the
-            prototype from the ArraySpec provided at construction.
-            Required when decoding into a non-default buffer (e.g. GPU).
         """
-        if chunk_shape is None and (prototype is None or prototype is self._ab_spec.prototype):
-            # Use pre-computed specs
-            ab_spec = self._ab_spec
-            aa_specs: list[ArraySpec] = [s for _, s in self._aa_codecs]
-        else:
-            # Resolve chunk_shape through the aa_codecs to get the correct
-            # spec for the ab_codec (e.g., TransposeCodec changes the shape).
-            base_spec = self._spec_for_shape(
-                chunk_shape if chunk_shape is not None else self._ab_spec.shape,
-                prototype=prototype,
-            )
-            aa_specs = []
-            spec = base_spec
-            for aa_codec, _ in self._aa_codecs:
-                aa_specs.append(spec)
-                spec = aa_codec.resolve_metadata(spec)  # type: ignore[attr-defined]
-            ab_spec = spec
+        aa_specs, ab_spec = self._resolve_specs(chunk_spec)
 
         data: Buffer = chunk_bytes
         for bb_codec in reversed(self._bb_codecs):
             data = bb_codec._decode_sync(data, ab_spec)
 
         chunk_array: NDBuffer = self._ab_codec._decode_sync(data, ab_spec)
 
-        for (aa_codec, _), aa_spec in zip(
-            reversed(self._aa_codecs), reversed(aa_specs), strict=True
-        ):
+        for aa_codec, aa_spec in zip(reversed(self._aa_codecs), reversed(aa_specs), strict=True):
             chunk_array = aa_codec._decode_sync(chunk_array, aa_spec)
 
         return chunk_array
 
-    def encode_chunk(
-        self,
-        chunk_array: NDBuffer,
-        chunk_shape: tuple[int, ...] | None = None,
-        prototype: BufferPrototype | None = None,
-    ) -> Buffer | None:
+    def encode_chunk(self, chunk_array: NDBuffer, chunk_spec: ArraySpec) -> Buffer | None:
         """Encode a single chunk through the full codec chain, synchronously.
 
         Pure compute -- no IO.
-
-        Parameters
-        ----------
-        chunk_array : NDBuffer
-            The chunk data to encode.
-        chunk_shape : tuple[int, ...] or None
-            The shape of this chunk. If None, uses the shape from the
-            ArraySpec provided at construction.
-        prototype : BufferPrototype or None
-            The buffer prototype to use for intermediate buffers. If
-            None, uses the prototype from the ArraySpec provided at
-            construction. Required when encoding non-default buffers
-            (e.g. GPU) so the codec chain produces matching buffer
-            types.
         """
-        if chunk_shape is None and (prototype is None or prototype is self._ab_spec.prototype):
-            ab_spec = self._ab_spec
-            aa_specs: list[ArraySpec] = [s for _, s in self._aa_codecs]
-        else:
-            base_spec = self._spec_for_shape(
-                chunk_shape if chunk_shape is not None else self._ab_spec.shape,
-                prototype=prototype,
-            )
-            aa_specs = []
-            spec = base_spec
-            for aa_codec, _ in self._aa_codecs:
-                aa_specs.append(spec)
-                spec = aa_codec.resolve_metadata(spec)  # type: ignore[attr-defined]
-            ab_spec = spec
+        aa_specs, ab_spec = self._resolve_specs(chunk_spec)
 
         aa_data: NDBuffer = chunk_array
-        for (aa_codec, _), aa_spec in zip(self._aa_codecs, aa_specs, strict=True):
+        for aa_codec, aa_spec in zip(self._aa_codecs, aa_specs, strict=True):
             aa_result = aa_codec._encode_sync(aa_data, aa_spec)
             if aa_result is None:
                 return None
@@ -824,9 +770,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         aa, ab, bb = codecs_from_list(evolved_codecs)
 
         try:
-            sync_transform: ChunkTransform | None = ChunkTransform(
-                codecs=evolved_codecs, array_spec=array_spec
-            )
+            sync_transform: ChunkTransform | None = ChunkTransform(codecs=evolved_codecs)
         except TypeError:
             sync_transform = None
 
@@ -984,15 +928,7 @@ def read_sync(
         def _decode_one(raw: Buffer | None, chunk_spec: ArraySpec) -> NDBuffer | None:
             if raw is None:
                 return None
-            chunk_shape = (
-                chunk_spec.shape if chunk_spec.shape != transform.array_spec.shape else None
-            )
-            prototype = (
-                chunk_spec.prototype
-                if chunk_spec.prototype is not transform.array_spec.prototype
-                else None
-            )
-            return transform.decode_chunk(raw, chunk_shape=chunk_shape, prototype=prototype)
+            return transform.decode_chunk(raw, chunk_spec)
 
         specs = [cs for _, cs, *_ in batch]
         if n_workers > 0 and len(batch) > 1:
@@ -1071,21 +1007,10 @@ def _process_one(
         ) -> Buffer | None:
             _, chunk_spec, chunk_selection, out_selection, is_complete = batch[idx]
             existing_bytes = existing_buffers[idx]
-            chunk_shape = (
-                chunk_spec.shape if chunk_spec.shape != transform.array_spec.shape else None
-            )
-
-            prototype = (
-                chunk_spec.prototype
-                if chunk_spec.prototype is not transform.array_spec.prototype
-                else None
-            )
 
             existing_chunk_array: NDBuffer | None = None
             if existing_bytes is not None:
-                existing_chunk_array = transform.decode_chunk(
-                    existing_bytes, chunk_shape=chunk_shape, prototype=prototype
-                )
+                existing_chunk_array = transform.decode_chunk(existing_bytes, chunk_spec)
 
             chunk_array = self._merge_chunk_array(
                 existing_chunk_array,
@@ -1103,7 +1028,7 @@ def _process_one(
             ):
                 return None
 
-            return transform.encode_chunk(chunk_array, chunk_shape=chunk_shape, prototype=prototype)
+            return transform.encode_chunk(chunk_array, chunk_spec)
 
         indices = list(range(len(batch)))
         if n_workers > 0 and len(batch) > 1: