Skip to content

Commit 9f8bce7

Browse files
d-v-bclaude
andcommitted
perf: cache default ArraySpec for regular chunk grids
For regular grids, all chunks have the same codec_shape, so we can build the ArraySpec once and reuse it for every chunk — avoiding the per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. Adds _get_default_chunk_spec() and uses it in _get_selection and _set_selection. Saves ~5ms per 1000 chunks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent e6207b7 commit 9f8bce7

File tree

1 file changed

+41
-2
lines changed

1 file changed

+41
-2
lines changed

src/zarr/core/array.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5778,6 +5778,37 @@ def _get_chunk_spec(
57785778
)
57795779

57805780

5781+
def _get_default_chunk_spec(
5782+
metadata: ArrayMetadata,
5783+
chunk_grid: ChunkGrid,
5784+
array_config: ArrayConfig,
5785+
prototype: BufferPrototype,
5786+
) -> ArraySpec | None:
5787+
"""Build an ArraySpec for the regular (non-edge) chunk shape, or None if not regular.
5788+
5789+
For regular grids, all chunks have the same codec_shape, so we can
5790+
build the ArraySpec once and reuse it for every chunk — avoiding the
5791+
per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead.
5792+
5793+
.. note::
5794+
Ideally the per-chunk ArraySpec would not exist at all: dtype,
5795+
fill_value, config, and prototype are constant across chunks —
5796+
only the shape varies (and only for edge chunks). A cleaner
5797+
design would pass a single ArraySpec plus a per-chunk shape
5798+
override, which ChunkTransform.decode_chunk already supports
5799+
via its ``chunk_shape`` parameter.
5800+
"""
5801+
if chunk_grid.is_regular:
5802+
return ArraySpec(
5803+
shape=chunk_grid.chunk_shape,
5804+
dtype=metadata.dtype,
5805+
fill_value=metadata.fill_value,
5806+
config=array_config,
5807+
prototype=prototype,
5808+
)
5809+
return None
5810+
5811+
57815812
async def _get_selection(
57825813
store_path: StorePath,
57835814
metadata: ArrayMetadata,
@@ -5857,11 +5888,16 @@ async def _get_selection(
58575888

58585889
# reading chunks and decoding them
58595890
indexed_chunks = list(indexer)
5891+
# Pre-compute the default chunk spec for regular grids to avoid
5892+
# per-chunk ChunkGrid lookups and ArraySpec construction.
5893+
default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype)
58605894
results = await codec_pipeline.read(
58615895
[
58625896
(
58635897
store_path / metadata.encode_chunk_key(chunk_coords),
5864-
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
5898+
default_spec
5899+
if default_spec is not None
5900+
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
58655901
chunk_selection,
58665902
out_selection,
58675903
is_complete_chunk,
@@ -6200,11 +6236,14 @@ async def _set_selection(
62006236
_config = replace(_config, order=order)
62016237

62026238
# merging with existing data and encoding chunks
6239+
default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype)
62036240
await codec_pipeline.write(
62046241
[
62056242
(
62066243
store_path / metadata.encode_chunk_key(chunk_coords),
6207-
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
6244+
default_spec
6245+
if default_spec is not None
6246+
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
62086247
chunk_selection,
62096248
out_selection,
62106249
is_complete_chunk,

0 commit comments

Comments
 (0)