Skip to content

Commit cdb5846

Browse files
d-v-bclaude
andauthored
perf: cache default ArraySpec for regular chunk grids (#3908)
* perf: cache default ArraySpec for regular chunk grids For regular grids, all chunks have the same codec_shape, so we can build the ArraySpec once and reuse it for every chunk — avoiding the per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead. Adds _get_default_chunk_spec() and uses it in _get_selection and _set_selection. Saves ~5ms per 1000 chunks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * docs: changelog * refactor: inline creation of default arrayspec --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f2321ec commit cdb5846

2 files changed

Lines changed: 29 additions & 2 deletions

File tree

changes/3908.misc.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Reuse a constant `ArraySpec` during indexing when possible.

src/zarr/core/array.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5427,11 +5427,24 @@ async def _get_selection(
54275427

54285428
# reading chunks and decoding them
54295429
indexed_chunks = list(indexer)
5430+
# For regular grids, all chunks share the same ArraySpec, so build it once
5431+
# and reuse it to avoid per-chunk ChunkGrid lookups and ArraySpec construction.
5432+
regular_grid = chunk_grid.is_regular
5433+
if regular_grid:
5434+
regular_chunk_spec = ArraySpec(
5435+
shape=chunk_grid.chunk_shape,
5436+
dtype=metadata.dtype,
5437+
fill_value=metadata.fill_value,
5438+
config=_config,
5439+
prototype=prototype,
5440+
)
54305441
results = await codec_pipeline.read(
54315442
[
54325443
(
54335444
store_path / metadata.encode_chunk_key(chunk_coords),
5434-
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
5445+
regular_chunk_spec
5446+
if regular_grid
5447+
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
54355448
chunk_selection,
54365449
out_selection,
54375450
is_complete_chunk,
@@ -5770,11 +5783,24 @@ async def _set_selection(
57705783
_config = replace(_config, order=order)
57715784

57725785
# merging with existing data and encoding chunks
5786+
# For regular grids, all chunks share the same ArraySpec, so build it once
5787+
# and reuse it to avoid per-chunk ChunkGrid lookups and ArraySpec construction.
5788+
regular_grid = chunk_grid.is_regular
5789+
if regular_grid:
5790+
regular_chunk_spec = ArraySpec(
5791+
shape=chunk_grid.chunk_shape,
5792+
dtype=metadata.dtype,
5793+
fill_value=metadata.fill_value,
5794+
config=_config,
5795+
prototype=prototype,
5796+
)
57735797
await codec_pipeline.write(
57745798
[
57755799
(
57765800
store_path / metadata.encode_chunk_key(chunk_coords),
5777-
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
5801+
regular_chunk_spec
5802+
if regular_grid
5803+
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
57785804
chunk_selection,
57795805
out_selection,
57805806
is_complete_chunk,

0 commit comments

Comments
 (0)