Skip to content

Commit 8965d09

Browse files
d-v-bmaxrjones
andauthored
refactor: allow regular-style chunk grid declaration for rectilinear chunk grid (#8)
* refactor: allow regular-style chunk grid declaration for rectilinear chunk grid The rectilinear chunk grid spec allows bare integers per dimension (meaning "regular step size"), distinct from explicit single-element edge lists. This commit widens `RectilinearChunkGrid.chunk_shapes` to `tuple[int | tuple[int, ...], ...]` so bare ints are preserved for faithful JSON round-tripping. Additionally: - unifies `_validate_chunk_shapes` to handle both regular and rectilinear validation; `_parse_chunk_shape` now delegates to it - adds `from_sizes` method to `ChunkGrid`, accepting `int | Sequence[int]` per dimension - removes `from_regular` and `from_rectilinear` methods from `ChunkGrid` - removes `parse_chunk_grid` from `chunk_grids.py` (JSON → ChunkGrid shortcut that bypassed the metadata layer) - removes `serialize_chunk_grid`, `_infer_chunk_grid_name`, and serialization helpers from `chunk_grids.py` (ChunkGrid never needs to be serialized; metadata DTOs handle it) - renames `parse_chunk_grid` in `v3.py` to `parse_chunk_grid_metadata` to disambiguate - moves the rectilinear feature flag to `RectilinearChunkGrid.__post_init__` - simplifies sharding codec validation into a single divisibility check for both regular and rectilinear grids - updates `validate_rectilinear_edges` to skip bare-int dimensions - refactors chunk grid tests to functional style with parametrization - adds docstrings to all test functions * chore: remove .claude * refactor: rename chunk_grid parsing function --------- Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com>
1 parent 2c06fb2 commit 8965d09

6 files changed

Lines changed: 2694 additions & 2178 deletions

File tree

src/zarr/codecs/sharding.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ async def _decode_single(
423423
indexer = BasicIndexer(
424424
tuple(slice(0, s) for s in shard_shape),
425425
shape=shard_shape,
426-
chunk_grid=ChunkGrid.from_regular(shard_shape, chunk_shape),
426+
chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape),
427427
)
428428

429429
# setup output array
@@ -469,7 +469,7 @@ async def _decode_partial_single(
469469
indexer = get_indexer(
470470
selection,
471471
shape=shard_shape,
472-
chunk_grid=ChunkGrid.from_regular(shard_shape, chunk_shape),
472+
chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape),
473473
)
474474

475475
# setup output array
@@ -544,7 +544,7 @@ async def _encode_single(
544544
BasicIndexer(
545545
tuple(slice(0, s) for s in shard_shape),
546546
shape=shard_shape,
547-
chunk_grid=ChunkGrid.from_regular(shard_shape, chunk_shape),
547+
chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape),
548548
)
549549
)
550550

@@ -586,7 +586,7 @@ async def _encode_partial_single(
586586
get_indexer(
587587
selection,
588588
shape=shard_shape,
589-
chunk_grid=ChunkGrid.from_regular(shard_shape, chunk_shape),
589+
chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape),
590590
)
591591
)
592592

src/zarr/core/chunk_grids.py

Lines changed: 39 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,10 @@ def is_boundary(self) -> bool:
267267
return self.shape != self.codec_shape
268268

269269

270+
# A single dimension's rectilinear chunk spec: bare int (uniform shorthand),
271+
# list of ints (explicit edges), or mixed RLE (e.g. [[10, 3], 5]).
272+
273+
270274
def _is_rectilinear_chunks(chunks: Any) -> TypeGuard[Sequence[Sequence[int]]]:
271275
"""Check if chunks is a nested sequence (e.g. [[10, 20], [5, 5]]).
272276
@@ -319,92 +323,58 @@ def from_metadata(cls, metadata: ArrayMetadata) -> ChunkGrid:
319323
from zarr.core.metadata.v3 import RectilinearChunkGrid, RegularChunkGrid
320324

321325
if isinstance(metadata, ArrayV2Metadata):
322-
return cls.from_regular(metadata.shape, metadata.chunks)
326+
return cls.from_sizes(metadata.shape, tuple(metadata.chunks))
323327
chunk_grid_meta = metadata.chunk_grid
324328
if isinstance(chunk_grid_meta, RegularChunkGrid):
325-
return cls.from_regular(metadata.shape, chunk_grid_meta.chunk_shape)
329+
return cls.from_sizes(metadata.shape, tuple(chunk_grid_meta.chunk_shape))
326330
elif isinstance(chunk_grid_meta, RectilinearChunkGrid):
327-
return cls.from_rectilinear(chunk_grid_meta.chunk_shapes, metadata.shape)
331+
return cls.from_sizes(metadata.shape, chunk_grid_meta.chunk_shapes)
328332
else:
329333
raise TypeError(f"Unknown chunk grid metadata type: {type(chunk_grid_meta)}")
330334

331335
@classmethod
332-
def from_regular(cls, array_shape: ShapeLike, chunk_shape: ShapeLike) -> ChunkGrid:
333-
"""Create a ChunkGrid where all dimensions are fixed (regular)."""
334-
shape_parsed = parse_shapelike(array_shape)
335-
chunks_parsed = parse_shapelike(chunk_shape)
336-
if len(shape_parsed) != len(chunks_parsed):
337-
raise ValueError(
338-
f"array_shape and chunk_shape must have same ndim, "
339-
f"got {len(shape_parsed)} vs {len(chunks_parsed)}"
340-
)
341-
dims = tuple(
342-
FixedDimension(size=c, extent=s)
343-
for s, c in zip(shape_parsed, chunks_parsed, strict=True)
344-
)
345-
return cls(dimensions=dims)
346-
347-
@classmethod
348-
def from_rectilinear(
336+
def from_sizes(
349337
cls,
350-
chunk_shapes: Sequence[Sequence[int]],
351338
array_shape: ShapeLike,
339+
chunk_sizes: Sequence[int | Sequence[int]],
352340
) -> ChunkGrid:
353-
"""Create a ChunkGrid with per-dimension edge lists.
354-
355-
Each element of chunk_shapes is a sequence of chunk sizes for that dimension.
356-
If all sizes in a dimension are identical *and* the extent equals
357-
``sum(edges)``, the dimension is stored as ``FixedDimension``.
358-
Otherwise it is stored as ``VaryingDimension``, preserving the
359-
explicit edge count (important when the last chunk extends past
360-
the array boundary).
341+
"""Create a ChunkGrid from per-dimension chunk size specifications.
361342
362343
Parameters
363344
----------
364-
chunk_shapes
365-
Per-dimension sequences of chunk edge lengths.
366345
array_shape
367-
The array shape to bind as the extent per dimension. The last
368-
chunk along each dimension may extend past the array boundary
369-
(the edge is the codec buffer size; ``data_size`` clips to the
370-
extent).
371-
372-
Raises
373-
------
374-
ValueError
375-
If the ``array.rectilinear_chunks`` config option is not enabled.
346+
The array shape (one extent per dimension).
347+
chunk_sizes
348+
Per-dimension chunk sizes. Each element is either:
349+
350+
- An ``int`` — regular (fixed) chunk size for that dimension.
351+
- A ``Sequence[int]`` — explicit per-chunk edge lengths. If all
352+
edges are identical and cover the extent, the dimension is
353+
stored as ``FixedDimension``; otherwise as ``VaryingDimension``.
376354
"""
377-
from zarr.core.config import config
378-
379-
if not config.get("array.rectilinear_chunks"):
380-
raise ValueError(
381-
"Rectilinear chunk grids are experimental and disabled by default. "
382-
"Enable them with: zarr.config.set({'array.rectilinear_chunks': True}) "
383-
"or set the environment variable ZARR_ARRAY__RECTILINEAR_CHUNKS=True"
384-
)
385355
extents = parse_shapelike(array_shape)
386-
if len(extents) != len(chunk_shapes):
356+
if len(extents) != len(chunk_sizes):
387357
raise ValueError(
388-
f"array_shape has {len(extents)} dimensions but chunk_shapes "
389-
f"has {len(chunk_shapes)} dimensions"
358+
f"array_shape has {len(extents)} dimensions but chunk_sizes "
359+
f"has {len(chunk_sizes)} dimensions"
390360
)
391361
dims: list[DimensionGrid] = []
392-
for edges, extent in zip(chunk_shapes, extents, strict=True):
393-
edges_list = list(edges)
394-
if not edges_list:
395-
raise ValueError("Each dimension must have at least one chunk")
396-
edge_sum = sum(edges_list)
397-
# Collapse to FixedDimension when edges are uniform AND either
398-
# extent == edge_sum (exact fit) or the number of edges matches
399-
# ceildiv(extent, edge) (regular grid with boundary overflow).
400-
if (
401-
edges_list[0] > 0
402-
and all(e == edges_list[0] for e in edges_list)
403-
and (extent == edge_sum or len(edges_list) == ceildiv(extent, edges_list[0]))
404-
):
405-
dims.append(FixedDimension(size=edges_list[0], extent=extent))
362+
for dim_spec, extent in zip(chunk_sizes, extents, strict=True):
363+
if isinstance(dim_spec, int):
364+
dims.append(FixedDimension(size=dim_spec, extent=extent))
406365
else:
407-
dims.append(VaryingDimension(edges_list, extent=extent))
366+
edges_list = list(dim_spec)
367+
if not edges_list:
368+
raise ValueError("Each dimension must have at least one chunk")
369+
edge_sum = sum(edges_list)
370+
if (
371+
edges_list[0] > 0
372+
and all(e == edges_list[0] for e in edges_list)
373+
and (extent == edge_sum or len(edges_list) == ceildiv(extent, edges_list[0]))
374+
):
375+
dims.append(FixedDimension(size=edges_list[0], extent=extent))
376+
else:
377+
dims.append(VaryingDimension(edges_list, extent=extent))
408378
return cls(dimensions=tuple(dims))
409379

410380
# -- Properties --
@@ -798,18 +768,18 @@ class RegularChunkGrid(metaclass=_RegularChunkGridMeta):
798768
"""Deprecated compatibility shim.
799769
800770
.. deprecated:: 3.1
801-
Use ``ChunkGrid.from_regular(array_shape, chunk_shape)`` instead.
771+
Use ``ChunkGrid.from_sizes(array_shape, chunk_sizes)`` instead.
802772
Use ``grid.is_regular`` instead of ``isinstance(grid, RegularChunkGrid)``.
803773
"""
804774

805775
def __new__(cls, *, chunk_shape: ShapeLike) -> ChunkGrid: # type: ignore[misc]
806776
warnings.warn(
807777
"RegularChunkGrid is deprecated. "
808-
"Use ChunkGrid.from_regular(array_shape, chunk_shape) instead.",
778+
"Use ChunkGrid.from_sizes(array_shape, chunk_sizes) instead.",
809779
DeprecationWarning,
810780
stacklevel=2,
811781
)
812782
# Without array_shape we cannot bind extents, so use chunk_shape as extent.
813783
# This matches the old behavior where RegularChunkGrid was shape-unaware.
814784
parsed = parse_shapelike(chunk_shape)
815-
return ChunkGrid.from_regular(array_shape=parsed, chunk_shape=parsed)
785+
return ChunkGrid.from_sizes(array_shape=parsed, chunk_sizes=tuple(parsed))

src/zarr/core/common.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -312,15 +312,17 @@ def validate_rectilinear_kind(kind: str | None) -> None:
312312

313313

314314
def validate_rectilinear_edges(
315-
chunk_shapes: Sequence[Sequence[int]], array_shape: Sequence[int]
315+
chunk_shapes: Sequence[int | Sequence[int]], array_shape: Sequence[int]
316316
) -> None:
317317
"""Validate that rectilinear chunk edges cover the array extent per dimension.
318318
319-
Raises ValueError if any dimension's edge sum is less than the corresponding
320-
array extent.
319+
Bare-int dimensions (regular step) always cover any extent, so they are
320+
skipped. Explicit edge lists must sum to at least the array extent.
321321
"""
322-
for i, (edges, extent) in enumerate(zip(chunk_shapes, array_shape, strict=True)):
323-
edge_sum = sum(edges)
322+
for i, (dim_spec, extent) in enumerate(zip(chunk_shapes, array_shape, strict=True)):
323+
if isinstance(dim_spec, int):
324+
continue
325+
edge_sum = sum(dim_spec)
324326
if edge_sum < extent:
325327
raise ValueError(
326328
f"Rectilinear chunk edges for dimension {i} sum to {edge_sum} "

src/zarr/core/metadata/v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def chunk_grid(self) -> ChunkGrid:
131131
DeprecationWarning,
132132
stacklevel=2,
133133
)
134-
return ChunkGrid.from_regular(self.shape, self.chunks)
134+
return ChunkGrid.from_sizes(self.shape, tuple(self.chunks))
135135

136136
@property
137137
def shards(self) -> tuple[int, ...] | None:

0 commit comments

Comments
 (0)