|
6 | 6 | import numbers |
7 | 7 | import operator |
8 | 8 | import warnings |
9 | | -from collections.abc import Iterable, Sequence |
10 | 9 | from dataclasses import dataclass, field |
11 | 10 | from functools import reduce |
12 | 11 | from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeGuard, cast, runtime_checkable |
|
16 | 15 |
|
17 | 16 | import zarr |
18 | 17 | from zarr.core.common import ( |
19 | | - JSON, |
20 | | - NamedConfig, |
21 | 18 | ShapeLike, |
22 | 19 | ceildiv, |
23 | | - compress_rle, |
24 | | - expand_rle, |
25 | | - parse_named_configuration, |
26 | 20 | parse_shapelike, |
27 | | - validate_rectilinear_edges, |
28 | | - validate_rectilinear_kind, |
29 | 21 | ) |
30 | 22 | from zarr.errors import ZarrUserWarning |
31 | 23 |
|
32 | 24 | if TYPE_CHECKING: |
33 | | - from collections.abc import Iterator |
| 25 | + from collections.abc import Iterable, Iterator, Sequence |
34 | 26 |
|
35 | 27 | from zarr.core.array import ShardsLike |
36 | 28 | from zarr.core.metadata import ArrayMetadata |
@@ -107,7 +99,7 @@ def with_extent(self, new_extent: int) -> FixedDimension: |
107 | 99 | """Re-bind to *new_extent* without modifying edges. |
108 | 100 |
|
109 | 101 | Used when constructing a grid from existing metadata where edges |
110 | | - are already correct (e.g. ``parse_chunk_grid``). Raises on |
| 102 | + are already correct. Raises on |
111 | 103 | ``VaryingDimension`` if edges don't cover the new extent. |
112 | 104 | """ |
113 | 105 | return FixedDimension(size=self.size, extent=new_extent) |
@@ -203,7 +195,7 @@ def with_extent(self, new_extent: int) -> VaryingDimension: |
203 | 195 | """Re-bind to *new_extent* without modifying edges. |
204 | 196 |
|
205 | 197 | Used when constructing a grid from existing metadata where edges |
206 | | - are already correct (e.g. ``parse_chunk_grid``). Raises if the |
| 198 | + are already correct. Raises if the |
207 | 199 | existing edges don't cover *new_extent*. |
208 | 200 | """ |
209 | 201 | edge_sum = self.cumulative[-1] |
@@ -275,66 +267,6 @@ def is_boundary(self) -> bool: |
275 | 267 | return self.shape != self.codec_shape |
276 | 268 |
|
277 | 269 |
|
278 | | -# A single dimension's rectilinear chunk spec: bare int (uniform shorthand), |
279 | | -# list of ints (explicit edges), or mixed RLE (e.g. [[10, 3], 5]). |
280 | | -RectilinearDimSpec = int | list[int | list[int]] |
281 | | - |
282 | | -# The serialization format name for a chunk grid. |
283 | | -ChunkGridName = Literal["regular", "rectilinear"] |
284 | | - |
285 | | - |
286 | | -def _serialize_fixed_dim(dim: FixedDimension) -> RectilinearDimSpec: |
287 | | - """Compact rectilinear representation for a fixed-size dimension. |
288 | | -
|
289 | | - Per the rectilinear spec, a bare integer is repeated until the sum |
290 | | - >= extent. This preserves the full codec buffer size for boundary |
291 | | - chunks, matching the regular grid spec ("chunks at the border always |
292 | | - have the full chunk size"). |
293 | | - """ |
294 | | - return dim.size |
295 | | - |
296 | | - |
297 | | -def _serialize_varying_dim(dim: VaryingDimension) -> RectilinearDimSpec: |
298 | | - """RLE-compressed rectilinear representation for a varying dimension.""" |
299 | | - edges = list(dim.edges) |
300 | | - rle = compress_rle(edges) |
301 | | - if len(rle) < len(edges): |
302 | | - return rle |
303 | | - # mypy: list[int] is invariant, so it won't widen to list[int | list[int]] |
304 | | - return cast("RectilinearDimSpec", edges) |
305 | | - |
306 | | - |
307 | | -def _decode_dim_spec(dim_spec: JSON, array_extent: int | None = None) -> list[int]: |
308 | | - """Decode a single dimension's chunk edge specification per the rectilinear spec. |
309 | | -
|
310 | | - Per the spec, each element of ``chunk_shapes`` can be: |
311 | | - - a bare integer ``m``: repeat ``m`` until the sum >= array extent |
312 | | - - an array of bare integers and/or ``[value, count]`` RLE pairs |
313 | | -
|
314 | | - Parameters |
315 | | - ---------- |
316 | | - dim_spec |
317 | | - The raw JSON value for one dimension's chunk edges. |
318 | | - array_extent |
319 | | - Array length along this dimension. Required when *dim_spec* is a bare |
320 | | - integer (to know how many repetitions). |
321 | | - """ |
322 | | - if isinstance(dim_spec, int): |
323 | | - if array_extent is None: |
324 | | - raise ValueError("Integer chunk_shapes shorthand requires array shape to expand.") |
325 | | - if dim_spec <= 0: |
326 | | - raise ValueError(f"Integer chunk edge length must be > 0, got {dim_spec}") |
327 | | - n = ceildiv(array_extent, dim_spec) |
328 | | - return [dim_spec] * n |
329 | | - if isinstance(dim_spec, list): |
330 | | - has_sublists = any(isinstance(e, list) for e in dim_spec) |
331 | | - if has_sublists: |
332 | | - return expand_rle(dim_spec) |
333 | | - else: |
334 | | - return [int(e) for e in dim_spec] |
335 | | - raise ValueError(f"Invalid chunk_shapes entry: {dim_spec}") |
336 | | - |
337 | | - |
338 | 270 | def _is_rectilinear_chunks(chunks: Any) -> TypeGuard[Sequence[Sequence[int]]]: |
339 | 271 | """Check if chunks is a nested sequence (e.g. [[10, 20], [5, 5]]). |
340 | 272 |
|
@@ -628,118 +560,6 @@ def update_shape(self, new_shape: tuple[int, ...]) -> ChunkGrid: |
628 | 560 | ) |
629 | 561 | return ChunkGrid(dimensions=dims) |
630 | 562 |
|
631 | | - # ChunkGrid does not serialize itself. The format choice ("regular" vs |
632 | | - # "rectilinear") belongs to the metadata layer. Use serialize_chunk_grid() |
633 | | - # for output and parse_chunk_grid() for input. |
634 | | - |
635 | | - |
636 | | -def parse_chunk_grid( |
637 | | - data: dict[str, JSON] | ChunkGrid | NamedConfig[str, Any], |
638 | | - array_shape: tuple[int, ...], |
639 | | -) -> ChunkGrid: |
640 | | - """Create a ChunkGrid from a metadata dict or existing grid, binding array shape. |
641 | | -
|
642 | | - This is the primary entry point for constructing a ChunkGrid from serialized |
643 | | - metadata. It always produces a grid with correct extent values. |
644 | | -
|
645 | | - Both ``"regular"`` and ``"rectilinear"`` grid names are supported. Rectilinear |
646 | | - grids are experimental and require the ``array.rectilinear_chunks`` config |
647 | | - option to be enabled; a ``ValueError`` is raised otherwise. |
648 | | - """ |
649 | | - if isinstance(data, ChunkGrid): |
650 | | - # Re-bind extent if array_shape differs from what's stored |
651 | | - dims = tuple( |
652 | | - dim.with_extent(extent) |
653 | | - for dim, extent in zip(data.dimensions, array_shape, strict=True) |
654 | | - ) |
655 | | - return ChunkGrid(dimensions=dims) |
656 | | - |
657 | | - name_parsed, configuration_parsed = parse_named_configuration(data) |
658 | | - |
659 | | - if name_parsed == "regular": |
660 | | - chunk_shape_raw = configuration_parsed.get("chunk_shape") |
661 | | - if chunk_shape_raw is None: |
662 | | - raise ValueError("Regular chunk grid requires 'chunk_shape' configuration") |
663 | | - if not isinstance(chunk_shape_raw, Sequence): |
664 | | - raise TypeError(f"chunk_shape must be a sequence, got {type(chunk_shape_raw)}") |
665 | | - return ChunkGrid.from_regular(array_shape, cast("Sequence[int]", chunk_shape_raw)) |
666 | | - |
667 | | - if name_parsed == "rectilinear": |
668 | | - validate_rectilinear_kind(cast("str | None", configuration_parsed.get("kind"))) |
669 | | - chunk_shapes_raw = configuration_parsed.get("chunk_shapes") |
670 | | - if chunk_shapes_raw is None: |
671 | | - raise ValueError("Rectilinear chunk grid requires 'chunk_shapes' configuration") |
672 | | - if not isinstance(chunk_shapes_raw, Sequence): |
673 | | - raise TypeError(f"chunk_shapes must be a sequence, got {type(chunk_shapes_raw)}") |
674 | | - if len(chunk_shapes_raw) != len(array_shape): |
675 | | - raise ValueError( |
676 | | - f"chunk_shapes has {len(chunk_shapes_raw)} dimensions but array shape " |
677 | | - f"has {len(array_shape)} dimensions" |
678 | | - ) |
679 | | - decoded: list[list[int]] = [] |
680 | | - for dim_spec, extent in zip(chunk_shapes_raw, array_shape, strict=True): |
681 | | - decoded.append(_decode_dim_spec(dim_spec, array_extent=extent)) |
682 | | - validate_rectilinear_edges(decoded, array_shape) |
683 | | - return ChunkGrid.from_rectilinear(decoded, array_shape=array_shape) |
684 | | - |
685 | | - raise ValueError(f"Unknown chunk grid name: {name_parsed!r}") |
686 | | - |
687 | | - |
688 | | -def serialize_chunk_grid(grid: ChunkGrid, name: ChunkGridName) -> dict[str, JSON]: |
689 | | - """Serialize a ChunkGrid to a metadata dict using the given format name. |
690 | | -
|
691 | | - The format choice ("regular" vs "rectilinear") belongs to the metadata layer, |
692 | | - not the grid itself. This function is called by ArrayV3Metadata.to_dict(). |
693 | | - """ |
694 | | - if name == "regular": |
695 | | - if not grid.is_regular: |
696 | | - raise ValueError( |
697 | | - "Cannot serialize a non-regular chunk grid as 'regular'. Use 'rectilinear' instead." |
698 | | - ) |
699 | | - # The regular grid spec encodes only chunk_shape, not per-axis edges, |
700 | | - # so zero-extent dimensions are valid (they simply produce zero chunks). |
701 | | - return { |
702 | | - "name": "regular", |
703 | | - "configuration": {"chunk_shape": tuple(grid.chunk_shape)}, |
704 | | - } |
705 | | - |
706 | | - if name == "rectilinear": |
707 | | - # Zero-extent dimensions cannot be represented as rectilinear because |
708 | | - # the spec requires at least one positive-integer edge length per axis. |
709 | | - # This is intentionally asymmetric with the regular grid, which encodes |
710 | | - # only chunk_shape (no per-axis edges) and thus handles zero-extent |
711 | | - # arrays without issue. |
712 | | - if any(d.extent == 0 for d in grid.dimensions): |
713 | | - raise ValueError( |
714 | | - "Cannot serialize a zero-extent grid as 'rectilinear': " |
715 | | - "the spec requires all edge lengths to be positive integers." |
716 | | - ) |
717 | | - chunk_shapes: list[RectilinearDimSpec] = [] |
718 | | - for dim in grid.dimensions: |
719 | | - if isinstance(dim, FixedDimension): |
720 | | - chunk_shapes.append(_serialize_fixed_dim(dim)) |
721 | | - elif isinstance(dim, VaryingDimension): |
722 | | - chunk_shapes.append(_serialize_varying_dim(dim)) |
723 | | - else: |
724 | | - raise TypeError(f"Unexpected dimension type: {type(dim)}") |
725 | | - return { |
726 | | - "name": "rectilinear", |
727 | | - "configuration": {"kind": "inline", "chunk_shapes": chunk_shapes}, |
728 | | - } |
729 | | - |
730 | | - raise ValueError(f"Unknown chunk grid name for serialization: {name!r}") |
731 | | - |
732 | | - |
733 | | -def _infer_chunk_grid_name( |
734 | | - data: dict[str, JSON] | ChunkGrid | NamedConfig[str, Any], |
735 | | - grid: ChunkGrid, |
736 | | -) -> ChunkGridName: |
737 | | - """Extract or infer the chunk grid serialization name from the input.""" |
738 | | - if isinstance(data, dict): |
739 | | - name, _ = parse_named_configuration(data) |
740 | | - return cast("ChunkGridName", name) |
741 | | - return "regular" if grid.is_regular else "rectilinear" |
742 | | - |
743 | 563 |
|
744 | 564 | def _guess_chunks( |
745 | 565 | shape: tuple[int, ...] | int, |
|
0 commit comments