|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -import itertools |
4 | 3 | import math |
5 | 4 | import numbers |
6 | | -import operator |
7 | 5 | import warnings |
8 | 6 | from abc import abstractmethod |
9 | 7 | from dataclasses import dataclass |
10 | | -from functools import reduce |
11 | 8 | from typing import TYPE_CHECKING, Any, Literal |
12 | 9 |
|
13 | 10 | import numpy as np |
| 11 | +import numpy.typing as npt |
14 | 12 |
|
15 | 13 | import zarr |
16 | 14 | from zarr.abc.metadata import Metadata |
17 | | -from zarr.core.common import ( |
18 | | - JSON, |
19 | | - NamedConfig, |
20 | | - ShapeLike, |
21 | | - ceildiv, |
22 | | - parse_named_configuration, |
23 | | - parse_shapelike, |
24 | | -) |
25 | 15 | from zarr.errors import ZarrUserWarning |
26 | 16 |
|
27 | 17 | if TYPE_CHECKING: |
28 | 18 | from collections.abc import Iterator |
29 | 19 | from typing import Self |
30 | 20 |
|
31 | 21 | from zarr.core.array import ShardsLike |
| 22 | + from zarr.core.common import JSON |
| 23 | + |
| 24 | + |
| 25 | +@dataclass(frozen=True) |
| 26 | +class ChunkGrid(Metadata): |
| 27 | + @abstractmethod |
| 28 | + def to_dict(self) -> dict[str, JSON]: ... |
| 29 | + |
| 30 | + @abstractmethod |
| 31 | + def update_shape(self, new_shape: tuple[int, ...]) -> Self: |
| 32 | + pass |
| 33 | + |
| 34 | + @abstractmethod |
| 35 | + def all_chunk_coords(self, array_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]: |
| 36 | + pass |
| 37 | + |
| 38 | + @abstractmethod |
| 39 | + def get_nchunks(self, array_shape: tuple[int, ...]) -> int: |
| 40 | + pass |
| 41 | + |
| 42 | + @abstractmethod |
| 43 | + def get_chunk_shape( |
| 44 | + self, array_shape: tuple[int, ...], chunk_coord: tuple[int, ...] |
| 45 | + ) -> tuple[int, ...]: |
| 46 | + """ |
| 47 | + Get the shape of a specific chunk. |
| 48 | +
|
| 49 | + Parameters |
| 50 | + ---------- |
| 51 | + array_shape : tuple[int, ...] |
| 52 | + Shape of the full array. |
| 53 | + chunk_coord : tuple[int, ...] |
| 54 | + Coordinates of the chunk in the chunk grid. |
| 55 | +
|
| 56 | + Returns |
| 57 | + ------- |
| 58 | + tuple[int, ...] |
| 59 | + Shape of the chunk at the given coordinates. |
| 60 | + """ |
| 61 | + |
| 62 | + @abstractmethod |
| 63 | + def get_chunk_start( |
| 64 | + self, array_shape: tuple[int, ...], chunk_coord: tuple[int, ...] |
| 65 | + ) -> tuple[int, ...]: |
| 66 | + """ |
| 67 | + Get the starting position of a chunk in the array. |
| 68 | +
|
| 69 | + Parameters |
| 70 | + ---------- |
| 71 | + array_shape : tuple[int, ...] |
| 72 | + Shape of the full array. |
| 73 | + chunk_coord : tuple[int, ...] |
| 74 | + Coordinates of the chunk in the chunk grid. |
| 75 | +
|
| 76 | + Returns |
| 77 | + ------- |
| 78 | + tuple[int, ...] |
| 79 | + Starting position (offset) of the chunk in the array. |
| 80 | + """ |
| 81 | + |
| 82 | + @abstractmethod |
| 83 | + def array_index_to_chunk_coord( |
| 84 | + self, array_shape: tuple[int, ...], array_index: tuple[int, ...] |
| 85 | + ) -> tuple[int, ...]: |
| 86 | + """ |
| 87 | + Map an array index to the chunk coordinates that contain it. |
| 88 | +
|
| 89 | + Parameters |
| 90 | + ---------- |
| 91 | + array_shape : tuple[int, ...] |
| 92 | + Shape of the full array. |
| 93 | + array_index : tuple[int, ...] |
| 94 | + Index in the array. |
| 95 | +
|
| 96 | + Returns |
| 97 | + ------- |
| 98 | + tuple[int, ...] |
| 99 | + Coordinates of the chunk containing the array index. |
| 100 | + """ |
| 101 | + |
| 102 | + @abstractmethod |
| 103 | + def array_indices_to_chunk_dim( |
| 104 | + self, array_shape: tuple[int, ...], dim: int, indices: npt.NDArray[np.intp] |
| 105 | + ) -> npt.NDArray[np.intp]: |
| 106 | + """ |
| 107 | + Map an array of indices along one dimension to chunk coordinates (vectorized). |
| 108 | +
|
| 109 | + Parameters |
| 110 | + ---------- |
| 111 | + array_shape : tuple[int, ...] |
| 112 | + Shape of the full array. |
| 113 | + dim : int |
| 114 | + Dimension index. |
| 115 | + indices : np.ndarray |
| 116 | + Array of indices along the given dimension. |
| 117 | +
|
| 118 | + Returns |
| 119 | + ------- |
| 120 | + np.ndarray |
| 121 | + Array of chunk coordinates, same shape as indices. |
| 122 | + """ |
| 123 | + |
| 124 | + @abstractmethod |
| 125 | + def chunks_per_dim(self, array_shape: tuple[int, ...], dim: int) -> int: |
| 126 | + """ |
| 127 | + Get the number of chunks along a specific dimension. |
| 128 | +
|
| 129 | + Parameters |
| 130 | + ---------- |
| 131 | + array_shape : tuple[int, ...] |
| 132 | + Shape of the full array. |
| 133 | + dim : int |
| 134 | + Dimension index. |
| 135 | +
|
| 136 | + Returns |
| 137 | + ------- |
| 138 | + int |
| 139 | + Number of chunks along the dimension. |
| 140 | + """ |
| 141 | + |
| 142 | + @abstractmethod |
| 143 | + def get_chunk_grid_shape(self, array_shape: tuple[int, ...]) -> tuple[int, ...]: |
| 144 | + """ |
| 145 | + Get the shape of the chunk grid (number of chunks along each dimension). |
| 146 | +
|
| 147 | + Parameters |
| 148 | + ---------- |
| 149 | + array_shape : tuple[int, ...] |
| 150 | + Shape of the full array. |
| 151 | +
|
| 152 | + Returns |
| 153 | + ------- |
| 154 | + tuple[int, ...] |
| 155 | + Number of chunks along each dimension. |
| 156 | + """ |
32 | 157 |
|
33 | 158 |
|
34 | 159 | def _guess_chunks( |
@@ -153,58 +278,6 @@ def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tupl |
153 | 278 | return tuple(int(c) for c in chunks) |
154 | 279 |
|
155 | 280 |
|
156 | | -@dataclass(frozen=True) |
157 | | -class ChunkGrid(Metadata): |
158 | | - @classmethod |
159 | | - def from_dict(cls, data: dict[str, JSON] | ChunkGrid | NamedConfig[str, Any]) -> ChunkGrid: |
160 | | - if isinstance(data, ChunkGrid): |
161 | | - return data |
162 | | - |
163 | | - name_parsed, _ = parse_named_configuration(data) |
164 | | - if name_parsed == "regular": |
165 | | - return RegularChunkGrid._from_dict(data) |
166 | | - raise ValueError(f"Unknown chunk grid. Got {name_parsed}.") |
167 | | - |
168 | | - @abstractmethod |
169 | | - def all_chunk_coords(self, array_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]: |
170 | | - pass |
171 | | - |
172 | | - @abstractmethod |
173 | | - def get_nchunks(self, array_shape: tuple[int, ...]) -> int: |
174 | | - pass |
175 | | - |
176 | | - |
177 | | -@dataclass(frozen=True) |
178 | | -class RegularChunkGrid(ChunkGrid): |
179 | | - chunk_shape: tuple[int, ...] |
180 | | - |
181 | | - def __init__(self, *, chunk_shape: ShapeLike) -> None: |
182 | | - chunk_shape_parsed = parse_shapelike(chunk_shape) |
183 | | - |
184 | | - object.__setattr__(self, "chunk_shape", chunk_shape_parsed) |
185 | | - |
186 | | - @classmethod |
187 | | - def _from_dict(cls, data: dict[str, JSON] | NamedConfig[str, Any]) -> Self: |
188 | | - _, configuration_parsed = parse_named_configuration(data, "regular") |
189 | | - |
190 | | - return cls(**configuration_parsed) # type: ignore[arg-type] |
191 | | - |
192 | | - def to_dict(self) -> dict[str, JSON]: |
193 | | - return {"name": "regular", "configuration": {"chunk_shape": tuple(self.chunk_shape)}} |
194 | | - |
195 | | - def all_chunk_coords(self, array_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]: |
196 | | - return itertools.product( |
197 | | - *(range(ceildiv(s, c)) for s, c in zip(array_shape, self.chunk_shape, strict=False)) |
198 | | - ) |
199 | | - |
200 | | - def get_nchunks(self, array_shape: tuple[int, ...]) -> int: |
201 | | - return reduce( |
202 | | - operator.mul, |
203 | | - itertools.starmap(ceildiv, zip(array_shape, self.chunk_shape, strict=True)), |
204 | | - 1, |
205 | | - ) |
206 | | - |
207 | | - |
208 | 281 | def _guess_num_chunks_per_axis_shard( |
209 | 282 | chunk_shape: tuple[int, ...], item_size: int, max_bytes: int, array_shape: tuple[int, ...] |
210 | 283 | ) -> int: |
|
0 commit comments