Skip to content

Commit d460521

Browse files
committed
Define ChunkNotFoundError; expose chunk key and chunk index in ChunkNotFoundError
1 parent 233ddce commit d460521

6 files changed

Lines changed: 43 additions & 16 deletions

File tree

src/zarr/abc/codec.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,9 @@ async def encode(
430430
@abstractmethod
431431
async def read(
432432
self,
433-
batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
433+
batch_info: Iterable[
434+
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool, str, tuple[int, ...]]
435+
],
434436
out: NDBuffer,
435437
drop_axes: tuple[int, ...] = (),
436438
) -> None:
@@ -439,12 +441,14 @@ async def read(
439441
440442
Parameters
441443
----------
442-
batch_info : Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]]
444+
batch_info : Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool, str, tuple[int, ...]]]
443445
Ordered set of information about the chunks.
444446
The first slice selection determines which parts of the chunk will be fetched.
445447
The second slice selection determines where in the output array the chunk data will be written.
446448
The ByteGetter is used to fetch the necessary bytes.
447449
The chunk spec contains information about the construction of an array from the bytes.
450+
The string is the chunk key.
451+
The tuple of ints is the chunk's grid coordinates.
448452
449453
If the Store returns ``None`` for a chunk, then the chunk was not
450454
written and the implementation must set the values of that chunk (or

src/zarr/codecs/sharding.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,8 @@ async def _decode_single(
439439
chunk_selection,
440440
out_selection,
441441
is_complete_shard,
442+
"/".join(str(c) for c in chunk_coords),
443+
chunk_coords,
442444
)
443445
for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
444446
],
@@ -511,6 +513,8 @@ async def _decode_partial_single(
511513
chunk_selection,
512514
out_selection,
513515
is_complete_shard,
516+
"/".join(str(c) for c in chunk_coords),
517+
chunk_coords,
514518
)
515519
for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
516520
],

src/zarr/core/array.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5608,6 +5608,8 @@ async def _get_selection(
56085608
chunk_selection,
56095609
out_selection,
56105610
is_complete_chunk,
5611+
metadata.encode_chunk_key(chunk_coords),
5612+
chunk_coords,
56115613
)
56125614
for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
56135615
],

src/zarr/core/codec_pipeline.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from zarr.core.common import concurrent_map
1818
from zarr.core.config import config
1919
from zarr.core.indexing import SelectorTuple, is_scalar
20-
from zarr.errors import MissingChunkError, ZarrUserWarning
20+
from zarr.errors import ChunkNotFoundError, ZarrUserWarning
2121
from zarr.registry import register_pipeline
2222

2323
if TYPE_CHECKING:
@@ -248,7 +248,9 @@ async def encode_partial_batch(
248248

249249
async def read_batch(
250250
self,
251-
batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
251+
batch_info: Iterable[
252+
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool, str, tuple[int, ...]]
253+
],
252254
out: NDBuffer,
253255
drop_axes: tuple[int, ...] = (),
254256
) -> None:
@@ -259,15 +261,17 @@ async def read_batch(
259261
for byte_getter, chunk_spec, chunk_selection, *_ in batch_info
260262
]
261263
)
262-
for chunk_array, (_, chunk_spec, _, out_selection, _) in zip(
264+
for chunk_array, (_, chunk_spec, _, out_selection, _, chunk_key, chunk_coords) in zip(
263265
chunk_array_batch, batch_info, strict=False
264266
):
265267
if chunk_array is not None:
266268
out[out_selection] = chunk_array
267269
elif chunk_spec.config.fill_missing_chunks:
268270
out[out_selection] = fill_value_or_default(chunk_spec)
269271
else:
270-
raise MissingChunkError
272+
raise ChunkNotFoundError(
273+
f"chunk '{chunk_key}' at grid position {chunk_coords} not found in store."
274+
)
271275
else:
272276
chunk_bytes_batch = await concurrent_map(
273277
[(byte_getter, array_spec.prototype) for byte_getter, array_spec, *_ in batch_info],
@@ -282,9 +286,15 @@ async def read_batch(
282286
)
283287
],
284288
)
285-
for chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) in zip(
286-
chunk_array_batch, batch_info, strict=False
287-
):
289+
for chunk_array, (
290+
_,
291+
chunk_spec,
292+
chunk_selection,
293+
out_selection,
294+
_,
295+
chunk_key,
296+
chunk_coords,
297+
) in zip(chunk_array_batch, batch_info, strict=False):
288298
if chunk_array is not None:
289299
tmp = chunk_array[chunk_selection]
290300
if drop_axes != ():
@@ -293,7 +303,9 @@ async def read_batch(
293303
elif chunk_spec.config.fill_missing_chunks:
294304
out[out_selection] = fill_value_or_default(chunk_spec)
295305
else:
296-
raise MissingChunkError
306+
raise ChunkNotFoundError(
307+
f"chunk '{chunk_key}' at grid position {chunk_coords} not found in store"
308+
)
297309

298310
def _merge_chunk_array(
299311
self,
@@ -470,7 +482,9 @@ async def encode(
470482

471483
async def read(
472484
self,
473-
batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
485+
batch_info: Iterable[
486+
tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool, str, tuple[int, ...]]
487+
],
474488
out: NDBuffer,
475489
drop_axes: tuple[int, ...] = (),
476490
) -> None:

src/zarr/errors.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,7 @@ class BoundsCheckError(IndexError): ...
146146
class ArrayIndexError(IndexError): ...
147147

148148

149-
class MissingChunkError(IndexError): ...
149+
class ChunkNotFoundError(BaseZarrError):
150+
"""
151+
Raised when a chunk that was expected to exist in storage was not retrieved successfully.
152+
"""

tests/test_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from zarr.core.codec_pipeline import BatchedCodecPipeline
2424
from zarr.core.config import BadConfigError, config
2525
from zarr.core.indexing import SelectorTuple
26-
from zarr.errors import MissingChunkError, ZarrUserWarning
26+
from zarr.errors import ChunkNotFoundError, ZarrUserWarning
2727
from zarr.registry import (
2828
fully_qualified_name,
2929
get_buffer_class,
@@ -345,7 +345,7 @@ def test_config_fill_missing_chunks(store: Store, kwargs: dict[str, Any]) -> Non
345345

346346
# with fill_missing_chunks=False, reading missing chunks raises an error
347347
with config.set({"array.fill_missing_chunks": False}):
348-
with pytest.raises(MissingChunkError):
348+
with pytest.raises(ChunkNotFoundError):
349349
zarr.open_array(store)[:]
350350

351351
# after writing data, all chunks exist and no error is raised
@@ -381,7 +381,7 @@ def test_config_fill_missing_chunks_sharded_inner(store: Store) -> None:
381381
assert np.array_equal(result, expected)
382382

383383
# second shard is entirely missing: raises an error
384-
with pytest.raises(MissingChunkError):
384+
with pytest.raises(ChunkNotFoundError):
385385
a[4:]
386386

387387

@@ -404,7 +404,7 @@ def test_config_fill_missing_chunks_write_empty_chunks(store: Store) -> None:
404404

405405
# overwrite with fill_value: chunks are dropped by write_empty_chunks=False
406406
arr[:] = 0
407-
with pytest.raises(MissingChunkError):
407+
with pytest.raises(ChunkNotFoundError):
408408
arr[:]
409409

410410
# with write_empty_chunks=True, chunks are kept and no error is raised

0 commit comments

Comments
 (0)