|
23 | 23 | from zarr.core.codec_pipeline import BatchedCodecPipeline |
24 | 24 | from zarr.core.config import BadConfigError, config |
25 | 25 | from zarr.core.indexing import SelectorTuple |
26 | | -from zarr.errors import ZarrUserWarning |
| 26 | +from zarr.errors import ChunkNotFoundError, ZarrUserWarning |
27 | 27 | from zarr.registry import ( |
28 | 28 | fully_qualified_name, |
29 | 29 | get_buffer_class, |
@@ -53,6 +53,7 @@ def test_config_defaults_set() -> None: |
53 | 53 | "array": { |
54 | 54 | "order": "C", |
55 | 55 | "write_empty_chunks": False, |
| 56 | + "read_missing_chunks": True, |
56 | 57 | "target_shard_size_bytes": None, |
57 | 58 | }, |
58 | 59 | "async": {"concurrency": 10, "timeout": None}, |
@@ -319,6 +320,108 @@ class NewCodec2(BytesCodec): |
319 | 320 | get_codec_class("new_codec") |
320 | 321 |
|
321 | 322 |
|
| 323 | +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) |
| 324 | +@pytest.mark.parametrize( |
| 325 | + "kwargs", |
| 326 | + [ |
| 327 | + {"shards": (4, 4)}, |
| 328 | + {"compressors": None}, |
| 329 | + ], |
| 330 | + ids=["partial_decode", "full_decode"], |
| 331 | +) |
| 332 | +def test_config_read_missing_chunks(store: Store, kwargs: dict[str, Any]) -> None: |
| 333 | + arr = zarr.create_array( |
| 334 | + store=store, |
| 335 | + shape=(4, 4), |
| 336 | + chunks=(2, 2), |
| 337 | + dtype="int32", |
| 338 | + fill_value=42, |
| 339 | + **kwargs, |
| 340 | + ) |
| 341 | + |
| 342 | + # default behavior: missing chunks are filled with the fill value |
| 343 | + result = zarr.open_array(store)[:] |
| 344 | + assert np.array_equal(result, np.full((4, 4), 42, dtype="int32")) |
| 345 | + |
| 346 | + # with read_missing_chunks=False, reading missing chunks raises an error |
| 347 | + with config.set({"array.read_missing_chunks": False}): |
| 348 | + with pytest.raises(ChunkNotFoundError): |
| 349 | + zarr.open_array(store)[:] |
| 350 | + |
| 351 | + # after writing data, all chunks exist and no error is raised |
| 352 | + arr[:] = np.arange(16, dtype="int32").reshape(4, 4) |
| 353 | + with config.set({"array.read_missing_chunks": False}): |
| 354 | + result = zarr.open_array(store)[:] |
| 355 | + assert np.array_equal(result, np.arange(16, dtype="int32").reshape(4, 4)) |
| 356 | + |
| 357 | + |
| 358 | +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) |
| 359 | +def test_config_read_missing_chunks_sharded_inner(store: Store) -> None: |
| 360 | + """Because the shard index and inner chunks should be stored |
| 361 | + together in a single storage object (read: a file or blob), |
| 362 | + we delegate to the shard index the responsibility of determining |
| 363 | + what chunks should be present. |
| 364 | +
|
| 365 | + Thus, `read_missing_chunks` raises an error only if the entire *shard* |
| 366 | + is missing. Missing inner chunks are filled with the array's fill value |
| 367 | + and do not raise an error, even if `read_missing_chunks=False` at the |
| 368 | + array level. |
| 369 | + """ |
| 370 | + arr = zarr.create_array( |
| 371 | + store=store, |
| 372 | + shape=(8, 4), |
| 373 | + chunks=(2, 2), |
| 374 | + shards=(4, 4), |
| 375 | + dtype="int32", |
| 376 | + fill_value=42, |
| 377 | + ) |
| 378 | + |
| 379 | + # write only one inner chunk in the first shard, leaving the second shard empty |
| 380 | + arr[0:2, 0:2] = np.ones((2, 2), dtype="int32") |
| 381 | + |
| 382 | + with config.set({"array.read_missing_chunks": False}): |
| 383 | + a = zarr.open_array(store) |
| 384 | + |
| 385 | + # first shard exists: missing inner chunks are filled, no error |
| 386 | + result = a[:4] |
| 387 | + expected = np.full((4, 4), 42, dtype="int32") |
| 388 | + expected[0:2, 0:2] = 1 |
| 389 | + assert np.array_equal(result, expected) |
| 390 | + |
| 391 | + # second shard is entirely missing: raises an error |
| 392 | + with pytest.raises(ChunkNotFoundError): |
| 393 | + a[4:] |
| 394 | + |
| 395 | + |
| 396 | +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) |
| 397 | +def test_config_read_missing_chunks_write_empty_chunks(store: Store) -> None: |
| 398 | + """write_empty_chunks=False drops chunks equal to fill_value, which then |
| 399 | + appear missing to read_missing_chunks=False.""" |
| 400 | + arr = zarr.create_array( |
| 401 | + store=store, |
| 402 | + shape=(4,), |
| 403 | + chunks=(2,), |
| 404 | + dtype="int32", |
| 405 | + fill_value=0, |
| 406 | + config={"write_empty_chunks": False, "read_missing_chunks": False}, |
| 407 | + ) |
| 408 | + |
| 409 | + # write non-fill-value data: chunks are stored |
| 410 | + arr[:] = [1, 2, 3, 4] |
| 411 | + assert np.array_equal(arr[:], [1, 2, 3, 4]) |
| 412 | + |
| 413 | + # overwrite with fill_value: chunks are dropped by write_empty_chunks=False |
| 414 | + arr[:] = 0 |
| 415 | + with pytest.raises(ChunkNotFoundError): |
| 416 | + arr[:] |
| 417 | + |
| 418 | + # with write_empty_chunks=True, chunks are kept and no error is raised |
| 419 | + with config.set({"array.write_empty_chunks": True}): |
| 420 | + arr = zarr.open_array(store) |
| 421 | + arr[:] = 0 |
| 422 | + assert np.array_equal(arr[:], [0, 0, 0, 0]) |
| 423 | + |
| 424 | + |
322 | 425 | @pytest.mark.parametrize( |
323 | 426 | "key", |
324 | 427 | [ |
|
0 commit comments