Skip to content

Commit de7afd8

Browse files
committed
Delegate missing-shard detection away from _get_chunk_spec. Codify
expected behaviour of fill_missing_chunks for both sharding and write_empty_chunks via tests. Use elif to make control flow slightly clearer.
1 parent 2846ed9 commit de7afd8

File tree

3 files changed

+71
-9
lines changed

3 files changed

+71
-9
lines changed

src/zarr/codecs/sharding.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,11 +717,16 @@ def _get_index_chunk_spec(self, chunks_per_shard: tuple[int, ...]) -> ArraySpec:
717717
)
718718

719719
def _get_chunk_spec(self, shard_spec: ArraySpec) -> ArraySpec:
720+
# Because the shard index and inner chunks should be stored
721+
# together, we detect missing data via the shard index.
722+
# The inner chunks defined here are thus allowed to return
723+
# None, even if fill_missing_chunks=False at the array level.
724+
config = replace(shard_spec.config, fill_missing_chunks=True)
720725
return ArraySpec(
721726
shape=self.chunk_shape,
722727
dtype=shard_spec.dtype,
723728
fill_value=shard_spec.fill_value,
724-
config=shard_spec.config,
729+
config=config,
725730
prototype=shard_spec.prototype,
726731
)
727732

src/zarr/core/codec_pipeline.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -264,11 +264,10 @@ async def read_batch(
264264
):
265265
if chunk_array is not None:
266266
out[out_selection] = chunk_array
267+
elif chunk_spec.config.fill_missing_chunks:
268+
out[out_selection] = fill_value_or_default(chunk_spec)
267269
else:
268-
if chunk_spec.config.fill_missing_chunks:
269-
out[out_selection] = fill_value_or_default(chunk_spec)
270-
else:
271-
raise MissingChunkError
270+
raise MissingChunkError
272271
else:
273272
chunk_bytes_batch = await concurrent_map(
274273
[(byte_getter, array_spec.prototype) for byte_getter, array_spec, *_ in batch_info],
@@ -291,11 +290,10 @@ async def read_batch(
291290
if drop_axes != ():
292291
tmp = tmp.squeeze(axis=drop_axes)
293292
out[out_selection] = tmp
293+
elif chunk_spec.config.fill_missing_chunks:
294+
out[out_selection] = fill_value_or_default(chunk_spec)
294295
else:
295-
if chunk_spec.config.fill_missing_chunks:
296-
out[out_selection] = fill_value_or_default(chunk_spec)
297-
else:
298-
raise MissingChunkError
296+
raise MissingChunkError
299297

300298
def _merge_chunk_array(
301299
self,

tests/test_config.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,65 @@ def test_config_fill_missing_chunks(store: Store, kwargs: dict[str, Any]) -> Non
355355
assert np.array_equal(result, np.arange(16, dtype="int32").reshape(4, 4))
356356

357357

358+
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
359+
def test_config_fill_missing_chunks_sharded_inner(store: Store) -> None:
360+
"""Missing inner chunks within a shard are always filled with the array's
361+
fill value, even when fill_missing_chunks=False."""
362+
arr = zarr.create_array(
363+
store=store,
364+
shape=(8, 4),
365+
chunks=(2, 2),
366+
shards=(4, 4),
367+
dtype="int32",
368+
fill_value=42,
369+
)
370+
371+
# write only one inner chunk in the first shard, leaving the second shard empty
372+
arr[0:2, 0:2] = np.ones((2, 2), dtype="int32")
373+
374+
with config.set({"array.fill_missing_chunks": False}):
375+
a = zarr.open_array(store)
376+
377+
# first shard exists: missing inner chunks are filled, no error
378+
result = a[:4]
379+
expected = np.full((4, 4), 42, dtype="int32")
380+
expected[0:2, 0:2] = 1
381+
assert np.array_equal(result, expected)
382+
383+
# second shard is entirely missing: raises an error
384+
with pytest.raises(MissingChunkError):
385+
a[4:]
386+
387+
388+
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
389+
def test_config_fill_missing_chunks_write_empty_chunks(store: Store) -> None:
390+
"""write_empty_chunks=False drops chunks equal to fill_value, which then
391+
appear missing to fill_missing_chunks=False."""
392+
arr = zarr.create_array(
393+
store=store,
394+
shape=(4,),
395+
chunks=(2,),
396+
dtype="int32",
397+
fill_value=0,
398+
config={"write_empty_chunks": False, "fill_missing_chunks": False},
399+
)
400+
401+
# write non-fill-value data: chunks are stored
402+
arr[:] = [1, 2, 3, 4]
403+
assert np.array_equal(arr[:], [1, 2, 3, 4])
404+
405+
# overwrite with fill_value: chunks are dropped by write_empty_chunks=False
406+
arr[:] = 0
407+
with pytest.raises(MissingChunkError):
408+
arr[:]
409+
410+
# with write_empty_chunks=True, chunks are kept and no error is raised
411+
with config.set({"array.write_empty_chunks": True}):
412+
arr = zarr.open_array(store)
413+
arr[:] = 0
414+
assert np.array_equal(arr[:], [0, 0, 0, 0])
415+
416+
358417
@pytest.mark.parametrize(
359418
"key",
360419
[

0 commit comments

Comments
 (0)