Skip to content

Commit 6db55a1

Browse files
committed
Move config from codec_pipeline -> array. Update docs, tests.
1 parent 68afd5c commit 6db55a1

File tree

8 files changed

+33
-13
lines changed

8 files changed

+33
-13
lines changed

changes/3748.feature.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Added `codec_pipeline.fill_missing_chunks` configuration option. When set to `False`, reading missing chunks raises a `MissingChunkError` instead of filling them with the fill value.
1+
Added `array.fill_missing_chunks` configuration option. When set to `False`, reading missing chunks raises a `MissingChunkError` instead of filling them with the array's fill value.

docs/user-guide/arrays.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,18 @@ print(f"Shape after second append: {z.shape}")
158158

159159
Zarr arrays are parametrized with a configuration that determines certain aspects of array behavior.
160160

161-
We currently support two configuration options for arrays: `write_empty_chunks` and `order`.
161+
We currently support three configuration options for arrays: `write_empty_chunks`, `fill_missing_chunks`, and `order`.
162162

163163
| field | type | default | description |
164164
| - | - | - | - |
165165
| `write_empty_chunks` | `bool` | `False` | Controls whether empty chunks are written to storage. See [Empty chunks](performance.md#empty-chunks).
166+
| `fill_missing_chunks` | `bool` | `True` | Controls whether missing chunks are filled with the array's fill value on read. If `False`, reading missing chunks raises a `MissingChunkError`.
166167
| `order` | `Literal["C", "F"]` | `"C"` | The memory layout of arrays returned when reading data from the store.
167168

169+
!!! note
170+
`write_empty_chunks=False` skips writing chunks that are entirely the array's fill value.
171+
If `fill_missing_chunks=False`, attempting to read these missing chunks will raise an error.
172+
168173
You can specify the configuration when you create an array with the `config` keyword argument.
169174
`config` can be passed as either a `dict` or an `ArrayConfig` object.
170175

docs/user-guide/config.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Configuration options include the following:
3030
- Default Zarr format `default_zarr_version`
3131
- Default array order in memory `array.order`
3232
- Whether empty chunks are written to storage `array.write_empty_chunks`
33-
- Whether missing chunks are filled with the fill value on read `codec_pipeline.fill_missing_chunks` (default `True`). Set to `False` to raise a `MissingChunkError` instead.
33+
- Whether missing chunks are filled with the array's fill value on read `array.fill_missing_chunks` (default `True`). Set to `False` to raise a `MissingChunkError` instead.
3434
- Async and threading options, e.g. `async.concurrency` and `threading.max_workers`
3535
- Selections of implementations of codecs, codec pipelines and buffers
3636
- Enabling GPU support with `zarr.config.enable_gpu()`. See GPU support for more.

src/zarr/codecs/sharding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ def _get_index_chunk_spec(self, chunks_per_shard: tuple[int, ...]) -> ArraySpec:
711711
dtype=UInt64(endianness="little"),
712712
fill_value=MAX_UINT_64,
713713
config=ArrayConfig(
714-
order="C", write_empty_chunks=False
714+
order="C", write_empty_chunks=False, fill_missing_chunks=True
715715
), # Note: this is hard-coded for simplicity -- it is not surfaced into user code,
716716
prototype=default_buffer_prototype(),
717717
)

src/zarr/core/array_spec.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class ArrayConfigParams(TypedDict):
2828

2929
order: NotRequired[MemoryOrder]
3030
write_empty_chunks: NotRequired[bool]
31+
fill_missing_chunks: NotRequired[bool]
3132

3233

3334
@dataclass(frozen=True)
@@ -41,17 +42,25 @@ class ArrayConfig:
4142
The memory layout of the arrays returned when reading data from the store.
4243
write_empty_chunks : bool
4344
If True, empty chunks will be written to the store.
45+
fill_missing_chunks : bool
46+
If True, missing chunks will be filled with the array's fill value on read.
47+
If False, reading missing chunks will raise a ``MissingChunkError``.
4448
"""
4549

4650
order: MemoryOrder
4751
write_empty_chunks: bool
52+
fill_missing_chunks: bool
4853

49-
def __init__(self, order: MemoryOrder, write_empty_chunks: bool) -> None:
54+
def __init__(
55+
self, order: MemoryOrder, write_empty_chunks: bool, fill_missing_chunks: bool
56+
) -> None:
5057
order_parsed = parse_order(order)
5158
write_empty_chunks_parsed = parse_bool(write_empty_chunks)
59+
fill_missing_chunks_parsed = parse_bool(fill_missing_chunks)
5260

5361
object.__setattr__(self, "order", order_parsed)
5462
object.__setattr__(self, "write_empty_chunks", write_empty_chunks_parsed)
63+
object.__setattr__(self, "fill_missing_chunks", fill_missing_chunks_parsed)
5564

5665
@classmethod
5766
def from_dict(cls, data: ArrayConfigParams) -> Self:
@@ -62,7 +71,9 @@ def from_dict(cls, data: ArrayConfigParams) -> Self:
6271
"""
6372
kwargs_out: ArrayConfigParams = {}
6473
for f in fields(ArrayConfig):
65-
field_name = cast("Literal['order', 'write_empty_chunks']", f.name)
74+
field_name = cast(
75+
"Literal['order', 'write_empty_chunks', 'fill_missing_chunks']", f.name
76+
)
6677
if field_name not in data:
6778
kwargs_out[field_name] = zarr_config.get(f"array.{field_name}")
6879
else:
@@ -73,7 +84,11 @@ def to_dict(self) -> ArrayConfigParams:
7384
"""
7485
Serialize an instance of this class to a dict.
7586
"""
76-
return {"order": self.order, "write_empty_chunks": self.write_empty_chunks}
87+
return {
88+
"order": self.order,
89+
"write_empty_chunks": self.write_empty_chunks,
90+
"fill_missing_chunks": self.fill_missing_chunks,
91+
}
7792

7893

7994
ArrayConfigLike = ArrayConfig | ArrayConfigParams

src/zarr/core/codec_pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ async def read_batch(
265265
if chunk_array is not None:
266266
out[out_selection] = chunk_array
267267
else:
268-
if config.get("codec_pipeline.fill_missing_chunks", True):
268+
if chunk_spec.config.fill_missing_chunks:
269269
out[out_selection] = fill_value_or_default(chunk_spec)
270270
else:
271271
raise MissingChunkError
@@ -292,7 +292,7 @@ async def read_batch(
292292
tmp = tmp.squeeze(axis=drop_axes)
293293
out[out_selection] = tmp
294294
else:
295-
if config.get("codec_pipeline.fill_missing_chunks", True):
295+
if chunk_spec.config.fill_missing_chunks:
296296
out[out_selection] = fill_value_or_default(chunk_spec)
297297
else:
298298
raise MissingChunkError

src/zarr/core/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def enable_gpu(self) -> ConfigSet:
9696
"array": {
9797
"order": "C",
9898
"write_empty_chunks": False,
99+
"fill_missing_chunks": True,
99100
"target_shard_size_bytes": None,
100101
},
101102
"async": {"concurrency": 10, "timeout": None},
@@ -104,7 +105,6 @@ def enable_gpu(self) -> ConfigSet:
104105
"codec_pipeline": {
105106
"path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
106107
"batch_size": 1,
107-
"fill_missing_chunks": True,
108108
},
109109
"codecs": {
110110
"blosc": "zarr.codecs.blosc.BloscCodec",

tests/test_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def test_config_defaults_set() -> None:
5353
"array": {
5454
"order": "C",
5555
"write_empty_chunks": False,
56+
"fill_missing_chunks": True,
5657
"target_shard_size_bytes": None,
5758
},
5859
"async": {"concurrency": 10, "timeout": None},
@@ -61,7 +62,6 @@ def test_config_defaults_set() -> None:
6162
"codec_pipeline": {
6263
"path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
6364
"batch_size": 1,
64-
"fill_missing_chunks": True,
6565
},
6666
"codecs": {
6767
"blosc": "zarr.codecs.blosc.BloscCodec",
@@ -344,13 +344,13 @@ def test_config_fill_missing_chunks(store: Store, kwargs: dict[str, Any]) -> Non
344344
assert np.array_equal(result, np.full((4, 4), 42, dtype="int32"))
345345

346346
# with fill_missing_chunks=False, reading missing chunks raises an error
347-
with config.set({"codec_pipeline.fill_missing_chunks": False}):
347+
with config.set({"array.fill_missing_chunks": False}):
348348
with pytest.raises(MissingChunkError):
349349
zarr.open_array(store)[:]
350350

351351
# after writing data, all chunks exist and no error is raised
352352
arr[:] = np.arange(16, dtype="int32").reshape(4, 4)
353-
with config.set({"codec_pipeline.fill_missing_chunks": False}):
353+
with config.set({"array.fill_missing_chunks": False}):
354354
result = zarr.open_array(store)[:]
355355
assert np.array_equal(result, np.arange(16, dtype="int32").reshape(4, 4))
356356

0 commit comments

Comments
 (0)