Skip to content

Commit 41db2dc

Browse files
committed
Merge branch 'main' of github.com:zarr-developers/zarr-python into feature/rectilinear-chunk-grid
2 parents 84e8112 + b712f96 commit 41db2dc

8 files changed

Lines changed: 163 additions & 12 deletions

File tree

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ default_language_version:
1111

1212
repos:
1313
- repo: https://github.com/astral-sh/ruff-pre-commit
14-
rev: v0.14.10
14+
rev: v0.14.14
1515
hooks:
1616
- id: ruff-check
1717
args: ["--fix", "--show-fixes"]

changes/3668.feature.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Exposes the array runtime configuration as an attribute called `config` on the `Array` and
2+
`AsyncArray` classes. The previous `AsyncArray._config` attribute is now a deprecated alias for `AsyncArray.config`.
3+
4+
Adds a method for creating a new `Array` / `AsyncArray` instance with a new runtime configuration, and fixes inaccurate documentation about the `write_empty_chunks` configuration parameter.

docs/user-guide/arrays.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,32 @@ z.append(np.vstack([a, a]), axis=1)
154154
print(f"Shape after second append: {z.shape}")
155155
```
156156

157+
## Runtime configuration
158+
159+
Zarr arrays are parametrized with a configuration that determines certain aspects of array behavior.
160+
161+
We currently support two configuration options for arrays: `write_empty_chunks` and `order`.
162+
163+
| field | type | default | description |
164+
| - | - | - | - |
165+
| `write_empty_chunks` | `bool` | `False` | Controls whether empty chunks are written to storage. See [Empty chunks](performance.md#empty-chunks).
166+
| `order` | `Literal["C", "F"]` | `"C"` | The memory layout of arrays returned when reading data from the store.
167+
168+
You can specify the configuration when you create an array with the `config` keyword argument.
169+
`config` can be passed as either a `dict` or an `ArrayConfig` object.
170+
171+
```python exec="true" session="arrays" source="above" result="ansi"
172+
arr = zarr.create_array({}, shape=(10,), dtype='int8', config={"write_empty_chunks": True})
173+
print(arr.config)
174+
```
175+
176+
To get an array view with a different config, use the `with_config` method.
177+
178+
```python exec="true" session="arrays" source="above" result="ansi"
179+
arr_f = arr.with_config({"order": "F"})
180+
print(arr_f.config)
181+
```
182+
157183
## Compressors
158184

159185
A number of different compressors can be used with Zarr. Zarr includes Blosc,

docs/user-guide/performance.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,14 @@ This optimization prevents storing redundant objects and can speed up reads, but
125125
added computation during array writes, since the contents of
126126
each chunk must be compared to the fill value, and these advantages are contingent on the content of the array.
127127
If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above.
128-
In this case, creating an array with `write_empty_chunks=True` (the default) will instruct Zarr to write every chunk without checking for emptiness.
128+
In this case, creating an array with `write_empty_chunks=True` will instruct Zarr to write every chunk without checking for emptiness.
129+
130+
The default value of `write_empty_chunks` is `False`:
131+
132+
```python exec="true" session="performance" source="above" result="ansi"
133+
arr = zarr.create_array(store={}, shape=(1,), dtype='uint8')
134+
assert arr.config.write_empty_chunks == False
135+
```
129136

130137
The following example illustrates the effect of the `write_empty_chunks` flag on
131138
the time required to write an array with different values.:

src/zarr/core/array.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@
152152
from zarr.codecs.sharding import ShardingCodecIndexLocation
153153
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar
154154
from zarr.storage import StoreLike
155-
from zarr.types import AnyArray, AnyAsyncArray, AsyncArrayV2, AsyncArrayV3
155+
from zarr.types import AnyArray, AnyAsyncArray, ArrayV2, ArrayV3, AsyncArrayV2, AsyncArrayV3
156156

157157

158158
# Array and AsyncArray are defined in the base ``zarr`` namespace
@@ -308,14 +308,14 @@ class AsyncArray(Generic[T_ArrayMetadata]):
308308
The path to the Zarr store.
309309
codec_pipeline : CodecPipeline
310310
The codec pipeline used for encoding and decoding chunks.
311-
_config : ArrayConfig
311+
config : ArrayConfig
312312
The runtime configuration of the array.
313313
"""
314314

315315
metadata: T_ArrayMetadata
316316
store_path: StorePath
317317
codec_pipeline: CodecPipeline = field(init=False)
318-
_config: ArrayConfig
318+
config: ArrayConfig
319319

320320
@overload
321321
def __init__(
@@ -344,7 +344,7 @@ def __init__(
344344

345345
object.__setattr__(self, "metadata", metadata_parsed)
346346
object.__setattr__(self, "store_path", store_path)
347-
object.__setattr__(self, "_config", config_parsed)
347+
object.__setattr__(self, "config", config_parsed)
348348
object.__setattr__(
349349
self,
350350
"codec_pipeline",
@@ -1028,6 +1028,11 @@ async def example():
10281028
def store(self) -> Store:
10291029
return self.store_path.store
10301030

1031+
@property
1032+
@deprecated("Use AsyncArray.config instead.", category=ZarrDeprecationWarning)
1033+
def _config(self) -> ArrayConfig:
1034+
return self.config
1035+
10311036
@property
10321037
def ndim(self) -> int:
10331038
"""Returns the number of dimensions in the Array.
@@ -1215,7 +1220,7 @@ def order(self) -> MemoryOrder:
12151220
if self.metadata.zarr_format == 2:
12161221
return self.metadata.order
12171222
else:
1218-
return self._config.order
1223+
return self.config.order
12191224

12201225
@property
12211226
def attrs(self) -> dict[str, JSON]:
@@ -1366,6 +1371,35 @@ def _nshards(self) -> int:
13661371
"""
13671372
return product(self._shard_grid_shape)
13681373

1374+
@overload
1375+
def with_config(self: AsyncArrayV2, config: ArrayConfigLike) -> AsyncArrayV2: ...
1376+
1377+
@overload
1378+
def with_config(self: AsyncArrayV3, config: ArrayConfigLike) -> AsyncArrayV3: ...
1379+
1380+
def with_config(self, config: ArrayConfigLike) -> Self:
1381+
"""
1382+
Return a copy of this Array with a new runtime configuration.
1383+
1384+
Parameters
1385+
----------
1386+
1387+
config : ArrayConfigLike
1388+
The runtime config for the new Array. Any keys not specified will be inherited
1389+
from the current array's config.
1390+
1391+
Returns
1392+
-------
1393+
A new Array
1394+
"""
1395+
if isinstance(config, ArrayConfig):
1396+
new_config = config
1397+
else:
1398+
# Merge new config with existing config, so missing keys are inherited
1399+
# from the current array rather than from global defaults
1400+
new_config = ArrayConfig(**{**self.config.to_dict(), **config}) # type: ignore[arg-type]
1401+
return type(self)(metadata=self.metadata, store_path=self.store_path, config=new_config)
1402+
13691403
async def nchunks_initialized(self) -> int:
13701404
"""
13711405
Calculate the number of chunks that have been initialized in storage.
@@ -1641,7 +1675,7 @@ async def _get_selection(
16411675
)
16421676
if product(indexer.shape) > 0:
16431677
# need to use the order from the metadata for v2
1644-
_config = self._config
1678+
_config = self.config
16451679
if self.metadata.zarr_format == 2:
16461680
_config = replace(_config, order=self.order)
16471681

@@ -1812,7 +1846,7 @@ async def _set_selection(
18121846
value_buffer = prototype.nd_buffer.from_ndarray_like(value)
18131847

18141848
# need to use the order from the metadata for v2
1815-
_config = self._config
1849+
_config = self.config
18161850
if self.metadata.zarr_format == 2:
18171851
_config = replace(_config, order=self.metadata.order)
18181852

@@ -2134,6 +2168,19 @@ def async_array(self) -> AsyncArray[T_ArrayMetadata]:
21342168
"""
21352169
return self._async_array
21362170

2171+
@property
2172+
def config(self) -> ArrayConfig:
2173+
"""
2174+
The runtime configuration for this array. This is a read-only property. To modify the
2175+
runtime configuration, use `Array.with_config` to create a new `Array` with the modified
2176+
configuration.
2177+
2178+
Returns
2179+
-------
2180+
An `ArrayConfig` object that defines the runtime configuration for the array.
2181+
"""
2182+
return self.async_array.config
2183+
21372184
@classmethod
21382185
@deprecated("Use zarr.create_array instead.", category=ZarrDeprecationWarning)
21392186
def create(
@@ -2617,6 +2664,29 @@ def _nshards(self) -> int:
26172664
"""
26182665
return self.async_array._nshards
26192666

2667+
@overload
2668+
def with_config(self: ArrayV2, config: ArrayConfigLike) -> ArrayV2: ...
2669+
2670+
@overload
2671+
def with_config(self: ArrayV3, config: ArrayConfigLike) -> ArrayV3: ...
2672+
2673+
def with_config(self, config: ArrayConfigLike) -> Self:
2674+
"""
2675+
Return a copy of this Array with a new runtime configuration.
2676+
2677+
Parameters
2678+
----------
2679+
2680+
config : ArrayConfigLike
2681+
The runtime config for the new Array. Any keys not specified will be inherited
2682+
from the current array's config.
2683+
2684+
Returns
2685+
-------
2686+
A new Array
2687+
"""
2688+
return type(self)(self._async_array.with_config(config))
2689+
26202690
@property
26212691
def nbytes(self) -> int:
26222692
"""

src/zarr/core/array_spec.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ def from_dict(cls, data: ArrayConfigParams) -> Self:
6969
kwargs_out[field_name] = data[field_name]
7070
return cls(**kwargs_out)
7171

72+
def to_dict(self) -> ArrayConfigParams:
73+
"""
74+
Serialize an instance of this class to a dict.
75+
"""
76+
return {"order": self.order, "write_empty_chunks": self.write_empty_chunks}
77+
7278

7379
ArrayConfigLike = ArrayConfig | ArrayConfigParams
7480

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def test_open_array_respects_write_empty_chunks_config(zarr_format: ZarrFormat)
232232
arr2 = zarr.open(store=store, path="test_array", config={"write_empty_chunks": True})
233233
assert isinstance(arr2, zarr.Array)
234234

235-
assert arr2.async_array._config.write_empty_chunks is True
235+
assert arr2.async_array.config.write_empty_chunks is True
236236

237237
arr2[0:5] = np.zeros(5)
238238
assert arr2.nchunks_initialized == 1

tests/test_array.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
default_filters_v2,
4545
default_serializer_v3,
4646
)
47+
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams
4748
from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype
4849
from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
4950
from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
@@ -890,7 +891,7 @@ def test_write_empty_chunks_behavior(
890891
config={"write_empty_chunks": write_empty_chunks},
891892
)
892893

893-
assert arr.async_array._config.write_empty_chunks == write_empty_chunks
894+
assert arr.async_array.config.write_empty_chunks == write_empty_chunks
894895

895896
# initialize the store with some non-fill value chunks
896897
arr[:] = fill_value + 1
@@ -1630,7 +1631,7 @@ async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store)
16301631
"""
16311632
with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}):
16321633
arr = await create_array(store, shape=(2, 2), dtype="i4")
1633-
assert arr._config.write_empty_chunks == write_empty_chunks
1634+
assert arr.config.write_empty_chunks == write_empty_chunks
16341635

16351636
@staticmethod
16361637
@pytest.mark.parametrize("path", [None, "", "/", "/foo", "foo", "foo/bar"])
@@ -2262,3 +2263,40 @@ def test_create_array_with_data_num_gets(
22622263
# one get for the metadata and one per shard.
22632264
# Note: we don't actually need one get per shard, but this is the current behavior
22642265
assert store.counter["get"] == 1 + num_shards
2266+
2267+
2268+
@pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True}, {"order": "C"}])
2269+
def test_with_config(config: ArrayConfigParams) -> None:
2270+
"""
2271+
Test that `AsyncArray.with_config` and `Array.with_config` create a copy of the source
2272+
array with a new runtime configuration.
2273+
"""
2274+
# the config we start with
2275+
source_config: ArrayConfigParams = {"write_empty_chunks": False, "order": "F"}
2276+
source_array = zarr.create_array({}, shape=(1,), dtype="uint8", config=source_config)
2277+
2278+
new_async_array_config_dict = source_array._async_array.with_config(config).config.to_dict()
2279+
new_array_config_dict = source_array.with_config(config).config.to_dict()
2280+
2281+
for key in source_config:
2282+
if key in config:
2283+
assert new_async_array_config_dict[key] == config[key] # type: ignore[literal-required]
2284+
assert new_array_config_dict[key] == config[key] # type: ignore[literal-required]
2285+
else:
2286+
assert new_async_array_config_dict[key] == source_config[key] # type: ignore[literal-required]
2287+
assert new_array_config_dict[key] == source_config[key] # type: ignore[literal-required]
2288+
2289+
2290+
def test_with_config_polymorphism() -> None:
2291+
"""
2292+
Test that `AsyncArray.with_config` and `Array.with_config` accept dicts and full array config
2293+
objects.
2294+
"""
2295+
source_config: ArrayConfig = ArrayConfig.from_dict({"write_empty_chunks": False, "order": "F"})
2296+
source_config_dict = source_config.to_dict()
2297+
2298+
arr = zarr.create_array({}, shape=(1,), dtype="uint8")
2299+
arr_source_config = arr.with_config(source_config)
2300+
arr_source_config_dict = arr.with_config(source_config_dict)
2301+
2302+
assert arr_source_config.config == arr_source_config_dict.config

0 commit comments

Comments
 (0)