Skip to content

Commit c9e65c7

Browse files
authored
Merge branch 'main' into ig/spec0_py314
2 parents 73b72b1 + 9c47b6d commit c9e65c7

File tree

7 files changed

+162
-11
lines changed

7 files changed

+162
-11
lines changed

changes/3668.feature.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Exposes the array runtime configuration as an attribute called `config` on the `Array` and
2+
`AsyncArray` classes. The previous `AsyncArray._config` attribute is now a deprecated alias for `AsyncArray.config`.
3+
4+
Adds a method for creating a new `Array` / `AsyncArray` instance with a new runtime configuration, and fixes inaccurate documentation about the `write_empty_chunks` configuration parameter.

docs/user-guide/arrays.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,32 @@ z.append(np.vstack([a, a]), axis=1)
154154
print(f"Shape after second append: {z.shape}")
155155
```
156156

157+
## Runtime configuration
158+
159+
Zarr arrays are parametrized with a configuration that determines certain aspects of array behavior.
160+
161+
We currently support two configuration options for arrays: `write_empty_chunks` and `order`.
162+
163+
| field | type | default | description |
164+
| - | - | - | - |
165+
| `write_empty_chunks` | `bool` | `False` | Controls whether empty chunks are written to storage. See [Empty chunks](performance.md#empty-chunks).
166+
| `order` | `Literal["C", "F"]` | `"C"` | The memory layout of arrays returned when reading data from the store.
167+
168+
You can specify the configuration when you create an array with the `config` keyword argument.
169+
`config` can be passed as either a `dict` or an `ArrayConfig` object.
170+
171+
```python exec="true" session="arrays" source="above" result="ansi"
172+
arr = zarr.create_array({}, shape=(10,), dtype='int8', config={"write_empty_chunks": True})
173+
print(arr.config)
174+
```
175+
176+
To get an array view with a different config, use the `with_config` method.
177+
178+
```python exec="true" session="arrays" source="above" result="ansi"
179+
arr_f = arr.with_config({"order": "F"})
180+
print(arr_f.config)
181+
```
182+
157183
## Compressors
158184

159185
A number of different compressors can be used with Zarr. Zarr includes Blosc,

docs/user-guide/performance.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,14 @@ This optimization prevents storing redundant objects and can speed up reads, but
125125
added computation during array writes, since the contents of
126126
each chunk must be compared to the fill value, and these advantages are contingent on the content of the array.
127127
If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above.
128-
In this case, creating an array with `write_empty_chunks=True` (the default) will instruct Zarr to write every chunk without checking for emptiness.
128+
In this case, creating an array with `write_empty_chunks=True` will instruct Zarr to write every chunk without checking for emptiness.
129+
130+
The default value of `write_empty_chunks` is `False`:
131+
132+
```python exec="true" session="performance" source="above" result="ansi"
133+
arr = zarr.create_array(store={}, shape=(1,), dtype='uint8')
134+
assert arr.config.write_empty_chunks == False
135+
```
129136

130137
The following example illustrates the effect of the `write_empty_chunks` flag on
131138
the time required to write an array with different values.:

src/zarr/core/array.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@
141141
from zarr.codecs.sharding import ShardingCodecIndexLocation
142142
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar
143143
from zarr.storage import StoreLike
144-
from zarr.types import AnyArray, AnyAsyncArray, AsyncArrayV2, AsyncArrayV3
144+
from zarr.types import AnyArray, AnyAsyncArray, ArrayV2, ArrayV3, AsyncArrayV2, AsyncArrayV3
145145

146146

147147
# Array and AsyncArray are defined in the base ``zarr`` namespace
@@ -297,14 +297,14 @@ class AsyncArray[T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)]:
297297
The path to the Zarr store.
298298
codec_pipeline : CodecPipeline
299299
The codec pipeline used for encoding and decoding chunks.
300-
_config : ArrayConfig
300+
config : ArrayConfig
301301
The runtime configuration of the array.
302302
"""
303303

304304
metadata: T_ArrayMetadata
305305
store_path: StorePath
306306
codec_pipeline: CodecPipeline = field(init=False)
307-
_config: ArrayConfig
307+
config: ArrayConfig
308308

309309
@overload
310310
def __init__(
@@ -333,7 +333,7 @@ def __init__(
333333

334334
object.__setattr__(self, "metadata", metadata_parsed)
335335
object.__setattr__(self, "store_path", store_path)
336-
object.__setattr__(self, "_config", config_parsed)
336+
object.__setattr__(self, "config", config_parsed)
337337
object.__setattr__(
338338
self,
339339
"codec_pipeline",
@@ -1009,6 +1009,11 @@ async def example():
10091009
def store(self) -> Store:
10101010
return self.store_path.store
10111011

1012+
@property
1013+
@deprecated("Use AsyncArray.config instead.", category=ZarrDeprecationWarning)
1014+
def _config(self) -> ArrayConfig:
1015+
return self.config
1016+
10121017
@property
10131018
def ndim(self) -> int:
10141019
"""Returns the number of dimensions in the Array.
@@ -1162,7 +1167,7 @@ def order(self) -> MemoryOrder:
11621167
if self.metadata.zarr_format == 2:
11631168
return self.metadata.order
11641169
else:
1165-
return self._config.order
1170+
return self.config.order
11661171

11671172
@property
11681173
def attrs(self) -> dict[str, JSON]:
@@ -1295,6 +1300,35 @@ def _nshards(self) -> int:
12951300
"""
12961301
return product(self._shard_grid_shape)
12971302

1303+
@overload
1304+
def with_config(self: AsyncArrayV2, config: ArrayConfigLike) -> AsyncArrayV2: ...
1305+
1306+
@overload
1307+
def with_config(self: AsyncArrayV3, config: ArrayConfigLike) -> AsyncArrayV3: ...
1308+
1309+
def with_config(self, config: ArrayConfigLike) -> Self:
1310+
"""
1311+
Return a copy of this Array with a new runtime configuration.
1312+
1313+
Parameters
1314+
----------
1315+
1316+
config : ArrayConfigLike
1317+
The runtime config for the new Array. Any keys not specified will be inherited
1318+
from the current array's config.
1319+
1320+
Returns
1321+
-------
1322+
A new Array
1323+
"""
1324+
if isinstance(config, ArrayConfig):
1325+
new_config = config
1326+
else:
1327+
# Merge new config with existing config, so missing keys are inherited
1328+
# from the current array rather than from global defaults
1329+
new_config = ArrayConfig(**{**self.config.to_dict(), **config}) # type: ignore[arg-type]
1330+
return type(self)(metadata=self.metadata, store_path=self.store_path, config=new_config)
1331+
12981332
async def nchunks_initialized(self) -> int:
12991333
"""
13001334
Calculate the number of chunks that have been initialized in storage.
@@ -1567,7 +1601,7 @@ async def _get_selection(
15671601
)
15681602
if product(indexer.shape) > 0:
15691603
# need to use the order from the metadata for v2
1570-
_config = self._config
1604+
_config = self.config
15711605
if self.metadata.zarr_format == 2:
15721606
_config = replace(_config, order=self.order)
15731607

@@ -1738,7 +1772,7 @@ async def _set_selection(
17381772
value_buffer = prototype.nd_buffer.from_ndarray_like(value)
17391773

17401774
# need to use the order from the metadata for v2
1741-
_config = self._config
1775+
_config = self.config
17421776
if self.metadata.zarr_format == 2:
17431777
_config = replace(_config, order=self.metadata.order)
17441778

@@ -2060,6 +2094,19 @@ def async_array(self) -> AsyncArray[T_ArrayMetadata]:
20602094
"""
20612095
return self._async_array
20622096

2097+
@property
2098+
def config(self) -> ArrayConfig:
2099+
"""
2100+
The runtime configuration for this array. This is a read-only property. To modify the
2101+
runtime configuration, use `Array.with_config` to create a new `Array` with the modified
2102+
configuration.
2103+
2104+
Returns
2105+
-------
2106+
An `ArrayConfig` object that defines the runtime configuration for the array.
2107+
"""
2108+
return self.async_array.config
2109+
20632110
@classmethod
20642111
@deprecated("Use zarr.create_array instead.", category=ZarrDeprecationWarning)
20652112
def create(
@@ -2521,6 +2568,29 @@ def _nshards(self) -> int:
25212568
"""
25222569
return self.async_array._nshards
25232570

2571+
@overload
2572+
def with_config(self: ArrayV2, config: ArrayConfigLike) -> ArrayV2: ...
2573+
2574+
@overload
2575+
def with_config(self: ArrayV3, config: ArrayConfigLike) -> ArrayV3: ...
2576+
2577+
def with_config(self, config: ArrayConfigLike) -> Self:
2578+
"""
2579+
Return a copy of this Array with a new runtime configuration.
2580+
2581+
Parameters
2582+
----------
2583+
2584+
config : ArrayConfigLike
2585+
The runtime config for the new Array. Any keys not specified will be inherited
2586+
from the current array's config.
2587+
2588+
Returns
2589+
-------
2590+
A new Array
2591+
"""
2592+
return type(self)(self._async_array.with_config(config))
2593+
25242594
@property
25252595
def nbytes(self) -> int:
25262596
"""

src/zarr/core/array_spec.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ def from_dict(cls, data: ArrayConfigParams) -> Self:
6969
kwargs_out[field_name] = data[field_name]
7070
return cls(**kwargs_out)
7171

72+
def to_dict(self) -> ArrayConfigParams:
73+
"""
74+
Serialize an instance of this class to a dict.
75+
"""
76+
return {"order": self.order, "write_empty_chunks": self.write_empty_chunks}
77+
7278

7379
ArrayConfigLike = ArrayConfig | ArrayConfigParams
7480

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def test_open_array_respects_write_empty_chunks_config(zarr_format: ZarrFormat)
232232
arr2 = zarr.open(store=store, path="test_array", config={"write_empty_chunks": True})
233233
assert isinstance(arr2, zarr.Array)
234234

235-
assert arr2.async_array._config.write_empty_chunks is True
235+
assert arr2.async_array.config.write_empty_chunks is True
236236

237237
arr2[0:5] = np.zeros(5)
238238
assert arr2.nchunks_initialized == 1

tests/test_array.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
default_filters_v2,
4545
default_serializer_v3,
4646
)
47+
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams
4748
from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype
4849
from zarr.core.chunk_grids import _auto_partition
4950
from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
@@ -889,7 +890,7 @@ def test_write_empty_chunks_behavior(
889890
config={"write_empty_chunks": write_empty_chunks},
890891
)
891892

892-
assert arr.async_array._config.write_empty_chunks == write_empty_chunks
893+
assert arr.async_array.config.write_empty_chunks == write_empty_chunks
893894

894895
# initialize the store with some non-fill value chunks
895896
arr[:] = fill_value + 1
@@ -1562,7 +1563,7 @@ async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store)
15621563
"""
15631564
with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}):
15641565
arr = await create_array(store, shape=(2, 2), dtype="i4")
1565-
assert arr._config.write_empty_chunks == write_empty_chunks
1566+
assert arr.config.write_empty_chunks == write_empty_chunks
15661567

15671568
@staticmethod
15681569
@pytest.mark.parametrize("path", [None, "", "/", "/foo", "foo", "foo/bar"])
@@ -2194,3 +2195,40 @@ def test_create_array_with_data_num_gets(
21942195
# one get for the metadata and one per shard.
21952196
# Note: we don't actually need one get per shard, but this is the current behavior
21962197
assert store.counter["get"] == 1 + num_shards
2198+
2199+
2200+
@pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True}, {"order": "C"}])
2201+
def test_with_config(config: ArrayConfigParams) -> None:
2202+
"""
2203+
Test that `AsyncArray.with_config` and `Array.with_config` create a copy of the source
2204+
array with a new runtime configuration.
2205+
"""
2206+
# the config we start with
2207+
source_config: ArrayConfigParams = {"write_empty_chunks": False, "order": "F"}
2208+
source_array = zarr.create_array({}, shape=(1,), dtype="uint8", config=source_config)
2209+
2210+
new_async_array_config_dict = source_array._async_array.with_config(config).config.to_dict()
2211+
new_array_config_dict = source_array.with_config(config).config.to_dict()
2212+
2213+
for key in source_config:
2214+
if key in config:
2215+
assert new_async_array_config_dict[key] == config[key] # type: ignore[literal-required]
2216+
assert new_array_config_dict[key] == config[key] # type: ignore[literal-required]
2217+
else:
2218+
assert new_async_array_config_dict[key] == source_config[key] # type: ignore[literal-required]
2219+
assert new_array_config_dict[key] == source_config[key] # type: ignore[literal-required]
2220+
2221+
2222+
def test_with_config_polymorphism() -> None:
2223+
"""
2224+
Test that `AsyncArray.with_config` and `Array.with_config` accept dicts and full array config
2225+
objects.
2226+
"""
2227+
source_config: ArrayConfig = ArrayConfig.from_dict({"write_empty_chunks": False, "order": "F"})
2228+
source_config_dict = source_config.to_dict()
2229+
2230+
arr = zarr.create_array({}, shape=(1,), dtype="uint8")
2231+
arr_source_config = arr.with_config(source_config)
2232+
arr_source_config_dict = arr.with_config(source_config_dict)
2233+
2234+
assert arr_source_config.config == arr_source_config_dict.config

0 commit comments

Comments
 (0)