Skip to content

Commit 6996284

Browse files
committed
remove batch size parameter; add changelog entry
1 parent b1b876a commit 6996284

5 files changed

Lines changed: 45 additions & 48 deletions

File tree

changes/3715.misc.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
Added several performance optimizations to chunk encoding and decoding. Low-latency stores that do not benefit from
2+
`async` operations can now implement synchronous IO methods which will be used when available during chunk processing.
3+
Similarly, codecs can implement a synchronous API which will be used if available during chunk processing.
4+
These changes remove unnecessary interactions with the event loop.
5+
6+
The synchronous chunk processing path optionally uses a thread pool to parallelize execution. The number of threads is chosen
7+
based on the estimated compute load of each chunk, which takes into account known encoding and decoding profiles for
8+
different codecs. This algorithm is aware of the latency required for setting up the thread pool, and for
9+
single-chunk workloads we skip the thread pool entirely.
10+
11+
Use of the thread pool can be disabled in the global configuration. The minimum number of threads
12+
and the maximum number of threads can be set via the configuration as well.

src/zarr/core/codec_pipeline.py

Lines changed: 24 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
from concurrent.futures import ThreadPoolExecutor
55
from dataclasses import dataclass
6-
from itertools import islice, pairwise
6+
from itertools import pairwise
77
from typing import TYPE_CHECKING, Any, TypeVar, cast
88
from warnings import warn
99

@@ -46,14 +46,6 @@ def _unzip2(iterable: Iterable[tuple[T, U]]) -> tuple[list[T], list[U]]:
4646
return (out0, out1)
4747

4848

49-
def batched(iterable: Iterable[T], n: int) -> Iterable[tuple[T, ...]]:
50-
if n < 1:
51-
raise ValueError("n must be at least one")
52-
it = iter(iterable)
53-
while batch := tuple(islice(it, n)):
54-
yield batch
55-
56-
5749
def resolve_batched(codec: Codec, chunk_specs: Iterable[ArraySpec]) -> Iterable[ArraySpec]:
5850
return [codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs]
5951

@@ -153,25 +145,37 @@ def _choose_workers(
153145
*,
154146
is_encode: bool = False,
155147
) -> int:
156-
"""Decide how many thread pool workers to use (0 = don't use pool)."""
157-
if n_chunks < 2:
148+
"""Decide how many thread pool workers to use (0 = don't use pool).
149+
150+
Respects ``threading.codec_workers`` config:
151+
- ``enabled``: if False, always returns 0.
152+
- ``min``: floor for the number of workers.
153+
- ``max``: ceiling for the number of workers (default: ``os.cpu_count()``).
154+
"""
155+
codec_workers = config.get("threading.codec_workers")
156+
if not codec_workers.get("enabled", True):
158157
return 0
159158

159+
min_workers: int = codec_workers.get("min", 0)
160+
max_workers: int = codec_workers.get("max") or os.cpu_count() or 4
161+
162+
if n_chunks < 2:
163+
return min_workers
164+
160165
per_chunk_ns = _estimate_chunk_work_ns(chunk_nbytes, codecs, is_encode=is_encode)
161166

162-
if per_chunk_ns < _POOL_OVERHEAD_NS:
167+
if per_chunk_ns < _POOL_OVERHEAD_NS and min_workers == 0:
163168
return 0
164169

165170
total_work_ns = per_chunk_ns * n_chunks
166171
total_dispatch_ns = n_chunks * 50_000 # ~50us per task
167-
if total_work_ns < total_dispatch_ns * 3:
172+
if total_work_ns < total_dispatch_ns * 3 and min_workers == 0:
168173
return 0
169174

170175
target_per_worker_ns = 1_000_000 # 1ms
171176
workers = max(1, int(total_work_ns / target_per_worker_ns))
172177

173-
cpu_count = os.cpu_count() or 4
174-
return min(workers, n_chunks, cpu_count)
178+
return max(min_workers, min(workers, n_chunks, max_workers))
175179

176180

177181
def _get_pool(max_workers: int) -> ThreadPoolExecutor:
@@ -208,7 +212,6 @@ class BatchedCodecPipeline(CodecPipeline):
208212
array_array_codecs: tuple[ArrayArrayCodec, ...]
209213
array_bytes_codec: ArrayBytesCodec
210214
bytes_bytes_codecs: tuple[BytesBytesCodec, ...]
211-
batch_size: int
212215

213216
@property
214217
def _all_sync(self) -> bool:
@@ -219,14 +222,13 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
219222
return type(self).from_codecs(c.evolve_from_array_spec(array_spec=array_spec) for c in self)
220223

221224
@classmethod
222-
def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self:
225+
def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
223226
array_array_codecs, array_bytes_codec, bytes_bytes_codecs = codecs_from_list(list(codecs))
224227

225228
return cls(
226229
array_array_codecs=array_array_codecs,
227230
array_bytes_codec=array_bytes_codec,
228231
bytes_bytes_codecs=bytes_bytes_codecs,
229-
batch_size=batch_size or config.get("codec_pipeline.batch_size"),
230232
)
231233

232234
@property
@@ -478,10 +480,7 @@ async def decode(
478480
]
479481

480482
# Async fallback: layer-by-layer across all chunks.
481-
output: list[NDBuffer | None] = []
482-
for batch_info in batched(items, self.batch_size):
483-
output.extend(await self.decode_batch(batch_info))
484-
return output
483+
return list(await self.decode_batch(items))
485484

486485
async def encode(
487486
self,
@@ -496,10 +495,7 @@ async def encode(
496495
return [self._encode_one(chunk_array, chunk_spec) for chunk_array, chunk_spec in items]
497496

498497
# Async fallback: layer-by-layer across all chunks.
499-
output: list[Buffer | None] = []
500-
for single_batch_info in batched(items, self.batch_size):
501-
output.extend(await self.encode_batch(single_batch_info))
502-
return output
498+
return list(await self.encode_batch(items))
503499

504500
# -------------------------------------------------------------------
505501
# Async read / write (IO overlap via concurrent_map)
@@ -610,14 +606,7 @@ async def read(
610606
out: NDBuffer,
611607
drop_axes: tuple[int, ...] = (),
612608
) -> None:
613-
await concurrent_map(
614-
[
615-
(single_batch_info, out, drop_axes)
616-
for single_batch_info in batched(batch_info, self.batch_size)
617-
],
618-
self.read_batch,
619-
config.get("async.concurrency"),
620-
)
609+
await self.read_batch(batch_info, out, drop_axes)
621610

622611
def _merge_chunk_array(
623612
self,
@@ -840,14 +829,7 @@ async def write(
840829
value: NDBuffer,
841830
drop_axes: tuple[int, ...] = (),
842831
) -> None:
843-
await concurrent_map(
844-
[
845-
(single_batch_info, value, drop_axes)
846-
for single_batch_info in batched(batch_info, self.batch_size)
847-
],
848-
self.write_batch,
849-
config.get("async.concurrency"),
850-
)
832+
await self.write_batch(batch_info, value, drop_axes)
851833

852834
# -------------------------------------------------------------------
853835
# Fully synchronous read / write (no event loop)

src/zarr/core/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,13 @@ def enable_gpu(self) -> ConfigSet:
9999
"target_shard_size_bytes": None,
100100
},
101101
"async": {"concurrency": 10, "timeout": None},
102-
"threading": {"max_workers": None},
102+
"threading": {
103+
"max_workers": None,
104+
"codec_workers": {"enabled": True, "min": 0, "max": None},
105+
},
103106
"json_indent": 2,
104107
"codec_pipeline": {
105108
"path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
106-
"batch_size": 1,
107109
},
108110
"codecs": {
109111
"blosc": "zarr.codecs.blosc.BloscCodec",

tests/package_with_entrypoint/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) ->
4040

4141

4242
class TestEntrypointCodecPipeline(CodecPipeline):
43-
def __init__(self, batch_size: int = 1) -> None:
43+
def __init__(self) -> None:
4444
pass
4545

4646
async def encode(

tests/test_config.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,13 @@ def test_config_defaults_set() -> None:
5656
"target_shard_size_bytes": None,
5757
},
5858
"async": {"concurrency": 10, "timeout": None},
59-
"threading": {"max_workers": None},
59+
"threading": {
60+
"max_workers": None,
61+
"codec_workers": {"enabled": True, "min": 0, "max": None},
62+
},
6063
"json_indent": 2,
6164
"codec_pipeline": {
6265
"path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
63-
"batch_size": 1,
6466
},
6567
"codecs": {
6668
"blosc": "zarr.codecs.blosc.BloscCodec",
@@ -103,7 +105,6 @@ def test_config_defaults_set() -> None:
103105
assert config.get("array.order") == "C"
104106
assert config.get("async.concurrency") == 10
105107
assert config.get("async.timeout") is None
106-
assert config.get("codec_pipeline.batch_size") == 1
107108
assert config.get("json_indent") == 2
108109

109110

0 commit comments

Comments
 (0)