@@ -197,6 +197,71 @@ def test_sharding_partial_read(
197197 assert np .all (read_data == 1 )
198198
199199
200+ @pytest .mark .slow_hypothesis
201+ @pytest .mark .parametrize ("store" , ["local" ], indirect = ["store" ])
202+ def test_partial_shard_read_performance (store : Store ) -> None :
203+ import asyncio
204+ import json
205+ from functools import partial
206+ from itertools import product
207+ from timeit import timeit
208+ from unittest .mock import AsyncMock
209+
210+ # The whole test array is a single shard to keep runtime manageable while
211+ # using a realistic shard size (256 MiB uncompressed, ~115 MiB compressed).
212+ # In practice, the array is likely to be much larger with many shards of this
213+ # rough order of magnitude. There are 512 chunks per shard in this example.
214+ array_shape = (512 , 512 , 512 )
215+ shard_shape = (512 , 512 , 512 ) # 256 MiB uncompressed unit16s
216+ chunk_shape = (64 , 64 , 64 ) # 512 KiB uncompressed unit16s
217+ dtype = np .uint16
218+
219+ a = zarr .create_array (
220+ StorePath (store ),
221+ shape = array_shape ,
222+ chunks = chunk_shape ,
223+ shards = shard_shape ,
224+ compressors = BloscCodec (cname = "zstd" ),
225+ dtype = dtype ,
226+ fill_value = np .iinfo (dtype ).max ,
227+ )
228+ # Narrow range of values lets zstd compress to about 1/2 of uncompressed size
229+ a [:] = np .random .default_rng (123 ).integers (low = 0 , high = 50 , size = array_shape , dtype = dtype )
230+
231+ num_calls = 20
232+ experiments = []
233+ for concurrency , get_latency , statement in product (
234+ [1 , 10 , 100 ], [0.0 , 0.01 ], ["a[0, :, :]" , "a[:, 0, :]" , "a[:, :, 0]" ]
235+ ):
236+ zarr .config .set ({"async.concurrency" : concurrency })
237+
238+ async def get_with_latency (* args : Any , get_latency : float , ** kwargs : Any ) -> Any :
239+ await asyncio .sleep (get_latency )
240+ return await store .get (* args , ** kwargs )
241+
242+ store_mock = AsyncMock (wraps = store , spec = store .__class__ )
243+ store_mock .get .side_effect = partial (get_with_latency , get_latency = get_latency )
244+
245+ a = zarr .open_array (StorePath (store_mock ))
246+
247+ store_mock .reset_mock ()
248+
249+ # Each timeit call accesses a 512x512 slice covering 64 chunks
250+ time = timeit (statement , number = num_calls , globals = {"a" : a }) / num_calls
251+ experiments .append (
252+ {
253+ "concurrency" : concurrency ,
254+ "statement" : statement ,
255+ "get_latency" : get_latency ,
256+ "time" : time ,
257+ "store_get_calls" : store_mock .get .call_count ,
258+ }
259+ )
260+
261+ with open ("zarr-python-partial-shard-read-performance-no-coalesce.json" , "w" ) as f :
262+ json .dump (experiments , f )
263+
264+
200265@pytest .mark .parametrize (
201266 "array_fixture" ,
202267 [
0 commit comments