@@ -219,7 +219,8 @@ def test_sharding_multiple_chunks_partial_shard_read(
219219 # 1MiB, enough to coalesce all chunks within a shard in this example
220220 zarr .config .set ({"sharding.read.coalesce_max_gap_bytes" : 2 ** 20 })
221221 else :
222- zarr .config .set ({"sharding.read.coalesce_max_gap_bytes" : - 1 }) # disable coalescing
222+ # disable coalescing
223+ zarr .config .set ({"sharding.read.coalesce_max_gap_bytes" : - 1 })
223224
224225 store_mock = AsyncMock (wraps = store , spec = store .__class__ )
225226 a = zarr .create_array (
@@ -269,6 +270,54 @@ def test_sharding_multiple_chunks_partial_shard_read(
269270 assert isinstance (kwargs ["byte_range" ], (SuffixByteRequest , RangeByteRequest ))
270271
271272
273+ @pytest .mark .parametrize ("index_location" , ["start" , "end" ])
274+ @pytest .mark .parametrize ("store" , ["local" , "memory" , "zip" ], indirect = ["store" ])
275+ @pytest .mark .parametrize ("coalesce_reads" , [True , False ])
276+ def test_sharding_duplicate_read_indexes (
277+ store : Store , index_location : ShardingCodecIndexLocation , coalesce_reads : bool
278+ ) -> None :
279+ """
280+ Check that coalesce optimization parses the grouped reads back out correctly
281+ when there are multiple reads for the same index.
282+ """
283+ array_shape = (15 ,)
284+ shard_shape = (8 ,)
285+ chunk_shape = (2 ,)
286+ data = np .arange (np .prod (array_shape ), dtype = "float32" ).reshape (array_shape )
287+
288+ if coalesce_reads :
289+ # 1MiB, enough to coalesce all chunks within a shard in this example
290+ zarr .config .set ({"sharding.read.coalesce_max_gap_bytes" : 2 ** 20 })
291+ else :
292+ # disable coalescing
293+ zarr .config .set ({"sharding.read.coalesce_max_gap_bytes" : - 1 })
294+
295+ store_mock = AsyncMock (wraps = store , spec = store .__class__ )
296+ a = zarr .create_array (
297+ StorePath (store_mock ),
298+ shape = data .shape ,
299+ chunks = chunk_shape ,
300+ shards = {"shape" : shard_shape , "index_location" : index_location },
301+ compressors = BloscCodec (cname = "lz4" ),
302+ dtype = data .dtype ,
303+ fill_value = - 1 ,
304+ )
305+ a [:] = data
306+
307+ store_mock .reset_mock () # ignore store calls during array creation
308+
309+ # Read the same index multiple times, do that from two chunks which can be coalesced
310+ indexer = [8 , 8 , 12 , 12 ]
311+ np .array_equal (a [indexer ], data [indexer ])
312+
313+ if coalesce_reads :
314+ # 1 shard index request + 1 coalesced read
315+ assert store_mock .get .call_count == 2
316+ else :
317+ # 1 shard index request + 2 chunks
318+ assert store_mock .get .call_count == 3
319+
320+
272321@pytest .mark .parametrize ("index_location" , ["start" , "end" ])
273322@pytest .mark .parametrize ("store" , ["local" , "memory" , "zip" ], indirect = ["store" ])
274323def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false (
0 commit comments