Skip to content

Commit d9a7842

Browse files
committed
Add test for duplicate integer indexing into a coalesced group
1 parent 6322ca6 commit d9a7842

File tree

1 file changed

+50
-1
lines changed

1 file changed

+50
-1
lines changed

tests/test_codecs/test_sharding.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ def test_sharding_multiple_chunks_partial_shard_read(
219219
# 1MiB, enough to coalesce all chunks within a shard in this example
220220
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
221221
else:
222-
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1}) # disable coalescing
222+
# disable coalescing
223+
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1})
223224

224225
store_mock = AsyncMock(wraps=store, spec=store.__class__)
225226
a = zarr.create_array(
@@ -269,6 +270,54 @@ def test_sharding_multiple_chunks_partial_shard_read(
269270
assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
270271

271272

273+
@pytest.mark.parametrize("index_location", ["start", "end"])
274+
@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
275+
@pytest.mark.parametrize("coalesce_reads", [True, False])
276+
def test_sharding_duplicate_read_indexes(
277+
store: Store, index_location: ShardingCodecIndexLocation, coalesce_reads: bool
278+
) -> None:
279+
"""
280+
Check that coalesce optimization parses the grouped reads back out correctly
281+
when there are multiple reads for the same index.
282+
"""
283+
array_shape = (15,)
284+
shard_shape = (8,)
285+
chunk_shape = (2,)
286+
data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
287+
288+
if coalesce_reads:
289+
# 1MiB, enough to coalesce all chunks within a shard in this example
290+
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
291+
else:
292+
# disable coalescing
293+
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1})
294+
295+
store_mock = AsyncMock(wraps=store, spec=store.__class__)
296+
a = zarr.create_array(
297+
StorePath(store_mock),
298+
shape=data.shape,
299+
chunks=chunk_shape,
300+
shards={"shape": shard_shape, "index_location": index_location},
301+
compressors=BloscCodec(cname="lz4"),
302+
dtype=data.dtype,
303+
fill_value=-1,
304+
)
305+
a[:] = data
306+
307+
store_mock.reset_mock() # ignore store calls during array creation
308+
309+
# Read the same index multiple times, do that from two chunks which can be coalesced
310+
indexer = [8, 8, 12, 12]
311+
np.array_equal(a[indexer], data[indexer])
312+
313+
if coalesce_reads:
314+
# 1 shard index request + 1 coalesced read
315+
assert store_mock.get.call_count == 2
316+
else:
317+
# 1 shard index request + 2 chunks
318+
assert store_mock.get.call_count == 3
319+
320+
272321
@pytest.mark.parametrize("index_location", ["start", "end"])
273322
@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
274323
def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false(

0 commit comments

Comments
 (0)