Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/4054.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Hypothesis property tests for block and mask indexing (`test_block_indexing`, `test_mask_indexing`), along with a `block_indices` strategy in `zarr.testing.strategies`. These extend the existing randomized indexing coverage (basic, orthogonal, and vectorized) to the block and mask selection methods.
45 changes: 45 additions & 0 deletions src/zarr/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,51 @@ def orthogonal_indices(
return tuple(zindexer), tuple(np.broadcast_arrays(*npindexer))


@st.composite
def block_indices(
draw: st.DrawFn, *, shape: tuple[int, ...], chunks: tuple[int, ...]
) -> tuple[tuple[int | slice, ...], tuple[slice, ...]]:
"""
Strategy for block-selection indexers over a *regular* chunk grid.

Block indexing addresses whole chunks on the block grid rather than
individual elements. It only supports integers and step-1 slices over the
grid (strided block slices are rejected), so neither newaxis, ellipsis, nor
a step is generated here. The array-space translation below assumes a
regular (uniform) chunk grid, so ``shape`` must be evenly tiled by
``chunks`` up to a possibly-smaller last chunk per dimension. Every
dimension must have at least one chunk (``size >= 1``).

Returns
-------
block_indexer
A tuple of ints / step-1 slices addressing whole chunks, suitable for
``Array.blocks`` / ``Array.get_block_selection`` / ``set_block_selection``.
array_indexer
The equivalent array-space selection (a tuple of slices) for indexing
the corresponding numpy array, used as the comparison oracle.
"""
grid_shape = tuple(-(-s // c) for s, c in zip(shape, chunks, strict=True)) # ceil division
block_indexer: list[int | slice] = []
array_indexer: list[slice] = []
for size, chunk, nchunks in zip(shape, chunks, grid_shape, strict=True):
if draw(st.booleans()):
# a single block, sometimes addressed from the end with a negative index
block = draw(st.integers(min_value=-nchunks, max_value=nchunks - 1))
block_indexer.append(block)
start = (block % nchunks) * chunk
Comment thread
maxrjones marked this conversation as resolved.
Outdated
array_indexer.append(slice(start, min(start + chunk, size)))
else:
# a contiguous run of whole blocks (possibly empty). The start must
# reference an existing chunk: block indexing rejects a slice that
# starts at nchunks, unlike numpy which treats arr[len:len] as empty.
start_block = draw(st.integers(min_value=0, max_value=nchunks - 1))
stop_block = draw(st.integers(min_value=start_block, max_value=nchunks))
block_indexer.append(slice(start_block, stop_block))
array_indexer.append(slice(start_block * chunk, min(stop_block * chunk, size)))
return tuple(block_indexer), tuple(array_indexer)


def key_ranges(
keys: SearchStrategy[str] = node_names, max_size: int = sys.maxsize
) -> SearchStrategy[list[tuple[str, RangeByteRequest]]]:
Expand Down
59 changes: 59 additions & 0 deletions tests/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
array_metadata,
arrays,
basic_indices,
block_indices,
complex_rectilinear_arrays,
np_array_and_chunks,
numpy_arrays,
orthogonal_indices,
rectilinear_arrays,
Expand Down Expand Up @@ -230,6 +232,63 @@ async def test_vindex(data: st.DataObject) -> None:
# note: async vindex setitem not yet implemented


@settings(deadline=None)
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@given(data=st.data())
def test_mask_indexing(data: st.DataObject) -> None:
zarray = data.draw(st.one_of(simple_arrays(), rectilinear_arrays()))
nparray = zarray[:]
mask = data.draw(npst.arrays(dtype=np.bool_, shape=st.just(nparray.shape)))

expected = nparray[mask]

# sync get, via both the dedicated method and the vindex interface
assert_array_equal(expected, zarray.get_mask_selection(mask))
assert_array_equal(expected, zarray.vindex[mask])

# sync set, via both interfaces
assume(zarray.shards is None) # GH2834
new_data = data.draw(numpy_arrays(shapes=st.just(expected.shape), dtype=nparray.dtype))
nparray[mask] = new_data
zarray.set_mask_selection(mask, new_data)
assert_array_equal(nparray, zarray[:])

zarray.vindex[mask] = new_data
assert_array_equal(nparray, zarray[:])


@settings(deadline=None)
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@given(data=st.data())
def test_block_indexing(data: st.DataObject) -> None:
# Block indexing addresses whole chunks on a regular grid; the array-space
# oracle in block_indices() assumes regular, unsharded chunks, so build the
# array directly from a regular chunking rather than drawing one that might
# be rectilinear or sharded.
nparray, chunks = data.draw(
np_array_and_chunks(arrays=numpy_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
)
store = data.draw(stores)
zarray = zarr.create_array(store=store, shape=nparray.shape, chunks=chunks, dtype=nparray.dtype)
zarray[...] = nparray

block_indexer, array_indexer = data.draw(block_indices(shape=nparray.shape, chunks=chunks))
expected = nparray[array_indexer]

# sync get, via both the .blocks interface and the dedicated method
assert_array_equal(expected, zarray.blocks[block_indexer])
assert_array_equal(expected, zarray.get_block_selection(block_indexer))

# sync set, via both interfaces
new_data = data.draw(numpy_arrays(shapes=st.just(expected.shape), dtype=nparray.dtype))
nparray[array_indexer] = new_data
zarray.blocks[block_indexer] = new_data
assert_array_equal(nparray, zarray[:])

zarray.set_block_selection(block_indexer, new_data)
assert_array_equal(nparray, zarray[:])


@given(store=stores, meta=array_metadata()) # type: ignore[misc]
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
async def test_roundtrip_array_metadata_from_store(
Expand Down
Loading