Skip to content

Commit 4ee48b5

Browse files
committed
Fix for OPSI when chunks are not multiples of blocks
1 parent f26764c commit 4ee48b5

2 files changed

Lines changed: 32 additions & 4 deletions

File tree

src/blosc2/indexing.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3196,6 +3196,17 @@ def _opsi_write_block_boundaries(
31963196
_write_ndarray_linear_span(stage.maxs, first_block, maxs)
31973197

31983198

3199+
def _opsi_storage_chunk_len(chunk_len: int, block_len: int) -> int:
3200+
"""Return an OPSI sidecar chunk length aligned to whole OPSI blocks."""
3201+
chunk_len = max(1, int(chunk_len))
3202+
block_len = max(1, int(block_len))
3203+
if chunk_len % block_len == 0:
3204+
return chunk_len
3205+
if chunk_len >= block_len:
3206+
return (chunk_len // block_len) * block_len
3207+
return block_len
3208+
3209+
31993210
def _opsi_build_stage1(
32003211
array: blosc2.NDArray,
32013212
target: dict,
@@ -3209,10 +3220,9 @@ def _opsi_build_stage1(
32093220
cparams: dict | blosc2.CParams | None = None,
32103221
) -> OpsiStageSidecars:
32113222
size = int(array.shape[0])
3212-
chunk_len = int(array.chunks[0])
3223+
source_chunk_len = int(array.chunks[0])
32133224
block_len = int(array.blocks[0])
3214-
if chunk_len % block_len != 0:
3215-
raise ValueError("OPSI requires chunk length to be a multiple of block length")
3225+
chunk_len = _opsi_storage_chunk_len(source_chunk_len, block_len)
32163226
nblocks = math.ceil(size / block_len)
32173227
stage = _opsi_stage_create(
32183228
array, token, kind, cycle, size, nblocks, dtype, persistent, chunk_len, block_len, cparams
@@ -3374,7 +3384,7 @@ def _build_opsi_descriptor(
33743384
"positions_path": positions_sidecar["path"],
33753385
"mins_path": mins_sidecar["path"],
33763386
"maxs_path": maxs_sidecar["path"],
3377-
"chunk_len": int(array.chunks[0]),
3387+
"chunk_len": _opsi_storage_chunk_len(int(array.chunks[0]), int(array.blocks[0])),
33783388
"block_len": int(array.blocks[0]),
33793389
"nblocks": 0,
33803390
"cycles": 0,

tests/ndarray/test_indexing.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,24 @@ def test_scalar_index_matches_scan(kind):
4747
np.testing.assert_array_equal(indexed, data[(data >= 120_000) & (data < 125_000)])
4848

4949

50+
def test_opsi_index_accepts_non_multiple_chunk_and_block_lengths():
51+
rng = np.random.default_rng(42)
52+
data = rng.random(5_000, dtype=np.float64)
53+
arr = blosc2.asarray(data, chunks=(781,), blocks=(160,))
54+
descriptor = arr.create_index(kind=blosc2.IndexKind.OPSI)
55+
56+
opsi = descriptor["opsi"]
57+
assert opsi["chunk_len"] % opsi["block_len"] == 0
58+
59+
lo = np.nextafter(data[1234], -np.inf)
60+
hi = np.nextafter(data[1234], np.inf)
61+
expr = ((arr >= lo) & (arr <= hi)).where(arr)
62+
63+
indexed = expr.compute()[:]
64+
scanned = expr.compute(_use_index=False)[:]
65+
np.testing.assert_array_equal(indexed, scanned)
66+
67+
5068
@pytest.mark.parametrize("kind", ["summary", "bucket", "partial", "full", "opsi"])
5169
def test_structured_field_index_matches_scan(kind):
5270
dtype = np.dtype([("id", np.int64), ("payload", np.float64)])

0 commit comments

Comments
 (0)