Skip to content

Commit c6d6002

Browse files
Merge pull request #628 from Blosc/opsi2
OPSI2: a new indexing engine
2 parents 59e41cb + c309059 commit c6d6002

11 files changed

Lines changed: 2189 additions & 76 deletions

File tree

bench/indexing/index_query_bench.py

Lines changed: 82 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@
2424

2525
SIZES = (1_000_000, 2_000_000, 5_000_000, 10_000_000)
2626
DEFAULT_REPEATS = 3
27-
KINDS = ("summary", "bucket", "partial", "full")
27+
KINDS = ("summary", "bucket", "partial", "full", "opsi")
2828
DEFAULT_KIND = "bucket"
2929
DISTS = ("sorted", "block-shuffled", "permuted", "random")
3030
RNG_SEED = 0
3131
DEFAULT_OPLEVEL = 5
3232
FULL_QUERY_MODES = ("auto", "selective-ooc", "whole-load")
3333
DATASET_LAYOUT_VERSION = "payload-ramp-v1"
3434
BUILD_MODES = ("auto", "memory", "ooc")
35+
FULL_INDEX_METHODS = ("global-sort",)
3536

3637
COLD_COLUMNS = [
3738
("rows", lambda result: f"{result['size']:,}"),
@@ -377,39 +378,40 @@ def _open_index_sidecar(path: str | os.PathLike[str], no_mmap: bool):
377378
def index_sizes(descriptor: dict, *, no_mmap: bool) -> tuple[int, int]:
378379
logical = 0
379380
disk = 0
381+
382+
def add_sidecar(path: str | None) -> None:
383+
nonlocal logical, disk
384+
if not path:
385+
return
386+
array = _open_index_sidecar(path, no_mmap)
387+
logical += int(np.prod(array.shape)) * array.dtype.itemsize
388+
disk += os.path.getsize(path)
389+
380390
for level_info in descriptor["levels"].values():
381-
dtype = np.dtype(level_info["dtype"])
382-
logical += dtype.itemsize * level_info["nsegments"]
383-
if level_info["path"]:
384-
disk += os.path.getsize(level_info["path"])
385-
386-
light = descriptor.get("light")
387-
if light is not None:
388-
for key in ("values_path", "bucket_positions_path", "offsets_path"):
389-
array = _open_index_sidecar(light[key], no_mmap)
390-
logical += int(np.prod(array.shape)) * array.dtype.itemsize
391-
disk += os.path.getsize(light[key])
392-
393-
reduced = descriptor.get("reduced")
394-
if reduced is not None:
395-
values = _open_index_sidecar(reduced["values_path"], no_mmap)
396-
positions = _open_index_sidecar(reduced["positions_path"], no_mmap)
397-
offsets = _open_index_sidecar(reduced["offsets_path"], no_mmap)
398-
logical += values.shape[0] * values.dtype.itemsize
399-
logical += positions.shape[0] * positions.dtype.itemsize
400-
logical += offsets.shape[0] * offsets.dtype.itemsize
401-
disk += os.path.getsize(reduced["values_path"])
402-
disk += os.path.getsize(reduced["positions_path"])
403-
disk += os.path.getsize(reduced["offsets_path"])
391+
add_sidecar(level_info.get("path"))
392+
393+
bucket = descriptor.get("bucket")
394+
if bucket is not None:
395+
for key in ("values_path", "bucket_positions_path", "offsets_path", "l1_path", "l2_path"):
396+
add_sidecar(bucket.get(key))
397+
398+
partial = descriptor.get("partial")
399+
if partial is not None:
400+
for key in ("values_path", "positions_path", "offsets_path", "l1_path", "l2_path"):
401+
add_sidecar(partial.get(key))
404402

405403
full = descriptor.get("full")
406404
if full is not None:
407-
values = _open_index_sidecar(full["values_path"], no_mmap)
408-
positions = _open_index_sidecar(full["positions_path"], no_mmap)
409-
logical += values.shape[0] * values.dtype.itemsize
410-
logical += positions.shape[0] * positions.dtype.itemsize
411-
disk += os.path.getsize(full["values_path"])
412-
disk += os.path.getsize(full["positions_path"])
405+
for key in ("values_path", "positions_path", "l1_path", "l2_path"):
406+
add_sidecar(full.get(key))
407+
for run in full.get("runs", ()):
408+
add_sidecar(run.get("values_path"))
409+
add_sidecar(run.get("positions_path"))
410+
411+
opsi = descriptor.get("opsi")
412+
if opsi is not None:
413+
for key in ("values_path", "positions_path", "mins_path", "maxs_path"):
414+
add_sidecar(opsi.get(key))
413415
return logical, disk
414416

415417

@@ -443,19 +445,31 @@ def _condition_expr(lo: object, hi: object, dtype: np.dtype, *, query_single_val
443445
return f"(id >= {lo_literal}) & (id <= {hi_literal})"
444446

445447

448+
def _index_kind_method(kind: str) -> tuple[str, str | None]:
449+
if kind == "full":
450+
return "full", "global-sort"
451+
return kind, None
452+
453+
446454
def _valid_index_descriptor(arr: blosc2.NDArray, kind: str, optlevel: int, build: str) -> dict | None:
455+
actual_kind, method = _index_kind_method(kind)
447456
for descriptor in arr.indexes:
448457
if descriptor.get("version") != blosc2_indexing.INDEX_FORMAT_VERSION:
449458
continue
450459
expected_ooc = build != "memory"
451-
if (
460+
if not (
452461
descriptor.get("field") == "id"
453-
and descriptor.get("kind") == kind
462+
and descriptor.get("kind") == actual_kind
454463
and int(descriptor.get("optlevel", -1)) == int(optlevel)
455464
and bool(descriptor.get("ooc", False)) is bool(expected_ooc)
456465
and not descriptor.get("stale", False)
457466
):
458-
return descriptor
467+
continue
468+
if method is not None:
469+
build_method = descriptor.get("full", {}).get("build_method", "global-sort")
470+
if build_method != method:
471+
continue
472+
return descriptor
459473
return None
460474

461475

@@ -482,6 +496,7 @@ def _open_or_build_indexed_array(
482496
clevel: int | None,
483497
nthreads: int | None,
484498
no_mmap: bool,
499+
opsi_max_cycles: int | None,
485500
) -> tuple[blosc2.NDArray, float]:
486501
if path.exists():
487502
arr = blosc2.open(path, mode="a")
@@ -493,12 +508,17 @@ def _open_or_build_indexed_array(
493508

494509
arr = build_persistent_array(size, dist, id_dtype, path, chunks, blocks)
495510
build_start = time.perf_counter()
511+
actual_kind, method = _index_kind_method(kind)
496512
kwargs = {
497513
"field": "id",
498-
"kind": blosc2.IndexKind[kind.upper()],
514+
"kind": blosc2.IndexKind[actual_kind.upper()],
499515
"optlevel": optlevel,
500516
"build": build,
501517
}
518+
if method is not None:
519+
kwargs["method"] = method
520+
if actual_kind == "opsi" and opsi_max_cycles is not None:
521+
kwargs["opsi_max_cycles"] = opsi_max_cycles
502522
cparams = {}
503523
if codec is not None:
504524
cparams["codec"] = codec
@@ -531,6 +551,7 @@ def benchmark_size(
531551
kinds: tuple[str, ...],
532552
repeats: int,
533553
no_mmap: bool,
554+
opsi_max_cycles: int | None,
534555
cold_row_callback=None,
535556
) -> list[dict]:
536557
arr = _open_or_build_persistent_array(
@@ -574,6 +595,7 @@ def benchmark_size(
574595
clevel,
575596
nthreads,
576597
no_mmap,
598+
opsi_max_cycles,
577599
)
578600
idx_cond = blosc2.lazyexpr(condition_str, idx_arr.fields)
579601
idx_expr = idx_cond.where(idx_arr)
@@ -725,12 +747,30 @@ def parse_args() -> argparse.Namespace:
725747
default="auto",
726748
help="Index builder policy: auto, memory, or ooc. Default: auto.",
727749
)
750+
parser.add_argument(
751+
"--method",
752+
choices=FULL_INDEX_METHODS,
753+
default="global-sort",
754+
help=(
755+
"Full-index build method. OPSI is a separate index kind; use --kind opsi to benchmark it. "
756+
"Default: global-sort."
757+
),
758+
)
728759
parser.add_argument(
729760
"--full-query-mode",
730761
choices=FULL_QUERY_MODES,
731762
default="auto",
732763
help="How full exact queries should run during the benchmark: auto, selective-ooc, or whole-load.",
733764
)
765+
parser.add_argument(
766+
"--opsi-max-cycles",
767+
type=int,
768+
default=None,
769+
help=(
770+
"Maximum OPSI cycles for --kind opsi. Default: derive from optlevel "
771+
"(optlevel for optlevel < 8, optlevel * 2 otherwise)."
772+
),
773+
)
734774
parser.add_argument(
735775
"--codec",
736776
type=str,
@@ -776,6 +816,8 @@ def main() -> None:
776816
raise SystemExit("--clevel must be >= 0")
777817
if args.nthreads is not None and args.nthreads <= 0:
778818
raise SystemExit("--nthreads must be a positive integer")
819+
if args.opsi_max_cycles is not None and args.opsi_max_cycles < 0:
820+
raise SystemExit("--opsi-max-cycles must be >= 0")
779821
sizes = (args.size,) if args.size is not None else SIZES
780822
dists = DISTS if args.dist == "all" else (args.dist,)
781823
kinds = KINDS if args.kind == "all" else (args.kind,)
@@ -801,6 +843,7 @@ def main() -> None:
801843
args.clevel,
802844
args.nthreads,
803845
args.no_mmap,
846+
args.opsi_max_cycles,
804847
)
805848
else:
806849
args.outdir.mkdir(parents=True, exist_ok=True)
@@ -823,6 +866,7 @@ def main() -> None:
823866
args.clevel,
824867
args.nthreads,
825868
args.no_mmap,
869+
args.opsi_max_cycles,
826870
)
827871

828872

@@ -845,6 +889,7 @@ def run_benchmarks(
845889
clevel: int | None,
846890
nthreads: int | None,
847891
no_mmap: bool,
892+
opsi_max_cycles: int | None,
848893
) -> None:
849894
all_results = []
850895

@@ -863,7 +908,8 @@ def run_benchmarks(
863908
f"full_query_mode={full_query_mode}, index_codec={'auto' if codec is None else codec.name}, "
864909
f"index_clevel={'auto' if clevel is None else clevel}, "
865910
f"index_nthreads={'auto' if nthreads is None else nthreads}, "
866-
f"index_mmap={'off' if no_mmap else 'on'}"
911+
f"index_mmap={'off' if no_mmap else 'on'}, "
912+
f"opsi_max_cycles={'optlevel' if opsi_max_cycles is None else opsi_max_cycles}"
867913
)
868914
cold_widths = progress_widths(COLD_COLUMNS, sizes, dists, kinds, id_dtype)
869915
print()
@@ -894,6 +940,7 @@ def cold_progress_callback(row: dict) -> None:
894940
kinds,
895941
repeats,
896942
no_mmap,
943+
opsi_max_cycles,
897944
cold_row_callback=cold_progress_callback,
898945
)
899946
all_results.extend(size_results)

doc/getting_started/tutorials/14.indexing-arrays.ipynb

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"source": [
88
"# Indexing Arrays\n",
99
"\n",
10-
"Blosc2 can attach indexes to 1-D `NDArray` objects and to fields inside 1-D structured arrays. These indexes accelerate selective masks, and `full` indexes can also drive ordered access directly through `sort(order=...)`, `NDArray.argsort(order=...)`, `LazyExpr.argsort(order=...)`, and `iter_sorted(...)`.\n",
10+
"Blosc2 can attach indexes to 1-D `NDArray` objects and to fields inside 1-D structured arrays. These indexes accelerate selective masks, and `full` indexes can also drive ordered access directly through `sort(order=...)`, `NDArray.argsort(order=...)`, `LazyExpr.argsort(order=...)`, and `iter_sorted(...)`. OPSI indexes are a separate tunable iterative-ordering kind: they improve the physical order used for exact filtering, but they are not intended to converge to a completely sorted `full`/CSI index.\n",
1111
"\n",
1212
"This tutorial covers:\n",
1313
"\n",
@@ -108,16 +108,19 @@
108108
"source": [
109109
"## Index kinds and how to create them\n",
110110
"\n",
111-
"Blosc2 currently supports four index kinds:\n",
111+
"Blosc2 currently supports five index kinds:\n",
112112
"\n",
113113
"- `summary`: compact summaries only,\n",
114114
"- `bucket`: summary levels plus lightweight per-block payloads,\n",
115115
"- `partial`: richer payloads for positional filtering,\n",
116+
"- `opsi`: tunable iterative ordering for exact filtering,\n",
116117
"- `full`: globally sorted payloads for positional filtering and ordered reuse.\n",
117118
"\n",
119+
"`OPSI` is intentionally a separate kind, not a `full` index construction method. It performs a configurable number of ordering cycles and then keeps that iterative ordering as-is. Achieving a completely sorted index (CSI) is not a goal for OPSI; use `FULL` when you require global sorted order or direct ordered reuse. By default, `OPSI` uses `optlevel` cycles for `optlevel < 8`, and `2 * optlevel` cycles for `optlevel >= 8`. You can override this with `opsi_max_cycles=...`.\n",
120+
"\n",
118121
"There is one active index per target field or expression. If you create another index on the same target, it replaces the previous one. The easiest way to compare kinds is to build them on separate arrays.\n",
119122
"\n",
120-
"The next cell times index creation and reports the compressed storage footprint of each index relative to the compressed base array."
123+
"The next cell times index creation and reports the compressed storage footprint of each index relative to the compressed base array.\n"
121124
]
122125
},
123126
{
@@ -152,6 +155,7 @@
152155
" blosc2.IndexKind.SUMMARY,\n",
153156
" blosc2.IndexKind.BUCKET,\n",
154157
" blosc2.IndexKind.PARTIAL,\n",
158+
" blosc2.IndexKind.OPSI,\n",
155159
" blosc2.IndexKind.FULL,\n",
156160
"):\n",
157161
" arr = data.copy()\n",
@@ -238,7 +242,7 @@
238242
"source": [
239243
"### Timing the mask with and without indexes\n",
240244
"\n",
241-
"The next cell measures the same selective mask on all four index kinds and compares it with a forced full scan. On this workload, `partial` and `full` usually show the clearest benefit because they carry richer payloads for positional filtering."
245+
"The next cell measures the same selective mask on all five index kinds and compares it with a forced full scan. On this workload, `partial`, `opsi`, and `full` usually show the clearest benefit because they carry richer payloads for positional filtering.\n"
242246
]
243247
},
244248
{
@@ -299,7 +303,9 @@
299303
"source": [
300304
"## `full` indexes and ordered access\n",
301305
"\n",
302-
"A `full` index stores a global sorted payload. This is the required index tier for direct ordered reuse. Build it directly with `create_index(kind=blosc2.IndexKind.FULL)`."
306+
"A `full` index stores a global sorted payload. This is the required index tier for direct ordered reuse. Build it directly with `create_index(kind=blosc2.IndexKind.FULL)`.\n",
307+
"\n",
308+
"If you only want a tunable iterative ordering index for exact filtering, use `create_index(kind=blosc2.IndexKind.OPSI)` instead. OPSI can improve cold-query locality as `optlevel` or `opsi_max_cycles` increases, but it does not replace `FULL` for globally sorted access.\n"
303309
]
304310
},
305311
{
@@ -585,11 +591,12 @@
585591
"## Practical guidance\n",
586592
"\n",
587593
"- Use `partial` when your main goal is faster selective masks.\n",
594+
"- Use `opsi` when you want exact filtering with tunable iterative ordering. Increase `optlevel` or pass `opsi_max_cycles` to spend more build time on ordering; do not expect OPSI to become a `full`/CSI index.\n",
588595
"- Use `full` when you also want ordered reuse through `sort(order=...)`, `NDArray.argsort(order=...)`, `LazyExpr.argsort(order=...)`, or `iter_sorted(...)`.\n",
589596
"- Persist the base array if you want indexes to survive reopen automatically.\n",
590597
"- After unsupported mutations, use `rebuild_index()`.\n",
591598
"- For append-heavy `full` indexes, compact explicitly at convenient maintenance boundaries instead of on every append.\n",
592-
"- Measure your own workload: compact indexes, predicate selectivity, and ordered access needs all affect which kind is best.\n"
599+
"- Measure your own workload: compact indexes, predicate selectivity, iterative-ordering level, and ordered access needs all affect which kind is best.\n"
593600
]
594601
},
595602
{

doc/getting_started/tutorials/15.indexing-ctables.ipynb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@
130130
"source": [
131131
"## Creating an index\n",
132132
"\n",
133-
"Call `create_index(col_name)` to build a bucket index on a column.\n",
133+
"Call `create_index(col_name)` to build a bucket index on a column. Pass `kind=...` to choose another index kind, including `blosc2.IndexKind.OPSI` for tunable iterative ordering or `blosc2.IndexKind.FULL` for globally sorted indexes that can also support ordered reuse. OPSI is a separate exact-filtering index kind, not a slower way to build a `FULL`/CSI index; its build effort is controlled by `optlevel` or the explicit `opsi_max_cycles` keyword.\n",
134+
"\n",
134135
"The returned `CTableIndex` handle shows the column name, kind, and whether the index is stale.\n"
135136
]
136137
},
@@ -494,6 +495,7 @@
494495
"- **Mutations** (`append`, `extend`, `setitem`, `assign`, `sort_by`, `compact`) mark indexes stale.\n",
495496
"- **Stale indexes** trigger automatic scan fallback — no user intervention needed.\n",
496497
"- **Persistent indexes** survive table close and reopen.\n",
498+
"- **OPSI indexes** are tunable iterative-ordering indexes for exact filtering; use `FULL` for completely sorted ordered reuse.\n",
497499
"- **Views** cannot own indexes; only root tables can.\n"
498500
]
499501
},

doc/reference/ctable.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,10 @@ CTable indexes can also target **direct expressions** over stored columns via
135135
predicates without adding either a computed column or a materialized stored one.
136136
A matching ``FULL`` direct-expression index can also be reused by ordering paths
137137
such as :meth:`CTable.sort_by` when sorting by a computed column backed by the
138-
same expression.
138+
same expression. ``OPSI`` indexes are a separate exact-filtering tier with a
139+
tunable number of iterative ordering cycles; they are not intended to converge
140+
to a completely sorted ``FULL``/CSI index, so use ``FULL`` when globally sorted
141+
ordered reuse is required.
139142

140143
.. autosummary::
141144

0 commit comments

Comments
 (0)