Skip to content

Commit f2c9fb8

Browse files
asg017claude
andcommitted
Add text PK, WAL concurrency tests, and fix bench-smoke config
Infrastructure improvements: - Fix benchmarks-ann Makefile: type=baseline -> type=vec0-flat (baseline was never a valid INDEX_REGISTRY key) - Add DiskANN + text primary key test: insert, KNN, delete, KNN - Add rescore + text primary key test: insert, KNN, delete, KNN - Add WAL concurrency test: reader sees snapshot isolation while writer has an open transaction, KNN works on reader's snapshot Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d684178 commit f2c9fb8

4 files changed

Lines changed: 138 additions & 4 deletions

File tree

benchmarks-ann/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ EXT = ../dist/vec0
44

55
# --- Baseline (brute-force) configs ---
66
BASELINES = \
7-
"brute-float:type=baseline,variant=float" \
8-
"brute-int8:type=baseline,variant=int8" \
9-
"brute-bit:type=baseline,variant=bit"
7+
"brute-float:type=vec0-flat,variant=float" \
8+
"brute-int8:type=vec0-flat,variant=int8" \
9+
"brute-bit:type=vec0-flat,variant=bit"
1010

1111
# --- IVF configs ---
1212
IVF_CONFIGS = \
@@ -43,7 +43,7 @@ ground-truth: seed
4343
# --- Quick smoke test ---
4444
bench-smoke: seed
4545
$(BENCH) --subset-size 5000 -k 10 -n 20 --dataset cohere1m -o runs \
46-
"brute-float:type=baseline,variant=float" \
46+
"brute-float:type=vec0-flat,variant=float" \
4747
"ivf-quick:type=ivf,nlist=16,nprobe=4" \
4848
"diskann-quick:type=diskann,R=48,L=64,quantizer=binary"
4949

tests/test-diskann.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,3 +1246,46 @@ def test_diskann_delete_interleaved_with_knn(db):
12461246
returned = {r["rowid"] for r in rows}
12471247
assert returned.issubset(alive), \
12481248
f"Deleted rowid {to_del} found in KNN results"
1249+
1250+
1251+
# ======================================================================
1252+
# Text primary key + DiskANN
1253+
# ======================================================================
1254+
1255+
1256+
def test_diskann_text_pk_insert_knn_delete(db):
1257+
"""DiskANN with text primary key: insert, KNN, delete, KNN again."""
1258+
db.execute("""
1259+
CREATE VIRTUAL TABLE t USING vec0(
1260+
id text primary key,
1261+
emb float[8] INDEXED BY diskann(neighbor_quantizer=binary, n_neighbors=8)
1262+
)
1263+
""")
1264+
1265+
vecs = {
1266+
"alpha": [1, 0, 0, 0, 0, 0, 0, 0],
1267+
"beta": [0, 1, 0, 0, 0, 0, 0, 0],
1268+
"gamma": [0, 0, 1, 0, 0, 0, 0, 0],
1269+
"delta": [0, 0, 0, 1, 0, 0, 0, 0],
1270+
"epsilon": [0, 0, 0, 0, 1, 0, 0, 0],
1271+
}
1272+
for name, vec in vecs.items():
1273+
db.execute("INSERT INTO t(id, emb) VALUES (?, ?)", [name, _f32(vec)])
1274+
1275+
# KNN should return text IDs
1276+
rows = db.execute(
1277+
"SELECT id, distance FROM t WHERE emb MATCH ? AND k=3",
1278+
[_f32([1, 0, 0, 0, 0, 0, 0, 0])],
1279+
).fetchall()
1280+
assert len(rows) >= 1
1281+
ids = [r["id"] for r in rows]
1282+
assert "alpha" in ids # closest to query
1283+
1284+
# Delete and verify
1285+
db.execute("DELETE FROM t WHERE id = 'alpha'")
1286+
rows = db.execute(
1287+
"SELECT id FROM t WHERE emb MATCH ? AND k=3",
1288+
[_f32([1, 0, 0, 0, 0, 0, 0, 0])],
1289+
).fetchall()
1290+
ids = [r["id"] for r in rows]
1291+
assert "alpha" not in ids

tests/test-insert-delete.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,3 +483,57 @@ def test_delete_one_chunk_of_two_shrinks_pages(tmp_path):
483483
row = db.execute("select emb from v where rowid = ?", [i]).fetchone()
484484
assert row[0] == _f32([float(i)] * dims)
485485
db.close()
486+
487+
488+
def test_wal_concurrent_reader_during_write(tmp_path):
489+
"""In WAL mode, a reader should see a consistent snapshot while a writer inserts."""
490+
dims = 4
491+
db_path = str(tmp_path / "test.db")
492+
493+
# Writer: create table, insert initial rows, enable WAL
494+
writer = sqlite3.connect(db_path)
495+
writer.enable_load_extension(True)
496+
writer.load_extension("dist/vec0")
497+
writer.execute("PRAGMA journal_mode=WAL")
498+
writer.execute(
499+
f"CREATE VIRTUAL TABLE v USING vec0(emb float[{dims}])"
500+
)
501+
for i in range(1, 11):
502+
writer.execute("INSERT INTO v(rowid, emb) VALUES (?, ?)", [i, _f32([float(i)] * dims)])
503+
writer.commit()
504+
505+
# Reader: open separate connection, start read
506+
reader = sqlite3.connect(db_path)
507+
reader.enable_load_extension(True)
508+
reader.load_extension("dist/vec0")
509+
510+
# Reader sees 10 rows
511+
count_before = reader.execute("SELECT count(*) FROM v").fetchone()[0]
512+
assert count_before == 10
513+
514+
# Writer inserts more rows (not yet committed)
515+
writer.execute("BEGIN")
516+
for i in range(11, 21):
517+
writer.execute("INSERT INTO v(rowid, emb) VALUES (?, ?)", [i, _f32([float(i)] * dims)])
518+
519+
# Reader still sees 10 (WAL snapshot isolation)
520+
count_during = reader.execute("SELECT count(*) FROM v").fetchone()[0]
521+
assert count_during == 10
522+
523+
# KNN during writer's transaction should work on reader's snapshot
524+
rows = reader.execute(
525+
"SELECT rowid FROM v WHERE emb MATCH ? AND k = 5",
526+
[_f32([1.0] * dims)],
527+
).fetchall()
528+
assert len(rows) == 5
529+
assert all(r[0] <= 10 for r in rows) # only original rows
530+
531+
# Writer commits
532+
writer.commit()
533+
534+
# Reader sees new rows after re-query (new snapshot)
535+
count_after = reader.execute("SELECT count(*) FROM v").fetchone()[0]
536+
assert count_after == 20
537+
538+
writer.close()
539+
reader.close()

tests/test-rescore.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,3 +595,40 @@ def test_corrupt_zeroblob_validity(db):
595595
).fetchall()
596596
except sqlite3.OperationalError:
597597
pass # Error is acceptable — crash is not
598+
599+
600+
def test_rescore_text_pk_insert_knn_delete(db):
601+
"""Rescore with text primary key: insert, KNN, delete, KNN again."""
602+
db.execute(
603+
"CREATE VIRTUAL TABLE t USING vec0("
604+
" id text primary key,"
605+
" embedding float[128] indexed by rescore(quantizer=bit)"
606+
")"
607+
)
608+
609+
import random
610+
random.seed(99)
611+
vecs = {}
612+
for name in ["alpha", "beta", "gamma", "delta", "epsilon"]:
613+
v = [random.gauss(0, 1) for _ in range(128)]
614+
vecs[name] = v
615+
db.execute("INSERT INTO t(id, embedding) VALUES (?, ?)", [name, float_vec(v)])
616+
617+
# KNN should return text IDs
618+
rows = db.execute(
619+
"SELECT id, distance FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 3",
620+
[float_vec(vecs["alpha"])],
621+
).fetchall()
622+
assert len(rows) >= 1
623+
ids = [r["id"] for r in rows]
624+
assert "alpha" in ids
625+
626+
# Delete and verify
627+
db.execute("DELETE FROM t WHERE id = 'alpha'")
628+
rows = db.execute(
629+
"SELECT id FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 3",
630+
[float_vec(vecs["alpha"])],
631+
).fetchall()
632+
ids = [r["id"] for r in rows]
633+
assert "alpha" not in ids
634+
assert len(rows) >= 1 # other results still returned

0 commit comments

Comments
 (0)