Skip to content

Commit 1e3bb3e

Browse files
authored
Implement DiskANN ANN index for vec0 virtual tables
Add DiskANN index for vec0 virtual table
2 parents e2c38f3 + fb81c01 commit 1e3bb3e

22 files changed

+6431
-134
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ test-loadable-watch:
204204
watchexec --exts c,py,Makefile --clear -- make test-loadable
205205

206206
test-unit:
207-
$(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST -DSQLITE_VEC_ENABLE_RESCORE tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit
207+
$(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST -DSQLITE_VEC_ENABLE_RESCORE -DSQLITE_VEC_ENABLE_DISKANN=1 tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor $(CFLAGS) -o $(prefix)/test-unit && $(prefix)/test-unit
208208

209209
# Standalone sqlite3 CLI with vec0 compiled in. Useful for benchmarking,
210210
# profiling (has debug symbols), and scripting without .load_extension.

benchmarks-ann/Makefile

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,16 @@ RESCORE_CONFIGS = \
1919
"rescore-bit-os16:type=rescore,quantizer=bit,oversample=16" \
2020
"rescore-int8-os8:type=rescore,quantizer=int8,oversample=8"
2121

22-
ALL_CONFIGS = $(BASELINES) $(RESCORE_CONFIGS) $(IVF_CONFIGS)
22+
# --- DiskANN configs ---
23+
DISKANN_CONFIGS = \
24+
"diskann-R48-binary:type=diskann,R=48,L=128,quantizer=binary" \
25+
"diskann-R72-binary:type=diskann,R=72,L=128,quantizer=binary" \
26+
"diskann-R72-int8:type=diskann,R=72,L=128,quantizer=int8" \
27+
"diskann-R72-L256:type=diskann,R=72,L=256,quantizer=binary"
2328

24-
.PHONY: seed ground-truth bench-smoke bench-rescore bench-ivf bench-10k bench-50k bench-100k bench-all \
29+
ALL_CONFIGS = $(BASELINES) $(RESCORE_CONFIGS) $(IVF_CONFIGS) $(DISKANN_CONFIGS)
30+
31+
.PHONY: seed ground-truth bench-smoke bench-rescore bench-ivf bench-diskann bench-10k bench-50k bench-100k bench-all \
2532
report clean
2633

2734
# --- Data preparation ---
@@ -37,7 +44,8 @@ ground-truth: seed
3744
bench-smoke: seed
3845
$(BENCH) --subset-size 5000 -k 10 -n 20 -o runs/smoke \
3946
"brute-float:type=baseline,variant=float" \
40-
"ivf-quick:type=ivf,nlist=16,nprobe=4"
47+
"ivf-quick:type=ivf,nlist=16,nprobe=4" \
48+
"diskann-quick:type=diskann,R=48,L=64,quantizer=binary"
4149

4250
bench-rescore: seed
4351
$(BENCH) --subset-size 10000 -k 10 -o runs/rescore \
@@ -62,6 +70,12 @@ bench-ivf: seed
6270
$(BENCH) --subset-size 50000 -k 10 -o runs/ivf $(BASELINES) $(IVF_CONFIGS)
6371
$(BENCH) --subset-size 100000 -k 10 -o runs/ivf $(BASELINES) $(IVF_CONFIGS)
6472

73+
# --- DiskANN across sizes ---
74+
bench-diskann: seed
75+
$(BENCH) --subset-size 10000 -k 10 -o runs/diskann $(BASELINES) $(DISKANN_CONFIGS)
76+
$(BENCH) --subset-size 50000 -k 10 -o runs/diskann $(BASELINES) $(DISKANN_CONFIGS)
77+
$(BENCH) --subset-size 100000 -k 10 -o runs/diskann $(BASELINES) $(DISKANN_CONFIGS)
78+
6579
# --- Report ---
6680
report:
6781
@echo "Use: sqlite3 runs/<dir>/results.db 'SELECT * FROM bench_results ORDER BY recall DESC'"

0 commit comments

Comments
 (0)