Skip to content

Commit 575371d

Browse files
committed
Add DiskANN index for vec0 virtual table
Add DiskANN graph-based index: builds a Vamana graph with configurable R (max degree) and L (search list size, separate for insert/query), supports int8 quantization with rescore, lazy reverse-edge replacement, pre-quantized query optimization, and insert buffer reuse. Includes shadow table management, delete support, KNN integration, compile flag (SQLITE_VEC_ENABLE_DISKANN), release-demo workflow, fuzz targets, and tests. Fixes rescore int8 quantization bug.
1 parent e2c38f3 commit 575371d

23 files changed

+6549
-134
lines changed

.github/workflows/release-demo.yml

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
name: "Release Demo (DiskANN)"
2+
on:
3+
push:
4+
branches: [diskann-yolo2]
5+
permissions:
6+
contents: write
7+
jobs:
8+
build-linux-x86_64-extension:
9+
runs-on: ubuntu-22.04
10+
steps:
11+
- uses: actions/checkout@v4
12+
- run: ./scripts/vendor.sh
13+
- run: make loadable static
14+
- uses: actions/upload-artifact@v4
15+
with:
16+
name: sqlite-vec-linux-x86_64-extension
17+
path: dist/*
18+
build-linux-aarch64-extension:
19+
runs-on: ubuntu-22.04-arm
20+
steps:
21+
- uses: actions/checkout@v4
22+
- run: ./scripts/vendor.sh
23+
- run: make loadable static
24+
- uses: actions/upload-artifact@v4
25+
with:
26+
name: sqlite-vec-linux-aarch64-extension
27+
path: dist/*
28+
build-macos-x86_64-extension:
29+
runs-on: macos-15-intel
30+
steps:
31+
- uses: actions/checkout@v4
32+
- run: ./scripts/vendor.sh
33+
- run: make loadable static
34+
- uses: actions/upload-artifact@v4
35+
with:
36+
name: sqlite-vec-macos-x86_64-extension
37+
path: dist/*
38+
build-macos-aarch64-extension:
39+
runs-on: macos-14
40+
steps:
41+
- uses: actions/checkout@v4
42+
- run: ./scripts/vendor.sh
43+
- run: make loadable static
44+
- uses: actions/upload-artifact@v4
45+
with:
46+
name: sqlite-vec-macos-aarch64-extension
47+
path: dist/*
48+
build-windows-x86_64-extension:
49+
runs-on: windows-2022
50+
steps:
51+
- uses: actions/checkout@v4
52+
- uses: ilammy/msvc-dev-cmd@v1
53+
- uses: actions/setup-python@v5
54+
with:
55+
python-version: "3.12"
56+
- run: ./scripts/vendor.sh
57+
shell: bash
58+
- run: make sqlite-vec.h
59+
- run: mkdir dist
60+
- run: cl.exe /fPIC -shared /W4 /Ivendor/ /O2 /LD sqlite-vec.c -o dist/vec0.dll
61+
- uses: actions/upload-artifact@v4
62+
with:
63+
name: sqlite-vec-windows-x86_64-extension
64+
path: dist/*
65+
dist:
66+
runs-on: ubuntu-latest
67+
needs:
68+
[
69+
build-linux-x86_64-extension,
70+
build-linux-aarch64-extension,
71+
build-macos-x86_64-extension,
72+
build-macos-aarch64-extension,
73+
build-windows-x86_64-extension,
74+
]
75+
steps:
76+
- uses: actions/checkout@v4
77+
- uses: actions/download-artifact@v4
78+
with:
79+
name: sqlite-vec-linux-x86_64-extension
80+
path: dist/linux-x86_64
81+
- uses: actions/download-artifact@v4
82+
with:
83+
name: sqlite-vec-linux-aarch64-extension
84+
path: dist/linux-aarch64
85+
- uses: actions/download-artifact@v4
86+
with:
87+
name: sqlite-vec-macos-x86_64-extension
88+
path: dist/macos-x86_64
89+
- uses: actions/download-artifact@v4
90+
with:
91+
name: sqlite-vec-macos-aarch64-extension
92+
path: dist/macos-aarch64
93+
- uses: actions/download-artifact@v4
94+
with:
95+
name: sqlite-vec-windows-x86_64-extension
96+
path: dist/windows-x86_64
97+
- run: make sqlite-vec.h
98+
- run: |
99+
./scripts/vendor.sh
100+
make amalgamation
101+
mkdir -p amalgamation
102+
cp dist/sqlite-vec.c sqlite-vec.h amalgamation/
103+
rm dist/sqlite-vec.c
104+
- uses: asg017/setup-sqlite-dist@73e37b2ffb0b51e64a64eb035da38c958b9ff6c6
105+
- run: sqlite-dist build --set-version $(cat VERSION)
106+
- name: Create release and upload assets
107+
env:
108+
GH_TOKEN: ${{ github.token }}
109+
run: |
110+
SHORT_SHA=$(echo "${{ github.sha }}" | head -c 10)
111+
TAG="diskann-${SHORT_SHA}"
112+
zip -j "amalgamation/sqlite-vec-amalgamation.zip" amalgamation/sqlite-vec.c amalgamation/sqlite-vec.h
113+
gh release create "$TAG" \
114+
--title "$TAG" \
115+
--target "${{ github.sha }}" \
116+
--prerelease \
117+
amalgamation/sqlite-vec-amalgamation.zip \
118+
.sqlite-dist/pip/*

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ test-loadable-watch:
204204
watchexec --exts c,py,Makefile --clear -- make test-loadable
205205

206206
test-unit:
207-
$(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST -DSQLITE_VEC_ENABLE_RESCORE tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit
207+
$(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST -DSQLITE_VEC_ENABLE_RESCORE -DSQLITE_VEC_ENABLE_DISKANN=1 tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor $(CFLAGS) -o $(prefix)/test-unit && $(prefix)/test-unit
208208

209209
# Standalone sqlite3 CLI with vec0 compiled in. Useful for benchmarking,
210210
# profiling (has debug symbols), and scripting without .load_extension.

benchmarks-ann/Makefile

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,16 @@ RESCORE_CONFIGS = \
1919
"rescore-bit-os16:type=rescore,quantizer=bit,oversample=16" \
2020
"rescore-int8-os8:type=rescore,quantizer=int8,oversample=8"
2121

22-
ALL_CONFIGS = $(BASELINES) $(RESCORE_CONFIGS) $(IVF_CONFIGS)
22+
# --- DiskANN configs ---
23+
DISKANN_CONFIGS = \
24+
"diskann-R48-binary:type=diskann,R=48,L=128,quantizer=binary" \
25+
"diskann-R72-binary:type=diskann,R=72,L=128,quantizer=binary" \
26+
"diskann-R72-int8:type=diskann,R=72,L=128,quantizer=int8" \
27+
"diskann-R72-L256:type=diskann,R=72,L=256,quantizer=binary"
2328

24-
.PHONY: seed ground-truth bench-smoke bench-rescore bench-ivf bench-10k bench-50k bench-100k bench-all \
29+
ALL_CONFIGS = $(BASELINES) $(RESCORE_CONFIGS) $(IVF_CONFIGS) $(DISKANN_CONFIGS)
30+
31+
.PHONY: seed ground-truth bench-smoke bench-rescore bench-ivf bench-diskann bench-10k bench-50k bench-100k bench-all \
2532
report clean
2633

2734
# --- Data preparation ---
@@ -37,7 +44,8 @@ ground-truth: seed
3744
bench-smoke: seed
3845
$(BENCH) --subset-size 5000 -k 10 -n 20 -o runs/smoke \
3946
"brute-float:type=baseline,variant=float" \
40-
"ivf-quick:type=ivf,nlist=16,nprobe=4"
47+
"ivf-quick:type=ivf,nlist=16,nprobe=4" \
48+
"diskann-quick:type=diskann,R=48,L=64,quantizer=binary"
4149

4250
bench-rescore: seed
4351
$(BENCH) --subset-size 10000 -k 10 -o runs/rescore \
@@ -62,6 +70,12 @@ bench-ivf: seed
6270
$(BENCH) --subset-size 50000 -k 10 -o runs/ivf $(BASELINES) $(IVF_CONFIGS)
6371
$(BENCH) --subset-size 100000 -k 10 -o runs/ivf $(BASELINES) $(IVF_CONFIGS)
6472

73+
# --- DiskANN across sizes ---
74+
bench-diskann: seed
75+
$(BENCH) --subset-size 10000 -k 10 -o runs/diskann $(BASELINES) $(DISKANN_CONFIGS)
76+
$(BENCH) --subset-size 50000 -k 10 -o runs/diskann $(BASELINES) $(DISKANN_CONFIGS)
77+
$(BENCH) --subset-size 100000 -k 10 -o runs/diskann $(BASELINES) $(DISKANN_CONFIGS)
78+
6579
# --- Report ---
6680
report:
6781
@echo "Use: sqlite3 runs/<dir>/results.db 'SELECT * FROM bench_results ORDER BY recall DESC'"

0 commit comments

Comments
 (0)