Skip to content

Commit 6e2c4c6

Browse files
asg017claude
andcommitted
Add FTS5-style command column and runtime oversample for rescore
Replace the old INSERT INTO t(rowid) VALUES('command') hack with a proper hidden command column named after the table (FTS5 pattern): INSERT INTO t(t) VALUES ('oversample=16') The command column is the first hidden column (before distance and k) to reserve ability for future table-valued function argument use. Schema: CREATE TABLE x(rowid, <cols>, "<table>" hidden, distance hidden, k hidden) For backwards compat, pre-v0.1.10 tables (detected via _info shadow table version) skip the command column to avoid name conflicts with user columns that may share the table's name. Verified with legacy fixture DB generated by sqlite-vec v0.1.6. Changes: - Add hidden command column to sqlite3_declare_vtab for new tables - Version-gate via _info shadow table for existing tables - Validate at CREATE time that no column name matches table name - Add rescore_handle_command() with oversample=N support - rescore_knn() prefers runtime oversample_search over CREATE default - Remove old rowid-based command dispatch - Migrate all DiskANN/IVF/fuzz tests and benchmarks to new syntax - Add legacy DB fixture (v0.1.6) and 9 backwards-compat tests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b7fc459 commit 6e2c4c6

21 files changed

+511
-104
lines changed

benchmarks-ann/bench-delete/bench_delete.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def _ivf_create(p):
159159
def _ivf_train(conn):
160160
"""Trigger built-in k-means training for IVF."""
161161
t0 = now_ns()
162-
conn.execute("INSERT INTO vec_items(id) VALUES ('compute-centroids')")
162+
conn.execute("INSERT INTO vec_items(vec_items) VALUES ('compute-centroids')")
163163
conn.commit()
164164
return ns_to_s(now_ns() - t0)
165165

benchmarks-ann/bench.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def _ivf_create_table_sql(params):
456456
def _ivf_post_insert_hook(conn, params):
457457
print(" Training k-means centroids (built-in)...", flush=True)
458458
t0 = time.perf_counter()
459-
conn.execute("INSERT INTO vec_items(id) VALUES ('compute-centroids')")
459+
conn.execute("INSERT INTO vec_items(vec_items) VALUES ('compute-centroids')")
460460
conn.commit()
461461
elapsed = time.perf_counter() - t0
462462
print(f" Training done in {elapsed:.1f}s", flush=True)
@@ -514,7 +514,7 @@ def _ivf_faiss_kmeans_hook(conn, params):
514514

515515
for cid, blob in centroids:
516516
conn.execute(
517-
"INSERT INTO vec_items(id, embedding) VALUES (?, ?)",
517+
"INSERT INTO vec_items(vec_items, embedding) VALUES (?, ?)",
518518
(f"set-centroid:{cid}", blob),
519519
)
520520
conn.commit()
@@ -540,7 +540,7 @@ def _ivf_pre_query_hook(conn, params):
540540
nprobe = params.get("nprobe")
541541
if nprobe:
542542
conn.execute(
543-
"INSERT INTO vec_items(id) VALUES (?)",
543+
"INSERT INTO vec_items(vec_items) VALUES (?)",
544544
(f"nprobe={nprobe}",),
545545
)
546546
conn.commit()
@@ -572,7 +572,7 @@ def _ivf_describe(params):
572572
"insert_sql": None,
573573
"post_insert_hook": _ivf_post_insert_hook,
574574
"pre_query_hook": _ivf_pre_query_hook,
575-
"train_sql": lambda _: "INSERT INTO vec_items(id) VALUES ('compute-centroids')",
575+
"train_sql": lambda _: "INSERT INTO vec_items(vec_items) VALUES ('compute-centroids')",
576576
"run_query": None,
577577
"query_sql": None,
578578
"describe": _ivf_describe,
@@ -616,7 +616,7 @@ def _diskann_pre_query_hook(conn, params):
616616
L_search = params.get("L_search", 0)
617617
if L_search:
618618
conn.execute(
619-
"INSERT INTO vec_items(id) VALUES (?)",
619+
"INSERT INTO vec_items(vec_items) VALUES (?)",
620620
(f"search_list_size_search={L_search}",),
621621
)
622622
conn.commit()

sqlite-vec-rescore.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,9 @@ static int rescore_knn(vec0_vtab *p, vec0_cursor *pCur,
351351
(void)pCur;
352352
(void)aMetadataIn;
353353
int rc = SQLITE_OK;
354-
int oversample = vector_column->rescore.oversample;
354+
int oversample = vector_column->rescore.oversample_search > 0
355+
? vector_column->rescore.oversample_search
356+
: vector_column->rescore.oversample;
355357
i64 k_oversample = k * oversample;
356358
if (k_oversample > 4096)
357359
k_oversample = 4096;
@@ -640,6 +642,27 @@ static int rescore_knn(vec0_vtab *p, vec0_cursor *pCur,
640642
return rc;
641643
}
642644

645+
/**
646+
* Handle FTS5-style command dispatch for rescore parameters.
647+
* Returns SQLITE_OK if handled, SQLITE_EMPTY if not a rescore command.
648+
*/
649+
static int rescore_handle_command(vec0_vtab *p, const char *command) {
650+
if (strncmp(command, "oversample=", 11) == 0) {
651+
int val = atoi(command + 11);
652+
if (val < 1) {
653+
vtab_set_error(&p->base, "oversample must be >= 1");
654+
return SQLITE_ERROR;
655+
}
656+
for (int i = 0; i < p->numVectorColumns; i++) {
657+
if (p->vector_columns[i].index_type == VEC0_INDEX_TYPE_RESCORE) {
658+
p->vector_columns[i].rescore.oversample_search = val;
659+
}
660+
}
661+
return SQLITE_OK;
662+
}
663+
return SQLITE_EMPTY;
664+
}
665+
643666
#ifdef SQLITE_VEC_TEST
644667
void _test_rescore_quantize_float_to_bit(const float *src, uint8_t *dst, size_t dim) {
645668
rescore_quantize_float_to_bit(src, dst, dim);

sqlite-vec.c

Lines changed: 111 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,7 +2588,8 @@ enum Vec0RescoreQuantizerType {
25882588

25892589
struct Vec0RescoreConfig {
25902590
enum Vec0RescoreQuantizerType quantizer_type;
2591-
int oversample;
2591+
int oversample; // CREATE-time default
2592+
int oversample_search; // runtime override (0 = use default)
25922593
};
25932594
#endif
25942595

@@ -3399,8 +3400,9 @@ static sqlite3_module vec_eachModule = {
33993400

34003401
#define VEC0_COLUMN_ID 0
34013402
#define VEC0_COLUMN_USERN_START 1
3402-
#define VEC0_COLUMN_OFFSET_DISTANCE 1
3403-
#define VEC0_COLUMN_OFFSET_K 2
3403+
#define VEC0_COLUMN_OFFSET_COMMAND 1
3404+
#define VEC0_COLUMN_OFFSET_DISTANCE 2
3405+
#define VEC0_COLUMN_OFFSET_K 3
34043406

34053407
#define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\""
34063408

@@ -3498,6 +3500,10 @@ struct vec0_vtab {
34983500
// Will change the schema of the _rowids table, and insert/query logic.
34993501
int pkIsText;
35003502

3503+
// True if the hidden command column (named after the table) exists.
3504+
// Tables created before v0.1.10 or without _info table don't have it.
3505+
int hasCommandColumn;
3506+
35013507
// number of defined vector columns.
35023508
int numVectorColumns;
35033509

@@ -3777,20 +3783,19 @@ int vec0_num_defined_user_columns(vec0_vtab *p) {
37773783
* @param p vec0 table
37783784
* @return int
37793785
*/
3786+
int vec0_column_command_idx(vec0_vtab *p) {
3787+
// Command column is the first hidden column (right after user columns)
3788+
return VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(p);
3789+
}
3790+
37803791
int vec0_column_distance_idx(vec0_vtab *p) {
3781-
return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
3782-
VEC0_COLUMN_OFFSET_DISTANCE;
3792+
int base = VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(p);
3793+
return base + (p->hasCommandColumn ? 1 : 0);
37833794
}
37843795

3785-
/**
3786-
* @brief Returns the index of the k hidden column for the given vec0 table.
3787-
*
3788-
* @param p vec0 table
3789-
* @return int k column index
3790-
*/
37913796
int vec0_column_k_idx(vec0_vtab *p) {
3792-
return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) +
3793-
VEC0_COLUMN_OFFSET_K;
3797+
int base = VEC0_COLUMN_USERN_START + vec0_num_defined_user_columns(p);
3798+
return base + (p->hasCommandColumn ? 2 : 1);
37943799
}
37953800

37963801
/**
@@ -5205,6 +5210,74 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
52055210
}
52065211
}
52075212

5213+
// Determine whether to add the FTS5-style hidden command column.
5214+
// New tables (isCreate) always get it; existing tables only if created
5215+
// with v0.1.10+ (which validated no column name == table name).
5216+
int hasCommandColumn = 0;
5217+
if (isCreate) {
5218+
// Validate no user column name conflicts with the table name
5219+
const char *tblName = argv[2];
5220+
int tblNameLen = (int)strlen(tblName);
5221+
for (int i = 0; i < numVectorColumns; i++) {
5222+
if (pNew->vector_columns[i].name_length == tblNameLen &&
5223+
sqlite3_strnicmp(pNew->vector_columns[i].name, tblName, tblNameLen) == 0) {
5224+
*pzErr = sqlite3_mprintf(
5225+
VEC_CONSTRUCTOR_ERROR
5226+
"column name '%s' conflicts with table name (reserved for command column)",
5227+
tblName);
5228+
goto error;
5229+
}
5230+
}
5231+
for (int i = 0; i < numPartitionColumns; i++) {
5232+
if (pNew->paritition_columns[i].name_length == tblNameLen &&
5233+
sqlite3_strnicmp(pNew->paritition_columns[i].name, tblName, tblNameLen) == 0) {
5234+
*pzErr = sqlite3_mprintf(
5235+
VEC_CONSTRUCTOR_ERROR
5236+
"column name '%s' conflicts with table name (reserved for command column)",
5237+
tblName);
5238+
goto error;
5239+
}
5240+
}
5241+
for (int i = 0; i < numAuxiliaryColumns; i++) {
5242+
if (pNew->auxiliary_columns[i].name_length == tblNameLen &&
5243+
sqlite3_strnicmp(pNew->auxiliary_columns[i].name, tblName, tblNameLen) == 0) {
5244+
*pzErr = sqlite3_mprintf(
5245+
VEC_CONSTRUCTOR_ERROR
5246+
"column name '%s' conflicts with table name (reserved for command column)",
5247+
tblName);
5248+
goto error;
5249+
}
5250+
}
5251+
for (int i = 0; i < numMetadataColumns; i++) {
5252+
if (pNew->metadata_columns[i].name_length == tblNameLen &&
5253+
sqlite3_strnicmp(pNew->metadata_columns[i].name, tblName, tblNameLen) == 0) {
5254+
*pzErr = sqlite3_mprintf(
5255+
VEC_CONSTRUCTOR_ERROR
5256+
"column name '%s' conflicts with table name (reserved for command column)",
5257+
tblName);
5258+
goto error;
5259+
}
5260+
}
5261+
hasCommandColumn = 1;
5262+
} else {
5263+
// xConnect: check _info shadow table for version
5264+
sqlite3_stmt *stmtInfo = NULL;
5265+
char *zInfoSql = sqlite3_mprintf(
5266+
"SELECT value FROM " VEC0_SHADOW_INFO_NAME " WHERE key = 'CREATE_VERSION_PATCH'",
5267+
argv[1], argv[2]);
5268+
if (zInfoSql) {
5269+
int infoRc = sqlite3_prepare_v2(db, zInfoSql, -1, &stmtInfo, NULL);
5270+
sqlite3_free(zInfoSql);
5271+
if (infoRc == SQLITE_OK && sqlite3_step(stmtInfo) == SQLITE_ROW) {
5272+
int patch = sqlite3_column_int(stmtInfo, 0);
5273+
hasCommandColumn = (patch >= 10); // v0.1.10+
5274+
}
5275+
// If _info doesn't exist or has no version, assume old table
5276+
sqlite3_finalize(stmtInfo);
5277+
}
5278+
}
5279+
pNew->hasCommandColumn = hasCommandColumn;
5280+
52085281
sqlite3_str *createStr = sqlite3_str_new(NULL);
52095282
sqlite3_str_appendall(createStr, "CREATE TABLE x(");
52105283
if (pkColumnName) {
@@ -5246,7 +5319,11 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv,
52465319
}
52475320

52485321
}
5249-
sqlite3_str_appendall(createStr, " distance hidden, k hidden) ");
5322+
if (hasCommandColumn) {
5323+
sqlite3_str_appendf(createStr, " \"%w\" hidden, distance hidden, k hidden) ", argv[2]);
5324+
} else {
5325+
sqlite3_str_appendall(createStr, " distance hidden, k hidden) ");
5326+
}
52505327
if (pkColumnName) {
52515328
sqlite3_str_appendall(createStr, "without rowid ");
52525329
}
@@ -10161,25 +10238,31 @@ static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv,
1016110238
}
1016210239
// INSERT operation
1016310240
else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) {
10164-
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE || SQLITE_VEC_ENABLE_DISKANN
10165-
// Check for command inserts: INSERT INTO t(rowid) VALUES ('command-string')
10166-
// The id column holds the command string.
10167-
sqlite3_value *idVal = argv[2 + VEC0_COLUMN_ID];
10168-
if (sqlite3_value_type(idVal) == SQLITE_TEXT) {
10169-
const char *cmd = (const char *)sqlite3_value_text(idVal);
10170-
vec0_vtab *p = (vec0_vtab *)pVTab;
10171-
int cmdRc = SQLITE_EMPTY;
10241+
vec0_vtab *p = (vec0_vtab *)pVTab;
10242+
// FTS5-style command dispatch via hidden column named after table
10243+
if (p->hasCommandColumn) {
10244+
sqlite3_value *cmdVal = argv[2 + vec0_column_command_idx(p)];
10245+
if (sqlite3_value_type(cmdVal) == SQLITE_TEXT) {
10246+
const char *cmd = (const char *)sqlite3_value_text(cmdVal);
10247+
int cmdRc = SQLITE_EMPTY;
10248+
#if SQLITE_VEC_ENABLE_RESCORE
10249+
cmdRc = rescore_handle_command(p, cmd);
10250+
#endif
1017210251
#if SQLITE_VEC_EXPERIMENTAL_IVF_ENABLE
10173-
cmdRc = ivf_handle_command(p, cmd, argc, argv);
10252+
if (cmdRc == SQLITE_EMPTY)
10253+
cmdRc = ivf_handle_command(p, cmd, argc, argv);
1017410254
#endif
1017510255
#if SQLITE_VEC_ENABLE_DISKANN
10176-
if (cmdRc == SQLITE_EMPTY)
10177-
cmdRc = diskann_handle_command(p, cmd);
10256+
if (cmdRc == SQLITE_EMPTY)
10257+
cmdRc = diskann_handle_command(p, cmd);
1017810258
#endif
10179-
if (cmdRc != SQLITE_EMPTY) return cmdRc; // handled (or error)
10180-
// SQLITE_EMPTY means not a recognized command — fall through to normal insert
10259+
if (cmdRc == SQLITE_EMPTY) {
10260+
vtab_set_error(pVTab, "unknown vec0 command: '%s'", cmd);
10261+
return SQLITE_ERROR;
10262+
}
10263+
return cmdRc;
10264+
}
1018110265
}
10182-
#endif
1018310266
return vec0Update_Insert(pVTab, argc, argv, pRowid);
1018410267
}
1018510268
// UPDATE operation

tests/fixtures/legacy-v0.1.6.db

104 KB
Binary file not shown.

tests/fuzz/diskann-command-inject.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
5050
{
5151
sqlite3_stmt *stmt;
5252
sqlite3_prepare_v2(db,
53-
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmt, NULL);
53+
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmt, NULL);
5454
for (int i = 1; i <= 8; i++) {
5555
float vec[8];
5656
for (int j = 0; j < 8; j++) vec[j] = (float)i * 0.1f + (float)j * 0.01f;
@@ -66,11 +66,11 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
6666
sqlite3_stmt *stmtInsert = NULL;
6767
sqlite3_stmt *stmtKnn = NULL;
6868

69-
/* Commands are dispatched via INSERT INTO t(rowid) VALUES ('cmd_string') */
69+
/* Commands are dispatched via INSERT INTO t(t) VALUES ('cmd_string') */
7070
sqlite3_prepare_v2(db,
71-
"INSERT INTO v(rowid) VALUES (?)", -1, &stmtCmd, NULL);
71+
"INSERT INTO v(v) VALUES (?)", -1, &stmtCmd, NULL);
7272
sqlite3_prepare_v2(db,
73-
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
73+
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
7474
sqlite3_prepare_v2(db,
7575
"SELECT rowid, distance FROM v WHERE emb MATCH ? AND k = ?",
7676
-1, &stmtKnn, NULL);

tests/fuzz/ivf-cell-overflow.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
5555
// Insert enough vectors to overflow at least one cell
5656
sqlite3_stmt *stmtInsert = NULL;
5757
sqlite3_prepare_v2(db,
58-
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
58+
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
5959
if (!stmtInsert) { sqlite3_close(db); return 0; }
6060

6161
size_t offset = 0;
@@ -81,7 +81,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
8181

8282
// Train to assign vectors to centroids (triggers cell building)
8383
sqlite3_exec(db,
84-
"INSERT INTO v(rowid) VALUES ('compute-centroids')",
84+
"INSERT INTO v(v) VALUES ('compute-centroids')",
8585
NULL, NULL, NULL);
8686

8787
// Delete vectors at boundary positions based on fuzz data
@@ -102,7 +102,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
102102
{
103103
sqlite3_stmt *si = NULL;
104104
sqlite3_prepare_v2(db,
105-
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &si, NULL);
105+
"INSERT INTO v(v, emb) VALUES (?, ?)", -1, &si, NULL);
106106
if (si) {
107107
for (int i = 0; i < 10; i++) {
108108
float *vec = sqlite3_malloc(dim * sizeof(float));
@@ -140,7 +140,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
140140
// Test assign-vectors with multi-cell state
141141
// First clear centroids
142142
sqlite3_exec(db,
143-
"INSERT INTO v(rowid) VALUES ('clear-centroids')",
143+
"INSERT INTO v(v) VALUES ('clear-centroids')",
144144
NULL, NULL, NULL);
145145

146146
// Set centroids manually, then assign
@@ -151,7 +151,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
151151

152152
char cmd[128];
153153
snprintf(cmd, sizeof(cmd),
154-
"INSERT INTO v(rowid, emb) VALUES ('set-centroid:%d', ?)", c);
154+
"INSERT INTO v(v, emb) VALUES ('set-centroid:%d', ?)", c);
155155
sqlite3_stmt *sc = NULL;
156156
sqlite3_prepare_v2(db, cmd, -1, &sc, NULL);
157157
if (sc) {
@@ -163,7 +163,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
163163
}
164164

165165
sqlite3_exec(db,
166-
"INSERT INTO v(rowid) VALUES ('assign-vectors')",
166+
"INSERT INTO v(v) VALUES ('assign-vectors')",
167167
NULL, NULL, NULL);
168168

169169
// Final query after assign-vectors

0 commit comments

Comments
 (0)