Skip to content

Commit 611ca63

Browse files
authored
Support constaints on distance column in KNN queries, for pagination and range queries (#166)
* Initial pass, needs tests+docs * old: test-knn-constraints * cleanup
1 parent a2dd24f commit 611ca63

3 files changed

Lines changed: 492 additions & 2 deletions

File tree

sqlite-vec.c

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5305,11 +5305,21 @@ static int vec0Close(sqlite3_vtab_cursor *cur) {
53055305
typedef enum {
53065306
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
53075307

5308+
// ~~~ KNN QUERIES ~~~ //
53085309
VEC0_IDXSTR_KIND_KNN_MATCH = '{',
53095310
VEC0_IDXSTR_KIND_KNN_K = '}',
53105311
VEC0_IDXSTR_KIND_KNN_ROWID_IN = '[',
5312+
// argv[i] is a constraint on a PARTITON KEY column in a KNN query
5313+
//
53115314
VEC0_IDXSTR_KIND_KNN_PARTITON_CONSTRAINT = ']',
5315+
5316+
// argv[i] is a constraint on the distance column in a KNN query
5317+
VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT = '*',
5318+
5319+
// ~~~ POINT QUERIES ~~~ //
53125320
VEC0_IDXSTR_KIND_POINT_ID = '!',
5321+
5322+
// ~~~ ??? ~~~ //
53135323
VEC0_IDXSTR_KIND_METADATA_CONSTRAINT = '&',
53145324
} vec0_idxstr_kind;
53155325

@@ -5318,11 +5328,22 @@ typedef enum {
53185328
typedef enum {
53195329
// If any values are updated, please update the ARCHITECTURE.md docs accordingly!
53205330

5331+
// Equality constraint on a PARTITON KEY column, ex `user_id = 123`
53215332
VEC0_PARTITION_OPERATOR_EQ = 'a',
5333+
5334+
// "Greater than" constraint on a PARTITON KEY column, ex `year > 2024`
53225335
VEC0_PARTITION_OPERATOR_GT = 'b',
5336+
5337+
// "Less than or equal to" constraint on a PARTITON KEY column, ex `year <= 2024`
53235338
VEC0_PARTITION_OPERATOR_LE = 'c',
5339+
5340+
// "Less than" constraint on a PARTITON KEY column, ex `year < 2024`
53245341
VEC0_PARTITION_OPERATOR_LT = 'd',
5342+
5343+
// "Greater than or equal to" constraint on a PARTITON KEY column, ex `year >= 2024`
53255344
VEC0_PARTITION_OPERATOR_GE = 'e',
5345+
5346+
// "Not equal to" constraint on a PARTITON KEY column, ex `year != 2024`
53265347
VEC0_PARTITION_OPERATOR_NE = 'f',
53275348
} vec0_partition_operator;
53285349
typedef enum {
@@ -5335,6 +5356,15 @@ typedef enum {
53355356
VEC0_METADATA_OPERATOR_IN = 'g',
53365357
} vec0_metadata_operator;
53375358

5359+
5360+
typedef enum {
5361+
5362+
VEC0_DISTANCE_CONSTRAINT_GT = 'a',
5363+
VEC0_DISTANCE_CONSTRAINT_GE = 'b',
5364+
VEC0_DISTANCE_CONSTRAINT_LT = 'c',
5365+
VEC0_DISTANCE_CONSTRAINT_LE = 'd',
5366+
} vec0_distance_constraint_operator;
5367+
53385368
static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
53395369
vec0_vtab *p = (vec0_vtab *)pVTab;
53405370
/**
@@ -5494,6 +5524,7 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
54945524
}
54955525
#endif
54965526

5527+
// find any PARTITION KEY column constraints
54975528
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
54985529
if (!pIdxInfo->aConstraint[i].usable)
54995530
continue;
@@ -5548,6 +5579,7 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
55485579

55495580
}
55505581

5582+
// find any metadata column constraints
55515583
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
55525584
if (!pIdxInfo->aConstraint[i].usable)
55535585
continue;
@@ -5644,6 +5676,58 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
56445676

56455677
}
56465678

5679+
// find any distance column constraints
5680+
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
5681+
if (!pIdxInfo->aConstraint[i].usable)
5682+
continue;
5683+
5684+
int iColumn = pIdxInfo->aConstraint[i].iColumn;
5685+
int op = pIdxInfo->aConstraint[i].op;
5686+
if(op == SQLITE_INDEX_CONSTRAINT_LIMIT || op == SQLITE_INDEX_CONSTRAINT_OFFSET) {
5687+
continue;
5688+
}
5689+
if(vec0_column_distance_idx(p) != iColumn) {
5690+
continue;
5691+
}
5692+
5693+
char value = 0;
5694+
switch(op) {
5695+
case SQLITE_INDEX_CONSTRAINT_GT: {
5696+
value = VEC0_DISTANCE_CONSTRAINT_GT;
5697+
break;
5698+
}
5699+
case SQLITE_INDEX_CONSTRAINT_GE: {
5700+
value = VEC0_DISTANCE_CONSTRAINT_GE;
5701+
break;
5702+
}
5703+
case SQLITE_INDEX_CONSTRAINT_LT: {
5704+
value = VEC0_DISTANCE_CONSTRAINT_LT;
5705+
break;
5706+
}
5707+
case SQLITE_INDEX_CONSTRAINT_LE: {
5708+
value = VEC0_DISTANCE_CONSTRAINT_LE;
5709+
break;
5710+
}
5711+
default: {
5712+
// IMP TODO
5713+
rc = SQLITE_ERROR;
5714+
vtab_set_error(
5715+
pVTab,
5716+
"Illegal WHERE constraint on distance column in a KNN query. "
5717+
"Only one of GT, GE, LT, LE constraints are allowed."
5718+
);
5719+
goto done;
5720+
}
5721+
}
5722+
5723+
pIdxInfo->aConstraintUsage[i].argvIndex = argvIndex++;
5724+
pIdxInfo->aConstraintUsage[i].omit = 1;
5725+
sqlite3_str_appendchar(idxStr, 1, VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT);
5726+
sqlite3_str_appendchar(idxStr, 1, value);
5727+
sqlite3_str_appendchar(idxStr, 1, '_');
5728+
sqlite3_str_appendchar(idxStr, 1, '_');
5729+
}
5730+
56475731

56485732

56495733
pIdxInfo->idxNum = iMatchVectorTerm;
@@ -5672,7 +5756,6 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) {
56725756
}
56735757
pIdxInfo->needToFreeIdxStr = 1;
56745758

5675-
56765759
rc = SQLITE_OK;
56775760

56785761
done:
@@ -6560,12 +6643,15 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
65606643
int numValueEntries = (idxStrLength-1) / 4;
65616644
assert(numValueEntries == argc);
65626645
int hasMetadataFilters = 0;
6646+
int hasDistanceConstraints = 0;
65636647
for(int i = 0; i < argc; i++) {
65646648
int idx = 1 + (i * 4);
65656649
char kind = idxStr[idx + 0];
65666650
if(kind == VEC0_IDXSTR_KIND_METADATA_CONSTRAINT) {
65676651
hasMetadataFilters = 1;
6568-
break;
6652+
}
6653+
else if(kind == VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT) {
6654+
hasDistanceConstraints = 1;
65696655
}
65706656
}
65716657

@@ -6752,6 +6838,55 @@ int vec0Filter_knn_chunks_iter(vec0_vtab *p, sqlite3_stmt *stmtChunks,
67526838
chunk_distances[i] = result;
67536839
}
67546840

6841+
if(hasDistanceConstraints) {
6842+
for(int i = 0; i < argc; i++) {
6843+
int idx = 1 + (i * 4);
6844+
char kind = idxStr[idx + 0];
6845+
// TODO casts f64 to f32, is that a problem?
6846+
f32 target = (f32) sqlite3_value_double(argv[i]);
6847+
6848+
if(kind != VEC0_IDXSTR_KIND_KNN_DISTANCE_CONSTRAINT) {
6849+
continue;
6850+
}
6851+
vec0_distance_constraint_operator op = idxStr[idx + 1];
6852+
6853+
switch(op) {
6854+
case VEC0_DISTANCE_CONSTRAINT_GE: {
6855+
for(int i = 0; i < p->chunk_size;i++) {
6856+
if(bitmap_get(b, i) && !(chunk_distances[i] >= target)) {
6857+
bitmap_set(b, i, 0);
6858+
}
6859+
}
6860+
break;
6861+
}
6862+
case VEC0_DISTANCE_CONSTRAINT_GT: {
6863+
for(int i = 0; i < p->chunk_size;i++) {
6864+
if(bitmap_get(b, i) && !(chunk_distances[i] > target)) {
6865+
bitmap_set(b, i, 0);
6866+
}
6867+
}
6868+
break;
6869+
}
6870+
case VEC0_DISTANCE_CONSTRAINT_LE: {
6871+
for(int i = 0; i < p->chunk_size;i++) {
6872+
if(bitmap_get(b, i) && !(chunk_distances[i] <= target)) {
6873+
bitmap_set(b, i, 0);
6874+
}
6875+
}
6876+
break;
6877+
}
6878+
case VEC0_DISTANCE_CONSTRAINT_LT: {
6879+
for(int i = 0; i < p->chunk_size;i++) {
6880+
if(bitmap_get(b, i) && !(chunk_distances[i] < target)) {
6881+
bitmap_set(b, i, 0);
6882+
}
6883+
}
6884+
break;
6885+
}
6886+
}
6887+
}
6888+
}
6889+
67556890
int used1;
67566891
min_idx(chunk_distances, p->chunk_size, b, chunk_topk_idxs,
67576892
min(k, p->chunk_size), bTaken, &used1);

0 commit comments

Comments
 (0)