Skip to content

Commit c749764

Browse files
committed
fix: pgvector should not trigger supplementary adapters like trgm
pgvector operates on embedding vectors, not text search. Add isIntentionalSearch flag to SearchAdapter interface so that only adapters representing real search infrastructure (tsvector, BM25) trigger supplementary adapters. pgvector sets isIntentionalSearch: false.
1 parent 4393bf8 commit c749764

3 files changed

Lines changed: 39 additions & 7 deletions

File tree

graphile/graphile-search/src/adapters/pgvector.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ export function createPgvectorAdapter(
5252

5353
filterPrefix,
5454

55+
// pgvector operates on embedding vectors, not text search — its presence
56+
// alone should NOT trigger supplementary adapters like trgm.
57+
isIntentionalSearch: false,
58+
5559
supportsTextSearch: false,
5660
// pgvector requires a vector array, not plain text — no buildTextSearchInput
5761

graphile/graphile-search/src/plugin.ts

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,12 @@ export function createUnifiedSearchPlugin(
9191
*
9292
* Runs non-supplementary adapters first (e.g. tsvector, BM25, pgvector).
9393
* Supplementary adapters (e.g. trgm with requireIntentionalSearch) are only
94-
* run if at least one non-supplementary adapter found columns — this prevents
95-
* trgm from adding similarity fields to every table with text columns.
94+
* run if at least one adapter with `isIntentionalSearch: true` found columns.
95+
*
96+
* This distinction matters because pgvector (embeddings) is NOT intentional
97+
* text search — its presence alone should not trigger trgm similarity fields.
98+
* Only tsvector and BM25, which represent explicit search infrastructure,
99+
* count as intentional search.
96100
*/
97101
function getAdapterColumns(codec: PgCodecWithAttributes, build: any): AdapterColumnCache[] {
98102
const cacheKey = codec.name;
@@ -103,18 +107,25 @@ export function createUnifiedSearchPlugin(
103107
const primaryAdapters = adapters.filter((a) => !a.isSupplementary);
104108
const supplementaryAdapters = adapters.filter((a) => a.isSupplementary);
105109

106-
// Phase 1: Run non-supplementary (intentional search) adapters
110+
// Phase 1: Run non-supplementary adapters (tsvector, BM25, pgvector, etc.)
107111
const results: AdapterColumnCache[] = [];
112+
let hasIntentionalSearch = false;
108113
for (const adapter of primaryAdapters) {
109114
const columns = adapter.detectColumns(codec, build);
110115
if (columns.length > 0) {
111116
results.push({ adapter, columns });
117+
// Track whether any "intentional search" adapter found columns.
118+
// isIntentionalSearch defaults to true when not explicitly set.
119+
if (adapter.isIntentionalSearch !== false) {
120+
hasIntentionalSearch = true;
121+
}
112122
}
113123
}
114124

115125
// Phase 2: Only run supplementary adapters if at least one primary
116-
// adapter found columns on this codec (i.e. intentional search exists)
117-
if (results.length > 0) {
126+
// adapter with isIntentionalSearch found columns on this codec.
127+
// pgvector (isIntentionalSearch: false) alone won't trigger trgm.
128+
if (hasIntentionalSearch) {
118129
for (const adapter of supplementaryAdapters) {
119130
const columns = adapter.detectColumns(codec, build);
120131
if (columns.length > 0) {

graphile/graphile-search/src/types.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,33 @@ export interface SearchAdapter {
7777

7878
/**
7979
* When true, this adapter is "supplementary" — it only activates on
80-
* tables that already have at least one column detected by a
81-
* non-supplementary adapter (e.g. tsvector or BM25).
80+
* tables that already have at least one column detected by an adapter
81+
* whose `isIntentionalSearch` is true (e.g. tsvector or BM25).
8282
*
8383
* This prevents adapters like pg_trgm from adding similarity fields
8484
* to every table with text columns when there is no intentional search setup.
8585
*
86+
* pgvector (embeddings) does NOT count as intentional search because it
87+
* operates on vector columns, not text search — so its presence alone
88+
* won't trigger supplementary adapters.
89+
*
8690
* @default false
8791
*/
8892
isSupplementary?: boolean;
8993

94+
/**
95+
* When true, this adapter represents "intentional search" — its presence
96+
* on a table signals that the table was explicitly set up for search and
97+
* should trigger supplementary adapters (e.g. trgm).
98+
*
99+
* Adapters that check for real infrastructure (tsvector columns, BM25
100+
* indexes) should set this to true. Adapters that operate on a different
101+
* domain (pgvector embeddings) should set this to false.
102+
*
103+
* @default true
104+
*/
105+
isIntentionalSearch?: boolean;
106+
90107
/**
91108
* The filter prefix used for filter field names on the connection filter input.
92109
* The field name is: `{filterPrefix}{ColumnName}` (camelCase).

0 commit comments

Comments
 (0)