Skip to content

Commit 0ff5664

Browse files
ENG-1769 Optimize semantic search by removing LIMIT from function body (#1141)
* Optimize semantic search by removing LIMIT from function body [ENG-1769] - Remove match_count parameter from match_content_embeddings function - Remove LIMIT clause from function body to improve query planner performance - Add space_id parameter for better filtering - Move LIMIT to caller side using Supabase .limit() method - Update TypeScript types to reflect new function signature Co-authored-by: Michael Gartner <mdroidian@users.noreply.github.com> * Remove space_id parameter - moved to separate ticket Focus this PR only on the LIMIT optimization that's killing the planner. The space_id filtering will be addressed in a separate ticket. Co-authored-by: Michael Gartner <mdroidian@users.noreply.github.com> * drop if exists --------- Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Michael Gartner <mdroidian@users.noreply.github.com>
1 parent ee5c5d2 commit 0ff5664

5 files changed

Lines changed: 45 additions & 15 deletions

File tree

apps/roam/src/utils/discourseNodeSearchProviders.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -512,11 +512,12 @@ const runSupabaseSemanticSearch = async ({
512512
}
513513

514514
const queryEmbedding = await createEmbedding(trimmedQuery);
515-
const { data, error } = await supabase.rpc("match_content_embeddings", {
516-
query_embedding: JSON.stringify(queryEmbedding),
517-
match_threshold: SUPABASE_MATCH_THRESHOLD,
518-
match_count: SEARCH_TEST_RESULT_LIMIT,
519-
});
515+
const { data, error } = await supabase
516+
.rpc("match_content_embeddings", {
517+
query_embedding: JSON.stringify(queryEmbedding),
518+
match_threshold: SUPABASE_MATCH_THRESHOLD,
519+
})
520+
.limit(SEARCH_TEST_RESULT_LIMIT);
520521

521522
if (error) {
522523
throw new Error(error.message);

apps/roam/src/utils/hyde.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -538,11 +538,12 @@ export const findSimilarNodesVectorOnly = async ({
538538

539539
const queryEmbedding = await createEmbedding(text);
540540

541-
const { data, error } = await supabase.rpc("match_content_embeddings", {
542-
query_embedding: JSON.stringify(queryEmbedding),
543-
match_threshold: threshold,
544-
match_count: limit,
545-
});
541+
const { data, error } = await supabase
542+
.rpc("match_content_embeddings", {
543+
query_embedding: JSON.stringify(queryEmbedding),
544+
match_threshold: threshold,
545+
})
546+
.limit(limit);
546547

547548
if (error) {
548549
console.error("Vector search failed:", error);

packages/database/src/dbTypes.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1722,7 +1722,6 @@ export type Database = {
17221722
match_content_embeddings: {
17231723
Args: {
17241724
current_document_id?: number
1725-
match_count: number
17261725
match_threshold: number
17271726
query_embedding: string
17281727
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
-- Optimize match_content_embeddings by removing LIMIT from function body
2+
-- This improves query planner performance as the LIMIT parameter was killing the planner
3+
4+
set search_path to public, extensions ;
5+
6+
DROP FUNCTION IF EXISTS public.match_content_embeddings(extensions.vector, double precision, integer, integer) ;
7+
8+
CREATE OR REPLACE FUNCTION public.match_content_embeddings (
9+
query_embedding extensions.vector,
10+
match_threshold double precision,
11+
current_document_id integer DEFAULT NULL::integer)
12+
RETURNS TABLE (
13+
content_id bigint,
14+
roam_uid Text,
15+
text_content Text,
16+
similarity double precision)
17+
SET search_path = 'extensions'
18+
LANGUAGE sql STABLE
19+
AS $$
20+
SELECT
21+
c.id AS content_id,
22+
c.source_local_id AS roam_uid,
23+
c.text AS text_content,
24+
1 - (c.vector <=> query_embedding) AS similarity
25+
FROM public.my_contents_with_embedding_openai_text_embedding_3_small_1536 AS c
26+
WHERE 1 - (c.vector <=> query_embedding) > match_threshold
27+
AND (current_document_id IS NULL OR c.document_id = current_document_id)
28+
ORDER BY
29+
c.vector <=> query_embedding ASC;
30+
$$ ;
31+
32+
RESET ALL ;

packages/database/supabase/schemas/embedding.sql

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ set search_path to public, extensions ;
5757
CREATE OR REPLACE FUNCTION public.match_content_embeddings (
5858
query_embedding extensions.vector,
5959
match_threshold double precision,
60-
match_count integer,
6160
current_document_id integer DEFAULT NULL::integer)
6261
RETURNS TABLE (
6362
content_id bigint,
@@ -76,14 +75,12 @@ FROM public.my_contents_with_embedding_openai_text_embedding_3_small_1536 AS c
7675
WHERE 1 - (c.vector <=> query_embedding) > match_threshold
7776
AND (current_document_id IS NULL OR c.document_id = current_document_id)
7877
ORDER BY
79-
c.vector <=> query_embedding ASC
80-
LIMIT match_count;
78+
c.vector <=> query_embedding ASC;
8179
$$ ;
8280

8381
ALTER FUNCTION public.match_content_embeddings (
8482
query_embedding extensions.vector,
8583
match_threshold double precision,
86-
match_count integer,
8784
current_document_id integer) OWNER TO "postgres" ;
8885

8986
CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes (

0 commit comments

Comments
 (0)