-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathplpgsql_bm25rrf.sql
More file actions
39 lines (32 loc) · 1.97 KB
/
plpgsql_bm25rrf.sql
File metadata and controls
39 lines (32 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/*
plpgsql_bm25rrf.sql
Hybrid search with Reciprocal Rank Fusion
version 1.1.0 by András Jankovics https://github.com/jankovicsandras andras@jankovics.net
Requirements:
- https://github.com/jankovicsandras/plpgsql_bm25
- https://github.com/pgvector/pgvector
- The documents and their vector embeddings are stored in the same table
- The BM25 index is already created with bm25createindex()
Example:
SELECT * FROM bm25rrf( querytext, queryembedding, tablename, idcolumnname, doccolumnname, embeddingcolumnname, resultlimit, algo, stopwordslanguage );
*/
DROP FUNCTION IF EXISTS bm25rrf;
CREATE OR REPLACE FUNCTION bm25rrf( querytext TEXT, queryembedding vector, tablename TEXT, idcolumnname TEXT, doccolumnname TEXT, embeddingcolumnname TEXT, slimit INT DEFAULT 20, algo TEXT DEFAULT '', stopwordslanguage TEXT DEFAULT '' ) RETURNS TABLE(id INTEGER, score NUMERIC, doc TEXT)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY EXECUTE FORMAT( 'WITH vector_search AS (
SELECT %s AS id, RANK () OVER (ORDER BY %s <=> %s) AS rank, %s AS doc FROM %s ORDER BY %s <=> %s LIMIT %s
),
bm25_search AS (
SELECT %s AS id, RANK () OVER (ORDER BY score DESC) AS rank, doc FROM bm25topk( %s, %s, %s, %s, %s, %s )
)
SELECT
COALESCE(vector_search.id, bm25_search.id) AS id,
COALESCE(1.0 / (60 + vector_search.rank), 0.0) + COALESCE(1.0 / (60 + bm25_search.rank), 0.0) AS score,
COALESCE(vector_search.doc, bm25_search.doc) AS doc
FROM vector_search
FULL OUTER JOIN bm25_search ON vector_search.doc = bm25_search.doc
ORDER BY score DESC LIMIT %s ;', idcolumnname, embeddingcolumnname, quote_literal(queryembedding), doccolumnname, tablename, embeddingcolumnname, quote_literal(queryembedding), slimit, idcolumnname, quote_literal(tablename), quote_literal(doccolumnname), quote_literal(querytext), slimit, quote_literal(algo), quote_literal(stopwordslanguage), slimit );
END;
$$;