Skip to content

Commit e2eff14

Browse files
committed
fix: recursive CTE bugs in edge traversal and improve HNSW params
- GetBidirectional: split into seed CTE + single recursive graph CTE to fix PostgreSQL error 42P19 (recursive reference in non-recursive term) - traverse (GetDownstream/GetUpstream): fix JOIN condition that caused combinatorial explosion (19K edges instead of 1) by correcting column references in the format string - Bump HNSW index params from m=16/ef=64 to m=32/ef=256 for better semantic search recall
1 parent 8c69e2c commit e2eff14

3 files changed

Lines changed: 18 additions & 16 deletions

File tree

store/edge_repository.go

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -83,24 +83,26 @@ func (r *EdgeRepository) GetBidirectional(ctx context.Context, ns *namespace.Nam
8383
}
8484

8585
query := `
86-
WITH RECURSIVE graph(source_urn, target_urn, type, properties, depth, path, frontier) AS (
87-
SELECT source_urn, target_urn, type, properties, 1, ARRAY[source_urn], target_urn
86+
WITH RECURSIVE seed AS (
87+
SELECT source_urn, target_urn, type, properties, target_urn AS frontier
8888
FROM edges
8989
WHERE namespace_id = $1 AND source_urn = $2 AND valid_to IS NULL
90-
UNION ALL
91-
SELECT source_urn, target_urn, type, properties, 1, ARRAY[target_urn], source_urn
90+
UNION ALL
91+
SELECT source_urn, target_urn, type, properties, source_urn AS frontier
9292
FROM edges
9393
WHERE namespace_id = $1 AND target_urn = $2 AND valid_to IS NULL
94+
),
95+
graph(source_urn, target_urn, type, properties, depth, path, frontier) AS (
96+
SELECT source_urn, target_urn, type, properties, 1, ARRAY[source_urn, target_urn], frontier
97+
FROM seed
9498
UNION ALL
95-
SELECT e.source_urn, e.target_urn, e.type, e.properties, g.depth + 1, g.path || g.frontier, e.target_urn
96-
FROM edges e
97-
JOIN graph g ON e.source_urn = g.frontier
98-
WHERE e.target_urn <> ALL(g.path) AND e.valid_to IS NULL AND g.depth < $3
99-
UNION ALL
100-
SELECT e.source_urn, e.target_urn, e.type, e.properties, g.depth + 1, g.path || g.frontier, e.source_urn
99+
SELECT e.source_urn, e.target_urn, e.type, e.properties, g.depth + 1,
100+
g.path || e.target_urn || e.source_urn,
101+
CASE WHEN e.source_urn = g.frontier THEN e.target_urn ELSE e.source_urn END
101102
FROM edges e
102-
JOIN graph g ON e.target_urn = g.frontier
103-
WHERE e.source_urn <> ALL(g.path) AND e.valid_to IS NULL AND g.depth < $3
103+
JOIN graph g ON e.source_urn = g.frontier OR e.target_urn = g.frontier
104+
WHERE CASE WHEN e.source_urn = g.frontier THEN e.target_urn ELSE e.source_urn END <> ALL(g.path)
105+
AND e.valid_to IS NULL AND g.depth < $3
104106
)
105107
SELECT DISTINCT source_urn, target_urn, type, properties FROM graph
106108
LIMIT 1000`
@@ -149,8 +151,8 @@ func (r *EdgeRepository) traverse(ctx context.Context, ns *namespace.Namespace,
149151
JOIN graph g ON e.%s = g.%s
150152
WHERE e.%s <> ALL(g.path) AND e.valid_to IS NULL AND g.depth < $3
151153
)
152-
SELECT source_urn, target_urn, type, properties FROM graph`,
153-
seedCol, seedCol, seedCol, joinCol, joinCol, seedCol)
154+
SELECT DISTINCT source_urn, target_urn, type, properties FROM graph`,
155+
seedCol, seedCol, seedCol, seedCol, joinCol, seedCol)
154156

155157
var models []edgeModel
156158
if err := r.client.SelectContext(ctx, &models, query, ns.ID, urn, depth); err != nil {

store/migrations/000001_init_schema.up.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ CREATE TABLE chunks (
116116

117117
CREATE INDEX chunks_embedding_idx ON chunks
118118
USING hnsw (embedding vector_cosine_ops)
119-
WITH (m = 16, ef_construction = 64);
119+
WITH (m = 32, ef_construction = 256);
120120
CREATE INDEX idx_chunks_entity_urn ON chunks(entity_urn);
121121
CREATE INDEX idx_chunks_namespace ON chunks(namespace_id);
122122

store/migrations/000002_documents_and_embeddings.up.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ DROP INDEX IF EXISTS idx_embeddings_vector;
1313
ALTER TABLE embeddings ALTER COLUMN embedding TYPE vector(768);
1414
CREATE INDEX idx_embeddings_vector ON embeddings
1515
USING hnsw (embedding vector_cosine_ops)
16-
WITH (m = 16, ef_construction = 64);
16+
WITH (m = 32, ef_construction = 256);
1717

1818
-- Add content tracking columns
1919
ALTER TABLE embeddings ADD COLUMN content_id uuid;

0 commit comments

Comments
 (0)