11import type { ModelProfile } from "../core/ModelProfile" ;
2- import type { MetadataStore , Page , VectorStore } from "../core/types" ;
2+ import type { Hash , MetadataStore , Page , VectorStore } from "../core/types" ;
33import type { VectorBackend } from "../VectorBackend" ;
44import type { EmbeddingRunner } from "../embeddings/EmbeddingRunner" ;
55import { runPromotionSweep } from "../core/SalienceEngine" ;
66import type { QueryResult } from "./QueryResult" ;
7+ import { rankPages , spillToWarm } from "./Ranking" ;
8+ import { buildMetroid } from "./MetroidBuilder" ;
9+ import { detectKnowledgeGap } from "./KnowledgeGapDetector" ;
10+ import { solveOpenTSP } from "./OpenTSPSolver" ;
711
812export interface QueryOptions {
913 modelProfile : ModelProfile ;
@@ -12,66 +16,10 @@ export interface QueryOptions {
1216 metadataStore : MetadataStore ;
1317 vectorBackend : VectorBackend ;
1418 topK ?: number ;
15- }
16-
17- function dot ( a : Float32Array , b : Float32Array ) : number {
18- const len = Math . min ( a . length , b . length ) ;
19- let sum = 0 ;
20- for ( let i = 0 ; i < len ; i ++ ) {
21- sum += a [ i ] * b [ i ] ;
22- }
23- return sum ;
24- }
25-
26- /**
27- * Concatenates an array of equal-length vectors into a single flat buffer.
28- * @param vectors - Must be non-empty; every element must have the same length.
29- */
30- function concatVectors ( vectors : Float32Array [ ] ) : Float32Array {
31- const dim = vectors [ 0 ] . length ;
32- const out = new Float32Array ( vectors . length * dim ) ;
33- for ( let i = 0 ; i < vectors . length ; i ++ ) {
34- out . set ( vectors [ i ] , i * dim ) ;
35- }
36- return out ;
37- }
38-
39- async function scorePages (
40- queryEmbedding : Float32Array ,
41- pages : Page [ ] ,
42- vectorStore : VectorStore ,
43- vectorBackend : VectorBackend ,
44- maxResults : number ,
45- ) : Promise < Array < { page : Page ; score : number } > > {
46- if ( pages . length === 0 ) return [ ] ;
47-
48- const [ firstPage ] = pages ;
49- const dim = firstPage . embeddingDim ;
50- const offsets = pages . map ( ( p ) => p . embeddingOffset ) ;
51-
52- // If all pages share the same embedding dimension and it matches the query,
53- // use the vector backend for fast scoring.
54- const uniformDim = pages . every ( ( p ) => p . embeddingDim === dim ) ;
55- const canUseBackend = uniformDim && queryEmbedding . length === dim ;
56-
57- if ( canUseBackend ) {
58- const embeddings = await vectorStore . readVectors ( offsets , dim ) ;
59- const matrix = concatVectors ( embeddings ) ;
60- const scores = await vectorBackend . dotMany ( queryEmbedding , matrix , dim , pages . length ) ;
61- const topk = await vectorBackend . topKFromScores ( scores , Math . min ( maxResults , pages . length ) ) ;
62- return topk . map ( ( r ) => ( { page : pages [ r . index ] , score : r . score } ) ) ;
63- }
64-
65- // Fallback: compute dot product per page.
66- const scored = await Promise . all (
67- pages . map ( async ( page ) => {
68- const vec = await vectorStore . readVector ( page . embeddingOffset , page . embeddingDim ) ;
69- return { page, score : dot ( queryEmbedding , vec ) } ;
70- } ) ,
71- ) ;
72-
73- scored . sort ( ( a , b ) => b . score - a . score || a . page . pageId . localeCompare ( b . page . pageId ) ) ;
74- return scored . slice ( 0 , Math . min ( maxResults , scored . length ) ) ;
19+ /** BFS depth for semantic neighbor subgraph expansion. 2 hops covers direct
20+ * neighbors and their neighbors, which is the minimum needed to surface
21+ * bridge nodes without exploding the graph size. */
22+ maxHops ?: number ;
7523}
7624
7725export async function query (
@@ -83,8 +31,8 @@ export async function query(
8331 embeddingRunner,
8432 vectorStore,
8533 metadataStore,
86- vectorBackend,
8734 topK = 10 ,
35+ maxHops = 2 ,
8836 } = options ;
8937
9038 const nowIso = new Date ( ) . toISOString ( ) ;
@@ -95,74 +43,114 @@ export async function query(
9543 }
9644 const queryEmbedding = embeddings [ 0 ] ;
9745
98- // Score resident (hotpath) pages first.
46+ const rankingOptions = { vectorStore, metadataStore } ;
47+
48+ // --- HOT path: score resident pages ---
9949 const hotpathEntries = await metadataStore . getHotpathEntries ( "page" ) ;
10050 const hotpathIds = hotpathEntries . map ( ( e ) => e . entityId ) ;
10151
102- const hotpathPages = ( await Promise . all (
103- hotpathIds . map ( ( id ) => metadataStore . getPage ( id ) ) ,
104- ) ) . filter ( ( p ) : p is Page => p !== undefined ) ;
105-
106- const hotpathResults = await scorePages (
107- queryEmbedding ,
108- hotpathPages ,
109- vectorStore ,
110- vectorBackend ,
111- topK ,
112- ) ;
52+ const hotResults = await rankPages ( queryEmbedding , hotpathIds , topK , rankingOptions ) ;
53+ const seenIds = new Set ( hotResults . map ( ( r ) => r . id ) ) ;
11354
114- const seen = new Set ( hotpathResults . map ( ( r ) => r . page . pageId ) ) ;
55+ // --- Warm spill: fill up to topK if hot path is insufficient ---
56+ let warmResults : Array < { id : Hash ; score : number } > = [ ] ;
57+ if ( hotResults . length < topK ) {
58+ const allWarm = await spillToWarm ( "page" , queryEmbedding , topK , rankingOptions ) ;
59+ warmResults = allWarm . filter ( ( r ) => ! seenIds . has ( r . id ) ) ;
60+ }
11561
116- // If we still need more results, score remaining pages (warm/cold).
117- const remaining = Math . max ( 0 , topK - hotpathResults . length ) ;
118- const coldResults : Array < { page : Page ; score : number } > = [ ] ;
62+ // Merge, deduplicate, sort, and slice to topK
63+ const merged = [ ...hotResults , ...warmResults ] ;
64+ merged . sort ( ( a , b ) => b . score - a . score || a . id . localeCompare ( b . id ) ) ;
65+ const topResults = merged . slice ( 0 , topK ) ;
66+
67+ // Load Page objects for the top results
68+ const topPages = (
69+ await Promise . all ( topResults . map ( ( r ) => metadataStore . getPage ( r . id ) ) )
70+ ) . filter ( ( p ) : p is Page => p !== undefined ) ;
71+
72+ const topScores = topResults
73+ . filter ( ( r ) => topPages . some ( ( p ) => p . pageId === r . id ) )
74+ . map ( ( r ) => r . score ) ;
75+
76+ // --- MetroidBuilder: build dialectical probe ---
77+ // Candidates: hotpath book medoid pages + hotpath pages themselves
78+ const hotpathBookEntries = await metadataStore . getHotpathEntries ( "book" ) ;
79+ const bookCandidates = (
80+ await Promise . all (
81+ hotpathBookEntries . map ( async ( e ) => {
82+ const book = await metadataStore . getBook ( e . entityId ) ;
83+ if ( ! book ) return null ;
84+ const medoidPage = await metadataStore . getPage ( book . medoidPageId ) ;
85+ if ( ! medoidPage ) return null ;
86+ return {
87+ pageId : medoidPage . pageId ,
88+ embeddingOffset : medoidPage . embeddingOffset ,
89+ embeddingDim : medoidPage . embeddingDim ,
90+ } ;
91+ } ) ,
92+ )
93+ ) . filter ( ( c ) : c is NonNullable < typeof c > => c !== null ) ;
94+
95+ const pageCandidates = topPages . map ( ( p ) => ( {
96+ pageId : p . pageId ,
97+ embeddingOffset : p . embeddingOffset ,
98+ embeddingDim : p . embeddingDim ,
99+ } ) ) ;
119100
120- if ( remaining > 0 ) {
121- const allPages = await metadataStore . getAllPages ( ) ;
122- const candidates = allPages . filter ( ( p ) => ! seen . has ( p . pageId ) ) ;
101+ // Deduplicate candidates by pageId
102+ const candidateMap = new Map < Hash , { pageId : Hash ; embeddingOffset : number ; embeddingDim : number } > ( ) ;
103+ for ( const c of [ ...bookCandidates , ...pageCandidates ] ) {
104+ candidateMap . set ( c . pageId , c ) ;
105+ }
106+ const metroidCandidates = [ ...candidateMap . values ( ) ] ;
123107
124- const scored = await scorePages (
125- queryEmbedding ,
126- candidates ,
127- vectorStore ,
128- vectorBackend ,
129- remaining ,
130- ) ;
108+ const metroid = await buildMetroid ( queryEmbedding , metroidCandidates , {
109+ modelProfile,
110+ vectorStore,
111+ } ) ;
131112
132- coldResults . push ( ...scored ) ;
133- }
113+ // --- KnowledgeGapDetector ---
114+ const knowledgeGap = await detectKnowledgeGap (
115+ queryText ,
116+ queryEmbedding ,
117+ metroid ,
118+ modelProfile ,
119+ ) ;
134120
135- const combined = [ ...hotpathResults , ...coldResults ] ;
136- combined . sort ( ( a , b ) => b . score - a . score ) ;
137-
138- // Ensure combined results are sorted by descending score for top-K semantics.
139- combined . sort ( ( a , b ) => b . score - a . score ) ;
140-
141- // Update activity for returned pages
142- await Promise . all ( combined . map ( async ( { page } ) => {
143- const activity = await metadataStore . getPageActivity ( page . pageId ) ;
144- const updated = {
145- pageId : page . pageId ,
146- queryHitCount : ( activity ?. queryHitCount ?? 0 ) + 1 ,
147- lastQueryAt : nowIso ,
148- communityId : activity ?. communityId ,
149- } ;
150- await metadataStore . putPageActivity ( updated ) ;
151- } ) ) ;
121+ // --- Subgraph expansion ---
122+ const topPageIds = topPages . map ( ( p ) => p . pageId ) ;
123+ const subgraph = await metadataStore . getInducedNeighborSubgraph ( topPageIds , maxHops ) ;
124+
125+ // --- TSP coherence path ---
126+ const coherencePath = solveOpenTSP ( subgraph ) ;
127+
128+ // --- Update activity for returned pages ---
129+ await Promise . all (
130+ topPages . map ( async ( page ) => {
131+ const activity = await metadataStore . getPageActivity ( page . pageId ) ;
132+ await metadataStore . putPageActivity ( {
133+ pageId : page . pageId ,
134+ queryHitCount : ( activity ?. queryHitCount ?? 0 ) + 1 ,
135+ lastQueryAt : nowIso ,
136+ communityId : activity ?. communityId ,
137+ } ) ;
138+ } ) ,
139+ ) ;
152140
153- // Recompute salience and run promotion sweep for pages returned in this query.
154- await runPromotionSweep ( combined . map ( ( r ) => r . page . pageId ) , metadataStore ) ;
141+ // --- Promotion sweep ---
142+ await runPromotionSweep ( topPageIds , metadataStore ) ;
155143
156144 return {
157- pages : combined . map ( ( r ) => r . page ) ,
158- scores : combined . map ( ( r ) => r . score ) ,
159- coherencePath : [ ] ,
160- metroid : null ,
161- knowledgeGap : null ,
145+ pages : topPages ,
146+ scores : topScores ,
147+ coherencePath,
148+ metroid,
149+ knowledgeGap,
162150 metadata : {
163151 queryText,
164152 topK,
165- returned : combined . length ,
153+ returned : topPages . length ,
166154 timestamp : nowIso ,
167155 modelId : modelProfile . modelId ,
168156 } ,
0 commit comments