Skip to content

Commit 404c925

Browse files
CopilotCopilot
andcommitted
feat(cortex): implement P1 dialectical query orchestrator
- Add cortex/Ranking.ts: rankPages/rankBooks/rankVolumes/rankShelves use cosine similarity against entity representative vectors; spillToWarm('page') scores the full corpus as fallback - Rewrite cortex/Query.ts (P1-E1): full dialectical orchestrator - HOT path: rankPages() over resident hotpath entries - Warm spill: spillToWarm('page') when hot path is insufficient - buildMetroid() with book medoid + page candidates - detectKnowledgeGap() from metroid result - getInducedNeighborSubgraph() + solveOpenTSP() for coherence path - queryHitCount increment + runPromotionSweep() - QueryResult now fully populated (coherencePath, metroid, knowledgeGap) - Add tests/cortex/Ranking.test.ts: 12 tests covering empty inputs, cosine scoring, descending order, topK, spillToWarm all tiers - Update tests/cortex/Query.test.ts: add assertions for new fields (coherencePath, metroid, knowledgeGap); preserve all existing behavioral checks; update describe label to reflect new orchestrator All 274 tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent fa4949f commit 404c925

4 files changed

Lines changed: 593 additions & 116 deletions

File tree

cortex/Query.ts

Lines changed: 103 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
import type { ModelProfile } from "../core/ModelProfile";
2-
import type { MetadataStore, Page, VectorStore } from "../core/types";
2+
import type { Hash, MetadataStore, Page, VectorStore } from "../core/types";
33
import type { VectorBackend } from "../VectorBackend";
44
import type { EmbeddingRunner } from "../embeddings/EmbeddingRunner";
55
import { runPromotionSweep } from "../core/SalienceEngine";
66
import type { QueryResult } from "./QueryResult";
7+
import { rankPages, spillToWarm } from "./Ranking";
8+
import { buildMetroid } from "./MetroidBuilder";
9+
import { detectKnowledgeGap } from "./KnowledgeGapDetector";
10+
import { solveOpenTSP } from "./OpenTSPSolver";
711

812
export interface QueryOptions {
913
modelProfile: ModelProfile;
@@ -12,66 +16,10 @@ export interface QueryOptions {
1216
metadataStore: MetadataStore;
1317
vectorBackend: VectorBackend;
1418
topK?: number;
15-
}
16-
17-
function dot(a: Float32Array, b: Float32Array): number {
18-
const len = Math.min(a.length, b.length);
19-
let sum = 0;
20-
for (let i = 0; i < len; i++) {
21-
sum += a[i] * b[i];
22-
}
23-
return sum;
24-
}
25-
26-
/**
27-
* Concatenates an array of equal-length vectors into a single flat buffer.
28-
* @param vectors - Must be non-empty; every element must have the same length.
29-
*/
30-
function concatVectors(vectors: Float32Array[]): Float32Array {
31-
const dim = vectors[0].length;
32-
const out = new Float32Array(vectors.length * dim);
33-
for (let i = 0; i < vectors.length; i++) {
34-
out.set(vectors[i], i * dim);
35-
}
36-
return out;
37-
}
38-
39-
async function scorePages(
40-
queryEmbedding: Float32Array,
41-
pages: Page[],
42-
vectorStore: VectorStore,
43-
vectorBackend: VectorBackend,
44-
maxResults: number,
45-
): Promise<Array<{ page: Page; score: number }>> {
46-
if (pages.length === 0) return [];
47-
48-
const [firstPage] = pages;
49-
const dim = firstPage.embeddingDim;
50-
const offsets = pages.map((p) => p.embeddingOffset);
51-
52-
// If all pages share the same embedding dimension and it matches the query,
53-
// use the vector backend for fast scoring.
54-
const uniformDim = pages.every((p) => p.embeddingDim === dim);
55-
const canUseBackend = uniformDim && queryEmbedding.length === dim;
56-
57-
if (canUseBackend) {
58-
const embeddings = await vectorStore.readVectors(offsets, dim);
59-
const matrix = concatVectors(embeddings);
60-
const scores = await vectorBackend.dotMany(queryEmbedding, matrix, dim, pages.length);
61-
const topk = await vectorBackend.topKFromScores(scores, Math.min(maxResults, pages.length));
62-
return topk.map((r) => ({ page: pages[r.index], score: r.score }));
63-
}
64-
65-
// Fallback: compute dot product per page.
66-
const scored = await Promise.all(
67-
pages.map(async (page) => {
68-
const vec = await vectorStore.readVector(page.embeddingOffset, page.embeddingDim);
69-
return { page, score: dot(queryEmbedding, vec) };
70-
}),
71-
);
72-
73-
scored.sort((a, b) => b.score - a.score || a.page.pageId.localeCompare(b.page.pageId));
74-
return scored.slice(0, Math.min(maxResults, scored.length));
19+
/** BFS depth for semantic neighbor subgraph expansion. 2 hops covers direct
20+
* neighbors and their neighbors, which is the minimum needed to surface
21+
* bridge nodes without exploding the graph size. */
22+
maxHops?: number;
7523
}
7624

7725
export async function query(
@@ -83,8 +31,8 @@ export async function query(
8331
embeddingRunner,
8432
vectorStore,
8533
metadataStore,
86-
vectorBackend,
8734
topK = 10,
35+
maxHops = 2,
8836
} = options;
8937

9038
const nowIso = new Date().toISOString();
@@ -95,74 +43,114 @@ export async function query(
9543
}
9644
const queryEmbedding = embeddings[0];
9745

98-
// Score resident (hotpath) pages first.
46+
const rankingOptions = { vectorStore, metadataStore };
47+
48+
// --- HOT path: score resident pages ---
9949
const hotpathEntries = await metadataStore.getHotpathEntries("page");
10050
const hotpathIds = hotpathEntries.map((e) => e.entityId);
10151

102-
const hotpathPages = (await Promise.all(
103-
hotpathIds.map((id) => metadataStore.getPage(id)),
104-
)).filter((p): p is Page => p !== undefined);
105-
106-
const hotpathResults = await scorePages(
107-
queryEmbedding,
108-
hotpathPages,
109-
vectorStore,
110-
vectorBackend,
111-
topK,
112-
);
52+
const hotResults = await rankPages(queryEmbedding, hotpathIds, topK, rankingOptions);
53+
const seenIds = new Set(hotResults.map((r) => r.id));
11354

114-
const seen = new Set(hotpathResults.map((r) => r.page.pageId));
55+
// --- Warm spill: fill up to topK if hot path is insufficient ---
56+
let warmResults: Array<{ id: Hash; score: number }> = [];
57+
if (hotResults.length < topK) {
58+
const allWarm = await spillToWarm("page", queryEmbedding, topK, rankingOptions);
59+
warmResults = allWarm.filter((r) => !seenIds.has(r.id));
60+
}
11561

116-
// If we still need more results, score remaining pages (warm/cold).
117-
const remaining = Math.max(0, topK - hotpathResults.length);
118-
const coldResults: Array<{ page: Page; score: number }> = [];
62+
// Merge, deduplicate, sort, and slice to topK
63+
const merged = [...hotResults, ...warmResults];
64+
merged.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
65+
const topResults = merged.slice(0, topK);
66+
67+
// Load Page objects for the top results
68+
const topPages = (
69+
await Promise.all(topResults.map((r) => metadataStore.getPage(r.id)))
70+
).filter((p): p is Page => p !== undefined);
71+
72+
const topScores = topResults
73+
.filter((r) => topPages.some((p) => p.pageId === r.id))
74+
.map((r) => r.score);
75+
76+
// --- MetroidBuilder: build dialectical probe ---
77+
// Candidates: hotpath book medoid pages + hotpath pages themselves
78+
const hotpathBookEntries = await metadataStore.getHotpathEntries("book");
79+
const bookCandidates = (
80+
await Promise.all(
81+
hotpathBookEntries.map(async (e) => {
82+
const book = await metadataStore.getBook(e.entityId);
83+
if (!book) return null;
84+
const medoidPage = await metadataStore.getPage(book.medoidPageId);
85+
if (!medoidPage) return null;
86+
return {
87+
pageId: medoidPage.pageId,
88+
embeddingOffset: medoidPage.embeddingOffset,
89+
embeddingDim: medoidPage.embeddingDim,
90+
};
91+
}),
92+
)
93+
).filter((c): c is NonNullable<typeof c> => c !== null);
94+
95+
const pageCandidates = topPages.map((p) => ({
96+
pageId: p.pageId,
97+
embeddingOffset: p.embeddingOffset,
98+
embeddingDim: p.embeddingDim,
99+
}));
119100

120-
if (remaining > 0) {
121-
const allPages = await metadataStore.getAllPages();
122-
const candidates = allPages.filter((p) => !seen.has(p.pageId));
101+
// Deduplicate candidates by pageId
102+
const candidateMap = new Map<Hash, { pageId: Hash; embeddingOffset: number; embeddingDim: number }>();
103+
for (const c of [...bookCandidates, ...pageCandidates]) {
104+
candidateMap.set(c.pageId, c);
105+
}
106+
const metroidCandidates = [...candidateMap.values()];
123107

124-
const scored = await scorePages(
125-
queryEmbedding,
126-
candidates,
127-
vectorStore,
128-
vectorBackend,
129-
remaining,
130-
);
108+
const metroid = await buildMetroid(queryEmbedding, metroidCandidates, {
109+
modelProfile,
110+
vectorStore,
111+
});
131112

132-
coldResults.push(...scored);
133-
}
113+
// --- KnowledgeGapDetector ---
114+
const knowledgeGap = await detectKnowledgeGap(
115+
queryText,
116+
queryEmbedding,
117+
metroid,
118+
modelProfile,
119+
);
134120

135-
const combined = [...hotpathResults, ...coldResults];
136-
combined.sort((a, b) => b.score - a.score);
137-
138-
// Ensure combined results are sorted by descending score for top-K semantics.
139-
combined.sort((a, b) => b.score - a.score);
140-
141-
// Update activity for returned pages
142-
await Promise.all(combined.map(async ({ page }) => {
143-
const activity = await metadataStore.getPageActivity(page.pageId);
144-
const updated = {
145-
pageId: page.pageId,
146-
queryHitCount: (activity?.queryHitCount ?? 0) + 1,
147-
lastQueryAt: nowIso,
148-
communityId: activity?.communityId,
149-
};
150-
await metadataStore.putPageActivity(updated);
151-
}));
121+
// --- Subgraph expansion ---
122+
const topPageIds = topPages.map((p) => p.pageId);
123+
const subgraph = await metadataStore.getInducedNeighborSubgraph(topPageIds, maxHops);
124+
125+
// --- TSP coherence path ---
126+
const coherencePath = solveOpenTSP(subgraph);
127+
128+
// --- Update activity for returned pages ---
129+
await Promise.all(
130+
topPages.map(async (page) => {
131+
const activity = await metadataStore.getPageActivity(page.pageId);
132+
await metadataStore.putPageActivity({
133+
pageId: page.pageId,
134+
queryHitCount: (activity?.queryHitCount ?? 0) + 1,
135+
lastQueryAt: nowIso,
136+
communityId: activity?.communityId,
137+
});
138+
}),
139+
);
152140

153-
// Recompute salience and run promotion sweep for pages returned in this query.
154-
await runPromotionSweep(combined.map((r) => r.page.pageId), metadataStore);
141+
// --- Promotion sweep ---
142+
await runPromotionSweep(topPageIds, metadataStore);
155143

156144
return {
157-
pages: combined.map((r) => r.page),
158-
scores: combined.map((r) => r.score),
159-
coherencePath: [],
160-
metroid: null,
161-
knowledgeGap: null,
145+
pages: topPages,
146+
scores: topScores,
147+
coherencePath,
148+
metroid,
149+
knowledgeGap,
162150
metadata: {
163151
queryText,
164152
topK,
165-
returned: combined.length,
153+
returned: topPages.length,
166154
timestamp: nowIso,
167155
modelId: modelProfile.modelId,
168156
},

0 commit comments

Comments
 (0)