Skip to content

Commit 55d6249

Browse files
authored
Integrate HierarchyBuilder into ingestion, add hierarchical query routing, enforce Williams-bound consistency (#96)
1 parent 6b0a5af commit 55d6249

21 files changed

Lines changed: 689 additions & 311 deletions

PLAN.md

Lines changed: 71 additions & 80 deletions
Large diffs are not rendered by default.

TODO.md

Lines changed: 77 additions & 75 deletions
Large diffs are not rendered by default.

lib/cortex/Query.ts

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import type { EmbeddingRunner } from "../embeddings/EmbeddingRunner";
44
import { runPromotionSweep } from "../core/SalienceEngine";
55
import { computeSubgraphBounds } from "../core/HotpathPolicy";
66
import type { QueryResult } from "./QueryResult";
7-
import { rankPages, spillToWarm } from "./Ranking";
7+
import { rankPages, rankBooks, rankVolumes, rankShelves, spillToWarm, type RankedResult } from "./Ranking";
88
import { buildMetroid } from "./MetroidBuilder";
99
import { detectKnowledgeGap } from "./KnowledgeGapDetector";
1010
import { solveOpenTSP } from "./OpenTSPSolver";
@@ -46,15 +46,75 @@ export async function query(
4646

4747
const rankingOptions = { vectorStore, metadataStore };
4848

49-
// --- HOT path: score resident pages ---
50-
const hotpathEntries = await metadataStore.getHotpathEntries("page");
51-
const hotpathIds = hotpathEntries.map((e) => e.entityId);
49+
// --- Hierarchical routing: Shelf → Volume → Book → Page ---
50+
// When higher-tier hotpath entries exist, we route through the hierarchy
51+
// to narrow the candidate set before flat page scoring.
52+
const hotpathShelfEntries = await metadataStore.getHotpathEntries("shelf");
53+
const hotpathVolumeEntries = await metadataStore.getHotpathEntries("volume");
54+
const hotpathBookEntries = await metadataStore.getHotpathEntries("book");
55+
const hotpathPageEntries = await metadataStore.getHotpathEntries("page");
56+
57+
// Shelf drill-down → discover volume candidates
58+
const volumeIdsFromShelves = new Set<Hash>();
59+
if (hotpathShelfEntries.length > 0) {
60+
const topShelves = await rankShelves(
61+
queryEmbedding,
62+
hotpathShelfEntries.map((e) => e.entityId),
63+
Math.max(2, Math.ceil(hotpathShelfEntries.length / 2)),
64+
rankingOptions,
65+
);
66+
for (const s of topShelves) {
67+
for (const vid of s.childIds) volumeIdsFromShelves.add(vid);
68+
}
69+
}
70+
71+
// Volume ranking → discover book candidates
72+
const volumeCandidateIds = new Set<Hash>([
73+
...hotpathVolumeEntries.map((e) => e.entityId),
74+
...volumeIdsFromShelves,
75+
]);
76+
77+
const bookIdsFromVolumes = new Set<Hash>();
78+
if (volumeCandidateIds.size > 0) {
79+
const topVolumes = await rankVolumes(
80+
queryEmbedding,
81+
[...volumeCandidateIds],
82+
Math.max(2, Math.ceil(volumeCandidateIds.size / 2)),
83+
rankingOptions,
84+
);
85+
for (const v of topVolumes) {
86+
for (const bid of v.childIds) bookIdsFromVolumes.add(bid);
87+
}
88+
}
5289

53-
const hotResults = await rankPages(queryEmbedding, hotpathIds, topK, rankingOptions);
90+
// Book ranking → discover page candidates
91+
const bookCandidateIds = new Set<Hash>([
92+
...hotpathBookEntries.map((e) => e.entityId),
93+
...bookIdsFromVolumes,
94+
]);
95+
96+
const pageIdsFromBooks = new Set<Hash>();
97+
if (bookCandidateIds.size > 0) {
98+
const topBooks = await rankBooks(
99+
queryEmbedding,
100+
[...bookCandidateIds],
101+
Math.max(2, Math.ceil(bookCandidateIds.size / 2)),
102+
rankingOptions,
103+
);
104+
for (const b of topBooks) {
105+
for (const pid of b.childIds) pageIdsFromBooks.add(pid);
106+
}
107+
}
108+
109+
// --- HOT path: score resident pages merged with hierarchy-discovered pages ---
110+
const hotpathIds = hotpathPageEntries.map((e) => e.entityId);
111+
const combinedPageIds = new Set<Hash>([...hotpathIds, ...pageIdsFromBooks]);
112+
113+
const hotResults = await rankPages(queryEmbedding, [...combinedPageIds], topK, rankingOptions);
54114
const seenIds = new Set(hotResults.map((r) => r.id));
55115

56116
// --- Warm spill: fill up to topK if hot path is insufficient ---
57-
let warmResults: Array<{ id: Hash; score: number }> = [];
117+
let warmResults: RankedResult[] = [];
58118
if (hotResults.length < topK) {
59119
const allWarm = await spillToWarm("page", queryEmbedding, topK, rankingOptions);
60120
warmResults = allWarm.filter((r) => !seenIds.has(r.id));
@@ -75,8 +135,7 @@ export async function query(
75135
.map((r) => r.score);
76136

77137
// --- MetroidBuilder: build dialectical probe ---
78-
// Candidates: hotpath book medoid pages + hotpath pages themselves
79-
const hotpathBookEntries = await metadataStore.getHotpathEntries("book");
138+
// Candidates: hotpath book medoid pages + top-ranked pages
80139
const bookCandidates = (
81140
await Promise.all(
82141
hotpathBookEntries.map(async (e) => {
@@ -121,16 +180,16 @@ export async function query(
121180

122181
// --- Subgraph expansion ---
123182
// Use dynamic Williams-derived bounds unless the caller has pinned an
124-
// explicit maxHops value. Only load all pages when we actually need to
125-
// compute bounds — skip the full-page scan on the hot path when maxHops is
126-
// already known.
183+
// explicit maxHops value. Prefer the hotpath resident count as an efficient
184+
// proxy for corpus size to avoid scanning all pages on the hot path.
127185
const topPageIds = topPages.map((p) => p.pageId);
128186
let effectiveMaxHops: number;
129187
if (options.maxHops !== undefined) {
130188
effectiveMaxHops = options.maxHops;
131189
} else {
132-
const allPages = await metadataStore.getAllPages();
133-
effectiveMaxHops = computeSubgraphBounds(allPages.length).maxHops;
190+
const residentCount = await metadataStore.getResidentCount();
191+
const graphMass = residentCount > 0 ? residentCount : combinedPageIds.size;
192+
effectiveMaxHops = computeSubgraphBounds(Math.max(1, graphMass)).maxHops;
134193
}
135194
const subgraph = await metadataStore.getInducedNeighborSubgraph(topPageIds, effectiveMaxHops);
136195

lib/cortex/Ranking.ts

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,34 +21,42 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
2121
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
2222
}
2323

24+
export interface RankedResult {
25+
id: Hash;
26+
score: number;
27+
/** Child IDs from the ranked entity (volumeIds / bookIds / pageIds). */
28+
childIds: Hash[];
29+
}
30+
2431
function pickTopK(
25-
scored: Array<{ id: Hash; score: number }>,
32+
scored: RankedResult[],
2633
k: number,
27-
): Array<{ id: Hash; score: number }> {
34+
): RankedResult[] {
2835
scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
2936
return scored.slice(0, k);
3037
}
3138

3239
/**
3340
* Ranks shelves by cosine similarity of their routing prototype to the query.
3441
* Uses routingPrototypeOffsets[0] as the representative vector.
42+
* Returns child volumeIds alongside each scored shelf.
3543
*/
3644
export async function rankShelves(
3745
queryEmbedding: Float32Array,
3846
residentShelfIds: Hash[],
3947
topK: number,
4048
options: RankingOptions,
41-
): Promise<Array<{ id: Hash; score: number }>> {
49+
): Promise<RankedResult[]> {
4250
if (residentShelfIds.length === 0) return [];
4351

4452
const { vectorStore, metadataStore } = options;
45-
const scored: Array<{ id: Hash; score: number }> = [];
53+
const scored: RankedResult[] = [];
4654

4755
for (const shelfId of residentShelfIds) {
4856
const shelf = await metadataStore.getShelf(shelfId);
4957
if (!shelf || shelf.routingPrototypeOffsets.length === 0) continue;
5058
const vec = await vectorStore.readVector(shelf.routingPrototypeOffsets[0], shelf.routingDim);
51-
scored.push({ id: shelfId, score: cosineSimilarity(queryEmbedding, vec) });
59+
scored.push({ id: shelfId, score: cosineSimilarity(queryEmbedding, vec), childIds: shelf.volumeIds });
5260
}
5361

5462
return pickTopK(scored, topK);
@@ -57,49 +65,51 @@ export async function rankShelves(
5765
/**
5866
* Ranks volumes by cosine similarity of their first prototype to the query.
5967
* Uses prototypeOffsets[0] as the representative vector.
68+
* Returns child bookIds alongside each scored volume.
6069
*/
6170
export async function rankVolumes(
6271
queryEmbedding: Float32Array,
6372
residentVolumeIds: Hash[],
6473
topK: number,
6574
options: RankingOptions,
66-
): Promise<Array<{ id: Hash; score: number }>> {
75+
): Promise<RankedResult[]> {
6776
if (residentVolumeIds.length === 0) return [];
6877

6978
const { vectorStore, metadataStore } = options;
70-
const scored: Array<{ id: Hash; score: number }> = [];
79+
const scored: RankedResult[] = [];
7180

7281
for (const volumeId of residentVolumeIds) {
7382
const volume = await metadataStore.getVolume(volumeId);
7483
if (!volume || volume.prototypeOffsets.length === 0) continue;
7584
const vec = await vectorStore.readVector(volume.prototypeOffsets[0], volume.prototypeDim);
76-
scored.push({ id: volumeId, score: cosineSimilarity(queryEmbedding, vec) });
85+
scored.push({ id: volumeId, score: cosineSimilarity(queryEmbedding, vec), childIds: volume.bookIds });
7786
}
7887

7988
return pickTopK(scored, topK);
8089
}
8190

8291
/**
8392
* Ranks books by cosine similarity of their medoid page embedding to the query.
93+
* Returns child pageIds alongside each scored book.
8494
*/
8595
export async function rankBooks(
8696
queryEmbedding: Float32Array,
8797
residentBookIds: Hash[],
8898
topK: number,
8999
options: RankingOptions,
90-
): Promise<Array<{ id: Hash; score: number }>> {
100+
): Promise<RankedResult[]> {
91101
if (residentBookIds.length === 0) return [];
92102

93103
const { vectorStore, metadataStore } = options;
94-
const scored: Array<{ id: Hash; score: number }> = [];
104+
const scored: RankedResult[] = [];
95105

96106
for (const bookId of residentBookIds) {
97107
const book = await metadataStore.getBook(bookId);
98108
if (!book) continue;
99109
const medoidPage = await metadataStore.getPage(book.medoidPageId);
100110
if (!medoidPage) continue;
101111
const vec = await vectorStore.readVector(medoidPage.embeddingOffset, medoidPage.embeddingDim);
102-
scored.push({ id: bookId, score: cosineSimilarity(queryEmbedding, vec) });
112+
scored.push({ id: bookId, score: cosineSimilarity(queryEmbedding, vec), childIds: book.pageIds });
103113
}
104114

105115
return pickTopK(scored, topK);
@@ -113,17 +123,17 @@ export async function rankPages(
113123
residentPageIds: Hash[],
114124
topK: number,
115125
options: RankingOptions,
116-
): Promise<Array<{ id: Hash; score: number }>> {
126+
): Promise<RankedResult[]> {
117127
if (residentPageIds.length === 0) return [];
118128

119129
const { vectorStore, metadataStore } = options;
120-
const scored: Array<{ id: Hash; score: number }> = [];
130+
const scored: RankedResult[] = [];
121131

122132
for (const pageId of residentPageIds) {
123133
const page = await metadataStore.getPage(pageId);
124134
if (!page) continue;
125135
const vec = await vectorStore.readVector(page.embeddingOffset, page.embeddingDim);
126-
scored.push({ id: pageId, score: cosineSimilarity(queryEmbedding, vec) });
136+
scored.push({ id: pageId, score: cosineSimilarity(queryEmbedding, vec), childIds: [] });
127137
}
128138

129139
return pickTopK(scored, topK);
@@ -139,17 +149,17 @@ export async function spillToWarm(
139149
queryEmbedding: Float32Array,
140150
topK: number,
141151
options: RankingOptions,
142-
): Promise<Array<{ id: Hash; score: number }>> {
152+
): Promise<RankedResult[]> {
143153
if (tier !== "page") return [];
144154

145155
const { vectorStore, metadataStore } = options;
146156
const allPages = await metadataStore.getAllPages();
147157
if (allPages.length === 0) return [];
148158

149-
const scored: Array<{ id: Hash; score: number }> = [];
159+
const scored: RankedResult[] = [];
150160
for (const page of allPages) {
151161
const vec = await vectorStore.readVector(page.embeddingOffset, page.embeddingDim);
152-
scored.push({ id: page.pageId, score: cosineSimilarity(queryEmbedding, vec) });
162+
scored.push({ id: page.pageId, score: cosineSimilarity(queryEmbedding, vec), childIds: [] });
153163
}
154164

155165
return pickTopK(scored, topK);

lib/daydreamer/ClusterStability.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ export interface LabelPropagationResult {
5252
/**
5353
* Run one pass of label propagation over all pages.
5454
*
55-
* Each node adopts the most frequent label among its Metroid neighbors.
55+
* Each node adopts the most frequent label among its semantic neighbors.
5656
* Ties are broken deterministically by choosing the lexicographically
5757
* smallest label (consistent across runs and nodes).
5858
*
@@ -107,7 +107,7 @@ async function propagationPass(
107107

108108
/**
109109
* Assign community labels to all pages via label propagation on the
110-
* Metroid (semantic) neighbor graph.
110+
* Semantic neighbor graph.
111111
*
112112
* Initial labels: each page is its own community (pageId as initial label).
113113
* Each iteration: every node adopts the most frequent label among neighbors.

lib/daydreamer/FullNeighborRecalc.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
// ---------------------------------------------------------------------------
1515

1616
import type { Hash, MetadataStore, SemanticNeighbor, Page, VectorStore } from "../core/types";
17-
import { computeCapacity, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
17+
import { computeCapacity, computeNeighborMaxDegree, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
1818
import { batchComputeSalience, runPromotionSweep } from "../core/SalienceEngine";
1919

2020
// Minimum pair budget per idle recalc cycle.
@@ -31,7 +31,8 @@ export interface FullNeighborRecalcOptions {
3131
metadataStore: MetadataStore;
3232
vectorStore: VectorStore;
3333
policy?: HotpathPolicy;
34-
/** Maximum Metroid neighbors stored per page. Default: 16. */
34+
/** Maximum semantic neighbors stored per page.
35+
* When omitted, uses Williams-derived `computeNeighborMaxDegree(graphMass)`. */
3536
maxNeighbors?: number;
3637
/** Current timestamp (ms since epoch). Defaults to Date.now(). */
3738
now?: number;
@@ -71,7 +72,7 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
7172
*
7273
* Finds all volumes flagged as dirty (via `needsNeighborRecalc`), loads
7374
* their pages, computes pairwise cosine similarities, and updates the
74-
* Metroid neighbor index. Processing is bounded by the Williams-Bound-derived
75+
* semantic neighbor index. Processing is bounded by the Williams-Bound-derived
7576
* maintenance budget to avoid blocking the idle loop.
7677
*
7778
* After recalculation, salience is recomputed for affected pages and a
@@ -84,7 +85,6 @@ export async function runFullNeighborRecalc(
8485
metadataStore,
8586
vectorStore,
8687
policy = DEFAULT_HOTPATH_POLICY,
87-
maxNeighbors = 16,
8888
now = Date.now(),
8989
} = options;
9090

@@ -110,6 +110,9 @@ export async function runFullNeighborRecalc(
110110
const totalGraphMass = (await metadataStore.getAllPages()).length;
111111
const pairBudget = Math.max(MIN_RECALC_PAIR_BUDGET, computeCapacity(totalGraphMass, policy.c));
112112

113+
// Derive max neighbor degree from Williams bounds if not explicitly provided.
114+
const maxNeighbors = options.maxNeighbors ?? computeNeighborMaxDegree(totalGraphMass, policy.c);
115+
113116
let totalVolumesProcessed = 0;
114117
let totalPagesProcessed = 0;
115118
let totalPairsComputed = 0;

lib/daydreamer/HebbianUpdater.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// ---------------------------------------------------------------------------
1212

1313
import type { Edge, Hash, MetadataStore } from "../core/types";
14-
import { DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
14+
import { computeNeighborMaxDegree, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
1515
import { batchComputeSalience, runPromotionSweep } from "../core/SalienceEngine";
1616

1717
// ---------------------------------------------------------------------------
@@ -43,7 +43,8 @@ export interface HebbianUpdaterOptions {
4343
ltdDecay?: number;
4444
/** Prune edges whose weight drops below this value. Default: DEFAULT_PRUNE_THRESHOLD. */
4545
pruneThreshold?: number;
46-
/** Maximum outgoing degree per node. Default: DEFAULT_MAX_DEGREE. */
46+
/** Maximum outgoing Hebbian edges per node.
47+
* When omitted, uses Williams-derived `computeNeighborMaxDegree(graphMass)`. */
4748
maxDegree?: number;
4849
/** Current timestamp (ms since epoch). Defaults to Date.now(). */
4950
now?: number;
@@ -135,13 +136,15 @@ export async function decayAndPrune(
135136
policy = DEFAULT_HOTPATH_POLICY,
136137
ltdDecay = DEFAULT_LTD_DECAY,
137138
pruneThreshold = DEFAULT_PRUNE_THRESHOLD,
138-
maxDegree = DEFAULT_MAX_DEGREE,
139139
now = Date.now(),
140140
} = options;
141141

142142
const allPages = await metadataStore.getAllPages();
143143
if (allPages.length === 0) return { decayed: 0, pruned: 0 };
144144

145+
// Derive max degree from Williams bounds if not explicitly provided.
146+
const maxDegree = options.maxDegree ?? computeNeighborMaxDegree(allPages.length, policy.c);
147+
145148
const changedNodeIds = new Set<Hash>();
146149
let totalDecayed = 0;
147150
let totalPruned = 0;

0 commit comments

Comments
 (0)