Skip to content

Commit 4335f67

Browse files
Copilotdevlux76
andcommitted
fix: run buildHierarchy before insertSemanticNeighbors in ingest pipeline
Swap the ordering so that Books/Volumes/Shelves (and their reverse indexes) exist before semantic neighbor insertion attempts to flag dirty volumes via page→book→volume traversal. Also: rename misleading `hierarchyPageIds` in Query.ts to per-tier variables (`volumeIdsFromShelves`, `bookIdsFromVolumes`, `pageIdsFromBooks`) and strengthen the integration test assertion to enforce exactly-once book membership. Co-authored-by: devlux76 <86517969+devlux76@users.noreply.github.com>
1 parent 926b005 commit 4335f67

3 files changed

Lines changed: 27 additions & 22 deletions

File tree

lib/cortex/Query.ts

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,8 @@ export async function query(
5454
const hotpathBookEntries = await metadataStore.getHotpathEntries("book");
5555
const hotpathPageEntries = await metadataStore.getHotpathEntries("page");
5656

57-
// Collect candidate page IDs from hierarchical routing.
58-
const hierarchyPageIds = new Set<Hash>();
59-
60-
// Shelf → Volume → Book → Page drill-down
57+
// Shelf drill-down → discover volume candidates
58+
const volumeIdsFromShelves = new Set<Hash>();
6159
if (hotpathShelfEntries.length > 0) {
6260
const topShelves = await rankShelves(
6361
queryEmbedding,
@@ -68,18 +66,18 @@ export async function query(
6866
for (const s of topShelves) {
6967
const shelf = await metadataStore.getShelf(s.id);
7068
if (shelf) {
71-
for (const vid of shelf.volumeIds) hierarchyPageIds.add(vid);
69+
for (const vid of shelf.volumeIds) volumeIdsFromShelves.add(vid);
7270
}
7371
}
7472
}
7573

76-
// Rank volumes — include both hotpath volumes and those found via shelf drill-down
74+
// Volume ranking → discover book candidates
7775
const volumeCandidateIds = new Set<Hash>([
7876
...hotpathVolumeEntries.map((e) => e.entityId),
79-
...hierarchyPageIds,
77+
...volumeIdsFromShelves,
8078
]);
81-
hierarchyPageIds.clear();
8279

80+
const bookIdsFromVolumes = new Set<Hash>();
8381
if (volumeCandidateIds.size > 0) {
8482
const topVolumes = await rankVolumes(
8583
queryEmbedding,
@@ -90,18 +88,18 @@ export async function query(
9088
for (const v of topVolumes) {
9189
const volume = await metadataStore.getVolume(v.id);
9290
if (volume) {
93-
for (const bid of volume.bookIds) hierarchyPageIds.add(bid);
91+
for (const bid of volume.bookIds) bookIdsFromVolumes.add(bid);
9492
}
9593
}
9694
}
9795

98-
// Rank books — include both hotpath books and those found via volume drill-down
96+
// Book ranking → discover page candidates
9997
const bookCandidateIds = new Set<Hash>([
10098
...hotpathBookEntries.map((e) => e.entityId),
101-
...hierarchyPageIds,
99+
...bookIdsFromVolumes,
102100
]);
103-
hierarchyPageIds.clear();
104101

102+
const pageIdsFromBooks = new Set<Hash>();
105103
if (bookCandidateIds.size > 0) {
106104
const topBooks = await rankBooks(
107105
queryEmbedding,
@@ -112,14 +110,14 @@ export async function query(
112110
for (const b of topBooks) {
113111
const book = await metadataStore.getBook(b.id);
114112
if (book) {
115-
for (const pid of book.pageIds) hierarchyPageIds.add(pid);
113+
for (const pid of book.pageIds) pageIdsFromBooks.add(pid);
116114
}
117115
}
118116
}
119117

120118
// --- HOT path: score resident pages merged with hierarchy-discovered pages ---
121119
const hotpathIds = hotpathPageEntries.map((e) => e.entityId);
122-
const combinedPageIds = new Set<Hash>([...hotpathIds, ...hierarchyPageIds]);
120+
const combinedPageIds = new Set<Hash>([...hotpathIds, ...pageIdsFromBooks]);
123121

124122
const hotResults = await rankPages(queryEmbedding, [...combinedPageIds], topK, rankingOptions);
125123
const seenIds = new Set(hotResults.map((r) => r.id));

lib/hippocampus/Ingest.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,23 @@ export async function ingestText(
9393
});
9494
}
9595

96-
// Insert semantic neighbor edges for the new pages against all stored pages.
97-
const allPages = await metadataStore.getAllPages();
98-
const allPageIds = allPages.map((p) => p.pageId);
99-
await insertSemanticNeighbors(pageIds, allPageIds, {
96+
// Build the full hierarchy: Pages → Books → Volumes → Shelves.
97+
// buildHierarchy handles medoid selection, adjacency edges, prototype
98+
// computation, Williams fanout enforcement, and promotion sweeps.
99+
// This must run BEFORE insertSemanticNeighbors so that reverse indexes
100+
// (page→book→volume) exist when the neighbor inserter flags dirty volumes.
101+
const hierarchy = await buildHierarchy(pageIds, {
100102
modelProfile,
101103
vectorStore,
102104
metadataStore,
103105
});
104106

105-
// Build the full hierarchy: Pages → Books → Volumes → Shelves.
106-
// buildHierarchy handles medoid selection, adjacency edges, prototype
107-
// computation, Williams fanout enforcement, and promotion sweeps.
108-
const hierarchy = await buildHierarchy(pageIds, {
107+
// Insert semantic neighbor edges for the new pages against all stored pages.
108+
// Runs after hierarchy building so that flagVolumeForNeighborRecalc() can
109+
// traverse the page→book→volume reverse indexes created above.
110+
const allPages = await metadataStore.getAllPages();
111+
const allPageIds = allPages.map((p) => p.pageId);
112+
await insertSemanticNeighbors(pageIds, allPageIds, {
109113
modelProfile,
110114
vectorStore,
111115
metadataStore,

tests/integration/IngestQuery.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,9 @@ describe("integration (v0.5): hierarchical and dialectical ingest/query", () =>
429429
for (const page of result.pages) {
430430
expect(allBookPageIds).toContain(page.pageId);
431431
}
432+
// Enforce exactly-once membership (no page duplicated across books)
433+
const uniqueBookPageIds = new Set(allBookPageIds);
434+
expect(uniqueBookPageIds.size).toBe(allBookPageIds.length);
432435
// Every book's medoid must be one of its own pages
433436
for (const book of result.books) {
434437
const storedBook = await metadataStore.getBook(book.bookId);

0 commit comments

Comments
 (0)