1- import type { Book , Hash , MetadataStore , Shelf , Volume , VectorStore } from "../core/types" ;
1+ import type { Book , Hash , MetadataStore , SemanticNeighbor , Shelf , Volume , VectorStore } from "../core/types" ;
22import type { ModelProfile } from "../core/ModelProfile" ;
33import type { HotpathPolicy } from "../core/HotpathPolicy" ;
44import { hashText } from "../core/crypto/hash" ;
@@ -12,6 +12,11 @@ const PAGES_PER_BOOK = 8;
1212const BOOKS_PER_VOLUME = 4 ;
1313const VOLUMES_PER_SHELF = 4 ;
1414
15+ // Max neighbors per page for the adjacency edges added by the hierarchy builder.
16+ // Adjacency edges represent document-order contiguity and bypass the cosine
17+ // cutoff used by FastNeighborInsert, so they must still be bounded by policy.
18+ const ADJACENCY_MAX_DEGREE = 16 ;
19+
1520export interface BuildHierarchyOptions {
1621 modelProfile : ModelProfile ;
1722 vectorStore : VectorStore ;
@@ -80,6 +85,36 @@ function chunkArray<T>(arr: T[], size: number): T[][] {
8085 return chunks ;
8186}
8287
88+ /**
89+ * Merge a candidate into a neighbor list, respecting maxDegree.
90+ * If at capacity, evicts the neighbor with the lowest cosineSimilarity.
91+ * Returns the updated list sorted by cosineSimilarity descending.
92+ */
93+ function mergeAdjacentNeighbor (
94+ existing : SemanticNeighbor [ ] ,
95+ candidate : SemanticNeighbor ,
96+ maxDegree : number ,
97+ ) : SemanticNeighbor [ ] {
98+ const deduped = existing . filter ( ( n ) => n . neighborPageId !== candidate . neighborPageId ) ;
99+
100+ if ( deduped . length < maxDegree ) {
101+ deduped . push ( candidate ) ;
102+ } else {
103+ let weakestIdx = 0 ;
104+ for ( let i = 1 ; i < deduped . length ; i ++ ) {
105+ if ( deduped [ i ] . cosineSimilarity < deduped [ weakestIdx ] . cosineSimilarity ) {
106+ weakestIdx = i ;
107+ }
108+ }
109+ if ( candidate . cosineSimilarity > deduped [ weakestIdx ] . cosineSimilarity ) {
110+ deduped [ weakestIdx ] = candidate ;
111+ }
112+ }
113+
114+ deduped . sort ( ( a , b ) => b . cosineSimilarity - a . cosineSimilarity ) ;
115+ return deduped ;
116+ }
117+
83118export async function buildHierarchy (
84119 pageIds : Hash [ ] ,
85120 options : BuildHierarchyOptions ,
@@ -99,6 +134,12 @@ export async function buildHierarchy(
99134 } ) ;
100135 const pageVectors = await vectorStore . readVectors ( pageOffsets , dim ) ;
101136
137+ // Build a Map<pageId, vector> for O(1) lookups throughout the hierarchy build.
138+ const pageVectorMap = new Map < Hash , Float32Array > ( ) ;
139+ for ( let i = 0 ; i < pageIds . length ; i ++ ) {
140+ pageVectorMap . set ( pageIds [ i ] , pageVectors [ i ] ) ;
141+ }
142+
102143 // -------------------------------------------------------------------------
103144 // Level 1: Pages → Books
104145 // -------------------------------------------------------------------------
@@ -110,8 +151,9 @@ export async function buildHierarchy(
110151 const bookId = await hashText ( sortedChunk . join ( "|" ) ) ;
111152
112153 const chunkVectors = chunk . map ( ( id ) => {
113- const idx = pageIds . indexOf ( id ) ;
114- return pageVectors [ idx ] ;
154+ const vec = pageVectorMap . get ( id ) ;
155+ if ( ! vec ) throw new Error ( `Vector not found for page ${ id } ` ) ;
156+ return vec ;
115157 } ) ;
116158
117159 const medoidIdx = selectMedoidIndex ( chunkVectors ) ;
@@ -122,6 +164,32 @@ export async function buildHierarchy(
122164 books . push ( book ) ;
123165 }
124166
167+ // Add SemanticNeighbor edges between consecutive pages within each book slice.
168+ // These document-order adjacency edges are always inserted regardless of cosine
169+ // cutoff, because adjacent text chunks of the same source are always related.
170+ for ( const book of books ) {
171+ for ( let i = 0 ; i < book . pageIds . length - 1 ; i ++ ) {
172+ const aId = book . pageIds [ i ] ;
173+ const bId = book . pageIds [ i + 1 ] ;
174+ const aVec = pageVectorMap . get ( aId ) ;
175+ const bVec = pageVectorMap . get ( bId ) ;
176+ if ( ! aVec || ! bVec ) continue ;
177+
178+ const sim = cosineSimilarity ( aVec , bVec ) ;
179+ const dist = 1 - sim ;
180+ const forwardEdge : SemanticNeighbor = { neighborPageId : bId , cosineSimilarity : sim , distance : dist } ;
181+ const reverseEdge : SemanticNeighbor = { neighborPageId : aId , cosineSimilarity : sim , distance : dist } ;
182+
183+ // Forward: a → b
184+ const existingA = await metadataStore . getSemanticNeighbors ( aId ) ;
185+ await metadataStore . putSemanticNeighbors ( aId , mergeAdjacentNeighbor ( existingA , forwardEdge , ADJACENCY_MAX_DEGREE ) ) ;
186+
187+ // Reverse: b → a
188+ const existingB = await metadataStore . getSemanticNeighbors ( bId ) ;
189+ await metadataStore . putSemanticNeighbors ( bId , mergeAdjacentNeighbor ( existingB , reverseEdge , ADJACENCY_MAX_DEGREE ) ) ;
190+ }
191+ }
192+
125193 await runPromotionSweep ( books . map ( ( b ) => b . bookId ) , metadataStore , policy ) ;
126194
127195 // -------------------------------------------------------------------------
@@ -135,8 +203,9 @@ export async function buildHierarchy(
135203 const volumeId = await hashText ( sortedBookIds . join ( "|" ) ) ;
136204
137205 const medoidVectors = bookChunk . map ( ( b ) => {
138- const idx = pageIds . indexOf ( b . medoidPageId ) ;
139- return pageVectors [ idx ] ;
206+ const vec = pageVectorMap . get ( b . medoidPageId ) ;
207+ if ( ! vec ) throw new Error ( `Vector not found for medoid page ${ b . medoidPageId } ` ) ;
208+ return vec ;
140209 } ) ;
141210
142211 const centroid = computeCentroid ( medoidVectors ) ;
0 commit comments