Skip to content

Commit 0341e62

Browse files
Copilotdevlux76
andcommitted
fix: align with main's P1 API renames, add deleteVolume, fix guard false positives, fix CI typecheck
Co-authored-by: devlux76 <86517969+devlux76@users.noreply.github.com>
1 parent dd56622 commit 0341e62

28 files changed

Lines changed: 2901 additions & 388 deletions

core/types.ts

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,18 @@ export interface Edge {
6464
}
6565

6666
// ---------------------------------------------------------------------------
67-
// Metroid nearest-neighbour graph (project term; medoid-inspired)
67+
// Semantic nearest-neighbor graph
6868
// ---------------------------------------------------------------------------
6969

70-
export interface MetroidNeighbor {
70+
/** A single directed proximity edge in the sparse semantic neighbor graph. */
71+
export interface SemanticNeighbor {
7172
neighborPageId: Hash;
7273
cosineSimilarity: number; // threshold is defined by runtime policy
7374
distance: number; // 1 - cosineSimilarity (ready for TSP)
7475
}
7576

76-
export interface MetroidSubgraph {
77+
/** Induced subgraph returned by BFS expansion of the semantic neighbor graph. */
78+
export interface SemanticNeighborSubgraph {
7779
nodes: Hash[];
7880
edges: { from: Hash; to: Hash; distance: number }[];
7981
}
@@ -162,6 +164,13 @@ export interface MetadataStore {
162164

163165
putVolume(volume: Volume): Promise<void>;
164166
getVolume(volumeId: Hash): Promise<Volume | undefined>;
167+
/**
168+
* Delete a volume record and clean up all reverse-index entries
169+
* (`bookToVolume` for each book in the volume, and the `volumeToShelf` entry).
170+
* Callers are responsible for removing the volume from any shelf's `volumeIds`
171+
* list before calling this method.
172+
*/
173+
deleteVolume(volumeId: Hash): Promise<void>;
165174

166175
putShelf(shelf: Shelf): Promise<void>;
167176
getShelf(shelfId: Hash): Promise<Shelf | undefined>;
@@ -175,20 +184,20 @@ export interface MetadataStore {
175184
getVolumesByBook(bookId: Hash): Promise<Volume[]>;
176185
getShelvesByVolume(volumeId: Hash): Promise<Shelf[]>;
177186

178-
// --- Metroid NN radius index ---
179-
putMetroidNeighbors(pageId: Hash, neighbors: MetroidNeighbor[]): Promise<void>;
180-
getMetroidNeighbors(pageId: Hash, maxDegree?: number): Promise<MetroidNeighbor[]>;
187+
// --- Semantic neighbor radius index ---
188+
putSemanticNeighbors(pageId: Hash, neighbors: SemanticNeighbor[]): Promise<void>;
189+
getSemanticNeighbors(pageId: Hash, maxDegree?: number): Promise<SemanticNeighbor[]>;
181190

182-
/** BFS expansion of the Metroid subgraph up to `maxHops` levels deep. */
183-
getInducedMetroidSubgraph(
191+
/** BFS expansion of the semantic neighbor subgraph up to `maxHops` levels deep. */
192+
getInducedNeighborSubgraph(
184193
seedPageIds: Hash[],
185194
maxHops: number,
186-
): Promise<MetroidSubgraph>;
195+
): Promise<SemanticNeighborSubgraph>;
187196

188197
// --- Dirty-volume recalc flags ---
189-
needsMetroidRecalc(volumeId: Hash): Promise<boolean>;
190-
flagVolumeForMetroidRecalc(volumeId: Hash): Promise<void>;
191-
clearMetroidRecalcFlag(volumeId: Hash): Promise<void>;
198+
needsNeighborRecalc(volumeId: Hash): Promise<boolean>;
199+
flagVolumeForNeighborRecalc(volumeId: Hash): Promise<void>;
200+
clearNeighborRecalcFlag(volumeId: Hash): Promise<void>;
192201

193202
// --- Hotpath index ---
194203
putHotpathEntry(entry: HotpathEntry): Promise<void>;

cortex/KnowledgeGapDetector.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import type { Hash } from "../core/types";
2+
import type { ModelProfile } from "../core/ModelProfile";
3+
import { hashText } from "../core/crypto/hash";
4+
import type { Metroid } from "./MetroidBuilder";
5+
6+
export interface KnowledgeGap {
7+
queryText: string;
8+
queryEmbedding: Float32Array;
9+
knowledgeBoundary: Hash | null;
10+
detectedAt: string;
11+
}
12+
13+
export interface CuriosityProbe {
14+
probeId: Hash;
15+
queryText: string;
16+
queryEmbedding: Float32Array;
17+
knowledgeBoundary: Hash | null;
18+
mimeType: string;
19+
modelUrn: string;
20+
createdAt: string;
21+
}
22+
23+
/**
24+
* Returns a KnowledgeGap when the metroid signals that m2 could not be found
25+
* (i.e. the engine has no antithesis for this query). Returns null when the
26+
* metroid is complete and no gap was detected.
27+
*/
28+
export async function detectKnowledgeGap(
29+
queryText: string,
30+
queryEmbedding: Float32Array,
31+
metroid: Metroid,
32+
// eslint-disable-next-line @typescript-eslint/no-unused-vars -- reserved for future model-aware gap categorisation
33+
_modelProfile: ModelProfile,
34+
): Promise<KnowledgeGap | null> {
35+
if (!metroid.knowledgeGap) return null;
36+
37+
return {
38+
queryText,
39+
queryEmbedding,
40+
knowledgeBoundary: metroid.m1 !== "" ? metroid.m1 : null,
41+
detectedAt: new Date().toISOString(),
42+
};
43+
}
44+
45+
/**
46+
* Builds a serialisable CuriosityProbe from a detected KnowledgeGap.
47+
* The probeId is the SHA-256 of (queryText + detectedAt) so it is
48+
* deterministic for the same gap inputs.
49+
*/
50+
export async function buildCuriosityProbe(
51+
gap: KnowledgeGap,
52+
modelProfile: ModelProfile,
53+
mimeType = "text/plain",
54+
): Promise<CuriosityProbe> {
55+
const probeId = await hashText(gap.queryText + gap.detectedAt);
56+
57+
return {
58+
probeId,
59+
queryText: gap.queryText,
60+
queryEmbedding: gap.queryEmbedding,
61+
knowledgeBoundary: gap.knowledgeBoundary,
62+
mimeType,
63+
modelUrn: `urn:model:${modelProfile.modelId}`,
64+
createdAt: new Date().toISOString(),
65+
};
66+
}

cortex/MetroidBuilder.ts

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import type { Hash, VectorStore } from "../core/types";
2+
import type { ModelProfile } from "../core/ModelProfile";
3+
4+
export interface Metroid {
5+
m1: Hash;
6+
m2: Hash | null;
7+
c: Float32Array | null;
8+
knowledgeGap: boolean;
9+
}
10+
11+
export interface MetroidBuilderOptions {
12+
modelProfile: ModelProfile;
13+
vectorStore: VectorStore;
14+
}
15+
16+
/** Standard Matryoshka tier sizes in ascending order. */
17+
const MATRYOSHKA_TIERS = [32, 64, 128, 256, 512, 768, 1024, 2048] as const;
18+
19+
function cosineSimilarity(a: Float32Array, b: Float32Array): number {
20+
let dotProduct = 0;
21+
let normA = 0;
22+
let normB = 0;
23+
const len = Math.min(a.length, b.length);
24+
for (let i = 0; i < len; i++) {
25+
dotProduct += a[i] * b[i];
26+
normA += a[i] * a[i];
27+
normB += b[i] * b[i];
28+
}
29+
if (normA === 0 || normB === 0) return 0;
30+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
31+
}
32+
33+
function cosineDistance(a: Float32Array, b: Float32Array): number {
34+
return 1 - cosineSimilarity(a, b);
35+
}
36+
37+
/**
38+
* Returns the index of the medoid: the element that minimises total cosine
39+
* distance to every other element in the set.
40+
*/
41+
function findMedoidIndex(embeddings: Float32Array[]): number {
42+
if (embeddings.length === 1) return 0;
43+
44+
let bestIdx = 0;
45+
let bestTotal = Infinity;
46+
47+
for (let i = 0; i < embeddings.length; i++) {
48+
let total = 0;
49+
for (let j = 0; j < embeddings.length; j++) {
50+
if (i !== j) {
51+
total += cosineDistance(embeddings[i], embeddings[j]);
52+
}
53+
}
54+
if (total < bestTotal) {
55+
bestTotal = total;
56+
bestIdx = i;
57+
}
58+
}
59+
60+
return bestIdx;
61+
}
62+
63+
interface CandidateEntry {
64+
pageId: Hash;
65+
embeddingOffset: number;
66+
embeddingDim: number;
67+
}
68+
69+
interface CandidateWithEmbedding extends CandidateEntry {
70+
embedding: Float32Array;
71+
}
72+
73+
/**
74+
* Searches for m2 among `others` (candidates excluding m1) using the free
75+
* dimensions starting at `protectedDim`.
76+
*
77+
* Returns the selected medoid candidate or `null` if no valid opposite set
78+
* can be assembled.
79+
*/
80+
function searchM2(
81+
others: CandidateWithEmbedding[],
82+
m1Embedding: Float32Array,
83+
protectedDim: number,
84+
): CandidateWithEmbedding | null {
85+
if (others.length === 0) return null;
86+
87+
const m1Free = m1Embedding.slice(protectedDim);
88+
89+
const scored = others.map((c) => {
90+
const free = c.embedding.slice(protectedDim);
91+
return { candidate: c, score: -cosineSimilarity(free, m1Free) };
92+
});
93+
94+
// Prefer candidates that are genuinely opposite (score >= 0).
95+
let oppositeSet = scored.filter((s) => s.score >= 0);
96+
97+
// Fall back to the top 50% when the genuine-opposite set is too small.
98+
if (oppositeSet.length < 2) {
99+
const byScore = [...scored].sort((a, b) => b.score - a.score);
100+
const topHalf = Math.max(1, Math.ceil(byScore.length / 2));
101+
oppositeSet = byScore.slice(0, topHalf);
102+
}
103+
104+
if (oppositeSet.length === 0) return null;
105+
106+
const medoidIdx = findMedoidIndex(oppositeSet.map((s) => s.candidate.embedding.slice(protectedDim)));
107+
return oppositeSet[medoidIdx].candidate;
108+
}
109+
110+
/**
111+
* Builds the dialectical probe (Metroid) for a given query embedding and a
112+
* ranked list of candidate memory nodes.
113+
*
114+
* Step overview
115+
* 1. Select m1 (thesis): the candidate with highest cosine similarity to the query.
116+
* 2. Select m2 (antithesis): the medoid of the cosine-opposite set in free dims.
117+
* Uses Matryoshka dimensional unwinding when the initial tier yields no m2.
118+
* 3. Compute centroid c (synthesis): protected dims copied from m1, free dims
119+
* averaged between m1 and m2.
120+
*/
121+
export async function buildMetroid(
122+
queryEmbedding: Float32Array,
123+
candidateMedoids: Array<{ pageId: Hash; embeddingOffset: number; embeddingDim: number }>,
124+
options: MetroidBuilderOptions,
125+
): Promise<Metroid> {
126+
const { modelProfile, vectorStore } = options;
127+
128+
if (candidateMedoids.length === 0) {
129+
return { m1: "", m2: null, c: null, knowledgeGap: true };
130+
}
131+
132+
// Load all candidate embeddings in one pass.
133+
const candidates: CandidateWithEmbedding[] = await Promise.all(
134+
candidateMedoids.map(async (cand) => ({
135+
...cand,
136+
embedding: await vectorStore.readVector(cand.embeddingOffset, cand.embeddingDim),
137+
})),
138+
);
139+
140+
// Select m1: highest cosine similarity to the query.
141+
let m1Candidate = candidates[0];
142+
let m1Score = cosineSimilarity(queryEmbedding, candidates[0].embedding);
143+
144+
for (let i = 1; i < candidates.length; i++) {
145+
const score = cosineSimilarity(queryEmbedding, candidates[i].embedding);
146+
if (score > m1Score) {
147+
m1Score = score;
148+
m1Candidate = candidates[i];
149+
}
150+
}
151+
152+
const protectedDim = modelProfile.matryoshkaProtectedDim;
153+
154+
if (protectedDim === undefined) {
155+
// Non-Matryoshka model: antithesis search is impossible.
156+
return { m1: m1Candidate.pageId, m2: null, c: null, knowledgeGap: true };
157+
}
158+
159+
const others = candidates.filter((c) => c.pageId !== m1Candidate.pageId);
160+
161+
// --- Matryoshka dimensional unwinding ---
162+
// Start at modelProfile.matryoshkaProtectedDim. If m2 not found, progressively
163+
// shrink the protected boundary (expand the free-dimension search region).
164+
165+
const startingTierIndex = MATRYOSHKA_TIERS.indexOf(
166+
protectedDim as (typeof MATRYOSHKA_TIERS)[number],
167+
);
168+
169+
// Build the list of tier boundaries to attempt, from the configured value
170+
// down to the smallest tier (expanding the free region at each step).
171+
const tierBoundaries: number[] = [];
172+
if (startingTierIndex !== -1) {
173+
for (let i = startingTierIndex; i >= 0; i--) {
174+
tierBoundaries.push(MATRYOSHKA_TIERS[i]);
175+
}
176+
} else {
177+
// protectedDim is not a standard tier; try it as-is plus any smaller standard tiers.
178+
tierBoundaries.push(protectedDim);
179+
for (const t of [...MATRYOSHKA_TIERS].reverse()) {
180+
if (t < protectedDim) tierBoundaries.push(t);
181+
}
182+
}
183+
184+
let m2Candidate: CandidateWithEmbedding | null = null;
185+
let usedProtectedDim = protectedDim;
186+
187+
for (const tierBoundary of tierBoundaries) {
188+
const found = searchM2(others, m1Candidate.embedding, tierBoundary);
189+
if (found !== null) {
190+
m2Candidate = found;
191+
usedProtectedDim = tierBoundary;
192+
break;
193+
}
194+
}
195+
196+
if (m2Candidate === null) {
197+
return { m1: m1Candidate.pageId, m2: null, c: null, knowledgeGap: true };
198+
}
199+
200+
// Compute frozen synthesis centroid c.
201+
const fullDim = m1Candidate.embedding.length;
202+
const c = new Float32Array(fullDim);
203+
204+
for (let i = 0; i < usedProtectedDim; i++) {
205+
c[i] = m1Candidate.embedding[i];
206+
}
207+
for (let i = usedProtectedDim; i < fullDim; i++) {
208+
c[i] = (m1Candidate.embedding[i] + m2Candidate.embedding[i]) / 2;
209+
}
210+
211+
return {
212+
m1: m1Candidate.pageId,
213+
m2: m2Candidate.pageId,
214+
c,
215+
knowledgeGap: false,
216+
};
217+
}

0 commit comments

Comments
 (0)