Skip to content

Commit 3b8412a

Browse files
Skiipy11claude
andcommitted
v1.2.0: Entity extraction, linking, and graph queries
Add automatic entity extraction from memory content at storage time. Two-speed approach: fast regex path on every write (non-blocking), LLM refinement during consolidation with alias normalization that compounds over time. Entities indexed in Qdrant payload for native filtered vector search with no result-count ceiling. New: entities service, entity graph API endpoints, brain_entities MCP tool, backfill script, Qdrant entity index, shared linking function. Fix scrollPoints dropping false filter values, Postgres createEntity race condition, MCP tool input validation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a68923d commit 3b8412a

22 files changed

Lines changed: 1109 additions & 70 deletions

README.md

Lines changed: 96 additions & 30 deletions
Large diffs are not rendered by default.

api/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ WORKDIR /app
33
COPY package*.json ./
44
RUN npm install --production
55
COPY src/ ./src/
6+
COPY scripts/ ./scripts/
67
RUN mkdir -p /app/data && \
78
addgroup --system --gid 1001 nodejs && \
89
adduser --system --uid 1001 nodejs && \

api/scripts/backfill-entities.js

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env node
2+
/**
3+
* Backfill entities for existing memories.
4+
* Runs fast-path entity extraction (regex + known-tech dictionary) on all active Qdrant points.
5+
* No LLM calls — this is purely regex-based extraction.
6+
*
7+
* Usage: node api/scripts/backfill-entities.js
8+
* Requires: .env configured with QDRANT_URL, BRAIN_API_KEY, and STRUCTURED_STORE
9+
*/
10+
11+
try { await import('dotenv/config'); } catch (e) { /* dotenv not needed in Docker — env injected */ }
12+
import { initQdrant, scrollPoints, updatePointPayload } from '../src/services/qdrant.js';
13+
import { initStore, isEntityStoreAvailable, createEntity, findEntity, linkEntityToMemory, loadAllAliases } from '../src/services/stores/interface.js';
14+
import { extractEntities, loadAliasCache, linkExtractedEntities } from '../src/services/entities.js';
15+
import { initEmbeddings } from '../src/services/embedders/interface.js';
16+
17+
async function backfill() {
18+
console.log('[backfill] Starting entity backfill for existing memories...');
19+
20+
// Initialize
21+
await initEmbeddings();
22+
await initQdrant();
23+
await initStore();
24+
25+
if (!isEntityStoreAvailable()) {
26+
console.error('[backfill] Entity store not available. Need sqlite or postgres.');
27+
process.exit(1);
28+
}
29+
30+
// Load alias cache
31+
try {
32+
const aliases = await loadAllAliases();
33+
loadAliasCache(aliases);
34+
} catch (e) {
35+
console.log('[backfill] No existing aliases — starting fresh');
36+
}
37+
38+
let processed = 0;
39+
let entitiesCreated = 0;
40+
let linksCreated = 0;
41+
let offset = null;
42+
43+
while (true) {
44+
const result = await scrollPoints({ active: true }, 100, offset);
45+
const points = result.points || [];
46+
47+
if (points.length === 0) break;
48+
49+
for (const point of points) {
50+
const pay = point.payload;
51+
const text = pay.text || '';
52+
const clientId = pay.client_id || 'global';
53+
const sourceAgent = pay.source_agent || 'unknown';
54+
55+
const entities = extractEntities(text, clientId, sourceAgent);
56+
57+
if (entities.length > 0) {
58+
// Update Qdrant payload with entities
59+
const entityPayload = entities.map(e => ({ name: e.name, type: e.type }));
60+
await updatePointPayload(point.id, { entities: entityPayload });
61+
62+
// Create entities and links in structured store
63+
await linkExtractedEntities(entities, point.id, { createEntity, findEntity, linkEntityToMemory });
64+
entitiesCreated += entities.filter(e => !e.entityId).length;
65+
linksCreated += entities.length;
66+
}
67+
68+
processed++;
69+
if (processed % 50 === 0) {
70+
console.log(`[backfill] Processed ${processed} memories, ${entitiesCreated} entities created, ${linksCreated} links`);
71+
}
72+
}
73+
74+
offset = result.next_page_offset;
75+
if (!offset) break;
76+
}
77+
78+
console.log(`[backfill] Complete: ${processed} memories processed, ${entitiesCreated} new entities, ${linksCreated} links created`);
79+
process.exit(0);
80+
}
81+
82+
backfill().catch(err => {
83+
console.error('[backfill] Fatal:', err);
84+
process.exit(1);
85+
});

api/scripts/cleanup-duplicates.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env node
22
/**
33
* One-time cleanup script for Shared Brain duplicate memories.
4-
* Run on Beelink where Qdrant is at localhost:6333.
4+
* Run on the server where Qdrant is at localhost:6333.
55
*
66
* Usage: node cleanup-duplicates.js [--dry-run]
77
*

api/src/index.js

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@ import { briefingRouter } from './routes/briefing.js';
55
import { webhookRouter } from './routes/webhook.js';
66
import { statsRouter } from './routes/stats.js';
77
import { consolidationRouter } from './routes/consolidation.js';
8-
import { initQdrant } from './services/qdrant.js';
8+
import { entitiesRouter } from './routes/entities.js';
9+
import { initQdrant, ensureEntityIndex } from './services/qdrant.js';
910
import { initEmbeddings } from './services/embedders/interface.js';
10-
import { initStore } from './services/stores/interface.js';
11+
import { initStore, isEntityStoreAvailable, loadAllAliases } from './services/stores/interface.js';
1112
import { initLLM } from './services/llm/interface.js';
1213
import { runConsolidation } from './services/consolidation.js';
14+
import { loadAliasCache } from './services/entities.js';
1315

1416
// Validate required environment variables
1517
if (!process.env.BRAIN_API_KEY) {
@@ -36,18 +38,30 @@ app.use('/memory', memoryRouter);
3638
app.use('/briefing', briefingRouter);
3739
app.use('/webhook', webhookRouter);
3840
app.use('/consolidate', consolidationRouter);
41+
app.use('/entities', entitiesRouter);
3942

4043
async function start() {
4144
try {
4245
// Initialize embedding provider first (Qdrant needs dimensions)
4346
await initEmbeddings();
4447

4548
await initQdrant();
49+
await ensureEntityIndex();
4650
console.log('[shared-brain] Qdrant collection ready');
4751

4852
// Initialize structured storage backend
4953
await initStore();
5054

55+
// Load entity alias cache for fast-path extraction
56+
if (isEntityStoreAvailable()) {
57+
try {
58+
const aliases = await loadAllAliases();
59+
loadAliasCache(aliases);
60+
} catch (e) {
61+
console.log('[shared-brain] Entity alias cache: starting empty (first run)');
62+
}
63+
}
64+
5165
// Initialize consolidation LLM (optional — only if enabled)
5266
if (process.env.CONSOLIDATION_ENABLED !== 'false') {
5367
try {

api/src/routes/briefing.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ briefingRouter.get('/', async (req, res) => {
6565
briefing.status_changes.sort(sortDesc);
6666
briefing.decisions.sort(sortDesc);
6767

68+
// Collect entities mentioned across all briefing entries
69+
const entityCounts = {};
70+
for (const point of points) {
71+
const entities = point.payload.entities || [];
72+
for (const ent of entities) {
73+
const key = ent.name;
74+
if (!entityCounts[key]) entityCounts[key] = { name: ent.name, type: ent.type, count: 0 };
75+
entityCounts[key].count++;
76+
}
77+
}
78+
const entitiesMentioned = Object.values(entityCounts).sort((a, b) => b.count - a.count);
79+
6880
// Summary stats
6981
briefing.summary = {
7082
total_entries: points.length,
@@ -74,6 +86,7 @@ briefingRouter.get('/', async (req, res) => {
7486
decisions: briefing.decisions.length,
7587
agents_active: [...new Set(points.map(p => p.payload.source_agent))],
7688
clients_mentioned: [...new Set(points.map(p => p.payload.client_id).filter(c => c !== 'global'))],
89+
entities_mentioned: entitiesMentioned.slice(0, 20),
7790
};
7891

7992
// Get collection stats

api/src/routes/entities.js

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import { Router } from 'express';
2+
import {
3+
isEntityStoreAvailable, listEntities, findEntity, getEntityMemories, getEntityStats,
4+
} from '../services/stores/interface.js';
5+
6+
export const entitiesRouter = Router();
7+
8+
// GET /entities — List all entities
9+
entitiesRouter.get('/', async (req, res) => {
10+
try {
11+
if (!isEntityStoreAvailable()) {
12+
return res.status(400).json({
13+
error: 'Entity queries require sqlite or postgres backend. Set STRUCTURED_STORE in .env.',
14+
});
15+
}
16+
17+
const { type: entityType, limit, offset } = req.query;
18+
const result = await listEntities({ entity_type: entityType, limit, offset });
19+
20+
res.json({
21+
count: result.results.length,
22+
entities: result.results,
23+
});
24+
} catch (err) {
25+
console.error('[entities] Error:', err.message);
26+
res.status(500).json({ error: err.message });
27+
}
28+
});
29+
30+
// GET /entities/stats — Entity stats
31+
entitiesRouter.get('/stats', async (req, res) => {
32+
try {
33+
if (!isEntityStoreAvailable()) {
34+
return res.json({ total: 0, by_type: {}, top_mentioned: [] });
35+
}
36+
const stats = await getEntityStats();
37+
res.json(stats);
38+
} catch (err) {
39+
console.error('[entities:stats] Error:', err.message);
40+
res.status(500).json({ error: err.message });
41+
}
42+
});
43+
44+
// GET /entities/:name — Single entity by name or alias
45+
entitiesRouter.get('/:name', async (req, res) => {
46+
try {
47+
if (!isEntityStoreAvailable()) {
48+
return res.status(400).json({ error: 'Entity queries require sqlite or postgres backend.' });
49+
}
50+
51+
const entity = await findEntity(req.params.name);
52+
if (!entity) {
53+
return res.status(404).json({ error: 'Entity not found' });
54+
}
55+
56+
res.json({
57+
id: entity.id,
58+
canonical_name: entity.canonical_name,
59+
entity_type: entity.entity_type,
60+
first_seen: entity.first_seen,
61+
last_seen: entity.last_seen,
62+
mention_count: entity.mention_count,
63+
aliases: entity.aliases || [],
64+
});
65+
} catch (err) {
66+
console.error('[entities:get] Error:', err.message);
67+
res.status(500).json({ error: err.message });
68+
}
69+
});
70+
71+
// GET /entities/:name/memories — All memories linked to an entity
72+
entitiesRouter.get('/:name/memories', async (req, res) => {
73+
try {
74+
if (!isEntityStoreAvailable()) {
75+
return res.status(400).json({ error: 'Entity queries require sqlite or postgres backend.' });
76+
}
77+
78+
const entity = await findEntity(req.params.name);
79+
if (!entity) {
80+
return res.status(404).json({ error: 'Entity not found' });
81+
}
82+
83+
const limit = parseInt(req.query.limit) || 20;
84+
const links = await getEntityMemories(entity.id, limit);
85+
86+
res.json({
87+
entity: entity.canonical_name,
88+
entity_type: entity.entity_type,
89+
count: links.results.length,
90+
memory_links: links.results,
91+
});
92+
} catch (err) {
93+
console.error('[entities:memories] Error:', err.message);
94+
res.status(500).json({ error: err.message });
95+
}
96+
});

api/src/routes/memory.js

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import {
77
} from '../services/qdrant.js';
88
import {
99
createEvent, upsertFact, upsertStatus, listEvents, listFacts, listStatuses, isStoreAvailable,
10+
isEntityStoreAvailable, createEntity, findEntity, linkEntityToMemory,
1011
} from '../services/stores/interface.js';
1112
import { scrubCredentials } from '../services/scrub.js';
13+
import { extractEntities, linkExtractedEntities } from '../services/entities.js';
1214

1315
export const memoryRouter = Router();
1416

@@ -104,10 +106,32 @@ memoryRouter.post('/', async (req, res) => {
104106
...(metadata ? { metadata } : {}),
105107
};
106108

109+
// Extract entities (fast path — regex + alias cache, no LLM)
110+
let extractedEntities = [];
111+
try {
112+
extractedEntities = extractEntities(cleanContent, client_id || 'global', source_agent);
113+
if (extractedEntities.length > 0) {
114+
payload.entities = extractedEntities.map(e => ({ name: e.name, type: e.type }));
115+
}
116+
} catch (e) {
117+
console.error('[memory:entities] Extraction failed (non-blocking):', e.message);
118+
}
119+
107120
// Embed and store in Qdrant
108121
const vector = await embed(cleanContent);
109122
await upsertPoint(pointId, vector, payload);
110123

124+
// Link entities in structured store (fire-and-forget — don't block response)
125+
if (isEntityStoreAvailable() && extractedEntities.length > 0) {
126+
Promise.resolve().then(async () => {
127+
try {
128+
await linkExtractedEntities(extractedEntities, pointId, { createEntity, findEntity, linkEntityToMemory });
129+
} catch (e) {
130+
console.error('[memory:entities] Linking failed:', e.message);
131+
}
132+
});
133+
}
134+
111135
// Store in structured database (if configured)
112136
const storeData = {
113137
content: cleanContent,
@@ -160,7 +184,7 @@ memoryRouter.post('/', async (req, res) => {
160184
// GET /memory/search — Semantic search via Qdrant
161185
memoryRouter.get('/search', async (req, res) => {
162186
try {
163-
const { q, type, source_agent, client_id, category, limit, include_superseded } = req.query;
187+
const { q, type, source_agent, client_id, category, limit, include_superseded, entity } = req.query;
164188

165189
if (!q) {
166190
return res.status(400).json({ error: 'Missing required query parameter: q' });
@@ -176,7 +200,20 @@ memoryRouter.get('/search', async (req, res) => {
176200
// By default, only return active memories (not superseded)
177201
if (include_superseded !== 'true') filter.active = true;
178202

179-
const rawResults = await searchPoints(vector, filter, parseInt(limit) || 10);
203+
// Entity filter — resolve alias to canonical name, then filter via Qdrant payload
204+
const nestedFilters = [];
205+
if (entity) {
206+
let entityName = entity;
207+
if (isEntityStoreAvailable()) {
208+
try {
209+
const found = await findEntity(entity);
210+
if (found) entityName = found.canonical_name;
211+
} catch (e) { /* use original name */ }
212+
}
213+
nestedFilters.push({ arrayField: 'entities', key: 'name', value: entityName });
214+
}
215+
216+
const rawResults = await searchPoints(vector, filter, parseInt(limit) || 10, nestedFilters);
180217

181218
// Apply confidence decay and re-rank
182219
const results = rawResults.map(r => {

api/src/routes/stats.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Router } from 'express';
22
import { getMemoryStats, scrollPoints, computeEffectiveConfidence, DECAY_TYPES } from '../services/qdrant.js';
3+
import { isEntityStoreAvailable, getEntityStats } from '../services/stores/interface.js';
34

45
export const statsRouter = Router();
56

@@ -21,13 +22,22 @@ statsRouter.get('/', async (req, res) => {
2122
// Non-critical
2223
}
2324

25+
// Entity stats
26+
let entityStats = null;
27+
if (isEntityStoreAvailable()) {
28+
try {
29+
entityStats = await getEntityStats();
30+
} catch (e) { /* non-critical */ }
31+
}
32+
2433
res.json({
2534
...stats,
2635
decayed_below_50pct: decayedCount,
2736
decay_config: {
2837
factor: parseFloat(process.env.DECAY_FACTOR) || 0.98,
2938
affected_types: DECAY_TYPES,
3039
},
40+
...(entityStats ? { entities: entityStats } : {}),
3141
});
3242
} catch (err) {
3343
console.error('[stats] Error:', err.message);

0 commit comments

Comments
 (0)