Skip to content

Commit a68923d

Browse files
Skiipy11claude
andcommitted
v1.1.0: Consolidation dedup, Gemini Flash, health checks, event TTL
Fix consolidation engine creating duplicate memories on every 6h run: - Add exact (content_hash) + semantic (92% vector similarity) dedup checks before creating merged facts and insights - Add content_hash dedup to webhook endpoint (n8n events) - Remove consolidation self-logging (was accumulating as noise) - Tighten consolidation prompt to reject vague platitudes - Add Gemini provider (gemini-2.5-flash recommended for consolidation) - Add EVENT_TTL_DAYS auto-cleanup for old unaccessed events - Fix Docker health checks (curl not available in containers) - Include cleanup-duplicates.js script for one-time dedup Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent fec7a4e commit a68923d

9 files changed

Lines changed: 310 additions & 37 deletions

File tree

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -465,9 +465,11 @@ All configuration is via environment variables. Copy `.env.example` to `.env` an
465465
|----------|---------|-------------|
466466
| `CONSOLIDATION_ENABLED` | `true` | Enable/disable the consolidation engine |
467467
| `CONSOLIDATION_INTERVAL` | `0 */6 * * *` | Cron schedule (default: every 6 hours) |
468-
| `CONSOLIDATION_LLM` | `openai` | `openai`, `anthropic`, or `ollama` |
469-
| `CONSOLIDATION_MODEL` | `gpt-4o-mini` | Model for consolidation |
468+
| `CONSOLIDATION_LLM` | `openai` | `openai`, `anthropic`, `gemini`, or `ollama` |
469+
| `CONSOLIDATION_MODEL` | `gpt-4o-mini` | Model for consolidation (e.g. `gemini-2.5-flash`) |
470470
| `ANTHROPIC_API_KEY` || Required when using Anthropic for consolidation |
471+
| `GEMINI_API_KEY` || Required when using Gemini for consolidation |
472+
| `EVENT_TTL_DAYS` | `30` | Auto-expire old unaccessed events after this many days |
471473

472474
### Memory Decay
473475

api/scripts/cleanup-duplicates.js

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#!/usr/bin/env node
2+
/**
3+
* One-time cleanup script for Shared Brain duplicate memories.
4+
* Run on Beelink where Qdrant is at localhost:6333.
5+
*
6+
* Usage: node cleanup-duplicates.js [--dry-run]
7+
*
8+
* What it removes:
9+
* 1. Duplicate content_hash entries (keeps newest, deletes older copies)
10+
* 2. Consolidation run log events ("Consolidation run: processed N memories...")
11+
* 3. Vague consolidation "insight" platitudes
12+
*/
13+
14+
const QDRANT_URL = process.env.QDRANT_URL || 'http://localhost:6333';
15+
const COLLECTION = 'shared_memories';
16+
const DRY_RUN = process.argv.includes('--dry-run');
17+
18+
const QDRANT_API_KEY = process.env.QDRANT_API_KEY || '';
19+
20+
async function qdrantRequest(path, options = {}) {
21+
const headers = { 'Content-Type': 'application/json', ...options.headers };
22+
if (QDRANT_API_KEY) headers['api-key'] = QDRANT_API_KEY;
23+
const res = await fetch(`${QDRANT_URL}${path}`, {
24+
...options,
25+
headers,
26+
});
27+
if (!res.ok) {
28+
const text = await res.text();
29+
throw new Error(`Qdrant ${path}: ${res.status} ${text}`);
30+
}
31+
return res.json();
32+
}
33+
34+
async function scrollAll() {
35+
const points = [];
36+
let offset = null;
37+
while (true) {
38+
const body = { limit: 100, with_payload: true, with_vector: false };
39+
if (offset) body.offset = offset;
40+
const result = await qdrantRequest(`/collections/${COLLECTION}/points/scroll`, {
41+
method: 'POST',
42+
body: JSON.stringify(body),
43+
});
44+
points.push(...result.result.points);
45+
offset = result.result.next_page_offset;
46+
if (!offset) break;
47+
}
48+
return points;
49+
}
50+
51+
async function deletePoints(ids) {
52+
if (ids.length === 0) return;
53+
if (DRY_RUN) {
54+
console.log(` [DRY RUN] Would delete ${ids.length} points`);
55+
return;
56+
}
57+
await qdrantRequest(`/collections/${COLLECTION}/points/delete`, {
58+
method: 'POST',
59+
body: JSON.stringify({ points: ids }),
60+
});
61+
console.log(` Deleted ${ids.length} points`);
62+
}
63+
64+
// Platitude patterns — vague consolidation insights with no actionable content
65+
const PLATITUDE_PATTERNS = [
66+
/indicates? (?:effective|strong|an ongoing|a robust) (?:project management|integration|collaboration|commitment)/i,
67+
/ongoing commitment to improving the efficiency/i,
68+
/operational (?:status|capabilities?) (?:of|and) the (?:Shared Brain|system)/i,
69+
/closely (?:intertwined|linked|related)/i,
70+
/successful completion of workflows indicates/i,
71+
/cross-referencing of (?:memory )?repositories indicate/i,
72+
/a critical factor in the successful launch/i,
73+
];
74+
75+
function isPlatitude(text) {
76+
return PLATITUDE_PATTERNS.some(p => p.test(text));
77+
}
78+
79+
async function main() {
80+
console.log(`Shared Brain Cleanup ${DRY_RUN ? '(DRY RUN)' : '(LIVE)'}`);
81+
console.log('---');
82+
83+
// Step 1: Scroll all points
84+
console.log('Scrolling all points...');
85+
const allPoints = await scrollAll();
86+
console.log(`Total points: ${allPoints.length}`);
87+
88+
const toDelete = new Set();
89+
90+
// Step 2: Find duplicate content_hash entries (keep newest)
91+
console.log('\n--- Duplicate content_hash entries ---');
92+
const byHash = {};
93+
for (const p of allPoints) {
94+
const hash = p.payload?.content_hash;
95+
if (!hash) continue;
96+
if (!byHash[hash]) byHash[hash] = [];
97+
byHash[hash].push(p);
98+
}
99+
100+
let dupGroups = 0;
101+
let dupCount = 0;
102+
for (const [hash, points] of Object.entries(byHash)) {
103+
if (points.length <= 1) continue;
104+
// Sort by created_at descending — keep newest
105+
points.sort((a, b) => new Date(b.payload.created_at) - new Date(a.payload.created_at));
106+
const keep = points[0];
107+
const dupes = points.slice(1);
108+
dupGroups++;
109+
dupCount += dupes.length;
110+
console.log(` hash=${hash} (${points.length} copies) — keeping ${keep.id} (${keep.payload.created_at}), deleting ${dupes.length}`);
111+
for (const d of dupes) {
112+
toDelete.add(d.id);
113+
}
114+
}
115+
console.log(`Found ${dupGroups} duplicate groups, ${dupCount} entries to remove`);
116+
117+
// Step 3: Find consolidation run log events
118+
console.log('\n--- Consolidation run log events ---');
119+
let runLogCount = 0;
120+
for (const p of allPoints) {
121+
if (p.payload?.source_agent === 'consolidation-engine' &&
122+
p.payload?.type === 'event' &&
123+
p.payload?.text?.startsWith('Consolidation run: processed')) {
124+
if (!toDelete.has(p.id)) {
125+
toDelete.add(p.id);
126+
runLogCount++;
127+
}
128+
}
129+
}
130+
console.log(`Found ${runLogCount} consolidation run logs to remove`);
131+
132+
// Step 4: Find platitude insights
133+
console.log('\n--- Platitude insights ---');
134+
let platitudeCount = 0;
135+
for (const p of allPoints) {
136+
if (p.payload?.source_agent === 'consolidation-engine' &&
137+
p.payload?.text &&
138+
isPlatitude(p.payload.text)) {
139+
if (!toDelete.has(p.id)) {
140+
toDelete.add(p.id);
141+
platitudeCount++;
142+
console.log(` "${p.payload.text.slice(0, 80)}..."`);
143+
}
144+
}
145+
}
146+
console.log(`Found ${platitudeCount} platitude insights to remove`);
147+
148+
// Step 5: Execute deletions
149+
const deleteIds = Array.from(toDelete);
150+
console.log(`\n--- Total: ${deleteIds.length} points to delete ---`);
151+
152+
if (deleteIds.length > 0) {
153+
// Delete in batches of 100
154+
for (let i = 0; i < deleteIds.length; i += 100) {
155+
const batch = deleteIds.slice(i, i + 100);
156+
await deletePoints(batch);
157+
}
158+
}
159+
160+
console.log('\nDone.');
161+
}
162+
163+
main().catch(err => {
164+
console.error('Error:', err.message);
165+
process.exit(1);
166+
});

api/src/routes/webhook.js

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { Router } from 'express';
22
import crypto from 'crypto';
33
import { embed } from '../services/embedders/interface.js';
4-
import { upsertPoint } from '../services/qdrant.js';
4+
import { upsertPoint, findByPayload } from '../services/qdrant.js';
55
import { createEvent, upsertStatus, isStoreAvailable } from '../services/stores/interface.js';
66
import { scrubCredentials } from '../services/scrub.js';
77

@@ -48,6 +48,17 @@ webhookRouter.post('/n8n', async (req, res) => {
4848
content = scrubCredentials(content);
4949

5050
const contentHash = crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
51+
52+
// Dedup: if identical content already exists, return existing memory
53+
const duplicates = await findByPayload('content_hash', contentHash, { active: true });
54+
if (duplicates.length > 0) {
55+
return res.status(200).json({
56+
id: duplicates[0].id,
57+
deduplicated: true,
58+
message: 'Identical webhook event already exists — skipped',
59+
});
60+
}
61+
5162
const pointId = crypto.randomUUID();
5263

5364
// Store as event in Qdrant

0 commit comments

Comments
 (0)