Skip to content

Commit fa78ec7

Browse files
Skiipy11claude
andcommitted
security: input validation, rate limiting, and consolidation hardening
Fixes identified during security audit triggered by #1: Input validation (middleware/validate.js): - source_agent must match /^[a-zA-Z0-9_-]{1,64}$/ - importance restricted to allowlist (critical/high/medium/low) - content capped at 10,000 characters - metadata validated for depth (max 3) and size (max 10KB) - client_id, key, subject, status_value length-capped Rate limiting (middleware/ratelimit.js): - 60 writes/min, 120 reads/min, 1 consolidation/hour per API key - Configurable via RATE_LIMIT_WRITES, RATE_LIMIT_READS env vars - Automatic stale bucket cleanup Corroboration hardening: - observed_by array capped at 20 entries to prevent bloat attacks - Search limit capped at 100 results max Credential scrubbing improvements (scrub.js): - Added AWS access key pattern (AKIA...) - Added connection string pattern (postgres://, mongodb://, redis://) - Added OpenAI/Anthropic API key pattern (sk-proj-..., sk-ant-...) - New scrubObject() recursively scrubs metadata before storage Consolidation LLM injection defense (consolidation.js): - Memory content wrapped in XML tags with escaped angle brackets - All memory IDs in LLM output validated against current batch - Contradictions with out-of-batch IDs filtered out - Connection/entity mentioned_in IDs validated - LLM-provided importance values sanitized against allowlist Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b53f08a commit fa78ec7

7 files changed

Lines changed: 255 additions & 20 deletions

File tree

api/src/index.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import express from 'express';
22
import { authMiddleware } from './middleware/auth.js';
3+
import { rateLimitMiddleware } from './middleware/ratelimit.js';
34
import { memoryRouter } from './routes/memory.js';
45
import { briefingRouter } from './routes/briefing.js';
56
import { webhookRouter } from './routes/webhook.js';
@@ -30,8 +31,9 @@ app.get('/health', (req, res) => {
3031
res.json({ status: 'ok', service: 'shared-brain', timestamp: new Date().toISOString() });
3132
});
3233

33-
// All other routes require API key
34+
// All other routes require API key + rate limiting
3435
app.use(authMiddleware);
36+
app.use(rateLimitMiddleware);
3537

3638
app.use('/stats', statsRouter);
3739
app.use('/memory', memoryRouter);

api/src/middleware/ratelimit.js

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Per-key request rate limiting for authenticated endpoints
2+
3+
const buckets = new Map(); // key -> { writes: { count, windowStart }, reads: { ... }, consolidation: { ... } }
4+
5+
const LIMITS = {
6+
write: { max: parseInt(process.env.RATE_LIMIT_WRITES) || 60, windowMs: 60_000 },
7+
read: { max: parseInt(process.env.RATE_LIMIT_READS) || 120, windowMs: 60_000 },
8+
consolidation: { max: 1, windowMs: 3_600_000 }, // 1 per hour
9+
};
10+
11+
function getBucket(apiKey, type) {
12+
const now = Date.now();
13+
if (!buckets.has(apiKey)) buckets.set(apiKey, {});
14+
const keyBuckets = buckets.get(apiKey);
15+
16+
if (!keyBuckets[type] || now - keyBuckets[type].windowStart > LIMITS[type].windowMs) {
17+
keyBuckets[type] = { count: 0, windowStart: now };
18+
}
19+
20+
return keyBuckets[type];
21+
}
22+
23+
function checkLimit(apiKey, type) {
24+
const bucket = getBucket(apiKey, type);
25+
if (bucket.count >= LIMITS[type].max) {
26+
const retryAfter = Math.ceil((bucket.windowStart + LIMITS[type].windowMs - Date.now()) / 1000);
27+
return { limited: true, retryAfter };
28+
}
29+
bucket.count++;
30+
return { limited: false };
31+
}
32+
33+
// Classify route + method into a rate limit type
34+
function classifyRequest(method, path) {
35+
if (path.startsWith('/consolidate') && method === 'POST') return 'consolidation';
36+
if (method === 'POST' || method === 'PUT' || method === 'PATCH' || method === 'DELETE') return 'write';
37+
return 'read';
38+
}
39+
40+
export function rateLimitMiddleware(req, res, next) {
41+
const apiKey = req.headers['x-api-key'] || 'unknown';
42+
const type = classifyRequest(req.method, req.path);
43+
const { limited, retryAfter } = checkLimit(apiKey, type);
44+
45+
if (limited) {
46+
res.set('Retry-After', String(retryAfter));
47+
return res.status(429).json({
48+
error: `Rate limit exceeded for ${type} requests. Try again in ${retryAfter}s.`,
49+
limit: LIMITS[type].max,
50+
window_seconds: LIMITS[type].windowMs / 1000,
51+
});
52+
}
53+
54+
next();
55+
}
56+
57+
// Periodic cleanup of stale buckets (every 10 minutes)
58+
setInterval(() => {
59+
const now = Date.now();
60+
for (const [key, keyBuckets] of buckets) {
61+
let allExpired = true;
62+
for (const [type, bucket] of Object.entries(keyBuckets)) {
63+
if (now - bucket.windowStart <= LIMITS[type]?.windowMs) {
64+
allExpired = false;
65+
}
66+
}
67+
if (allExpired) buckets.delete(key);
68+
}
69+
}, 600_000).unref();

api/src/middleware/validate.js

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Input validation middleware and helpers for memory API
2+
3+
const AGENT_NAME_REGEX = /^[a-zA-Z0-9_-]{1,64}$/;
4+
const VALID_TYPES = ['event', 'fact', 'decision', 'status'];
5+
const VALID_IMPORTANCE = ['critical', 'high', 'medium', 'low'];
6+
const MAX_CONTENT_LENGTH = 10_000;
7+
const MAX_METADATA_SIZE = 10_240; // 10 KB serialized
8+
const MAX_METADATA_DEPTH = 3;
9+
const MAX_OBSERVED_BY = 20;
10+
const MAX_STRING_FIELD_LENGTH = 256;
11+
12+
function checkDepth(obj, max, current = 0) {
13+
if (current >= max) return false;
14+
if (obj === null || typeof obj !== 'object') return true;
15+
if (Array.isArray(obj)) {
16+
return obj.every(item => checkDepth(item, max, current + 1));
17+
}
18+
return Object.values(obj).every(val => checkDepth(val, max, current + 1));
19+
}
20+
21+
export function validateSourceAgent(agent) {
22+
if (!agent || typeof agent !== 'string') return 'source_agent is required and must be a string';
23+
if (!AGENT_NAME_REGEX.test(agent)) return `source_agent must match ${AGENT_NAME_REGEX} (1-64 alphanumeric, hyphens, underscores)`;
24+
return null;
25+
}
26+
27+
export function validateType(type) {
28+
if (!type) return 'type is required';
29+
if (!VALID_TYPES.includes(type)) return `Invalid type: ${type}. Must be one of: ${VALID_TYPES.join(', ')}`;
30+
return null;
31+
}
32+
33+
export function validateImportance(importance) {
34+
if (!importance) return null; // optional, defaults to 'medium'
35+
if (!VALID_IMPORTANCE.includes(importance)) return `Invalid importance: ${importance}. Must be one of: ${VALID_IMPORTANCE.join(', ')}`;
36+
return null;
37+
}
38+
39+
export function validateContent(content) {
40+
if (!content || typeof content !== 'string') return 'content is required and must be a string';
41+
if (content.length > MAX_CONTENT_LENGTH) return `content exceeds maximum length of ${MAX_CONTENT_LENGTH} characters (got ${content.length})`;
42+
return null;
43+
}
44+
45+
export function validateMetadata(metadata) {
46+
if (metadata === undefined || metadata === null) return null; // optional
47+
if (typeof metadata !== 'object' || Array.isArray(metadata)) return 'metadata must be a plain object';
48+
const serialized = JSON.stringify(metadata);
49+
if (serialized.length > MAX_METADATA_SIZE) return `metadata exceeds maximum size of ${MAX_METADATA_SIZE} bytes (got ${serialized.length})`;
50+
if (!checkDepth(metadata, MAX_METADATA_DEPTH)) return `metadata exceeds maximum nesting depth of ${MAX_METADATA_DEPTH}`;
51+
return null;
52+
}
53+
54+
export function validateStringField(value, name, maxLen = MAX_STRING_FIELD_LENGTH) {
55+
if (value === undefined || value === null) return null; // optional
56+
if (typeof value !== 'string') return `${name} must be a string`;
57+
if (value.length > maxLen) return `${name} exceeds maximum length of ${maxLen} characters`;
58+
return null;
59+
}
60+
61+
export function validateClientId(clientId) {
62+
return validateStringField(clientId, 'client_id', 64);
63+
}
64+
65+
// Validate all inputs for POST /memory and return first error or null
66+
export function validateMemoryInput({ type, content, source_agent, importance, metadata, client_id, key, subject, status_value }) {
67+
return validateType(type)
68+
|| validateContent(content)
69+
|| validateSourceAgent(source_agent)
70+
|| validateImportance(importance)
71+
|| validateMetadata(metadata)
72+
|| validateClientId(client_id)
73+
|| validateStringField(key, 'key', 128)
74+
|| validateStringField(subject, 'subject', 256)
75+
|| validateStringField(status_value, 'status_value', 256)
76+
|| null;
77+
}
78+
79+
export { MAX_OBSERVED_BY, VALID_TYPES, VALID_IMPORTANCE };

api/src/routes/memory.js

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ import {
99
createEvent, upsertFact, upsertStatus, listEvents, listFacts, listStatuses, isStoreAvailable,
1010
isEntityStoreAvailable, createEntity, findEntity, linkEntityToMemory,
1111
} from '../services/stores/interface.js';
12-
import { scrubCredentials } from '../services/scrub.js';
12+
import { scrubCredentials, scrubObject } from '../services/scrub.js';
1313
import { extractEntities, linkExtractedEntities } from '../services/entities.js';
14+
import { validateMemoryInput, MAX_OBSERVED_BY } from '../middleware/validate.js';
1415

1516
export const memoryRouter = Router();
1617

@@ -19,16 +20,10 @@ memoryRouter.post('/', async (req, res) => {
1920
try {
2021
const { type, content, source_agent, client_id, category, importance, metadata } = req.body;
2122

22-
// Validate required fields
23-
if (!type || !content || !source_agent) {
24-
return res.status(400).json({
25-
error: 'Missing required fields: type, content, source_agent',
26-
valid_types: ['event', 'fact', 'decision', 'status'],
27-
});
28-
}
29-
30-
if (!['event', 'fact', 'decision', 'status'].includes(type)) {
31-
return res.status(400).json({ error: `Invalid type: ${type}. Must be event, fact, decision, or status` });
23+
// Validate all input fields
24+
const validationError = validateMemoryInput(req.body);
25+
if (validationError) {
26+
return res.status(400).json({ error: validationError });
3227
}
3328

3429
// Scrub credentials
@@ -58,6 +53,18 @@ memoryRouter.post('/', async (req, res) => {
5853
}
5954

6055
// Different agent → corroborate: record that another agent observed the same thing
56+
if (existingObservedBy.length >= MAX_OBSERVED_BY) {
57+
return res.status(200).json({
58+
id: existing.id,
59+
type: existing.payload.type,
60+
content_hash: contentHash,
61+
deduplicated: true,
62+
observed_by: existingObservedBy,
63+
observation_count: existingObservedBy.length,
64+
message: `Observer cap reached (${MAX_OBSERVED_BY}) — corroboration noted but not recorded`,
65+
stored_in: { qdrant: true, structured_db: true },
66+
});
67+
}
6168
const updatedObservedBy = [...existingObservedBy, source_agent];
6269
const now = new Date().toISOString();
6370
await updatePointPayload(existing.id, {
@@ -132,7 +139,7 @@ memoryRouter.post('/', async (req, res) => {
132139
superseded_by: null,
133140
...(type === 'fact' && req.body.key ? { key: req.body.key } : {}),
134141
...(type === 'status' && req.body.subject ? { subject: req.body.subject, status_value: req.body.status_value } : {}),
135-
...(metadata ? { metadata } : {}),
142+
...(metadata ? { metadata: scrubObject(metadata) } : {}),
136143
};
137144

138145
// Extract entities (fast path — regex + alias cache, no LLM)
@@ -242,7 +249,7 @@ memoryRouter.get('/search', async (req, res) => {
242249
nestedFilters.push({ arrayField: 'entities', key: 'name', value: entityName });
243250
}
244251

245-
const rawResults = await searchPoints(vector, filter, parseInt(limit) || 10, nestedFilters);
252+
const rawResults = await searchPoints(vector, filter, Math.min(parseInt(limit) || 10, 100), nestedFilters);
246253

247254
// Apply confidence decay and re-rank
248255
const results = rawResults.map(r => {

api/src/routes/webhook.js

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { upsertPoint, findByPayload, updatePointPayload } from '../services/qdra
55
import { createEvent, upsertStatus, isStoreAvailable, isEntityStoreAvailable, createEntity, findEntity, linkEntityToMemory } from '../services/stores/interface.js';
66
import { scrubCredentials } from '../services/scrub.js';
77
import { extractEntities, linkExtractedEntities } from '../services/entities.js';
8+
import { validateClientId, MAX_OBSERVED_BY } from '../middleware/validate.js';
89

910
export const webhookRouter = Router();
1011

@@ -37,6 +38,9 @@ webhookRouter.post('/n8n', async (req, res) => {
3738
});
3839
}
3940

41+
const clientIdError = validateClientId(client_id);
42+
if (clientIdError) return res.status(400).json({ error: clientIdError });
43+
4044
const now = new Date().toISOString();
4145

4246
// Build content string
@@ -67,7 +71,16 @@ webhookRouter.post('/n8n', async (req, res) => {
6771
});
6872
}
6973

70-
// Different source → corroborate
74+
// Different source → corroborate (with cap)
75+
if (existingObservedBy.length >= MAX_OBSERVED_BY) {
76+
return res.status(200).json({
77+
id: existing.id,
78+
deduplicated: true,
79+
observed_by: existingObservedBy,
80+
observation_count: existingObservedBy.length,
81+
message: `Observer cap reached (${MAX_OBSERVED_BY}) — corroboration noted but not recorded`,
82+
});
83+
}
7184
const updatedObservedBy = [...existingObservedBy, sourceAgent];
7285
await updatePointPayload(existing.id, {
7386
observed_by: updatedObservedBy,

api/src/services/consolidation.js

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,16 @@ export async function runConsolidation() {
183183
}
184184

185185
async function consolidateBatch(points, clientId) {
186-
// Format memories for the LLM
186+
// Collect valid IDs for output validation
187+
const batchIds = new Set(points.map(p => p.id));
188+
189+
// Format memories for the LLM — wrapped in XML tags to resist prompt injection
187190
const memoriesText = points.map(p => {
188191
const pay = p.payload;
189-
return `[ID: ${p.id}] [Type: ${pay.type}] [Agent: ${pay.source_agent}] [Client: ${pay.client_id}] [Created: ${pay.created_at}]\n${pay.text}`;
190-
}).join('\n\n---\n\n');
192+
// Escape any XML-like tags in the memory content to prevent tag injection
193+
const safeText = pay.text.replace(/</g, '&lt;').replace(/>/g, '&gt;');
194+
return `<memory id="${p.id}" type="${pay.type}" agent="${pay.source_agent}" client="${pay.client_id}" created="${pay.created_at}">\n${safeText}\n</memory>`;
195+
}).join('\n\n');
191196

192197
const prompt = CONSOLIDATION_PROMPT + memoriesText;
193198
const responseText = await complete(prompt);
@@ -200,6 +205,45 @@ async function consolidateBatch(points, clientId) {
200205
return { merged: 0, contradictions: 0, connections: 0, insights: 0 };
201206
}
202207

208+
// Validate: strip any memory IDs not in the current batch
209+
if (result.merged_facts) {
210+
for (const fact of result.merged_facts) {
211+
if (fact.source_memories) {
212+
fact.source_memories = fact.source_memories.filter(id => batchIds.has(id));
213+
}
214+
}
215+
}
216+
if (result.contradictions) {
217+
result.contradictions = result.contradictions.filter(c =>
218+
batchIds.has(c.memory_a) && batchIds.has(c.memory_b)
219+
);
220+
}
221+
if (result.connections) {
222+
for (const conn of result.connections) {
223+
if (conn.memories) {
224+
conn.memories = conn.memories.filter(id => batchIds.has(id));
225+
}
226+
}
227+
result.connections = result.connections.filter(c => c.memories && c.memories.length >= 2);
228+
}
229+
if (result.insights) {
230+
for (const insight of result.insights) {
231+
if (insight.source_memories) {
232+
insight.source_memories = insight.source_memories.filter(id => batchIds.has(id));
233+
}
234+
}
235+
}
236+
if (result.entities) {
237+
for (const ent of result.entities) {
238+
if (ent.mentioned_in) {
239+
ent.mentioned_in = ent.mentioned_in.filter(id => batchIds.has(id));
240+
}
241+
}
242+
}
243+
244+
const VALID_IMPORTANCE = ['critical', 'high', 'medium', 'low'];
245+
const sanitizeImportance = (val) => VALID_IMPORTANCE.includes(val) ? val : 'medium';
246+
203247
const now = new Date().toISOString();
204248
let merged = 0, contradictions = 0, connections = 0, insights = 0;
205249

@@ -232,7 +276,7 @@ async function consolidateBatch(points, clientId) {
232276
source_agent: 'consolidation-engine',
233277
client_id: fact.client_id || clientId,
234278
category: 'semantic',
235-
importance: fact.importance || 'medium',
279+
importance: sanitizeImportance(fact.importance),
236280
key: fact.key || contentHash,
237281
content_hash: contentHash,
238282
created_at: now,
@@ -321,7 +365,7 @@ async function consolidateBatch(points, clientId) {
321365
source_agent: 'consolidation-engine',
322366
client_id: clientId,
323367
category: 'semantic',
324-
importance: insight.importance || 'medium',
368+
importance: sanitizeImportance(insight.importance),
325369
content_hash: contentHash,
326370
created_at: now,
327371
last_accessed_at: now,

api/src/services/scrub.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ const PATTERNS = [
1111
{ regex: /(?:smtp|email|mail).*?(?:pass|password)\s*[:=]\s*['"]?[^\s'"]{8,}['"]?/gi, replace: '[EMAIL_CRED_REDACTED]' },
1212
// SSH private keys
1313
{ regex: /-----BEGIN [\w\s]+ PRIVATE KEY-----[\s\S]*?-----END [\w\s]+ PRIVATE KEY-----/g, replace: '[PRIVATE_KEY_REDACTED]' },
14+
// AWS access key IDs
15+
{ regex: /AKIA[0-9A-Z]{16}/g, replace: '[AWS_KEY_REDACTED]' },
16+
// Connection strings (postgres, mongodb, redis, mysql)
17+
{ regex: /(?:postgres(?:ql)?|mongodb(?:\+srv)?|redis|mysql|amqp):\/\/[^\s'"]+/gi, replace: '[CONNECTION_STRING_REDACTED]' },
18+
// OpenAI / Anthropic API keys
19+
{ regex: /sk-(?:proj-|ant-)?[A-Za-z0-9_-]{20,}/g, replace: '[API_KEY_REDACTED]' },
1420
];
1521

1622
export function scrubCredentials(text) {
@@ -21,3 +27,18 @@ export function scrubCredentials(text) {
2127
}
2228
return scrubbed;
2329
}
30+
31+
// Recursively scrub all string values in an object
32+
export function scrubObject(obj) {
33+
if (obj === null || obj === undefined) return obj;
34+
if (typeof obj === 'string') return scrubCredentials(obj);
35+
if (Array.isArray(obj)) return obj.map(item => scrubObject(item));
36+
if (typeof obj === 'object') {
37+
const result = {};
38+
for (const [key, value] of Object.entries(obj)) {
39+
result[key] = scrubObject(value);
40+
}
41+
return result;
42+
}
43+
return obj; // numbers, booleans, etc.
44+
}

0 commit comments

Comments
 (0)