Skip to content

Commit 388d5f9

Browse files
fix(proxy): per-scenario namespace isolation to prevent data pollution (DAK-6929) (#222)
Different playground scenarios (Graph Explorer, LLM Compare, Multi-Agent, etc.) now get separate engine namespaces so financial data from one scenario cannot leak into another. Multi-agent _agent_a/_agent_b intentionally share a namespace for the cross-agent demo feature. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent df5ffb9 commit 388d5f9

3 files changed

Lines changed: 355 additions & 38 deletions

File tree

docker/playground/proxy/namespace.js

Lines changed: 105 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
const crypto = require('crypto');
44

55
// =============================================================================
6-
// Per-session agent_id namespacing (DAK-6757) — cross-session PII isolation.
6+
// Per-session + per-scenario agent_id namespacing
7+
// (DAK-6757 cross-session PII isolation + DAK-6929 scenario isolation).
78
// =============================================================================
89
// Every playground session shares the public "playground-demo" agent_id when
910
// talking to the engine. Without this layer ANY session can recall what ANY
@@ -13,23 +14,74 @@ const crypto = require('crypto');
1314
// Session C recalls {"agent_id":"playground-demo","query":"SECRET"} -> leak
1415
//
1516
// We give each session its OWN engine namespace by rewriting `agent_id` in the
16-
// forwarded request to `playground-demo-<sha256(sessionId)[:12]>`, and we
17-
// transparently restore the client's original `agent_id` in the engine's
18-
// response so the isolation is invisible to the frontend (no client-visible
19-
// change to the agent_id format).
17+
// forwarded request to `playground-demo-<sha256(sessionId + scenarioKey)[:12]>`,
18+
// and we transparently restore the client's original `agent_id` in the engine's
19+
// response so the isolation is invisible to the frontend.
20+
//
21+
// DAK-6929: Different playground scenarios (Guided Tour, Graph Explorer,
22+
// Multi-Agent, etc.) now get SEPARATE namespaces so financial data from the
23+
// Store scenario cannot leak into Graph Explorer, etc.
2024
// =============================================================================
2125

2226
const NS_PREFIX = 'playground-demo';
2327

28+
// ---------------------------------------------------------------------------
29+
// Scenario key extraction (DAK-6929)
30+
// ---------------------------------------------------------------------------
31+
// Frontend agent_ids follow the pattern `pg_XXXXXX_<suffix>` where the suffix
32+
// identifies the scenario. We map suffixes to scenario keys so that:
33+
// - each scenario gets its own isolated namespace
34+
// - _agent_a and _agent_b share a namespace (multi-agent demo feature)
35+
// - all _llm_* variants share a namespace (they seed different agent_ids)
36+
// - `playground-demo` (API Explorer auto-seed) maps to `default`
37+
38+
/**
39+
* Extract the scenario key from a client-supplied agent_id.
40+
*
41+
* @param {string} agentId — the raw agent_id from the client request
42+
* @returns {string} scenario key used to salt the namespace hash
43+
*/
44+
function scenarioKey(agentId) {
45+
if (!agentId || typeof agentId !== 'string') return 'default';
46+
47+
// Exact match for the base playground-demo id (API Explorer / auto-seed)
48+
if (agentId === NS_PREFIX || agentId === 'playground-demo') return 'default';
49+
50+
// Multi-agent: _agent_a and _agent_b share a namespace
51+
if (agentId.endsWith('_agent_a') || agentId.endsWith('_agent_b')) return 'multiagent';
52+
53+
// All LLM compare variants share one namespace
54+
if (/_llm_/.test(agentId)) return 'llm';
55+
56+
// Graph explorer
57+
if (agentId.endsWith('_graphex')) return 'graphex';
58+
59+
// Try to extract suffix after `pg_XXXXXX_` prefix pattern (8-char session prefix)
60+
const m = agentId.match(/^pg_[A-Za-z0-9_-]{6,}_(.+)$/);
61+
if (m) return m[1];
62+
63+
// If the agent_id doesn't match the pg_ pattern, use it as-is (capped for safety)
64+
return agentId.length > 64 ? agentId.slice(0, 64) : agentId;
65+
}
66+
2467
/**
25-
* Deterministic per-session engine namespace. The same session id always maps
26-
* to the same namespace, so a session can always recall its own stores, while
27-
* two different sessions can never collide.
68+
* Deterministic per-session, per-scenario engine namespace.
69+
*
70+
* The same (sessionId, agentId) pair always maps to the same namespace.
71+
* Different scenarios within the same session get different namespaces so
72+
* data cannot leak across playground modes (DAK-6929).
73+
*
74+
* For backward compatibility, when called with only sessionId (no agentId),
75+
* it falls back to the original DAK-6757 behavior (scenarioKey = 'default').
76+
*
2877
* @param {string} sessionId
78+
* @param {string} [agentId] — client agent_id, used to derive the scenario key
2979
* @returns {string}
3080
*/
31-
function sessionNamespace(sessionId) {
32-
const digest = crypto.createHash('sha256').update(String(sessionId)).digest('hex');
81+
function sessionNamespace(sessionId, agentId) {
82+
const key = scenarioKey(agentId);
83+
const material = key === 'default' ? String(sessionId) : `${String(sessionId)}:${key}`;
84+
const digest = crypto.createHash('sha256').update(material).digest('hex');
3385
return `${NS_PREFIX}-${digest.slice(0, 12)}`;
3486
}
3587

@@ -45,39 +97,72 @@ const ITEM_ARRAY_KEYS = ['memories', 'items', 'queries'];
4597
* shared default namespace. Nested batch items are only rewritten when they
4698
* already carry their own agent_id (otherwise they inherit the top-level one).
4799
*
100+
* DAK-6929: the namespace now incorporates the scenario key derived from the
101+
* client's agent_id, so each playground mode gets its own isolated data.
102+
*
48103
* @param {Buffer} bodyBuf raw request body
49-
* @param {string} namespace session namespace from {@link sessionNamespace}
50-
* @returns {{body: Buffer, clientAgentId: (string|null)}}
104+
* @param {string} sessionId session identifier (used with agent_id to derive namespace)
105+
* @param {string} [namespaceOverride] pre-computed namespace (backward compat for callers
106+
* that already called sessionNamespace themselves). When provided, this value is used
107+
* directly and sessionId is only used as a fallback.
108+
* @returns {{body: Buffer, clientAgentId: (string|null), namespace: (string|null)}}
51109
* body — rewritten buffer (or the original when not JSON)
52110
* clientAgentId — the original agent_id to restore in the response, or null
53111
* when the body was not rewritten (not JSON). Defaults to
54112
* "playground-demo" when the client sent no agent_id.
113+
* namespace — the engine namespace that was injected (for response restore)
55114
*/
56-
function rewriteRequestAgentId(bodyBuf, namespace) {
57-
if (!bodyBuf || bodyBuf.length === 0) return { body: bodyBuf, clientAgentId: null };
115+
function rewriteRequestAgentId(bodyBuf, sessionId, namespaceOverride) {
116+
if (!bodyBuf || bodyBuf.length === 0) return { body: bodyBuf, clientAgentId: null, namespace: null };
58117

59118
let parsed;
60119
try {
61120
parsed = JSON.parse(bodyBuf.toString('utf8'));
62121
} catch {
63-
return { body: bodyBuf, clientAgentId: null }; // not JSON — forward untouched
122+
return { body: bodyBuf, clientAgentId: null, namespace: null }; // not JSON — forward untouched
64123
}
65124
if (!parsed || typeof parsed !== 'object') {
66-
return { body: bodyBuf, clientAgentId: null };
125+
return { body: bodyBuf, clientAgentId: null, namespace: null };
67126
}
68127

128+
// Extract the first agent_id we find to derive the scenario-aware namespace.
69129
let clientAgentId = null;
130+
const findFirst = (obj) => {
131+
if (!obj || typeof obj !== 'object') return;
132+
if (typeof obj.agent_id === 'string' && clientAgentId === null) {
133+
clientAgentId = obj.agent_id;
134+
}
135+
};
136+
const roots = Array.isArray(parsed) ? parsed : [parsed];
137+
for (const root of roots) {
138+
findFirst(root);
139+
if (clientAgentId) break;
140+
if (root && typeof root === 'object' && !Array.isArray(root)) {
141+
for (const key of ITEM_ARRAY_KEYS) {
142+
if (Array.isArray(root[key])) {
143+
for (const item of root[key]) { findFirst(item); if (clientAgentId) break; }
144+
}
145+
if (clientAgentId) break;
146+
}
147+
}
148+
}
149+
150+
// Compute the namespace: if a pre-computed override is provided AND it looks
151+
// like a session namespace, use it directly (backward compat). Otherwise
152+
// derive from sessionId + agent_id scenario key.
153+
const namespace = namespaceOverride && namespaceOverride.startsWith(NS_PREFIX + '-')
154+
? namespaceOverride
155+
: sessionNamespace(sessionId, clientAgentId);
156+
70157
const applyTo = (obj, force) => {
71158
if (!obj || typeof obj !== 'object') return;
72159
if (typeof obj.agent_id === 'string') {
73-
if (clientAgentId === null) clientAgentId = obj.agent_id;
74160
obj.agent_id = namespace;
75161
} else if (force) {
76162
obj.agent_id = namespace;
77163
}
78164
};
79165

80-
const roots = Array.isArray(parsed) ? parsed : [parsed];
81166
for (const root of roots) {
82167
applyTo(root, true); // force isolation on the request root
83168
if (root && typeof root === 'object' && !Array.isArray(root)) {
@@ -92,6 +177,7 @@ function rewriteRequestAgentId(bodyBuf, namespace) {
92177
return {
93178
body: Buffer.from(JSON.stringify(parsed), 'utf8'),
94179
clientAgentId: clientAgentId === null ? NS_PREFIX : clientAgentId,
180+
namespace,
95181
};
96182
}
97183

@@ -110,4 +196,4 @@ function restoreResponseAgentId(buf, namespace, restoreTo) {
110196
return Buffer.from(text, 'utf8');
111197
}
112198

113-
module.exports = { sessionNamespace, rewriteRequestAgentId, restoreResponseAgentId, NS_PREFIX };
199+
module.exports = { sessionNamespace, scenarioKey, rewriteRequestAgentId, restoreResponseAgentId, NS_PREFIX };

0 commit comments

Comments
 (0)