Skip to content

Commit dce788d

Browse files
author
bgagent
committed
fix(mem): simplify
1 parent 1afc853 commit dce788d

4 files changed

Lines changed: 56 additions & 6 deletions

File tree

agent/src/sanitization.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,19 @@
2828
_MISPLACED_BOM = re.compile(r"(?!^)\ufeff")
2929

3030

31+
def _strip_until_stable(s: str, pattern: re.Pattern[str]) -> str:
32+
"""Apply *pattern* repeatedly until the string stops changing.
33+
34+
A single pass can be bypassed by nesting fragments
35+
(e.g. "<scrip<script></script>t>" reassembles after inner tag removal).
36+
"""
37+
while True:
38+
prev = s
39+
s = pattern.sub("", s)
40+
if s == prev:
41+
return s
42+
43+
3144
def sanitize_external_content(text: str | None) -> str:
3245
"""Sanitize external content before it enters the agent's context.
3346
@@ -37,8 +50,8 @@ def sanitize_external_content(text: str | None) -> str:
3750
"""
3851
if not text:
3952
return text or ""
40-
s = _DANGEROUS_TAGS.sub("", text)
41-
s = _HTML_TAGS.sub("", s)
53+
s = _strip_until_stable(text, _DANGEROUS_TAGS)
54+
s = _strip_until_stable(s, _HTML_TAGS)
4255
s = _INSTRUCTION_PREFIXES.sub(r"[SANITIZED_PREFIX] \1:", s)
4356
s = _INJECTION_PHRASES.sub("[SANITIZED_INSTRUCTION]", s)
4457
s = _CONTROL_CHARS.sub("", s)

agent/tests/test_prompts.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,17 @@ def test_self_closing_dangerous_tags(self):
144144
assert sanitize_memory_content("a<script/>b") == "ab"
145145
assert sanitize_memory_content("a<iframe/>b") == "ab"
146146

147+
def test_nested_fragment_bypass(self):
148+
# Fragments that reassemble into a dangerous tag after inner tag removal
149+
assert sanitize_memory_content("<scrip<script></script>t>alert(1)</script>") == ""
150+
assert sanitize_memory_content("<ifra<iframe></iframe>me src=x>") == ""
151+
# Double-nested — outermost <sc prefix survives (not a valid tag)
152+
assert sanitize_memory_content("<sc<scr<script></script>ipt>ript>xss</script>") == "<sc"
153+
154+
def test_nested_fragment_bypass_html_tags(self):
155+
# Regex greedily matches <di<b> as one tag, so <div> never reassembles
156+
assert sanitize_memory_content("<di<b></b>v>text</div>") == "v>text"
157+
147158
def test_preserves_tabs_and_newlines(self):
148159
result = sanitize_memory_content("hello\tworld\nfoo")
149160
assert result == "hello\tworld\nfoo"

cdk/src/handlers/shared/sanitization.ts

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,21 @@ const CONTROL_CHARS = /[\x00-\x08\x0B\x0C\x0E-\x1F]/g;
4040
const BIDI_CHARS = /[\u200E\u200F\u202A-\u202E\u2066-\u2069]/g;
4141
const MISPLACED_BOM = /(?!^)\uFEFF/g;
4242

43+
/**
44+
* Apply a regex replacement repeatedly until the string stops changing.
45+
*
46+
* A single pass can be bypassed by nesting fragments
47+
* (e.g. "<scrip<script></script>t>" reassembles after inner tag removal).
48+
*/
49+
function stripUntilStable(s: string, pattern: RegExp): string {
50+
let prev;
51+
do {
52+
prev = s;
53+
s = s.replace(pattern, '');
54+
} while (s !== prev);
55+
return s;
56+
}
57+
4358
/**
4459
* Sanitize external content before it enters the agent's context.
4560
*
@@ -53,13 +68,11 @@ const MISPLACED_BOM = /(?!^)\uFEFF/g;
5368
export function sanitizeExternalContent(text: string): string {
5469
if (!text) return text || '';
5570

56-
let sanitized = text;
57-
5871
// 1. Strip dangerous HTML tags with their content
59-
sanitized = sanitized.replace(DANGEROUS_TAGS, '');
72+
let sanitized = stripUntilStable(text, DANGEROUS_TAGS);
6073

6174
// 2. Strip remaining HTML tags (preserve inner text)
62-
sanitized = sanitized.replace(HTML_TAGS, '');
75+
sanitized = stripUntilStable(sanitized, HTML_TAGS);
6376

6477
// 3. Neutralize embedded instruction patterns
6578
sanitized = sanitized.replace(INSTRUCTION_PREFIXES, '[SANITIZED_PREFIX] $1:');

cdk/test/handlers/shared/sanitization.test.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,19 @@ describe('sanitizeExternalContent', () => {
5252
expect(result).toContain('safe');
5353
});
5454

55+
test('strips nested fragment bypass (CodeQL incomplete multi-char sanitization)', () => {
56+
// Fragments that reassemble into a dangerous tag after inner tag removal
57+
expect(sanitizeExternalContent('<scrip<script></script>t>alert(1)</script>')).toBe('');
58+
expect(sanitizeExternalContent('<ifra<iframe></iframe>me src=x>')).toBe('');
59+
// Double-nested — outermost <sc prefix survives (not a valid tag)
60+
expect(sanitizeExternalContent('<sc<scr<script></script>ipt>ript>xss</script>')).toBe('<sc');
61+
});
62+
63+
test('strips nested fragment bypass for HTML tags', () => {
64+
// Regex greedily matches <di<b> as one tag, so <div> never reassembles
65+
expect(sanitizeExternalContent('<di<b></b>v>text</div>')).toBe('v>text');
66+
});
67+
5568
test('strips unclosed dangerous tags', () => {
5669
const input = 'before<script>alert("xss")after';
5770
const result = sanitizeExternalContent(input);

0 commit comments

Comments
 (0)