-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathsanitization.py
More file actions
60 lines (50 loc) · 2.25 KB
/
sanitization.py
File metadata and controls
60 lines (50 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""Content sanitization for external/untrusted inputs.
Mirrors the TypeScript sanitizeExternalContent() in
cdk/src/handlers/shared/sanitization.ts. Both implementations
must produce identical output for the same input — cross-language
parity is verified by shared test fixtures.
Applied to: memory records (before hashing on write, before injection
on read), GitHub issue/PR content (TS side only — Python agent receives
already-sanitized content from the orchestrator's hydrated context).
"""
import re
_DANGEROUS_TAGS = re.compile(
r"(<(script|style|iframe|object|embed|form|input)[^>]*>[\s\S]*?</\2>"
r"|<(script|style|iframe|object|embed|form|input)[^>]*\/?>)",
re.IGNORECASE,
)
_HTML_TAGS = re.compile(r"</?[a-z][^>]*>", re.IGNORECASE)
_INSTRUCTION_PREFIXES = re.compile(r"^(SYSTEM|ASSISTANT|Human)\s*:", re.MULTILINE | re.IGNORECASE)
_INJECTION_PHRASES = re.compile(
r"(?:ignore previous instructions|disregard (?:above|previous|all)|new instructions\s*:)",
re.IGNORECASE,
)
_CONTROL_CHARS = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
_BIDI_CHARS = re.compile(r"[\u200e\u200f\u202a-\u202e\u2066-\u2069]")
_MISPLACED_BOM = re.compile(r"(?!^)\ufeff")
def _strip_until_stable(s: str, pattern: re.Pattern[str]) -> str:
"""Apply *pattern* repeatedly until the string stops changing.
A single pass can be bypassed by nesting fragments
(e.g. "<scrip<script></script>t>" reassembles after inner tag removal).
"""
while True:
prev = s
s = pattern.sub("", s)
if s == prev:
return s
def sanitize_external_content(text: str | None) -> str:
"""Sanitize external content before it enters the agent's context.
Neutralizes rather than blocks — suspicious patterns are replaced with
bracketed markers so content is still visible to the LLM (for legitimate
discussion of prompts/instructions) but structurally defanged.
"""
if not text:
return text or ""
s = _strip_until_stable(text, _DANGEROUS_TAGS)
s = _strip_until_stable(s, _HTML_TAGS)
s = _INSTRUCTION_PREFIXES.sub(r"[SANITIZED_PREFIX] \1:", s)
s = _INJECTION_PHRASES.sub("[SANITIZED_INSTRUCTION]", s)
s = _CONTROL_CHARS.sub("", s)
s = _BIDI_CHARS.sub("", s)
s = _MISPLACED_BOM.sub("", s)
return s