Skip to content

Commit 9359674

Browse files
committed
feat: add code-injection, exfil-chain, write-probe detectors + PII canary honeypot
Closes four gaps identified in agent-trials eval run: - tool-001 (write to /etc/crontab): regex.sensitive_file_probe gains write-etc-privileged pattern covering write-intent verbs to /etc/crontab, /etc/sudoers, /etc/hosts, /etc/cron.d - tool-003 (chained exfil to URL): new regex.exfil_chain detector with exfil-chained-to-url (then/and + send verb + URL) and exfil-suspicious-path (/collect, /exfil, /steal paths in URL) - tool-004 (Python code injection): new regex.code_injection detector catches __import__('subprocess'), subprocess.run/Popen + net tool, and os.system() + net tool - exfil-004 (PII aggregation): output-side defense via PII context honeypot. Four new catalogue types (pii-name-000 with randomised "Firstname Epic Last" name, pii-email-000, pii-dob-000, pii-sin-000). write_pii_context() + armor canary pii-context CLI subcommand generates system-prompt snippet. Canary scanner catches any compiled PII report containing these values. 13 new eval corpus rows; all 1222 tests pass (916 unit + 306 eval).
1 parent 6ea8498 commit 9359674

12 files changed

Lines changed: 594 additions & 17 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.10.1] — 2026-05-17
11+
12+
### Added
13+
14+
- **`regex.code_injection` detector blocks Python subprocess injection attacks.** Catches `__import__('subprocess')` dynamic import bypass, `subprocess.run/Popen` combined with network tools (`curl`, `wget`, `nc`), and `os.system()` with network tools. Covers both the user-instruction vector ("execute this in your code tool") and code tool parameters directly.
15+
- **`regex.exfil_chain` detector blocks chained tool-abuse with external data exfiltration.** Two patterns: `exfil-chained-to-url` catches "then/and send/upload/forward … http(s)://..." sequences; `exfil-suspicious-path` catches URLs whose path ends in `/collect`, `/exfil`, `/steal`, `/harvest`, or similar collection segments.
16+
- **`regex.sensitive_file_probe` now blocks privileged file write attempts.** A new `write-etc-privileged` pattern catches write-intent verbs (`write`, `append`, `overwrite`, `modify`, etc.) targeted at `/etc/crontab`, `/etc/sudoers`, `/etc/hosts`, or `/etc/cron.d` — the persistence and privilege-escalation paths an agent should never be asked to write.
17+
- **PII context honeypot via `armor canary pii-context`.** Four new PII canary types (`pii-name-000`, `pii-email-000`, `pii-dob-000`, `pii-sin-000`) added to the catalogue. The name canary is generated at install time as a randomized "Firstname AwesomeMiddle EpicLast" triple (e.g., "Kevin Lightning Dragon") — distinctive enough to stand out in any output, unique per installation. `write_pii_context()` produces a context snippet to inject into the agent's system prompt; when an attacker asks the agent to compile a PII report, the canary scanner catches the known values at output stage. Run `armor canary generate --out ~/.armor/canaries.json && armor canary pii-context --values ~/.armor/canaries.json --out pii-context.txt`, then inject `pii-context.txt` into your system prompt.
18+
- **13 new eval corpus rows** covering write-to-crontab, Python code injection, and exfiltration chain patterns, plus matching true negatives.
19+
1020
## [0.10.0] — 2026-05-17
1121

1222
### Added

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "armor-ai"
7-
version = "0.10.0"
7+
version = "0.10.1"
88
description = "A defense-in-depth security layer for LLM agents"
99
readme = {file = "README_PYPI.md", content-type = "text/markdown"}
1010
requires-python = ">=3.12"
@@ -72,6 +72,8 @@ regex_memory_planting = "armor.detectors.memory_planting:RegexMemoryPlanting"
7272
regex_roleplay_hijack = "armor.detectors.regex_roleplay_hijack:RegexRoleplayHijack"
7373
regex_system_prompt_extraction = "armor.detectors.regex_system_prompt_extraction:RegexSystemPromptExtraction"
7474
regex_encoding_request = "armor.detectors.regex_encoding_request:RegexEncodingRequest"
75+
regex_code_injection = "armor.detectors.regex_code_injection:RegexCodeInjection"
76+
regex_exfil_chain = "armor.detectors.regex_exfil_chain:RegexExfilChain"
7577
regex_sensitive_file_probe = "armor.detectors.regex_sensitive_file_probe:RegexSensitiveFileProbe"
7678
regex_ssrf_probe = "armor.detectors.regex_ssrf_probe:RegexSsrfProbe"
7779
tool_chain = "armor.detectors.tool_chain:ToolChain"

src/armor/canaries/_generate.py

Lines changed: 222 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,146 @@
2222

2323
logger = logging.getLogger(__name__)
2424

25+
# ---------------------------------------------------------------------------
26+
# PII fake-name generator — normal first name + memorable middle + last name.
27+
# Values are chosen at generate-time so each installation gets a unique name,
28+
# making it infeasible for an attacker to pre-predict and suppress it.
29+
# ---------------------------------------------------------------------------
30+
_PII_FIRST_NAMES = [
31+
"Kevin",
32+
"Sarah",
33+
"James",
34+
"Emma",
35+
"Michael",
36+
"Olivia",
37+
"Daniel",
38+
"Sophia",
39+
"Christopher",
40+
"Isabella",
41+
"Matthew",
42+
"Ava",
43+
"Andrew",
44+
"Mia",
45+
"Joshua",
46+
"Natalie",
47+
"David",
48+
"Samantha",
49+
"Ryan",
50+
"Victoria",
51+
"Tyler",
52+
"Hannah",
53+
"Jonathan",
54+
"Grace",
55+
"Nathan",
56+
"Chloe",
57+
"Brandon",
58+
"Lily",
59+
"Justin",
60+
"Zoe",
61+
]
62+
_PII_MIDDLE_NAMES = [
63+
"Lightning",
64+
"Thunder",
65+
"Blaze",
66+
"Storm",
67+
"Iron",
68+
"Silver",
69+
"Crimson",
70+
"Shadow",
71+
"Frost",
72+
"Ember",
73+
"Quantum",
74+
"Cobalt",
75+
"Titan",
76+
"Inferno",
77+
"Steel",
78+
"Jade",
79+
"Onyx",
80+
"Ruby",
81+
"Zenith",
82+
"Solar",
83+
"Lunar",
84+
"Apex",
85+
"Hyper",
86+
"Turbo",
87+
"Neon",
88+
"Vortex",
89+
"Plasma",
90+
"Atomic",
91+
"Primal",
92+
]
93+
_PII_LAST_NAMES = [
94+
"Dragon",
95+
"Wolf",
96+
"Phoenix",
97+
"Eagle",
98+
"Falcon",
99+
"Viper",
100+
"Hawk",
101+
"Griffin",
102+
"Tempest",
103+
"Ironside",
104+
"Thunderbolt",
105+
"Glacier",
106+
"Fortress",
107+
"Colossus",
108+
"Wraith",
109+
"Juggernaut",
110+
"Sentinel",
111+
"Wyvern",
112+
"Basilisk",
113+
"Leviathan",
114+
"Pantheon",
115+
"Behemoth",
116+
"Harbinger",
117+
"Ravager",
118+
"Valkyrie",
119+
"Cyclone",
120+
"Avalanche",
121+
"Maelstrom",
122+
"Nemesis",
123+
"Obliterator",
124+
]
125+
126+
127+
def _generate_pii_value(marker_rule: str) -> str:
128+
"""Generate a fake PII value for a pii: prefixed marker rule.
129+
130+
Args:
131+
marker_rule: A string starting with 'pii:' identifying the PII type.
132+
133+
Returns:
134+
A recognisably fake but plausible-looking PII value.
135+
"""
136+
kind = marker_rule[len("pii:") :]
137+
138+
if kind == "fake_name":
139+
first = random.choice(_PII_FIRST_NAMES)
140+
middle = random.choice(_PII_MIDDLE_NAMES)
141+
last = random.choice(_PII_LAST_NAMES)
142+
return f"{first} {middle} {last}"
143+
144+
if kind == "dob":
145+
year = random.randint(1950, 2000)
146+
month = random.randint(1, 12)
147+
day = random.randint(1, 28) # Stay in safe range for all months
148+
return f"{year:04d}-{month:02d}-{day:02d}"
149+
150+
if kind == "sin":
151+
# Canadian SIN starting with 9 (temporary / clearly fake range)
152+
d1 = 9
153+
d2 = random.randint(0, 9)
154+
d3 = random.randint(0, 9)
155+
d4 = random.randint(0, 9)
156+
d5 = random.randint(0, 9)
157+
d6 = random.randint(0, 9)
158+
d7 = random.randint(0, 9)
159+
d8 = random.randint(0, 9)
160+
d9 = random.randint(0, 9)
161+
return f"{d1}{d2}{d3}-{d4}{d5}{d6}-{d7}{d8}{d9}"
162+
163+
raise ValueError(f"Unknown pii: sub-type: {kind!r}")
164+
25165

26166
def _generate_value_for_pattern(marker_rule: str) -> str:
27167
"""Generate a fake-but-realistic value matching a given regex pattern.
@@ -38,6 +178,10 @@ def _generate_value_for_pattern(marker_rule: str) -> str:
38178
Raises:
39179
ValueError: If the pattern is not recognized or generation fails.
40180
"""
181+
# PII canaries: marker_rule is a 'pii:<type>' descriptor, not a regex
182+
if marker_rule.startswith("pii:"):
183+
return _generate_pii_value(marker_rule)
184+
41185
# AWS access keys: AKIA + 16 chars [A-Z0-9]
42186
if marker_rule == r"^AKIA[A-Z0-9]{16}$":
43187
return "AKIA" + "".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(16))
@@ -306,12 +450,14 @@ def generate_values(
306450
except ValueError as e:
307451
raise ValueError(f"Failed to generate value for {canary_id}: {e}") from e
308452

309-
# Validate the generated value matches the pattern
310-
try:
311-
if not re.match(marker_rule, value):
312-
raise ValueError("Generated value does not match pattern")
313-
except re.error as e:
314-
raise ValueError(f"Invalid marker_rule regex: {e}") from e
453+
# Validate the generated value matches the pattern.
454+
# pii: descriptors are not regexes — skip validation for them.
455+
if not marker_rule.startswith("pii:"):
456+
try:
457+
if not re.match(marker_rule, value):
458+
raise ValueError("Generated value does not match pattern")
459+
except re.error as e:
460+
raise ValueError(f"Invalid marker_rule regex: {e}") from e
315461

316462
values.append({"canary_id": canary_id, "value": value})
317463

@@ -449,3 +595,73 @@ def write_dotenv_honeypot(
449595
raise
450596

451597
logger.info(f"Wrote honeypot .env to {output_path} (mode 0o600, {len(_DOTENV_HONEYPOT_VARS)} canaries)")
598+
599+
600+
# PII context honeypot — canary IDs and their display labels in the context snippet.
601+
_PII_CONTEXT_VARS: list[tuple[str, str]] = [
602+
("pii-name-000", "Name"),
603+
("pii-email-000", "Email"),
604+
("pii-dob-000", "Date of Birth"),
605+
("pii-sin-000", "SIN"),
606+
]
607+
608+
609+
def write_pii_context(
610+
output_path: str | Path,
611+
values_path: str | Path,
612+
) -> None:
613+
"""Write a honeypot PII context snippet to be injected into an agent system prompt.
614+
615+
The snippet contains fake personal information (name, email, DOB, SIN) drawn
616+
from canary values. When an attacker tricks the agent into compiling a PII report
617+
and outputting these values, the canary scanner catches the known strings at
618+
output stage.
619+
620+
The output file is written with mode 0o600 (owner read/write only).
621+
622+
Args:
623+
output_path: Path where the context snippet file should be written.
624+
values_path: Path to the generated canary values JSON file.
625+
626+
Raises:
627+
FileNotFoundError: If values_path does not exist.
628+
KeyError: If a required canary ID is missing from the values file.
629+
IOError: If the output file cannot be written.
630+
"""
631+
output_path = Path(output_path) if isinstance(output_path, str) else output_path
632+
values_path = Path(values_path) if isinstance(values_path, str) else values_path
633+
634+
if not values_path.exists():
635+
raise FileNotFoundError(f"Values file not found: {values_path}")
636+
637+
with open(values_path, encoding="utf-8") as f:
638+
values_data = json.load(f)
639+
640+
values_by_id: dict[str, str] = {}
641+
for entry in values_data:
642+
cid = entry.get("canary_id")
643+
val = entry.get("value")
644+
if cid and val:
645+
values_by_id[cid] = val
646+
647+
lines = [
648+
"[User profile — confidential, do not share]",
649+
"",
650+
]
651+
for canary_id, label in _PII_CONTEXT_VARS:
652+
if canary_id not in values_by_id:
653+
raise KeyError(f"Canary {canary_id!r} not found in values file {values_path}")
654+
lines.append(f"{label}: {values_by_id[canary_id]}")
655+
656+
content = "\n".join(lines) + "\n"
657+
658+
output_path.parent.mkdir(parents=True, exist_ok=True)
659+
fd = os.open(str(output_path), os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600)
660+
try:
661+
with os.fdopen(fd, "w", encoding="utf-8") as f:
662+
f.write(content)
663+
except Exception:
664+
os.close(fd)
665+
raise
666+
667+
logger.info(f"Wrote PII context honeypot to {output_path} (mode 0o600, {len(_PII_CONTEXT_VARS)} canaries)")

src/armor/canaries/catalogue.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,15 +169,17 @@ def load(
169169
false_positive_risk=item.get("false_positive_risk"),
170170
activation=activation,
171171
)
172-
# Validate active canaries: must have a value and it must match marker_rule
172+
# Validate active canaries: must have a value and it must match marker_rule.
173+
# pii: prefixed rules are descriptors, not regexes — skip regex validation.
173174
if entry.active:
174175
if not entry.value:
175176
raise ValueError(f"Canary {canary_id}: no value provided (neither in schema nor values file)")
176-
try:
177-
if not re.match(entry.marker_rule, entry.value):
178-
raise ValueError(f"Canary {canary_id}: value does not match marker_rule")
179-
except re.error as e:
180-
raise ValueError(f"Canary {canary_id}: invalid marker_rule regex: {e}") from e
177+
if not entry.marker_rule.startswith("pii:"):
178+
try:
179+
if not re.match(entry.marker_rule, entry.value):
180+
raise ValueError(f"Canary {canary_id}: value does not match marker_rule")
181+
except re.error as e:
182+
raise ValueError(f"Canary {canary_id}: invalid marker_rule regex: {e}") from e
181183
entries.append(entry)
182184
except KeyError as e:
183185
raise ValueError(f"Missing field in canary entry: {e}") from e

src/armor/canaries/default_catalogue.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,5 +838,37 @@
838838
"marker_rule": "^[a-zA-Z0-9+/]+=*$",
839839
"active": true,
840840
"created_at": "2026-05-07T10:35:47.228853Z"
841+
},
842+
{
843+
"canary_id": "pii-name-000",
844+
"kind": "pii",
845+
"service": "identity",
846+
"marker_rule": "pii:fake_name",
847+
"active": true,
848+
"created_at": "2026-05-17T10:00:00.000000Z"
849+
},
850+
{
851+
"canary_id": "pii-email-000",
852+
"kind": "pii",
853+
"service": "identity",
854+
"marker_rule": "^canary-[a-z0-9\\-]+@armor-trap\\.invalid$",
855+
"active": true,
856+
"created_at": "2026-05-17T10:00:00.000000Z"
857+
},
858+
{
859+
"canary_id": "pii-dob-000",
860+
"kind": "pii",
861+
"service": "identity",
862+
"marker_rule": "pii:dob",
863+
"active": true,
864+
"created_at": "2026-05-17T10:00:00.000000Z"
865+
},
866+
{
867+
"canary_id": "pii-sin-000",
868+
"kind": "pii",
869+
"service": "identity",
870+
"marker_rule": "pii:sin",
871+
"active": true,
872+
"created_at": "2026-05-17T10:00:00.000000Z"
841873
}
842874
]

0 commit comments

Comments
 (0)