aws-samples
diff --git a/‎.gitleaks.toml‎
Lines changed: 12 additions & 0 deletions b/‎.gitleaks.toml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎agent/src/hooks.py‎
Lines changed: 86 additions & 4 deletions b/‎agent/src/hooks.py‎
Lines changed: 86 additions & 4 deletions
diff --git a/‎agent/src/output_scanner.py‎
Lines changed: 105 additions & 0 deletions b/‎agent/src/output_scanner.py‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎agent/src/telemetry.py‎
Lines changed: 15 additions & 0 deletions b/‎agent/src/telemetry.py‎
Lines changed: 15 additions & 0 deletions
@@ -0,0 +1,12 @@
+title = "sample-autonomous-cloud-coding-agents"
+
+[extend]
+useDefault = true
+
+[[allowlists]]
+description = "PEM-shaped fixtures for output_scanner / hook tests (not real keys)."
+targetRules = ["private-key"]
+paths = [
+    "^agent/tests/test_hooks\\.py$",
+    "^agent/tests/test_output_scanner\\.py$",
+]
@@ -22,7 +22,7 @@ repos:
 
   - repo: local
     hooks:
-      - id: gitleaks-staged
+      - id: gitleaks
         name: gitleaks (staged)
         entry: bash -lc 'cd "$(git rev-parse --show-toplevel)" && mise run security:secrets:staged'
         language: system
 
@@ -1,14 +1,16 @@
-"""PreToolUse hook callback for Cedar policy enforcement.
+"""PreToolUse and PostToolUse hook callbacks for policy enforcement.
 
-Integrates the PolicyEngine with the Claude Agent SDK's hook system
-to enforce tool-use policies at runtime.
+Integrates the PolicyEngine (Cedar, pre-execution) and the output scanner
+(regex, post-execution) with the Claude Agent SDK's hook system to enforce
+tool-use policies at runtime.
 """
 
 from __future__ import annotations
 
 import json
 from typing import TYPE_CHECKING, Any
 
+from output_scanner import scan_tool_output
 from shell import log
 
 if TYPE_CHECKING:
@@ -82,6 +84,70 @@ async def pre_tool_use_hook(
     }
 
 
+async def post_tool_use_hook(
+    hook_input: Any,
+    tool_use_id: str | None,
+    hook_context: Any,
+    *,
+    trajectory: _TrajectoryWriter | None = None,
+) -> dict:
+    """PostToolUse hook: screen tool output for secrets/PII.
+
+    Returns a dict with hookSpecificOutput.  When sensitive content is
+    detected the response includes ``updatedMCPToolOutput`` containing the
+    redacted version (steered enforcement — content is sanitized, not
+    blocked).
+    """
+    _PASS_THROUGH: dict = {"hookSpecificOutput": {"hookEventName": "PostToolUse"}}
+    _FAIL_CLOSED: dict = {
+        "hookSpecificOutput": {
+            "hookEventName": "PostToolUse",
+            "updatedMCPToolOutput": "[Output redacted: screening error — fail-closed]",
+        }
+    }
+
+    if not isinstance(hook_input, dict):
+        log("WARN", "PostToolUse hook received non-dict input — passing through")
+        return _PASS_THROUGH
+
+    tool_name = hook_input.get("tool_name", "unknown")
+
+    if "tool_response" not in hook_input:
+        log("WARN", f"PostToolUse hook: missing 'tool_response' key for {tool_name}")
+        return _PASS_THROUGH
+
+    tool_response = hook_input["tool_response"]
+
+    # Normalise non-string responses
+    if not isinstance(tool_response, str):
+        tool_response = str(tool_response)
+
+    try:
+        result = scan_tool_output(tool_response)
+    except Exception as exc:
+        log("ERROR", f"Output scanner failed for {tool_name}: {type(exc).__name__}: {exc}")
+        if trajectory:
+            trajectory.write_output_screening_decision(
+                tool_name, [f"SCANNER_ERROR: {type(exc).__name__}"], redacted=True, duration_ms=0.0
+            )
+        return _FAIL_CLOSED
+
+    if result.has_sensitive_content:
+        if trajectory:
+            trajectory.write_output_screening_decision(
+                tool_name, result.findings, redacted=True, duration_ms=result.duration_ms
+            )
+        log("POLICY", f"OUTPUT REDACTED: {tool_name} — {', '.join(result.findings)}")
+        return {
+            "hookSpecificOutput": {
+                "hookEventName": "PostToolUse",
+                "updatedMCPToolOutput": result.redacted_content,
+            }
+        }
+
+    return _PASS_THROUGH
+
+
 def build_hook_matchers(
     engine: PolicyEngine,
     trajectory: _TrajectoryWriter | None = None,
@@ -99,6 +165,7 @@ def build_hook_matchers(
         HookInput,
         HookJSONOutput,
         HookMatcher,
+        PostToolUseHookSpecificOutput,
         SyncHookJSONOutput,
     )
 
@@ -110,8 +177,23 @@ async def _pre(
         result = await pre_tool_use_hook(
             hook_input, tool_use_id, ctx, engine=engine, trajectory=trajectory
         )
-        return SyncHookJSONOutput(**result)  # type: ignore[typeddict-item]
+        return SyncHookJSONOutput(**result)
+
+    async def _post(
+        hook_input: HookInput, tool_use_id: str | None, ctx: HookContext
+    ) -> HookJSONOutput:
+        try:
+            result = await post_tool_use_hook(hook_input, tool_use_id, ctx, trajectory=trajectory)
+            return SyncHookJSONOutput(**result)
+        except Exception as exc:
+            log("ERROR", f"PostToolUse wrapper crashed: {type(exc).__name__}: {exc}")
+            fail_closed: PostToolUseHookSpecificOutput = {
+                "hookEventName": "PostToolUse",
+                "updatedMCPToolOutput": "[Output redacted: hook error — fail-closed]",
+            }
+            return SyncHookJSONOutput(hookSpecificOutput=fail_closed)
 
     return {
         "PreToolUse": [HookMatcher(matcher=None, hooks=[_pre])],
+        "PostToolUse": [HookMatcher(matcher=None, hooks=[_post])],
     }
@@ -0,0 +1,105 @@
+"""Regex-based secret and PII scanner for tool output screening.
+
+Scans tool outputs for sensitive content (secrets, tokens, private keys,
+connection strings) and produces redacted versions suitable for re-injection
+into agent context.  Patterns are compiled once at module level.
+"""
+
+from __future__ import annotations
+
+import re
+import time
+from dataclasses import dataclass, field
+
+# ---------------------------------------------------------------------------
+# Scan result
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class ScanResult:
+    """Result of scanning tool output for sensitive content."""
+
+    has_sensitive_content: bool
+    redacted_content: str
+    findings: list[str] = field(default_factory=list)
+    duration_ms: float = 0.0
+
+
+# ---------------------------------------------------------------------------
+# Pattern registry
+# ---------------------------------------------------------------------------
+
+_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
+    # AWS access key IDs
+    ("AWS_KEY", re.compile(r"AKIA[0-9A-Z]{16}")),
+    # AWS secret access keys (40-char base64 near common keywords)
+    (
+        "AWS_SECRET",
+        re.compile(
+            r"(?:aws_secret_access_key|SecretAccessKey|AWS_SECRET_ACCESS_KEY)"
+            r"[\s=:\"']+([A-Za-z0-9/+=]{40})",
+            re.IGNORECASE,
+        ),
+    ),
+    # GitHub tokens (PAT, OAuth, App, user-to-server, fine-grained)
+    ("GITHUB_TOKEN", re.compile(r"(?:ghp|gho|ghs|ghu)_[a-zA-Z0-9]{36}")),
+    ("GITHUB_PAT", re.compile(r"github_pat_[a-zA-Z0-9_]{22,}")),
+    # Private keys (PEM blocks)
+    (
+        "PRIVATE_KEY",
+        re.compile(
+            r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
+            r"[\s\S]*?"
+            r"-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
+        ),
+    ),
+    # Generic Bearer / token patterns (min 20-char token to avoid false positives
+    # on natural English like "bearer of good news")
+    ("BEARER_TOKEN", re.compile(r"Bearer\s+[a-zA-Z0-9\-._~+/]{20,}=*", re.IGNORECASE)),
+    # Connection strings with embedded passwords (protocol name capped at 20
+    # chars to avoid quadratic backtracking on long alphabetic strings)
+    (
+        "CONNECTION_STRING",
+        re.compile(r"[a-zA-Z][a-zA-Z0-9+.-]{0,20}://[^:]+:[^@]+@[^\s\"']+"),
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+# Scan only the first 5 MB of tool output to bound regex execution time.
+_MAX_SCAN_LENGTH = 5_000_000
+
+
+def scan_tool_output(content: str | None) -> ScanResult:
+    """Scan *content* for secrets/PII and return a ``ScanResult``.
+
+    Non-string values should be converted to ``str`` before calling.
+    ``None`` and empty strings short-circuit to a clean result.
+    Content exceeding ``_MAX_SCAN_LENGTH`` is truncated before scanning.
+    """
+    if not content:
+        return ScanResult(has_sensitive_content=False, redacted_content=content or "")
+
+    if len(content) > _MAX_SCAN_LENGTH:
+        content = content[:_MAX_SCAN_LENGTH]
+
+    start = time.monotonic()
+    findings: list[str] = []
+    redacted = content
+
+    for label, pattern in _PATTERNS:
+        if pattern.search(redacted):
+            findings.append(f"{label} detected")
+            redacted = pattern.sub(f"[REDACTED-{label}]", redacted)
+
+    elapsed_ms = (time.monotonic() - start) * 1000
+    return ScanResult(
+        has_sensitive_content=len(findings) > 0,
+        redacted_content=redacted,
+        findings=findings,
+        duration_ms=elapsed_ms,
+    )
@@ -271,6 +271,21 @@ def write_policy_decision(
             }
         )
 
+    def write_output_screening_decision(
+        self, tool_name: str, findings: list[str], redacted: bool, duration_ms: float
+    ) -> None:
+        """Write an OUTPUT_SCREENING event for a post-tool-use output scan."""
+        self._put_event(
+            {
+                "event": "OUTPUT_SCREENING",
+                "task_id": self._task_id,
+                "tool_name": tool_name,
+                "findings": findings,
+                "redacted": redacted,
+                "duration_ms": duration_ms,
+            }
+        )
+
 
 # Values under these keys may contain tool stderr, paths, or incidental secrets.
 _METRICS_REDACT_KEYS = frozenset({"error"})