chore(project): update policy

bgagent · bgagent · commit 546311e0c232 · 2026-04-12T14:30:07.000-05:00
diff --git a/agent/src/policy.py b/agent/src/policy.py
@@ -47,11 +47,11 @@
     resource == Agent::Tool::"Edit"
 );
 
-// All agents: forbid writes to protected paths
-forbid (principal, action == Agent::Action::"write_file", resource)
-when { context.file_path like ".github/workflows/*" };
+// All agents: forbid writes to .git internals
 forbid (principal, action == Agent::Action::"write_file", resource)
 when { context.file_path like ".git/*" };
+forbid (principal, action == Agent::Action::"write_file", resource)
+when { context.file_path like "*/.git/*" };
 
 // All agents: forbid destructive bash commands
 forbid (principal, action == Agent::Action::"execute_bash", resource)
diff --git a/agent/tests/test_hooks.py b/agent/tests/test_hooks.py
@@ -45,12 +45,12 @@ def test_denies_restricted_tool(self):
         assert result["hookSpecificOutput"]["permissionDecision"] == "deny"
         assert "pr_review" in result["hookSpecificOutput"]["permissionDecisionReason"]
 
-    def test_denies_protected_path(self):
+    def test_denies_git_internals_path(self):
         engine = PolicyEngine(task_type="new_task", repo="owner/repo")
         hook_input = {
             "hook_event_name": "PreToolUse",
             "tool_name": "Write",
-            "tool_input": {"file_path": ".github/workflows/ci.yml"},
+            "tool_input": {"file_path": ".git/config"},
             "tool_use_id": "test-789",
             "session_id": "sess-1",
             "transcript_path": "/tmp/t",
diff --git a/agent/tests/test_policy.py b/agent/tests/test_policy.py
@@ -85,26 +85,30 @@ def test_allows_bash(self):
 
 
 class TestProtectedPaths:
-    def test_denies_write_to_github_workflows(self):
+    def test_denies_write_to_git_dir(self):
         engine = PolicyEngine(task_type="new_task", repo="owner/repo")
-        result = engine.evaluate_tool_use("Write", {"file_path": ".github/workflows/ci.yml"})
+        result = engine.evaluate_tool_use("Write", {"file_path": ".git/config"})
         assert result.allowed is False
-        assert ".github/workflows/ci.yml" in result.reason
 
-    def test_denies_write_to_git_dir(self):
+    def test_denies_write_to_git_dir_absolute_path(self):
         engine = PolicyEngine(task_type="new_task", repo="owner/repo")
-        result = engine.evaluate_tool_use("Write", {"file_path": ".git/config"})
+        result = engine.evaluate_tool_use("Write", {"file_path": "/workspace/abc123/.git/config"})
         assert result.allowed is False
 
     def test_allows_write_to_normal_path(self):
         engine = PolicyEngine(task_type="new_task", repo="owner/repo")
         result = engine.evaluate_tool_use("Write", {"file_path": "src/app.ts"})
         assert result.allowed is True
 
-    def test_denies_edit_to_github_workflows(self):
+    def test_allows_write_to_github_workflows(self):
+        engine = PolicyEngine(task_type="new_task", repo="owner/repo")
+        result = engine.evaluate_tool_use("Write", {"file_path": ".github/workflows/ci.yml"})
+        assert result.allowed is True
+
+    def test_allows_edit_to_github_workflows(self):
         engine = PolicyEngine(task_type="new_task", repo="owner/repo")
         result = engine.evaluate_tool_use("Edit", {"file_path": ".github/workflows/deploy.yml"})
-        assert result.allowed is False
+        assert result.allowed is True
 
 
 class TestDestructiveBashCommands:
@@ -172,9 +176,9 @@ def test_allows_path_with_quotes(self):
         result = engine.evaluate_tool_use("Write", {"file_path": '/workspace/it"s-a-file.ts'})
         assert result.allowed is True
 
-    def test_denies_protected_path_with_quotes(self):
+    def test_denies_git_dir_path_with_quotes(self):
         engine = PolicyEngine(task_type="new_task", repo="owner/repo")
-        result = engine.evaluate_tool_use("Write", {"file_path": '.github/workflows/ci"test.yml'})
+        result = engine.evaluate_tool_use("Write", {"file_path": '.git/hooks/pre"commit'})
         assert result.allowed is False
 
 
diff --git a/docs/design/SECURITY.md b/docs/design/SECURITY.md
@@ -134,15 +134,15 @@ Submission-time policy decisions (validation, onboarding gate, guardrail screeni
 
 ### Policy resolution and authorization (planned)
 
-**Partially implemented / Planned (Iteration 5, Phase 2):** Cedar as the single policy engine for both **operational policy** (budget/quota/tool-access resolution, tool-call interception rules) and **authorization** (multi-tenant access control, extended when multi-user/team lands). **Current state:** An in-process Cedar policy engine (`agent/src/policy.py`, using `cedarpy`) enforces a deny-list model for tool-call governance: `pr_review` agents are forbidden from using `Write` and `Edit` tools, writes to protected paths (`.github/workflows/*`, `.git/*`) are blocked for all agents, and destructive bash commands (`rm -rf /`, `git push --force`) are denied. The engine is fail-closed — if `cedarpy` is unavailable or evaluation errors occur, all tool calls are denied. Per-repo custom Cedar policies can be injected via Blueprint `security.cedarPolicies`. The PreToolUse hook (`agent/src/hooks.py`) integrates the policy engine with the Claude Agent SDK's hook system, and denied decisions emit `POLICY_DECISION` telemetry events via `agent/src/telemetry.py`. **Planned:** Cedar replaces the scattered merge logic across TypeScript handlers with a unified policy evaluation. A thin `policy.ts` adapter translates Cedar decisions into `PolicyDecision` objects consumed by existing handlers. Cedar is preferred over OPA: it is AWS-native, has formal verification guarantees, integrates with AgentCore Gateway, and policies can be evaluated in-process via the Cedar SDK without a separate service dependency. Cedar's binary permit/forbid model supports the three enforcement modes (`enforced`, `observed`, `steered`) via a **virtual-action classification pattern**: the interceptor evaluates against multiple virtual actions (`invoke_tool`, `invoke_tool_steered`, `invoke_tool_denied`) and uses the first permitted action to determine the mode. For example, `forbid(principal, action == Action::"invoke_tool", resource) when { resource.path like ".github/workflows/*" && principal.capability_tier != "elevated" }` blocks the call, while `permit(principal, action == Action::"invoke_tool_steered", resource) when { context.output_contains_pii }` triggers PII redaction instead of blocking. Cedar policies will be stored in Amazon Verified Permissions and loaded at hydration/session-start time — policy changes take effect without CDK redeployment. When multi-user/team support lands, the same Cedar policy store expands to cover tenant-specific authorization (user/team/repo scoping, team budgets, risk-based approval requirements).
+**Partially implemented / Planned (Iteration 5, Phase 2):** Cedar as the single policy engine for both **operational policy** (budget/quota/tool-access resolution, tool-call interception rules) and **authorization** (multi-tenant access control, extended when multi-user/team lands). **Current state:** An in-process Cedar policy engine (`agent/src/policy.py`, using `cedarpy`) enforces a deny-list model for tool-call governance: `pr_review` agents are forbidden from using `Write` and `Edit` tools, writes to `.git/*` internals are blocked for all agents, and destructive bash commands (`rm -rf /`, `git push --force`) are denied. The engine is fail-closed — if `cedarpy` is unavailable or evaluation errors occur, all tool calls are denied. Per-repo custom Cedar policies can be injected via Blueprint `security.cedarPolicies`. The PreToolUse hook (`agent/src/hooks.py`) integrates the policy engine with the Claude Agent SDK's hook system, and denied decisions emit `POLICY_DECISION` telemetry events via `agent/src/telemetry.py`. **Planned:** Cedar replaces the scattered merge logic across TypeScript handlers with a unified policy evaluation. A thin `policy.ts` adapter translates Cedar decisions into `PolicyDecision` objects consumed by existing handlers. Cedar is preferred over OPA: it is AWS-native, has formal verification guarantees, integrates with AgentCore Gateway, and policies can be evaluated in-process via the Cedar SDK without a separate service dependency. Cedar's binary permit/forbid model supports the three enforcement modes (`enforced`, `observed`, `steered`) via a **virtual-action classification pattern**: the interceptor evaluates against multiple virtual actions (`invoke_tool`, `invoke_tool_steered`, `invoke_tool_denied`) and uses the first permitted action to determine the mode. For example, `forbid(principal, action == Action::"invoke_tool", resource) when { resource.path like ".github/workflows/*" && principal.capability_tier != "elevated" }` blocks the call, while `permit(principal, action == Action::"invoke_tool_steered", resource) when { context.output_contains_pii }` triggers PII redaction instead of blocking. Cedar policies will be stored in Amazon Verified Permissions and loaded at hydration/session-start time — policy changes take effect without CDK redeployment. When multi-user/team support lands, the same Cedar policy store expands to cover tenant-specific authorization (user/team/repo scoping, team budgets, risk-based approval requirements).
 
 ### Mid-execution enforcement (planned)
 
 Today, once an agent session starts, the orchestrator can only observe it via polling (session running or terminated). There is no mechanism to detect or intervene when an agent goes off the rails mid-session — infinite tool-call loops, excessive file writes, or cost runaway. The orchestrator's hard timeout is the only backstop.
 
 **Planned (Iteration 5):** Two complementary mechanisms address this gap:
 
-1. **Tool-call interceptor (Guardian pattern)** — A policy-evaluation layer in the agent harness (`agent/src/hooks.py` + `agent/src/policy.py`) that sits between the agent SDK's tool-call decision and actual tool execution. **Current state:** The pre-execution stage is implemented: a Cedar-based `PolicyEngine` evaluates tool calls via a PreToolUse hook before execution. The deny-list model blocks `Write`/`Edit` for `pr_review` tasks, protects `.github/workflows/*` and `.git/*` paths, and denies destructive bash commands. The engine is fail-closed (denies on error or missing `cedarpy`). Per-repo custom Cedar policies are supported via Blueprint `security.cedarPolicies`. **Planned extensions:** Evaluation is split into two stages: a **pre-execution stage** (implemented) that validates tool inputs before the tool runs (tool-level deny-list via Cedar policies, file path deny patterns for protected paths, bash command deny patterns for destructive commands, and per-repo custom Cedar policies from Blueprint `security.cedarPolicies`) and blocks disallowed operations before they execute, and a **post-execution stage** (planned) that screens tool outputs after the tool runs (PII patterns in file content, secrets in command output, sensitive data leakage) and can redact or flag content before it re-enters the agent context. The interceptor can allow, modify (e.g. redact secrets from output), or deny tool calls. Denied calls return a structured error to the agent, which can retry with a different approach. This follows the Guardian interceptor pattern (Hu et al. 2025) — enforcement happens at tool-call time, not before the session starts (input guardrails) or after it ends (validation pipeline). Denied decisions emit `POLICY_DECISION` telemetry events via `agent/src/telemetry.py`. Combined with per-tool-call structured telemetry (Iteration 3d), every interceptor decision will be logged as a `PolicyDecisionEvent`.
+1. **Tool-call interceptor (Guardian pattern)** — A policy-evaluation layer in the agent harness (`agent/src/hooks.py` + `agent/src/policy.py`) that sits between the agent SDK's tool-call decision and actual tool execution. **Current state:** The pre-execution stage is implemented: a Cedar-based `PolicyEngine` evaluates tool calls via a PreToolUse hook before execution. The deny-list model blocks `Write`/`Edit` for `pr_review` tasks, protects `.git/*` internals, and denies destructive bash commands. The engine is fail-closed (denies on error or missing `cedarpy`). Per-repo custom Cedar policies are supported via Blueprint `security.cedarPolicies`. **Planned extensions:** Evaluation is split into two stages: a **pre-execution stage** (implemented) that validates tool inputs before the tool runs (tool-level deny-list via Cedar policies, file path deny patterns for protected paths, bash command deny patterns for destructive commands, and per-repo custom Cedar policies from Blueprint `security.cedarPolicies`) and blocks disallowed operations before they execute, and a **post-execution stage** (planned) that screens tool outputs after the tool runs (PII patterns in file content, secrets in command output, sensitive data leakage) and can redact or flag content before it re-enters the agent context. The interceptor can allow, modify (e.g. redact secrets from output), or deny tool calls. Denied calls return a structured error to the agent, which can retry with a different approach. This follows the Guardian interceptor pattern (Hu et al. 2025) — enforcement happens at tool-call time, not before the session starts (input guardrails) or after it ends (validation pipeline). Denied decisions emit `POLICY_DECISION` telemetry events via `agent/src/telemetry.py`. Combined with per-tool-call structured telemetry (Iteration 3d), every interceptor decision will be logged as a `PolicyDecisionEvent`.
 
 2. **Behavioral circuit breaker** — Lightweight monitoring of tool-call patterns within a session: call frequency (calls per minute), cumulative cost, repeated failures on the same tool, and file mutation rate. When metrics exceed configurable thresholds (e.g. >50 tool calls/minute, >$10 cumulative cost, >5 consecutive failures), the circuit breaker pauses or terminates the session and emits a `circuit_breaker_triggered` event. This catches runaway loops and cost explosions before the hard session timeout. Thresholds are configurable per-repo via Blueprint `security` props.
 
@@ -257,7 +257,7 @@ AgentCore Memory has **no native backup mechanism**. This is a significant gap f
 - **No customer-managed KMS** — all encryption at rest uses AWS-managed keys. Customer-managed KMS can be added if required by compliance policy.
 - **CORS is fully open** — `ALL_ORIGINS` is configured for CLI consumption. Restrict origins when exposing browser clients.
 - **DNS Firewall IP bypass** — DNS Firewall does not block direct IP connections (see [NETWORK_ARCHITECTURE.md](./NETWORK_ARCHITECTURE.md#dns-firewall)).
-- **Partial tool access control** — Cedar-based policy enforcement (`agent/src/policy.py`) provides per-task-type tool restrictions (e.g. `pr_review` agents cannot use `Write`/`Edit`), path-based write protection, and destructive command blocking. Per-repo custom Cedar policies are supported via Blueprint `security.cedarPolicies`. **Important:** custom policies for `write_file` and `execute_bash` actions must use `context.file_path` / `context.command` in `when` clauses — not `resource ==` matching — because the engine uses fixed sentinel resource IDs to avoid Cedar entity UID parsing failures on special characters. `invoke_tool` actions use the real tool name as resource ID, so `resource ==` matching works for tool-level policies. Full tiered tool access (capability tiers, MCP server allowlisting) is planned for Iteration 5.
+- **Partial tool access control** — Cedar-based policy enforcement (`agent/src/policy.py`) provides per-task-type tool restrictions (e.g. `pr_review` agents cannot use `Write`/`Edit`), `.git/*` write protection, and destructive command blocking. `.github/workflows/*` is not blocked by default because agents may legitimately need to modify CI workflows; operators can add workflow protection via Blueprint `security.cedarPolicies` if needed. Per-repo custom Cedar policies are supported via Blueprint `security.cedarPolicies`. **Important:** custom policies for `write_file` and `execute_bash` actions must use `context.file_path` / `context.command` in `when` clauses — not `resource ==` matching — because the engine uses fixed sentinel resource IDs to avoid Cedar entity UID parsing failures on special characters. `invoke_tool` actions use the real tool name as resource ID, so `resource ==` matching works for tool-level policies. Full tiered tool access (capability tiers, MCP server allowlisting) is planned for Iteration 5.
 
 ## Reference
 
diff --git a/docs/src/content/docs/design/Security.md b/docs/src/content/docs/design/Security.md