diff --git a/README.md b/README.md index a7301b7..ecfaa43 100644 --- a/README.md +++ b/README.md @@ -56,11 +56,11 @@ ABCA is under active development. The platform ships iteratively — each iterat |---|---|---| | **1** | Done | Agent runs on AWS, CLI submit, branch + PR | | **2** | Done | Production orchestrator, API contract, task management, observability, security, webhooks | -| **3a** | Done | Repo onboarding, per-repo GitHub App credentials, turn caps, prompt guide | +| **3a** | Done | Repo onboarding, per-repo credentials, turn caps, prompt guide | | **3b** | Done | Memory Tier 1, insights, agent self-feedback, prompt versioning, commit attribution | | **3bis** | Done | Hardening — reconciler error tracking, error serialization, test coverage gaps | | **3c** | WIP | Pre-flight checks, persistent session storage, deterministic validation, PR review task type, multi-modal input, input guardrail screening | -| **3d** | Planned | Review feedback loop, PR outcome tracking, evaluation pipeline | +| **3d** | Planned | Review feedback loop, PR outcome tracking, evaluation pipeline, memory input hardening | | **4** | Planned | GitLab, visual proof, Slack, control panel, WebSocket streaming | | **5** | Planned | Pre-warming, multi-user/team, cost management, output guardrails, alternate runtime | | **6** | Planned | Skills learning, multi-repo, iterative feedback, multiplayer, CDK constructs | diff --git a/cdk/src/handlers/shared/context-hydration.ts b/cdk/src/handlers/shared/context-hydration.ts index 49ba1c5..cfc2348 100644 --- a/cdk/src/handlers/shared/context-hydration.ts +++ b/cdk/src/handlers/shared/context-hydration.ts @@ -793,12 +793,12 @@ export interface HydrateContextOptions { /** * Hydrate context for a task: resolve GitHub token, fetch issue/PR, enforce - * token budget, assemble the user prompt, and (for PR tasks) screen through - * Bedrock Guardrail for prompt injection. + * token budget, assemble the user prompt, and screen through Bedrock Guardrail + * for prompt injection (PR tasks; new_task when issue content is present). * @param task - the task record from DynamoDB. * @param options - optional per-repo overrides. - * @returns the hydrated context. For PR tasks, `guardrail_blocked` is set when - * the guardrail intervened. + * @returns the hydrated context. `guardrail_blocked` is set when the guardrail + * intervened (PR tasks: always screened; new_task: screened when issue content is present). * @throws GuardrailScreeningError when the Bedrock Guardrail API call fails * (fail-closed — propagated to prevent unscreened content from reaching the agent). */ @@ -990,13 +990,19 @@ export async function hydrateContext(task: TaskRecord, options?: HydrateContextO return prContext; } - // Standard task: existing behavior + // Standard task const budgetResult = enforceTokenBudget(issue, task.task_description, USER_PROMPT_TOKEN_BUDGET); issue = budgetResult.issue; userPrompt = assembleUserPrompt(task.task_id, task.repo, issue, budgetResult.taskDescription); const tokenEstimate = estimateTokens(userPrompt); + // Screen assembled prompt when it includes GitHub issue content (attacker-controlled input). + // Skipped when no issue is present — task_description is already screened at submission time. + const guardrailAction = issue + ? await screenWithGuardrail(userPrompt, task.task_id) + : undefined; + return { version: 1, user_prompt: userPrompt, @@ -1005,6 +1011,9 @@ export async function hydrateContext(task: TaskRecord, options?: HydrateContextO sources, token_estimate: tokenEstimate, truncated: budgetResult.truncated, + ...(guardrailAction === 'GUARDRAIL_INTERVENED' && { + guardrail_blocked: 'Task context blocked by content policy', + }), }; } catch (err) { // Guardrail failures must propagate (fail-closed) — unscreened content must not reach the agent diff --git a/cdk/test/handlers/shared/context-hydration.test.ts b/cdk/test/handlers/shared/context-hydration.test.ts index a4639bb..5586b58 100644 --- a/cdk/test/handlers/shared/context-hydration.test.ts +++ b/cdk/test/handlers/shared/context-hydration.test.ts @@ -546,6 +546,8 @@ describe('hydrateContext', () => { ok: true, json: async () => ({ number: 42, title: 'Bug', body: 'Details', comments: 0 }), }); + // Guardrail screens assembled prompt when issue content is present + mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' }); const task = { ...baseTask, issue_number: 42, task_description: 'Fix it' }; const result = await hydrateContext(task as any); @@ -571,6 +573,9 @@ describe('hydrateContext', () => { expect(result.sources).toContain('task_description'); expect(result.issue).toBeUndefined(); expect(result.user_prompt).toContain('Fix it'); + // No issue content fetched — guardrail should not be called (task_description already screened) + expect(result.guardrail_blocked).toBeUndefined(); + expect(mockBedrockSend).not.toHaveBeenCalled(); }); test('no issue number — assembles from task description only', async () => { @@ -628,6 +633,8 @@ describe('hydrateContext', () => { ok: true, json: async () => ({ number: 10, title: 'Test', body: 'body', comments: 0 }), }); + // Guardrail screens assembled prompt when issue content is present + mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' }); const task = { ...baseTask, issue_number: 10, task_description: 'Fix' }; const result = await hydrateContext(task as any, { githubTokenSecretArn: perRepoArn }); @@ -1027,7 +1034,7 @@ describe('screenWithGuardrail', () => { }); // --------------------------------------------------------------------------- -// hydrateContext — guardrail screening for PR tasks +// hydrateContext — guardrail screening // --------------------------------------------------------------------------- describe('hydrateContext — guardrail screening', () => { @@ -1113,29 +1120,70 @@ describe('hydrateContext — guardrail screening', () => { expect(mockBedrockSend).toHaveBeenCalledTimes(1); }); - test('does not invoke guardrail for new_task type', async () => { + // --- new_task guardrail screening --- + + const baseNewTask = { + task_id: 'TASK-NEW-001', + user_id: 'user-123', + status: 'SUBMITTED', + repo: 'org/repo', + branch_name: 'bgagent/TASK-NEW-001/fix', + channel_source: 'api', + status_created_at: 'SUBMITTED#2024-01-01T00:00:00Z', + created_at: '2024-01-01T00:00:00Z', + updated_at: '2024-01-01T00:00:00Z', + task_type: 'new_task', + task_description: 'Fix it', + }; + + function mockIssueFetch(): void { mockSmSend.mockResolvedValueOnce({ SecretString: 'ghp_test' }); mockFetch.mockResolvedValueOnce({ ok: true, json: async () => ({ number: 42, title: 'Bug', body: 'Details', comments: 0 }), }); + } - const newTask = { - task_id: 'TASK-NEW-001', - user_id: 'user-123', - status: 'SUBMITTED', - repo: 'org/repo', - branch_name: 'bgagent/TASK-NEW-001/fix', - channel_source: 'api', - status_created_at: 'SUBMITTED#2024-01-01T00:00:00Z', - created_at: '2024-01-01T00:00:00Z', - updated_at: '2024-01-01T00:00:00Z', - task_type: 'new_task', - issue_number: 42, - task_description: 'Fix it', - }; - const result = await hydrateContext(newTask as any); + test('invokes guardrail for new_task with issue content', async () => { + mockIssueFetch(); + mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' }); + + const result = await hydrateContext({ ...baseNewTask, issue_number: 42 } as any); + expect(result.guardrail_blocked).toBeUndefined(); + expect(mockBedrockSend).toHaveBeenCalledTimes(1); + }); + + test('does not invoke guardrail for new_task without issue_number', async () => { + const result = await hydrateContext(baseNewTask as any); expect(result.guardrail_blocked).toBeUndefined(); expect(mockBedrockSend).not.toHaveBeenCalled(); }); + + test('returns guardrail_blocked when new_task issue context is blocked', async () => { + mockIssueFetch(); + mockBedrockSend.mockResolvedValueOnce({ action: 'GUARDRAIL_INTERVENED' }); + + const result = await hydrateContext({ ...baseNewTask, issue_number: 42 } as any); + expect(result.guardrail_blocked).toBe('Task context blocked by content policy'); + expect(mockBedrockSend).toHaveBeenCalledTimes(1); + }); + + test('proceeds normally when new_task issue context passes guardrail', async () => { + mockIssueFetch(); + mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' }); + + const result = await hydrateContext({ ...baseNewTask, issue_number: 42 } as any); + expect(result.guardrail_blocked).toBeUndefined(); + expect(result.issue).toBeDefined(); + expect(result.sources).toContain('issue'); + }); + + test('throws when guardrail screening fails for new_task (fail-closed)', async () => { + mockIssueFetch(); + mockBedrockSend.mockRejectedValueOnce(new Error('Bedrock timeout')); + + await expect( + hydrateContext({ ...baseNewTask, issue_number: 42 } as any), + ).rejects.toThrow('Guardrail screening unavailable: Bedrock timeout'); + }); }); diff --git a/docs/design/API_CONTRACT.md b/docs/design/API_CONTRACT.md index 3bde6b6..65a12de 100644 --- a/docs/design/API_CONTRACT.md +++ b/docs/design/API_CONTRACT.md @@ -617,6 +617,8 @@ Rate limit status is communicated via response headers (see Standard response he | `WEBHOOK_NOT_FOUND` | 404 | Webhook does not exist or belongs to a different user. | | `WEBHOOK_ALREADY_REVOKED` | 409 | Webhook is already revoked. | | `REPO_NOT_ONBOARDED` | 422 | Repository is not registered with the platform. Repos are onboarded via CDK deployment, not via a runtime API. There are no `/v1/repos` endpoints. | +| `GITHUB_UNREACHABLE` | 502 | The GitHub API was unreachable during the orchestrator's pre-flight check. The task fails fast without consuming compute. Transient — retry with backoff. | +| `REPO_NOT_FOUND_OR_NO_ACCESS` | 422 | The target repository does not exist or the configured credentials lack access. Checked during the orchestrator's pre-flight step (`GET /repos/{owner}/{repo}`). Distinct from `REPO_NOT_ONBOARDED` — the repo is onboarded but the credential cannot reach it. | | `PR_NOT_FOUND_OR_CLOSED` | 422 | For `pr_iteration` and `pr_review` tasks: the specified PR does not exist, is not open, or is not accessible with the configured GitHub token. Checked during the orchestrator's pre-flight step. | | `INVALID_STEP_SEQUENCE` | 500 | The blueprint's step sequence is invalid (missing required steps or incorrect ordering). This indicates a CDK configuration error that slipped past synth-time validation. Visible via `GET /v1/tasks/{id}` as `error_code`. See [REPO_ONBOARDING.md](./REPO_ONBOARDING.md#step-sequence-validation). | | `GUARDRAIL_BLOCKED` | 400 | Task description was blocked by Bedrock Guardrail content screening (prompt injection detected). Revise the task description and retry. | diff --git a/docs/design/ARCHITECTURE.md b/docs/design/ARCHITECTURE.md index 19bb916..20443c5 100644 --- a/docs/design/ARCHITECTURE.md +++ b/docs/design/ARCHITECTURE.md @@ -129,7 +129,8 @@ Cost efficiency is a design principle. The following estimates are based on **50 | **API Gateway** (REST API, ~2K requests/day) | ~$5–15 | Per-request pricing | | **AgentCore Memory** (events, records, retrieval) | TBD | Pricing not fully public; proportional to usage | | **CloudWatch** (logs, metrics, traces, Transaction Search) | ~$20–50 | Log ingestion + storage | -| **Secrets Manager** (GitHub App keys, webhook secrets) | ~$5–10 | Per-secret/month + API calls | +| **Secrets Manager** (GitHub token or App private key, webhook secrets) | ~$5–10 | Per-secret/month + API calls | +| **AgentCore Identity** (planned — WorkloadIdentity, Token Vault credential provider) | TBD | Token vending API calls; replaces per-task Secrets Manager reads for GitHub tokens | | **S3** (artifacts, memory backups) | ~$1–5 | Storage + requests | | **Total** | **~$700–1,600/month** | | @@ -201,10 +202,17 @@ Each concept has a **source-of-truth document** and one or more documents that r | Agent swarm orchestration | ROADMAP.md (Iter 6) | — | | Adaptive model router | ROADMAP.md (Iter 5) | COST_MODEL.md | | Capability-based security | ROADMAP.md (Iter 5) | SECURITY.md | +| Centralized policy framework | ROADMAP.md (Iter 5), SECURITY.md (Policy enforcement and audit) | ORCHESTRATOR.md, OBSERVABILITY.md | +| GitHub App + AgentCore Token Vault | ROADMAP.md (Iter 3c), SECURITY.md (Authentication) | ORCHESTRATOR.md (context hydration), COMPUTE.md | | Live session replay | ROADMAP.md (Iter 4) | API_CONTRACT.md | | PR iteration task type | API_CONTRACT.md, ORCHESTRATOR.md | USER_GUIDE.md, PROMPT_GUIDE.md, SECURITY.md, AGENT_HARNESS.md | | PR review task type | API_CONTRACT.md, ORCHESTRATOR.md | USER_GUIDE.md, PROMPT_GUIDE.md, SECURITY.md, AGENT_HARNESS.md | +| Orchestrator pre-flight checks | ORCHESTRATOR.md (Context hydration, pre-flight sub-step) | API_CONTRACT.md (Error codes: GITHUB_UNREACHABLE, REPO_NOT_FOUND_OR_NO_ACCESS), ROADMAP.md (3c), SECURITY.md | | Bedrock Guardrail input screening | SECURITY.md (Input validation and guardrails) | ORCHESTRATOR.md (Context hydration), API_CONTRACT.md (Error codes), OBSERVABILITY.md (Alarms), ROADMAP.md (3c) | +| Memory input hardening (3e Phase 1) | ROADMAP.md (Iter 3e Phase 1, co-ships with 3d) | MEMORY.md, SECURITY.md (Memory-specific threats) | +| Per-tool-call structured telemetry | ROADMAP.md (Iter 3d) | SECURITY.md (Mid-execution enforcement), EVALUATION.md, OBSERVABILITY.md | +| Mid-execution behavioral monitoring | ROADMAP.md (Iter 5), SECURITY.md (Mid-execution enforcement) | OBSERVABILITY.md | +| Tool-call interceptor (Guardian pattern) | SECURITY.md (Mid-execution enforcement), ROADMAP.md (Iter 5) | REPO_ONBOARDING.md (Blueprint security props) | ### Per-repo model selection diff --git a/docs/design/OBSERVABILITY.md b/docs/design/OBSERVABILITY.md index 1ab1ab0..df77fa1 100644 --- a/docs/design/OBSERVABILITY.md +++ b/docs/design/OBSERVABILITY.md @@ -129,6 +129,7 @@ Both are one-time, account-level setup steps — not managed by CDK. - **Guardrail screening events** — `guardrail_blocked` (content blocked by Bedrock Guardrail during hydration, with metadata: `reason`, `task_type`, `pr_number`, `sources`, `token_estimate`). Screening failures are logged with structured `metric_type` fields (not emitted as task events). - Time in each state (e.g. time in HYDRATING, time RUNNING, cold start to first agent activity). - Correlation with a task id and user id so users and operators can filter by task or user. +- **Planned (Iteration 5, Phase 1): `PolicyDecisionEvent`** — A unified event schema for all policy decisions across the task lifecycle: admission control, budget/quota resolution, guardrail screening, tool-call interception, and finalization. Each event carries: decision ID, policy name, version, phase, input hash, result (`allow` | `deny` | `modify`), reason codes, and enforcement mode (`enforced` | `observed` | `steered`). This normalizes the current mix of structured events (e.g. `admission_rejected`, `guardrail_blocked`) and silent HTTP errors into a single auditable event type. See [ROADMAP.md Iteration 5](../guides/ROADMAP.md) (Centralized policy framework) and [SECURITY.md](./SECURITY.md) (Policy enforcement and audit). ### Agent execution diff --git a/docs/design/ORCHESTRATOR.md b/docs/design/ORCHESTRATOR.md index cdfa7d3..d8d3727 100644 --- a/docs/design/ORCHESTRATOR.md +++ b/docs/design/ORCHESTRATOR.md @@ -29,11 +29,11 @@ The orchestrator document describes **behavior** (state machine, admission, canc **Relationship to blueprints.** The orchestrator is a **framework** that enforces platform invariants — the task state machine, event emission, concurrency management, and cancellation handling — and delegates variable work to **blueprint-defined step implementations**. A blueprint defines which steps run, in what order, and how each step is implemented (built-in strategy, Lambda-backed custom step, or custom sequence). The default blueprint is defined in this document (Section 4). Per-repo customization (see [REPO_ONBOARDING.md](./REPO_ONBOARDING.md)) changes the steps the orchestrator executes, not the framework guarantees it enforces. The orchestrator wraps every step with state transitions, event emission, and cancellation checks — regardless of whether the step is a built-in or a custom Lambda. -### Iteration 1 vs. target state +### Iteration 1 vs. current state -In **Iteration 1** (current), the orchestrator does not exist as a distinct component. The client calls `invoke_agent_runtime` synchronously, the agent runs to completion inside the AgentCore Runtime MicroVM, and the caller infers the result from the response. There is no durable state, no task management, no concurrency control, and no recovery. If the caller disconnects, the session is orphaned. +In **Iteration 1**, the orchestrator did not exist as a distinct component. The client called `invoke_agent_runtime` synchronously, the agent ran to completion inside the AgentCore Runtime MicroVM, and the caller inferred the result from the response. There was no durable state, no task management, no concurrency control, and no recovery. -The **target state** (Iteration 2 and beyond) introduces a durable orchestrator that manages the full task lifecycle. This document designs for the target state; where Iteration 1 constraints apply, they are called out explicitly. +**Current state (Iteration 3+):** The durable orchestrator manages the full task lifecycle with checkpoint/resume (Lambda Durable Functions), the full state machine (8 states), concurrency control, cancellation, context hydration, memory integration, pre-flight checks, and multi-task-type support. This document describes the current architecture; where historical Iteration 1 constraints are referenced (e.g. synchronous invocation model), they are called out explicitly. --- @@ -179,7 +179,7 @@ See the Admission control section for details. Validates that the task is allowe #### Step 2: Context hydration (deterministic) -See the Context hydration section for details. Assembles the agent's prompt from multiple sources depending on task type. For `new_task`: user message, GitHub issue (title, body, comments), memory, repo configuration, and platform defaults. For `pr_iteration`: PR metadata, review comments, diff summary, and optional user instructions. An additional **pre-flight** sub-step verifies PR accessibility when `pr_number` is set (see [preflight.ts](../../cdk/src/handlers/shared/preflight.ts)). For PR tasks, the assembled prompt is screened through Amazon Bedrock Guardrails for prompt injection before the agent receives it. The output is a fully assembled prompt, ready to pass to the compute session. +See the Context hydration section for details. Assembles the agent's prompt from multiple sources depending on task type. For `new_task`: user message, GitHub issue (title, body, comments), memory, repo configuration, and platform defaults. For `pr_iteration`: PR metadata, review comments, diff summary, and optional user instructions. An additional **pre-flight** sub-step verifies PR accessibility when `pr_number` is set (see [preflight.ts](../../cdk/src/handlers/shared/preflight.ts)). The assembled prompt is screened through Amazon Bedrock Guardrails for prompt injection before the agent receives it (PR tasks: always screened; `new_task`: screened when issue content is present). The output is a fully assembled prompt, ready to pass to the compute session. #### Step 3: Session start and agent execution (deterministic start + agentic execution) @@ -224,7 +224,7 @@ When the orchestrator loads a task's `blueprint_config`, it resolves the step pi 1. **Load `RepoConfig`** from the `RepoTable` by `repo` (PK). Merge with platform defaults (see [REPO_ONBOARDING.md](./REPO_ONBOARDING.md#platform-defaults) for default values and override precedence). 2. **Resolve compute strategy** from `compute_type` (default: `agentcore`). The strategy implements the `ComputeStrategy` interface (see [REPO_ONBOARDING.md](./REPO_ONBOARDING.md#compute-strategy-interface)). -3. **Build step list.** If `step_sequence` is provided, use it; otherwise use the default sequence (`admission-control` → `hydrate-context` → `start-session` → `await-agent-completion` → `finalize`). For each entry, resolve to a built-in step function or a Lambda invocation wrapper. +3. **Build step list.** If `step_sequence` is provided, use it; otherwise use the default sequence (`admission-control` → `hydrate-context` → `pre-flight` → `start-session` → `await-agent-completion` → `finalize`). The `pre-flight` step runs fail-closed readiness checks (GitHub API reachability, repo access, PR accessibility for PR tasks) before consuming compute — see [ROADMAP.md Iteration 3c](../guides/ROADMAP.md). For each entry, resolve to a built-in step function or a Lambda invocation wrapper. 4. **Inject custom steps.** If `custom_steps` are defined and no explicit `step_sequence` is provided, insert them at their declared `phase` position (pre-agent steps before `start-session`, post-agent steps after `await-agent-completion`). 5. **Validate.** Check that required steps are present and correctly ordered (see [step sequence validation](./REPO_ONBOARDING.md#step-sequence-validation)). If invalid, fail the task with `INVALID_STEP_SEQUENCE`. 6. **Execute.** Iterate the resolved list. For each step: check cancellation, filter `blueprintConfig` to only the fields that step needs (stripping credential ARNs for custom Lambda steps), execute with retry policy, enforce `StepOutput.metadata` size budget (10KB), prune `previousStepResults` to last 5 steps, emit events. Built-in steps that need durable waits (e.g. `await-agent-completion`) receive the `DurableContext` and `ComputeStrategy` so they can call `waitForCondition` and `computeStrategy.pollSession()` internally — no name-based special-casing in the framework loop. @@ -253,6 +253,8 @@ Admission control runs immediately after the input gateway dispatches a "create - **Rejected.** Task transitions to `FAILED` with a reason (repo not onboarded, rate limit exceeded, concurrency limit, validation error). No counter change. - **Deduplicated.** Existing task ID returned. No new task created. +**Planned (Iteration 5):** Admission control checks will be governed by Cedar policies as part of the centralized policy framework. Cedar replaces the current inline admission logic with formally verifiable policy evaluation — the same Cedar policy store handles admission, budget/quota resolution, tool-call interception, and (when multi-user/team lands) tenant-scoped authorization. All admission decisions will emit a structured `PolicyDecisionEvent` for audit. See [ROADMAP.md Iteration 5](../guides/ROADMAP.md) (Centralized policy framework) and [SECURITY.md](./SECURITY.md) (Policy enforcement and audit). + --- ## Context hydration @@ -271,7 +273,7 @@ The orchestrator's `hydrateAndTransition()` function calls `hydrateContext()` (` 4. **Assembles the user prompt** based on task type: - **`new_task`**: A structured markdown document with Task ID, Repository, GitHub Issue section, and Task section. The format mirrors the Python `assemble_prompt()` in `agent/entrypoint.py`. - **`pr_iteration`**: Assembled by `assemblePrIterationPrompt()` — includes PR metadata (number, title, body), the diff summary (changed files and patches), review comments (inline and conversation), and optional user instructions from `task_description`. -5. **Screens through Bedrock Guardrail** (PR tasks only): For `pr_iteration` and `pr_review` tasks, the assembled user prompt is screened through Amazon Bedrock Guardrails (`screenWithGuardrail()`) using the `PROMPT_ATTACK` content filter. If the guardrail detects prompt injection, `guardrail_blocked` is set on the result and the orchestrator fails the task. If the Bedrock API is unavailable, a `GuardrailScreeningError` is thrown (fail-closed — unscreened content never reaches the agent). Task descriptions for all task types are screened at submission time in `create-task-core.ts`. +5. **Screens through Bedrock Guardrail** (PR tasks; `new_task` when issue content is present): The assembled user prompt is screened through Amazon Bedrock Guardrails (`screenWithGuardrail()`) using the `PROMPT_ATTACK` content filter. For `new_task` tasks without issue content, screening is skipped because the task description was already screened at submission time. If the guardrail detects prompt injection, `guardrail_blocked` is set on the result and the orchestrator fails the task. If the Bedrock API is unavailable, a `GuardrailScreeningError` is thrown (fail-closed — unscreened content never reaches the agent). Task descriptions for all task types are screened at submission time in `create-task-core.ts`. 6. **Returns a `HydratedContext` object** containing `version`, `user_prompt`, `issue`, `sources`, `token_estimate`, `truncated`, and for `pr_iteration`/`pr_review` tasks: `resolved_branch_name` and `resolved_base_branch`. The hydrated context is passed to the agent as a new `hydrated_context` field in the invocation payload, alongside the existing legacy fields (`repo_url`, `task_id`, `branch_name`, `issue_number`, `prompt`). The agent checks for `hydrated_context` with `version == 1`; if present, it uses the pre-assembled `user_prompt` directly and skips in-container GitHub fetching and prompt assembly. If absent (e.g. during a deployment rollout or when the secret ARN isn't configured), the agent falls back to its existing behavior. @@ -304,7 +306,7 @@ We evaluated routing GitHub API calls through AgentCore Gateway (with the GitHub 4. **User message.** The free-text task description provided by the user (via CLI `--task` flag or equivalent). May supplement or replace the issue context. -5. **Memory context (Iteration 3+).** Query long-term memory (e.g. AgentCore Memory) for relevant past context: insights from previous tasks on this repo, failure summaries, learned patterns. See [MEMORY.md](./MEMORY.md) for how insights and code attribution feed into hydration. Not yet implemented. +5. **Memory context (Iteration 3b+).** Query long-term memory (AgentCore Memory) for relevant past context: repository knowledge (semantic search) and past task episodes (episodic search). Memory is loaded during context hydration via two parallel `RetrieveMemoryRecordsCommand` calls with a 5-second timeout and 2,000-token budget. See [MEMORY.md](./MEMORY.md) for how insights and code attribution feed into hydration. Tier 1 (repo knowledge + task episodes) is operational since Iteration 3b. Tier 2 (review feedback rules) is planned for Iteration 3d. 6. **Attachments.** Images or files provided by the user (multi-modal input). Passed through to the agent prompt as base64 or URLs. @@ -395,7 +397,7 @@ The orchestrator records the `(task_id, session_id)` mapping in the task record ### Invocation model: synchronous vs. asynchronous -**Iteration 1 (current).** `invoke_agent_runtime` is called synchronously with a long read timeout. The call blocks until the agent finishes. This is simple but limits concurrency: one orchestrator process per task. +**Iteration 1 (historical).** `invoke_agent_runtime` was called synchronously with a long read timeout. The call blocked until the agent finished. This was simple but limited concurrency: one orchestrator process per task. **Target state.** The orchestrator uses AgentCore's **asynchronous processing model** ([Runtime async docs](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/runtime-long-run.html)). The key capabilities: @@ -420,7 +422,7 @@ The orchestrator needs to know whether the session is still running. Two complem 2. **Re-invocation on the same session (target state).** The orchestrator calls `invoke_agent_runtime` with the same `runtimeSessionId`. Sticky routing ensures the request reaches the same instance. The agent's entrypoint can detect this is a poll (e.g., via a `poll: true` field in the payload or by tracking the initial task) and return the current status without starting a new task. This is a fast, lightweight call that returns immediately. -**Iteration 1.** The `invoke_agent_runtime` call blocks; when it returns, the session is over. No explicit liveness check needed. +**Iteration 1 (historical).** The `invoke_agent_runtime` call blocked; when it returned, the session was over. No explicit liveness check was needed. **Fallback: DynamoDB heartbeat (optional enhancement).** As defense in depth, the agent can write a heartbeat timestamp to DynamoDB every N minutes. The orchestrator reads it during its poll cycle. A missing heartbeat (e.g. none in the last 10 minutes while `/ping` reports `HealthyBusy`) could indicate the agent is stuck but not idle — triggering investigation or forced termination. @@ -430,7 +432,7 @@ AgentCore Runtime terminates sessions after 15 minutes of inactivity (no `/ping` **Mitigation (async model).** In the target state, the agent uses the AgentCore SDK's async task management: `add_async_task` registers a background task, and the SDK automatically reports `HealthyBusy` via `/ping` while any async task is active. AgentCore polls `/ping` and sees the agent is busy, preventing idle termination. When the agent calls `complete_async_task`, the status reverts to `Healthy`. The `/ping` endpoint runs on the main thread (or async event loop) while the coding task runs in a separate thread, so `/ping` remains responsive. -**Mitigation (Iteration 1 / current).** The agent container's FastAPI server defines `/ping` as a separate async endpoint. Because the agent task runs in a threadpool worker (not in the asyncio event loop), the `/ping` endpoint remains responsive while the agent works. AgentCore calls `/ping` periodically and the server responds, preventing idle timeout. +**Mitigation (current).** The agent container's FastAPI server defines `/ping` as a separate async endpoint. Because the agent task runs in a threadpool worker (not in the asyncio event loop), the `/ping` endpoint remains responsive while the agent works. AgentCore calls `/ping` periodically and the server responds, preventing idle timeout. **Risk.** If the agent's computation blocks the entire process (not just a thread) — e.g. due to a subprocess that consumes all resources, or the server becomes unresponsive — the `/ping` response may be delayed, triggering idle termination. This risk applies to both models. The defense is to ensure the coding task runs in a separate thread or process and does not starve the main thread. @@ -438,7 +440,7 @@ AgentCore Runtime terminates sessions after 15 minutes of inactivity (no `/ping` When the session ends (agent finishes, crashes, or is terminated), the orchestrator detects this: -- **Iteration 1:** The `invoke_agent_runtime` call returns (it blocks). The response body contains the agent's output (status, PR URL, cost, etc.). +- **Iteration 1 (historical):** The `invoke_agent_runtime` call returned (it blocked). The response body contained the agent's output (status, PR URL, cost, etc.). - **Target state:** The orchestrator polls the agent via re-invocation on the same session (see Invocation model above). Completion is detected when: (a) the agent responds with a "completed" or "failed" status in the poll response, or (b) the re-invocation fails because the session was terminated (idle timeout, crash, or 8-hour limit reached). In the durable orchestrator, a `waitForCondition` evaluates the poll result at each interval and resumes the pipeline when the condition is met. See the session monitoring pattern in the Implementation options section. ### External termination (cancellation) @@ -871,6 +873,7 @@ The primary table for task state. DynamoDB. | `cost_usd` | Number (optional) | Agent cost from the SDK result. | | `duration_s` | Number (optional) | Total task duration in seconds. | | `build_passed` | Boolean (optional) | Post-agent build verification result. | +| `lint_passed` | Boolean (optional) | Post-agent lint verification result. Recorded alongside `build_passed` during finalization; surfaced as a span attribute (`lint.passed`) and included in the PR body's verification section. | | `max_turns` | Number (optional) | Maximum agent turns for this task. Set during task creation — either the user-specified value (1–500) or the platform default (100). Included in the orchestrator payload and consumed by the agent SDK's `ClaudeAgentOptions(max_turns=...)`. | | `max_budget_usd` | Number (optional) | Maximum cost budget in USD for this task. Set during task creation — either the user-specified value ($0.01–$100) or the per-repo Blueprint default. When reached, the agent stops regardless of remaining turns. If neither the task nor the Blueprint specifies a value, no budget limit is applied (turn limit and session timeout still apply). Included in the orchestrator payload and consumed by the agent SDK's `ClaudeAgentOptions(max_budget_usd=...)`. | | `blueprint_config` | Map (optional) | Snapshot of the `RepoConfig` record at task creation time (or a reference to it). This ensures tasks are not affected by mid-flight config changes. The schema follows the `RepoConfig` interface defined in [REPO_ONBOARDING.md](./REPO_ONBOARDING.md#repoconfig-schema). Includes `compute_type`, `runtime_arn`, `model_id`, `max_turns`, `system_prompt_overrides`, `github_token_secret_arn`, `poll_interval_ms`, `custom_steps`, `step_sequence`, and `egress_allowlist`. The `max_turns` value from `blueprint_config` serves as the per-repo default; per-task `max_turns` (from the API request) takes higher priority. `max_budget_usd` follows the same 2-tier override pattern: per-task value takes priority over `blueprint_config.max_budget_usd`; if neither is specified, no budget limit is applied. | diff --git a/docs/design/REPO_ONBOARDING.md b/docs/design/REPO_ONBOARDING.md index 6d1d2b1..0d00661 100644 --- a/docs/design/REPO_ONBOARDING.md +++ b/docs/design/REPO_ONBOARDING.md @@ -43,8 +43,21 @@ interface BlueprintProps { agent?: { modelId?: string; // foundation model override maxTurns?: number; // default turn limit for this repo + maxBudgetUsd?: number; // default cost budget for this repo ($0.01–$100) + memoryTokenBudget?: number; // memory context token budget override (default: 2000) systemPromptOverrides?: string; // additional system prompt instructions }; + // Security (planned — Iteration 5) + security?: { + capabilityTier?: 'standard' | 'elevated' | 'read-only'; // tool access tier + filePathDenyList?: string[]; // deny writes to these paths (e.g. '.github/workflows/') + bashAllowlist?: string[]; // allowed bash commands (overrides default tier allowlist) + circuitBreaker?: { // behavioral circuit breaker thresholds + maxCallsPerMinute?: number; // default: 50 + maxCostUsd?: number; // default: 10 + maxConsecutiveFailures?: number; // default: 5 + }; + }; // Credentials credentials?: { githubTokenSecretArn?: string; // per-repo GitHub token @@ -100,6 +113,8 @@ interface RepoConfig { // Agent model_id?: string; max_turns?: number; + max_budget_usd?: number; + memory_token_budget?: number; system_prompt_overrides?: string; // Credentials github_token_secret_arn?: string; @@ -178,6 +193,8 @@ Used when a `RepoConfig` field is absent: | `runtime_arn` | Stack-level `RUNTIME_ARN` env var | CDK stack props | | `model_id` | Claude Sonnet 4 | CDK stack props | | `max_turns` | 100 | Platform constant (`DEFAULT_MAX_TURNS`) | +| `max_budget_usd` | None (no budget limit) | — | +| `memory_token_budget` | 2000 | Platform constant | | `github_token_secret_arn` | Stack-level `GITHUB_TOKEN_SECRET_ARN` | CDK stack props | | `poll_interval_ms` | 30000 | Orchestrator constant | | `system_prompt_overrides` | None | — | @@ -203,7 +220,8 @@ The orchestrator loads the `RepoConfig` in the first step (after `load-task`) an | `load-blueprint` | `compute_type`, `custom_steps`, `step_sequence` (resolves the full step pipeline) | | `admission-control` | `status` (defense-in-depth; already checked at API level) | | `hydrate-context` | `github_token_secret_arn`, `system_prompt_overrides` | -| `start-session` | `compute_type`, `runtime_arn`, `model_id`, `max_turns` | +| `pre-flight` | `github_token_secret_arn` (verifies GitHub API reachability and repo access) | +| `start-session` | `compute_type`, `runtime_arn`, `model_id`, `max_turns`, `max_budget_usd` | | `await-agent-completion` | `poll_interval_ms` | | `finalize` | (custom post-agent steps run before finalize if configured) | | Custom steps (layer 2/3) | `custom_steps[].config` (step-specific configuration) | @@ -288,6 +306,7 @@ When a `stepSequence` is provided (Layer 3), the framework validates it at deplo | Step | Why it's required | |---|---| | `admission-control` | Enforces concurrency limits. Omitting it leaks concurrency slots. | +| `pre-flight` | Fail-closed readiness checks (GitHub API reachability, repo access). Omitting it allows doomed tasks to consume compute. | | `start-session` | Starts the compute session. Without it, nothing runs. | | `await-agent-completion` | Polls for session completion. Without it, the orchestrator cannot detect when the agent finishes. | | `finalize` | Releases concurrency slots, emits terminal events, persists outcome. Omitting it leaks concurrency counters and leaves tasks in non-terminal states. | @@ -296,6 +315,7 @@ When a `stepSequence` is provided (Layer 3), the framework validates it at deplo **Ordering constraints:** - `admission-control` must be first. +- `pre-flight` must precede `start-session`. - `start-session` must precede `await-agent-completion`. - `finalize` must be last. - Custom steps can be inserted between any adjacent pair of built-in steps, but cannot precede `admission-control` or follow `finalize`. diff --git a/docs/design/SECURITY.md b/docs/design/SECURITY.md index 260f596..2a9410f 100644 --- a/docs/design/SECURITY.md +++ b/docs/design/SECURITY.md @@ -29,8 +29,8 @@ This aligns with AWS guidance: *Isolate sessions* (1.4) and use session-scoped s ## Authentication and authorization - **Authentication** — CLI users authenticate via Amazon Cognito (JWT). Webhook integrations authenticate via HMAC-SHA256 signatures (per-integration shared secrets stored in Secrets Manager). Each channel uses its own verification mechanism. The input gateway verifies every request before processing. -- **Credentials for the agent** — the agent does not hold long-lived secrets. GitHub access uses AgentCore Identity OAuth (token vault, auto-refresh); the agent receives tokens from the vault, not from application code. This reduces credential exposure and supports least-privilege at the identity layer. -- **Dynamic secret substitution** — the principle that **the LLM and agent context never see raw credentials**. Secrets (e.g. API keys, OAuth tokens) are held by the runtime or gateway and injected only at tool-execution time when a request is made. They do not appear in prompts, conversation history, or logs, which limits exposure from prompt leakage, log ingestion, or context exfiltration. AgentCore Identity’s token vault provides this for GitHub: the agent calls tools that use the vault; the model never receives the token. +- **Credentials for the agent** — currently, GitHub access uses a shared PAT (or per-repo PAT) stored in Secrets Manager. The orchestrator reads the secret at hydration time and passes it to the agent runtime via environment variable. The runtime execution role has `secretsmanager:GetSecretValue` for the token secret. **Planned (Iteration 3c):** Replace the shared PAT with a **GitHub App** integrated via **AgentCore Identity Token Vault**. A `CfnWorkloadIdentity` resource will represent the agent's identity; the GitHub App's credentials are registered as a Token Vault credential provider. At task hydration, the orchestrator will generate a short-lived installation token (1-hour TTL, scoped to the target repo) via the GitHub API. For long-running sessions, the agent calls `GetWorkloadAccessToken` to obtain a fresh token — the Token Vault handles refresh automatically. The runtime execution role already has the necessary permissions (`bedrock-agentcore:GetWorkloadAccessToken`, `GetWorkloadAccessTokenForJWT`, `GetWorkloadAccessTokenForUserId` — granted automatically by the AgentCore Runtime L2 construct). This will replace the shared PAT with per-task, repo-scoped, short-lived tokens and set up the same pattern for future integrations (GitLab, Jira, Slack). See [ROADMAP.md Iteration 3c](../guides/ROADMAP.md). +- **Dynamic secret substitution** — the principle that **the LLM and agent context never see raw credentials**. Secrets (e.g. API keys, OAuth tokens) are held by the runtime or gateway and injected only at tool-execution time when a request is made. They do not appear in prompts, conversation history, or logs, which limits exposure from prompt leakage, log ingestion, or context exfiltration. Currently, the GitHub PAT is fetched from Secrets Manager by the agent at runtime and used for git operations and GitHub API calls; the model does not receive the token in its context. **Planned (Iteration 3c):** AgentCore Identity's Token Vault will provide dynamic credential vending for GitHub — the agent will call `GetWorkloadAccessToken` to obtain a scoped, short-lived token at runtime. The GitHub App private key will be stored in Secrets Manager and accessed only by the orchestrator (never by the agent or model). Future Gateway integration will enable credential injection for GitHub API calls without any token in the sandbox. - **Webhook secret management** — Each webhook integration has a unique 32-byte random secret stored in AWS Secrets Manager (`bgagent/webhook/{webhook_id}`). Secrets are shown to the user only once at creation time. On revocation, secrets are scheduled for deletion with a 7-day recovery window. The webhook task handler caches secrets in-memory with a 5-minute TTL to reduce Secrets Manager API calls while maintaining reasonable secret rotation latency. IAM policies are scoped to the `bgagent/webhook/*` prefix. - **Authorization** — any authenticated user can submit tasks; users can view and cancel only their **own** tasks (enforced by user_id). Webhook management endpoints enforce ownership — a user can only list, view, and revoke their own webhooks (non-owners receive 404, not 403, to avoid leaking webhook existence). @@ -50,7 +50,7 @@ The agent runs with **full permissions inside the sandbox** but cannot escape it - **Per-repo tool profiles:** Stored in the onboarding config and loaded by the orchestrator during context hydration. The agent harness configures the tool set based on the profile. See [REPO_ONBOARDING.md](./REPO_ONBOARDING.md) for per-repo configuration. - **Enforcement mechanism:** Tools are exposed to the agent through **AgentCore Gateway**, which provides built-in mechanisms to enforce access control. The Gateway acts as a managed proxy between the agent and external tools/APIs — only tools registered and authorized in the Gateway are reachable. Per-repo tool profiles map to Gateway tool configurations: the orchestrator registers the allowed tool set for each session, and the Gateway enforces it. This is a platform-level enforcement boundary (not a prompt-level suggestion), meaning the agent cannot bypass it by requesting tools that are not registered. For tools not mediated by the Gateway (e.g. direct bash commands), enforcement relies on the sandbox environment (filesystem permissions, network egress rules, and the bash allowlist configured in the agent harness). - **Rationale:** More tools increase the agent's search space, making behavior less predictable and harder to evaluate. A minimal default with opt-in expansion balances capability with reliability. -- **Guardrails** — Amazon Bedrock Guardrails are deployed for task input screening. The `task-input-guardrail` applies a `PROMPT_ATTACK` content filter at `HIGH` strength on task descriptions at submission time. This provides a first layer of defense against prompt injection in user-supplied task descriptions. A second screening point runs during context hydration for PR tasks (`pr_iteration`, `pr_review`), screening the assembled prompt (PR body, review comments, conversation comments, diff summary, task description) before the agent receives it. Both screening points follow a **fail-closed** pattern: if the Bedrock Guardrail API is unavailable, the task is rejected (submission-time returns HTTP 503; hydration-time transitions the task to FAILED). This ensures unscreened content never reaches the agent, even during Bedrock outages. Screening failures are logged with a structured `metric_type: 'guardrail_screening_failure'` field for CloudWatch alerting: +- **Guardrails** — Amazon Bedrock Guardrails are deployed for task input screening. The `task-input-guardrail` applies a `PROMPT_ATTACK` content filter at `HIGH` strength on task descriptions at submission time. This provides a first layer of defense against prompt injection in user-supplied task descriptions. A second screening point runs during context hydration for PR tasks (`pr_iteration`, `pr_review`) and for `new_task` tasks when GitHub issue content is present, screening the assembled prompt before the agent receives it. Both screening points follow a **fail-closed** pattern: if the Bedrock Guardrail API is unavailable, the task is rejected (submission-time returns HTTP 503; hydration-time transitions the task to FAILED). This ensures unscreened content never reaches the agent, even during Bedrock outages. Screening failures are logged with a structured `metric_type: 'guardrail_screening_failure'` field for CloudWatch alerting: ``` filter metric_type = "guardrail_screening_failure" | stats count() by bin(5m) ``` @@ -102,6 +102,54 @@ The `functionArn` in `CustomStepConfig` should be validated at CDK synth time to - **Per-repo `egressAllowlist` is a declarative annotation**, not per-session enforcement. All agent sessions share the same VPC and DNS Firewall rules. Per-repo allowlists are aggregated (union) into the platform-wide policy. - **DNS Firewall does not prevent IP-based connections.** A direct connection to an IP address (e.g. `curl https://1.2.3.4/`) bypasses DNS resolution. This is acceptable for the "confused agent" threat model (the agent uses domain names in its tool calls) but does not defend against a sophisticated adversary. Closing this gap would require AWS Network Firewall (SNI-based filtering) at ~$274/month/endpoint. +## Policy enforcement and audit + +The platform enforces policies at multiple points in the task lifecycle. Today, these policies are implemented inline across ~20 files (handlers, constructs, agent code). A centralized policy framework is planned (Iteration 5) to improve auditability, consistency, and change control. + +### Current policy enforcement map + +| Phase | Policy | Enforcement location | Audit trail | +|---|---|---|---| +| **Submission** | Input validation (format, ranges, lengths) | `validation.ts`, `create-task-core.ts` | HTTP 400 response only — no event emitted | +| **Submission** | Repo onboarding gate | `repo-config.ts` → `create-task-core.ts` | HTTP 422 response only — no event emitted | +| **Submission** | Guardrail input screening | `create-task-core.ts` (Bedrock Guardrails) | HTTP 400 response only — no event emitted | +| **Submission** | Idempotency check | `create-task-core.ts` | HTTP 409 response only — no event emitted | +| **Admission** | Concurrency limit | `orchestrator.ts` (`admissionControl`) | `admission_rejected` event emitted | +| **Pre-flight** | GitHub reachability, repo access, PR access | `preflight.ts` | `preflight_failed` event emitted | +| **Hydration** | Guardrail prompt screening (PR + issue content) | `context-hydration.ts` | `guardrail_blocked` event emitted | +| **Hydration** | Budget/quota resolution (3-tier max_turns, 2-tier max_budget_usd) | `orchestrator.ts` (`hydrateAndTransition`) | Values persisted on task record — no policy decision event | +| **Hydration** | Token budget for prompt assembly | `context-hydration.ts` | No event emitted | +| **Session** | Tool access control (pr_review restrictions) | `agent/entrypoint.py` | No event emitted | +| **Session** | Budget enforcement (turns, cost) | Claude Agent SDK | Agent SDK enforces; cost in task result | +| **Finalization** | Build/lint verification | `agent/entrypoint.py` | Results in task record and PR body | +| **Infrastructure** | DNS Firewall egress allowlist | `dns-firewall.ts`, `agent.ts` (CDK synth) | DNS query logs in CloudWatch | +| **Infrastructure** | WAF rate limiting | `task-api.ts` (CDK synth) | WAF logs | +| **State machine** | Valid transition enforcement | `task-status.ts`, `orchestrator.ts` | DynamoDB conditional writes | + +### Audit gaps (planned remediation) + +Submission-time policy decisions (validation, onboarding gate, guardrail screening, idempotency) currently return HTTP errors without emitting structured audit events. Budget resolution decisions are persisted but not logged as policy decisions with reason codes. Tool access selection is implicit (hardcoded in agent code) with no audit event. + +**Planned (Iteration 5, Phase 1):** A unified `PolicyDecisionEvent` schema will normalize all policy decisions into structured events with: decision ID, policy name, version, phase, input hash, result, reason codes, and enforcement mode. Enforcement supports three modes: `enforced` (decision is binding — deny blocks, allow proceeds), `observed` (decision is logged but not enforced — shadow mode for safe rollout), and `steered` (decision modifies the input or output rather than blocking — redact PII, sanitize paths, mask secrets). New rules deploy in `observed` mode first; operators validate false-positive rates via `PolicyDecisionEvent` logs, then promote to `enforced` or `steered`. This observe-before-enforce workflow enables gradual rollout of security policies without risking false blocks on legitimate tasks. See [ROADMAP.md Iteration 5](../guides/ROADMAP.md) for the full centralized policy framework design. + +### Policy resolution and authorization (planned) + +**Planned (Iteration 5, Phase 2):** Cedar as the single policy engine for both **operational policy** (budget/quota/tool-access resolution, tool-call interception rules) and **authorization** (multi-tenant access control, extended when multi-user/team lands). Cedar replaces the scattered merge logic across handlers with a unified policy evaluation. A thin `policy.ts` adapter translates Cedar decisions into `PolicyDecision` objects consumed by existing handlers. Cedar is preferred over OPA: it is AWS-native, has formal verification guarantees, integrates with AgentCore Gateway, and policies can be evaluated in-process via the Cedar SDK without a separate service dependency. Cedar's binary permit/forbid model supports the three enforcement modes (`enforced`, `observed`, `steered`) via a **virtual-action classification pattern**: the interceptor evaluates against multiple virtual actions (`invoke_tool`, `invoke_tool_steered`, `invoke_tool_denied`) and uses the first permitted action to determine the mode. For example, `forbid(principal, action == Action::"invoke_tool", resource) when { resource.path like ".github/workflows/*" && principal.capability_tier != "elevated" }` blocks the call, while `permit(principal, action == Action::"invoke_tool_steered", resource) when { context.output_contains_pii }` triggers PII redaction instead of blocking. Cedar policies are stored in Amazon Verified Permissions and loaded at hydration/session-start time — policy changes take effect without CDK redeployment. When multi-user/team support lands, the same Cedar policy store expands to cover tenant-specific authorization (user/team/repo scoping, team budgets, risk-based approval requirements). + +### Mid-execution enforcement (planned) + +Today, once an agent session starts, the orchestrator can only observe it via polling (session running or terminated). There is no mechanism to detect or intervene when an agent goes off the rails mid-session — infinite tool-call loops, excessive file writes, or cost runaway. The orchestrator's hard timeout is the only backstop. + +**Planned (Iteration 5):** Two complementary mechanisms address this gap: + +1. **Tool-call interceptor (Guardian pattern)** — A policy-evaluation layer in the agent harness (`entrypoint.py`) that sits between the agent SDK's tool-call decision and actual tool execution. Evaluation is split into two stages: a **pre-execution stage** that validates tool inputs before the tool runs (file path deny patterns, bash command allowlist per capability tier, cost threshold checks, and per-repo rules from Blueprint `security` configuration) and blocks disallowed operations before they execute, and a **post-execution stage** that screens tool outputs after the tool runs (PII patterns in file content, secrets in command output, sensitive data leakage) and can redact or flag content before it re-enters the agent context. The interceptor can allow, modify (e.g. redact secrets from output), or deny tool calls. Denied calls return a structured error to the agent, which can retry with a different approach. This follows the Guardian interceptor pattern (Hu et al. 2025) — enforcement happens at tool-call time, not before the session starts (input guardrails) or after it ends (validation pipeline). Combined with per-tool-call structured telemetry (Iteration 3d), every interceptor decision is logged as a `PolicyDecisionEvent`. + +2. **Behavioral circuit breaker** — Lightweight monitoring of tool-call patterns within a session: call frequency (calls per minute), cumulative cost, repeated failures on the same tool, and file mutation rate. When metrics exceed configurable thresholds (e.g. >50 tool calls/minute, >$10 cumulative cost, >5 consecutive failures), the circuit breaker pauses or terminates the session and emits a `circuit_breaker_triggered` event. This catches runaway loops and cost explosions before the hard session timeout. Thresholds are configurable per-repo via Blueprint `security` props. + +These mechanisms are complementary: the interceptor enforces per-call policy (what the agent is allowed to do), while the circuit breaker enforces aggregate behavioral bounds (how the agent is behaving over time). Both operate within the existing agent harness — no sidecar process or external service required. For ABCA's single-agent-per-task model, embedded monitoring is simpler and more reliable than an external sidecar; sidecar architecture becomes relevant when multi-agent orchestration lands (Iteration 6). + +See [ROADMAP.md Iteration 5](../guides/ROADMAP.md) (Guardrails, Mid-execution behavioral monitoring). + ## Memory-specific threats ### OWASP ASI06 — Memory and context poisoning @@ -198,11 +246,11 @@ AgentCore Memory has **no native backup mechanism**. This is a significant gap f ## Known limitations -- **Single GitHub OAuth token** — one token may be shared for all users and repos the platform can access. Any authenticated user can trigger agent work against any repo that token can access. There is no per-user repo scoping. -- **Guardrails are input-only** — the `PROMPT_ATTACK` filter screens task descriptions at submission and assembled PR prompts during context hydration. No guardrails are applied to model output during agent execution or to review feedback entering the memory system. For `pr_iteration` and `pr_review` tasks, the assembled user prompt (including PR body, review comments, conversation comments, diff summary, and task description) is screened through the Bedrock Guardrail during hydration; if blocked, the task fails with a descriptive error. Guardrail screening follows a fail-closed pattern: a Bedrock outage blocks task submissions (HTTP 503) and fails PR tasks during hydration. +- **Single GitHub OAuth token (planned mitigation: GitHub App + AgentCore Token Vault)** — one token may be shared for all users and repos the platform can access. Any authenticated user can trigger agent work against any repo that token can access. There is no per-user repo scoping. **Planned mitigation (Iteration 3c):** Replace the shared PAT with a GitHub App integrated via AgentCore Token Vault. Each task receives a short-lived installation token scoped to the target repo only. The Token Vault manages refresh for long-running sessions. Combined with SSO (federated identity), tokens can be further scoped to the user's effective GitHub permissions. See [ROADMAP.md Iteration 3c](../guides/ROADMAP.md) for the implementation approach. +- **Guardrails are input-only** — the `PROMPT_ATTACK` filter screens task descriptions at submission and assembled prompts during context hydration (for PR tasks and for `new_task` tasks with issue content). No guardrails are applied to model output during agent execution or to review feedback entering the memory system. For `pr_iteration` and `pr_review` tasks, the assembled user prompt (including PR body, review comments, conversation comments, diff summary, and task description) is screened through the Bedrock Guardrail during hydration. For `new_task` tasks, the assembled prompt is screened when GitHub issue content is present; when no issue content is fetched, hydration-time screening is skipped because the task description was already screened at submission time. If blocked, the task fails with a descriptive error. Guardrail screening follows a fail-closed pattern: a Bedrock outage blocks task submissions (HTTP 503) and fails tasks during hydration. - **No memory content validation** — retrieved memory records are injected into the agent's context without sanitization, injection pattern scanning, or trust scoring. This is the most critical memory security gap (OWASP ASI06). See [MEMORY.md](./MEMORY.md#memory-security-analysis) for the full gap analysis and [ROADMAP.md Iteration 3e](../guides/ROADMAP.md) for the remediation plan. - **No memory provenance or integrity checking** — memory entries carry no source attribution, content hashing, or trust metadata. The system cannot distinguish agent-generated memory from externally-influenced content. -- **GitHub issue content as untrusted input** — issue bodies and comments (attacker-controlled) are injected into the agent's context during hydration for `new_task` tasks without guardrail screening. Only the user-supplied `task_description` is screened at submission time; the fetched issue content bypasses both screening points. This is a known gap — extending hydration-time guardrail screening to `new_task` issue content is planned as a follow-up. +- **GitHub issue content as untrusted input** — issue bodies and comments (attacker-controlled) are injected into the agent's context during hydration for `new_task` tasks. The assembled user prompt is now screened through the Bedrock Guardrails `PROMPT_ATTACK` filter during context hydration when issue content is present; if prompt injection is detected, the task fails before reaching the agent. When no issue content is fetched (task_description only), hydration-time screening is skipped because the task description was already screened at submission time. - **PR review comments as untrusted input** — for `pr_iteration` and `pr_review` tasks, review comments, PR body, and conversation comments are fetched and injected into the agent's context. These are attacker-controlled inputs subject to the same prompt injection risks as issue comments. The assembled PR prompt is now screened by the Bedrock Guardrails `PROMPT_ATTACK` filter during context hydration; if prompt injection is detected, the task fails before reaching the agent. For `pr_review` tasks, additional defense-in-depth mitigates residual risk: the agent runs without `Write` or `Edit` tools, so even if injection bypasses the guardrail, the agent cannot modify files or push code. - **No memory rollback or quarantine** — the 365-day AgentCore Memory expiration is the only cleanup mechanism. There is no snapshot, rollback, or quarantine capability for suspected poisoned entries. - **No MFA** — Cognito MFA is disabled (CLI-based auth flow). Should be enabled for production deployments. diff --git a/docs/guides/ROADMAP.md b/docs/guides/ROADMAP.md index b1f11eb..2e4efdc 100644 --- a/docs/guides/ROADMAP.md +++ b/docs/guides/ROADMAP.md @@ -18,6 +18,7 @@ These practices apply continuously across iterations and are not treated as one- - `P-ABCA-4` memory fallback guarantee: if task finalization sees `memory_written = false`, fallback episode write is attempted and result is observable. - `P-ABCA-5` branch-name uniqueness: simultaneous tasks for the same repo generate distinct branch names (ULID-based suffix). - **Definition-of-done hook** — New orchestrator/concurrency changes should include: updated property mappings, at least one property-based test where applicable, and invariant notes in `ORCHESTRATOR.md` to keep docs and executable checks aligned. +- **Memory extraction prompt versioning** — Hash memory extraction prompts (in `agent/memory.py`: `write_task_episode`, `write_repo_learnings`) alongside system prompts so changes to extraction logic are tracked by `prompt_version`. This enables correlating memory quality changes with extraction prompt updates in the evaluation pipeline. --- @@ -151,7 +152,21 @@ These practices apply continuously across iterations and are not treated as one- **Goal:** Multi-layered validation catches errors, enforces code quality, and assesses change risk before PRs are created; the platform supports more than one task type; multi-modal input broadens what users can express. -- **Per-repo GitHub credentials (GitHub App)** — Replace the single shared OAuth token with a **GitHub App** installed per-organization or per-repository. Each onboarded repo is associated with a GitHub App installation that grants fine-grained permissions (read/write to that repo only). This eliminates the security gap where any authenticated user can trigger agent work against any repo the shared token can access. Token management (installation token generation, rotation) is handled by the platform, not by the agent. AgentCore Identity's token vault can store and refresh installation tokens. This is a prerequisite for any multi-user or multi-team deployment. +- **Per-repo GitHub credentials (GitHub App + AgentCore Token Vault)** — Replace the single shared OAuth token with a **GitHub App** installed per-organization or per-repository, using **AgentCore Identity's Token Vault** for credential management (recommended approach). Each onboarded repo is associated with a GitHub App installation that grants fine-grained permissions (read/write to that repo only). This eliminates the security gap where any authenticated user can trigger agent work against any repo the shared token can access. + + **Implementation approach — AgentCore Token Vault integration:** + 1. **WorkloadIdentity resource** — Create a `CfnWorkloadIdentity` in CDK representing the agent's identity, enabling token exchange with GitHub. + 2. **Token Vault credential provider** — Register the GitHub App's credentials in the AgentCore Token Vault. For server-to-server authentication, the GitHub App uses a private key to sign JWTs that are exchanged for installation tokens via the GitHub API. For the user-authorization OAuth flow (acting on behalf of a user), the App's client ID and client secret are registered as an OAuth credential provider. The Token Vault handles token refresh automatically — no expiry issues for long-running tasks (sessions exceeding 1 hour). + 3. **Orchestrator token generation** — At task hydration time, the orchestrator calls the GitHub API to generate an installation token (1-hour TTL, scoped to the target repo) and passes it to the agent at session start. + 4. **Agent-side token refresh** — For tasks running longer than 1 hour, the agent calls `GetWorkloadAccessToken` (permissions already granted to the runtime execution role: `bedrock-agentcore:GetWorkloadAccessToken`, `GetWorkloadAccessTokenForJWT`, `GetWorkloadAccessTokenForUserId`) to obtain a fresh token from the Token Vault. No Secrets Manager reads needed at runtime. + 5. **Blueprint configuration** — Extend `Blueprint` credentials with `githubAppId`, `githubAppPrivateKeySecretArn`, and `githubAppInstallationId` (per-org or per-repo). + 6. **Gateway integration (future)** — Wire an AgentCore Gateway target for GitHub API calls with automatic credential injection, enabling audit trails and Cedar policy enforcement per request. Git transport (clone/push) still requires a token in the remote URL, so Gateway-mediated access applies to API operations only. + + **Why Token Vault over Secrets Manager:** The runtime already has `GetWorkloadAccessToken` permissions (granted by the AgentCore Runtime construct). Token Vault is purpose-built for dynamic credential vending — it manages refresh automatically, supports arbitrary OAuth providers (GitHub, GitLab, Jira, Slack via the same pattern), and keeps credentials out of the sandbox as static secrets. This sets up the pattern for all future third-party integrations. + + **Per-user identity flow (future, connects to SSO):** With a GitHub App, installation tokens can be scoped per-repository and per-permission set. Combined with federated identity (SSO), the orchestrator can look up the user's GitHub identity and generate tokens scoped to the target repo with only the permissions that user would have. Git commits are attributed to the GitHub App acting on behalf of the user. + + This is a prerequisite for any multi-user or multi-team deployment. - [x] **Orchestrator pre-flight checks (fail-closed)** — Add a `pre-flight` step before `start-session` so doomed tasks fail fast without consuming AgentCore runtime. The orchestrator performs lightweight readiness checks with strict timeouts (for example, 5 seconds): verify GitHub API reachability, verify repository existence and credential access (`GET /repos/{owner}/{repo}` or equivalent), and optionally verify AgentCore Runtime availability when a status probe exists. If pre-flight fails, the task transitions to `FAILED` immediately with a clear terminal reason (`GITHUB_UNREACHABLE`, `REPO_NOT_FOUND_OR_NO_ACCESS`, `RUNTIME_UNAVAILABLE`), releases the concurrency slot, emits an event/notification, and does **not** invoke the agent. Unlike memory/context hydration (fail-open), pre-flight is explicitly fail-closed: inability to verify repo access blocks execution by design. - [x] **Persistent session storage (cache layer)** — Enabled AgentCore Runtime persistent session storage (preview) for selective cache persistence across stop/resume. A per-session filesystem is mounted at `/mnt/workspace` via `FilesystemConfigurations` (CFN escape hatch on the L2 construct). The S3-backed FUSE mount does not support `flock()` (returns `ENOTRECOVERABLE` / os error 524), so only caches whose tools never call `flock()` go on the mount (`npm_config_cache`, `CLAUDE_CONFIG_DIR`). Caches for tools that use `flock()` stay on local ephemeral disk (`MISE_DATA_DIR=/tmp/mise-data` — mise's pipx backend delegates to `uv` which flocks inside installs/; `UV_CACHE_DIR=/tmp/uv-cache`). Repo clones stay on `/workspace` (local) for the same reason. The `AGENT_WORKSPACE` env var and `{workspace}` system prompt placeholder are wired for a future move to persistent repo clones if the mount adds `flock()` support. Each `runtimeSessionId` gets isolated storage (no cross-task leakage). 14-day TTL; data deleted on runtime version update. See [COMPUTE.md](../design/COMPUTE.md#session-storage-persistent-filesystem). - **Pre-execution task risk classification** — Add a lightweight risk classifier at task submission (before orchestration starts) to drive proportional controls for agent execution. Initial implementation can be rule-based and Blueprint-configurable: prompt keywords (for example, `database`, `auth`, `security`, `infrastructure`), metadata from issue labels, and file/path signals when available (for example, `**/migrations/**`, `**/.github/**`, infra directories). Persist `risk_level` (`low` / `medium` / `high` / `critical`) on the task record and use it to set defaults and policy: model tier/cascade, turn and budget defaults, prompt strictness/conservatism, approval requirements before merge, and optional autonomous-execution blocks for `critical` tasks. This is intentionally pre-execution and complements (does not replace) post-execution PR risk/blast-radius analysis. @@ -162,8 +177,11 @@ These practices apply continuously across iterations and are not treated as one- - **PR risk level and validation report** — Every agent-created PR includes a structured **validation report** (as a PR comment or check run) summarizing: Tier 1 results (pass/fail per tool), Tier 2 findings (code quality issues by severity), Tier 3 risk assessment (risk level, blast radius summary, affected modules). The PR is labeled with the computed risk level (`risk:low`, `risk:medium`, `risk:high`, `risk:critical`). Risk level is persisted in the task record for evaluation and trending. See [EVALUATION.md](../design/EVALUATION.md#pr-risk-level). - [x] **Other task types: PR review and PR-iteration** — Support additional task types beyond "implement from issue": **iterate on pull request** (`pr_iteration`) reads review comments and addresses them (implement changes, push updates, post summary). **Review pull request** (`pr_review`) is a read-only task type where the agent analyzes a PR's changes and posts structured review comments via the GitHub Reviews API. The `pr_review` agent runs without `Write` or `Edit` tools (defense-in-depth), skips `ensure_committed` and push, and treats build status as informational only. Each review comment uses a structured format: type (comment/question/issue/good_point), severity for issues (minor/medium/major/critical), title, description with memory attribution, proposed fix, and a ready-to-use AI prompt. The CLI exposes `--review-pr ` (mutually exclusive with `--pr`). - [x] **Input guardrail screening (Bedrock Guardrails)** — Amazon Bedrock Guardrails screen task descriptions at submission time and assembled PR prompts during context hydration (`pr_iteration`, `pr_review`). Uses `PROMPT_ATTACK` content filter at `HIGH` strength. Fail-closed: Bedrock outages block tasks rather than letting unscreened content through. See [SECURITY.md](../design/SECURITY.md). +- [x] **Guardrail screening for GitHub issue content (`new_task`)** — Bedrock Guardrail screening now covers GitHub issue bodies and comments fetched during context hydration for `new_task` tasks. The assembled user prompt is screened through the `PROMPT_ATTACK` filter when issue content is present; when no issue content is fetched (task_description only), hydration-time screening is skipped because the task description was already screened at submission time. Same fail-closed pattern as PR tasks. See [SECURITY.md](../design/SECURITY.md). - **Multi-modal input** — Accept text and images (or other modalities) in the task payload; pass through to the agent. Gateway and schema support it; agent harness supports it where available. Primary use case: screenshots of bugs, UI mockups, or design specs attached to issues. +**Scope note:** Iteration 3c contains a wide range of items — from security-critical (GitHub App credentials, guardrail screening) to quality-improving (tiered validation, risk classification) to capability-expanding (multi-modal input). Items marked `[x]` are done. The remaining items can be delivered incrementally; the tiered validation pipeline and risk classification in particular can ship independently of per-repo credentials and multi-modal input. + **Builds on Iteration 3b:** Memory is operational; this iteration changes the orchestrator blueprint (tiered validation pipeline, new task type) and broadens the input schema. These are independently testable from memory. --- @@ -175,8 +193,11 @@ These practices apply continuously across iterations and are not treated as one- - **Review feedback memory loop (Tier 2)** — Capture PR review comments via GitHub webhook, extract actionable rules via LLM, and persist them as searchable memory so the agent internalizes reviewer preferences over time. This is the primary feedback loop between human reviewers and the agent — no shipping coding agent does this today. Requires a GitHub webhook → API Gateway → Lambda pipeline (separate from agent execution). Two types of extracted knowledge: repo-level rules ("don't use `any` types") and task-specific corrections. See [MEMORY.md](../design/MEMORY.md) (Review feedback memory) and [SECURITY.md](../design/SECURITY.md) (prompt injection via review comments). - **PR outcome tracking** — Track whether agent-created PRs are merged, revised, or rejected via GitHub webhooks (`pull_request.closed` events). A merged PR is a positive signal; closed-without-merge is a negative signal. These outcome signals feed into the evaluation pipeline and enable the episodic memory to learn which approaches succeed. See [MEMORY.md](../design/MEMORY.md) (PR outcome signals) and [EVALUATION.md](../design/EVALUATION.md). - **Evaluation pipeline (basic)** — Automated evaluation of agent runs: failure categorization (reasoning errors, missed instructions, missing tests, timeouts, tool failures). Results are stored and surfaced in observability dashboards. Basic version: rules-based analysis of task outcomes and agent responses. Track memory effectiveness metrics: first-review merge rate, revision cycles, CI pass rate on first push, review comment density, and repeated mistakes. Advanced version (ML-based trace analysis, A/B prompt comparison, feedback loop into prompts) is deferred to Iteration 5. See [EVALUATION.md](../design/EVALUATION.md) and [OBSERVABILITY.md](../design/OBSERVABILITY.md). +- **Per-tool-call structured telemetry** — Instrument the agent harness (`entrypoint.py`) to emit structured events for every tool call: tool name, input hash (SHA-256), output hash, duration, cost attribution, and result status. Events flow through the existing `create_event` path and are surfaced in CloudWatch. This is foundational for: (a) the evaluation pipeline (tool-call-level success/failure analysis), (b) the centralized policy framework Phase 1 (tool calls become `PolicyDecisionEvent` sources in Iteration 5), and (c) future mid-execution policy enforcement (tool-call interceptor in Iteration 5). Without per-tool-call telemetry, the platform can only observe sessions as opaque black boxes — model invocation logs capture LLM reasoning but not the tool execution that connects reasoning to action. Informed by the Guardian system's tool-call interception architecture (Hu et al. 2025). See [OBSERVABILITY.md](../design/OBSERVABILITY.md) and [SECURITY.md](../design/SECURITY.md) (Mid-execution enforcement). -**Builds on Iteration 3c:** Validation and PR review task type are in place; this iteration adds new infrastructure (webhook → Lambda → LLM extraction pipeline) and connects the feedback loop. Review feedback requires prompt injection mitigations (see SECURITY.md). +**Prerequisite: 3e Phase 1 (input hardening) ships with this iteration.** The review feedback memory loop writes attacker-controlled content (PR review comments) to persistent memory. Without content sanitization, provenance tagging, and integrity hashing (3e Phase 1), this creates a known attack vector — poisoned review comments stored as persistent rules that influence all future tasks on the repo. 3e Phase 1 items (memory content sanitization, GitHub issue input sanitization, source provenance on memory writes, content integrity hashing) must be implemented before or concurrently with the review feedback pipeline. See [SECURITY.md](../design/SECURITY.md) (Prompt injection via PR review comments). + +**Builds on Iteration 3c:** Validation and PR review task type are in place; this iteration adds new infrastructure (webhook → Lambda → LLM extraction pipeline) and connects the feedback loop. --- @@ -188,7 +209,9 @@ These practices apply continuously across iterations and are not treated as one- Deep research identified **9 memory-layer security gaps** in the current architecture (see the [Memory Security Analysis](#memory-security-analysis) section in [MEMORY.md](../design/MEMORY.md)). The platform has strong network-layer security (VPC isolation, DNS Firewall, HTTPS-only egress) but lacks memory content validation, provenance tracking, trust scoring, anomaly detection, and rollback capabilities. Research shows that MINJA-style attacks achieve 95%+ injection success rates against undefended agent memory systems, and that emergent self-corruption (hallucination crystallization, error compounding feedback loops) is equally dangerous because it lacks an external attacker signature. -### Phase 1 — Input hardening +### Phase 1 — Input hardening (ships with Iteration 3d) + +**Phase 1 is a prerequisite for Iteration 3d's review feedback memory loop.** Attacker-controlled PR review comments must not enter persistent memory without sanitization, provenance tagging, and integrity checking. These items ship concurrently with 3d, not after it. - [ ] **Memory content sanitization** — Add content validation in `loadMemoryContext()` (`src/handlers/shared/memory.ts`). Scan retrieved memory records for injection patterns (embedded instructions, system prompt overrides, command injection payloads) before including them in the agent's context. Implement a `sanitizeMemoryContent()` function that strips or flags suspicious patterns while preserving legitimate repository knowledge. - [ ] **GitHub issue input sanitization** — Add trust-boundary-aware sanitization in `context-hydration.ts` for GitHub issue bodies and comments. These are attacker-controlled inputs that currently flow into the agent's context without differentiation. Strip control characters, embedded instruction patterns, and known injection payloads. Tag the content source as `untrusted-external` in the hydrated context. @@ -221,7 +244,7 @@ Deep research identified **9 memory-layer security gaps** in the current archite - The `MEMORY_REVIEW` task state is a new addition to the state machine (requires orchestrator, API contract, and observability updates). - Trust-scored retrieval changes the memory context budget allocation, which may affect prompt version hashing. -**Builds on Iteration 3d:** Review feedback memory and PR outcome tracking are in place; this iteration hardens the memory system that those components write to. The 4-phase approach allows incremental deployment with measurable security improvement at each phase. +**Builds on Iteration 3d:** Review feedback memory and PR outcome tracking are in place; Phases 2–4 harden the memory system that those components write to. Phase 1 (input hardening) ships with 3d as a prerequisite — see [Iteration 3d](#iteration-3d--review-feedback-loop-and-evaluation). The phased approach allows incremental deployment with measurable security improvement at each phase. --- @@ -255,9 +278,36 @@ Deep research identified **9 memory-layer security gaps** in the current archite - **Full cost management** — per-user and per-team monthly budgets, cost attribution dashboards (cost per task, per repo, per user), alerts when budgets are approaching limits. Token usage and compute cost are tracked per task and aggregated. The control panel (Iter 4) displays cost dashboards. - **Adaptive model router with cost-aware cascade** — Per-turn model selection via a lightweight heuristic engine. File reads and simple edits use a cheaper model (Haiku); multi-file refactors use Sonnet; complex reasoning escalates to Opus. Error escalation: if the agent fails twice on the same step, upgrade model for the retry. As the cost budget ceiling approaches, cascade down to cheaper models. Blueprint `modelCascade` config enables per-repo tuning. Potential 30-40% cost reduction on inference-dominated workloads. Requires agent harness changes to support mid-session model switching. - **Advanced evaluation and feedback loop** — Extend the basic evaluation pipeline from Iteration 3d: ML-based or LLM-based trace analysis (not just rules), A/B prompt comparison framework, automated feedback into prompt templates (e.g. "for repo X, always run tests before opening PR"), and per-repo or per-failure-type improvement tracking. Evaluation results can update the repo's agent configuration stored during onboarding. **Optional patterns from adaptive teaching research** (e.g. plan → targeted critique → execution; separate **evaluator** vs **prompt/reflection** roles; fitness from LLM judging plus efficiency metrics; evolution of teaching templates from failed trajectories with Pareto-style candidate sets for diverse failure modes) can inform offline or scheduled improvement of Blueprint prompts and checklists without replacing ABCA's core orchestrator. -- **Formal orchestrator verification (TLA+)** — Add a formal specification of the orchestrator in TLA+ and verify it with TLC model checking. Scope includes the task state machine (8 states, valid transitions, terminal states), concurrency admission control (atomic increment + max check), cancellation races (cancel arriving during any orchestration step), reconciler/orchestrator interleavings (counter drift correction while tasks are active), and the polling loop (agent writes terminal status, orchestrator observes and finalizes). Define invariants such as valid-state progression, no illegal transitions, and repo-level safety constraints (for example, at most one active `RUNNING` task per repo when configured). Keep the spec aligned with `src/constructs/task-status.ts` and orchestrator docs so regressions surface as model-check counterexamples before production. -- **Guardrails (output and tool-call)** — Extend Bedrock Guardrails from input screening (implemented in Iteration 3c) to **output filtering** and **agent tool-call guardrails**. Apply content filters to model responses during agent execution, restrict sensitive content generation, and enforce organizational policies (e.g. "do not modify files in `/infrastructure`"). See [SECURITY.md](../design/SECURITY.md). Guardrails configuration can be per-repo (via onboarding) or platform-wide. -- **Capability-based security model** — Fine-grained enforcement beyond Bedrock Guardrails, operating at three levels: (1) **Tool-level capabilities** — Bash command allowlist (git, npm, make permitted; curl, wget blocked), configurable per capability tier (standard / elevated / read-only). (2) **File-system scope** — Blueprint declares include/exclude path patterns; Write/Edit/Read tools are filtered to the declared scope. (3) **Input trust scoring** — Authenticated user input = trusted; external GitHub issues = untrusted; PR review comments entering memory = adversarial. Trust level selects the capability set. Essential once review feedback memory (Iter 3d) introduces attacker-controlled content into the agent's context. Blueprint `security` prop configures the capability profile per repo. +- **Formal orchestrator verification (TLA+)** — Add a formal specification of the orchestrator in TLA+ and verify it with TLC model checking. Scope includes the task state machine (8 states, valid transitions, terminal states), concurrency admission control (atomic increment + max check), cancellation races (cancel arriving during any orchestration step), reconciler/orchestrator interleavings (counter drift correction while tasks are active), and the polling loop (agent writes terminal status, orchestrator observes and finalizes). Define invariants such as valid-state progression, no illegal transitions, and repo-level safety constraints (for example, at most one active `RUNNING` task per repo when configured). Keep the spec aligned with `src/constructs/task-status.ts` and orchestrator docs so regressions surface as model-check counterexamples before production. **Note:** The TLA+ specification can be started earlier (e.g. during Iteration 3d) since the state machine and concurrency model are already stable. The spec is documentation that also catches bugs — writing it does not depend on Iteration 5 features. Consider starting the state machine and cancellation models as part of the ongoing engineering practice. +- **Guardrails (output and tool-call) with interceptor pattern** — Extend Bedrock Guardrails from input screening (implemented in Iteration 3c) to **output filtering** and **agent tool-call guardrails**. Apply content filters to model responses during agent execution, restrict sensitive content generation, and enforce organizational policies (e.g. "do not modify files in `/infrastructure`"). Guardrails configuration can be per-repo (via onboarding) or platform-wide. + + **Tool-call interceptor (Guardian pattern):** Implement a policy-evaluation layer in the agent harness (`entrypoint.py`) that intercepts tool calls between the agent SDK's decision and actual execution — enforcement happens at tool-call time, not before the session starts (input guardrails) or after it ends (validation pipeline). Each tool call is evaluated against a policy: file path restrictions (deny writes to `.github/workflows/`, `**/migrations/**`), bash command allowlist per capability tier, cost threshold checks, and per-repo rules from Blueprint `security` configuration. The interceptor can **allow**, **modify** (e.g. redact secrets from output), or **deny** (return structured error to agent, which retries with a different approach). Evaluation is split into two stages: a **pre-execution stage** that validates tool inputs before the tool runs (file path deny patterns, bash command allowlist, cost threshold checks) and blocks disallowed operations before they execute, and a **post-execution stage** that screens tool outputs after the tool runs (PII patterns in file content, secrets in command output, sensitive data leakage) and can redact or flag content before it re-enters the agent context. Combined with per-tool-call structured telemetry (Iteration 3d), every interceptor decision is logged as a `PolicyDecisionEvent`. This pattern is informed by the Guardian system (Hu et al. 2025) — a "guardian agent" that monitors and can intercept tool calls before execution. See [SECURITY.md](../design/SECURITY.md) (Mid-execution enforcement). +- **Mid-execution behavioral monitoring** — Lightweight monitoring of agent behavior within a running session, filling the gap between input guardrails (pre-session) and validation (post-session). A **behavioral circuit breaker** in the agent harness tracks aggregate metrics: tool-call frequency (calls per minute), cumulative session cost, repeated failures on the same tool, and file mutation rate. When metrics exceed configurable thresholds (e.g. >50 tool calls/minute, >$10 cumulative cost, >5 consecutive failures on the same tool), the circuit breaker pauses or terminates the session and emits a `circuit_breaker_triggered` event. This catches runaway loops, cost explosions, and stuck agents before the hard session timeout. Thresholds are configurable per-repo via Blueprint `security` props. The circuit breaker operates within the existing agent harness — no sidecar process or external service required. For ABCA's single-agent-per-task model, embedded monitoring is simpler and more reliable than an external sidecar; sidecar architecture becomes relevant when multi-agent orchestration lands (Iteration 6). See [SECURITY.md](../design/SECURITY.md) (Mid-execution enforcement). +- **Centralized policy framework** — Consolidate the platform's distributed policy decisions into a unified policy framework and audit layer. Policy logic today is scattered across 20+ files (input validation in `validation.ts` and `create-task-core.ts`, admission control in `orchestrator.ts`, guardrail screening in `context-hydration.ts`, budget resolution across `validation.ts`/`orchestrator.ts`/`entrypoint.py`, tool access in `entrypoint.py`, network egress in `dns-firewall.ts`/`agent.ts`, state transitions in `task-status.ts`/`orchestrator.ts`). This fragmentation makes it difficult to audit what policies exist, verify consistency, or change policy behavior without touching multiple files. + + **Phase 1 — Policy audit normalization:** + Define a stable `PolicyDecisionEvent` schema: `decision_id` (ULID), `policy_name` (e.g. `admission.concurrency`, `budget.max_turns`, `guardrail.input_screening`), `policy_version`, `phase` (`submission` | `admission` | `pre_flight` | `hydration` | `session_start` | `session` | `finalization`), `input_hash` (SHA-256 of the decision input for reproducibility), `result` (`allow` | `deny` | `modify`), `reason_codes[]`, `enforcement` (`enforced` | `observed` | `steered`), and `task_id`. The three enforcement modes serve distinct purposes: `enforced` means the decision is binding (deny blocks, allow proceeds), `observed` means the decision is logged but not enforced (shadow mode for safe rollout), and `steered` means the decision modifies the input or output rather than blocking (redact PII, sanitize paths, mask secrets). New rules deploy in `observed` mode first; operators validate false-positive rates via `PolicyDecisionEvent` logs, then promote to `enforced` or `steered`. This observe-before-enforce workflow enables gradual rollout of security policies without risking false blocks on legitimate tasks. Emit a `policy_decision` event via `emitTaskEvent` at every existing enforcement point. Today, some decisions emit events (`admission_rejected`, `preflight_failed`, `guardrail_blocked`) while others silently return HTTP errors — normalize them all. This is pure instrumentation of existing code paths; no behavior change. + + **Phase 2 — Cedar policy engine:** + Introduce **Cedar** (not OPA) as the single policy engine for both **operational policy** (budget/quota/tool-access resolution, tool-call interception rules) and **authorization** (extended for multi-tenant access control when multi-user/team support lands). Cedar is AWS-native, has formal verification guarantees, and integrates with AgentCore Gateway. + + **Policy resolution:** Cedar replaces the scattered budget/quota/tool-access merge logic (3-tier `max_turns` resolution, 2-tier `max_budget_usd` resolution, tool access determination in `entrypoint.py`, per-repo configuration merge in `loadBlueprintConfig`) with a unified policy evaluation. A thin `policy.ts` adapter module translates Cedar decisions into `PolicyDecision` objects (`PolicyInput` → Cedar evaluation → `PolicyDecision` with computed budgets, tool profile, risk tier, redaction directives) consumed by existing handlers — no new service, no network hop. Input validation (format checks, range checks) remains at the input boundary; Cedar handles resolution and policy composition. + + **Operational tool-call policies** use a **virtual-action classification pattern** to support the three enforcement modes (`enforced`, `observed`, `steered`) within Cedar's binary permit/forbid model. Instead of asking Cedar "allow or deny?", the interceptor evaluates against multiple virtual actions (`invoke_tool`, `invoke_tool_steered`, `invoke_tool_denied`) and uses the first permitted action to determine the mode. For example: `forbid(principal, action == Action::"invoke_tool", resource) when { resource.path like ".github/workflows/*" && principal.capability_tier != "elevated" }` blocks the call, while `permit(principal, action == Action::"invoke_tool_steered", resource) when { context.output_contains_pii }` triggers PII redaction. This keeps Cedar doing what it does best (binary decisions with formal verification) while the interceptor interprets the combination of decisions as allow/steer/deny. + + **Authorization policies (extended with multi-user/team):** When multi-user/team support lands, the same Cedar policy store expands to cover tenant-specific authorization: "users in team X can submit tasks to repos A, B, C", "team Y has a monthly budget of $500", "repos tagged `critical` require `pr_review` before `new_task`". This replaces the current single-dimensional ownership check (`record.user_id !== userId`) with multi-dimensional authorization (user, team, repo, action, risk level). No new policy engine — the same Cedar instance grows to cover authorization alongside operational policy. + + **Runtime-configurable policies:** Cedar policies are stored in Amazon Verified Permissions and loaded at hydration/session-start time. Policy changes take effect without CDK redeployment — operators update policies via the Verified Permissions API, and the next task evaluation picks them up. Deployment-time invariants (schema validation, state machine transitions) remain in CDK code. + + Policy versioning, rollback, and observe-before-enforce semantics carry forward from Phase 1. Cedar policies are evaluated at submission, admission, hydration, session (tool-call interception), and finalization. + + **Why not OPA:** OPA uses Rego (a custom DSL) and runs as a sidecar or external service. ABCA's policies change at the same cadence as infrastructure (deployed via CDK). A separate service with a separate language adds operational burden without proportionate benefit for a single-tenant platform. Cedar is a better fit: it's a typed language with formal verification, it's AWS-native (used by Amazon Verified Permissions and AgentCore Gateway), and policies can be evaluated in-process via the Cedar SDK without a separate service. Unlike OPA/Rego (which can return arbitrary JSON), Cedar's binary decisions require the virtual-action pattern for steering — but this keeps policy evaluation formally verifiable, which OPA cannot guarantee. + + **What stays out of the policy framework:** Schema validation (repo format, `max_turns` range, task description length) stays at the input boundary. State machine transitions stay in the orchestrator. DNS Firewall stays in CDK. These are infrastructure invariants, not policy decisions — they don't vary by tenant, user, or context. + + See [SECURITY.md](../design/SECURITY.md) (Policy enforcement and audit). + +- **Capability-based security model** — Fine-grained enforcement beyond Bedrock Guardrails, operating at three levels: (1) **Tool-level capabilities** — Bash command allowlist (git, npm, make permitted; curl, wget blocked), configurable per capability tier (standard / elevated / read-only). (2) **File-system scope** — Blueprint declares include/exclude path patterns; Write/Edit/Read tools are filtered to the declared scope. (3) **Input trust scoring** — Authenticated user input = trusted; external GitHub issues = untrusted; PR review comments entering memory = adversarial. Trust level selects the capability set. Essential once review feedback memory (Iter 3d) introduces attacker-controlled content into the agent's context. Blueprint `security` prop configures the capability profile per repo. Capability tiers become inputs to the centralized policy framework and are governed by Cedar policies (Phase 2). - **Additional execution environment** — Support an alternative to AgentCore Runtime (e.g. ECS/Fargate, EKS) behind the **ComputeStrategy** interface (see [REPO_ONBOARDING.md](../design/REPO_ONBOARDING.md#compute-strategy-interface)). The orchestrator calls abstract methods (`startSession`, `stopSession`, `pollSession`); the implementation maps to AgentCore, Fargate, or EKS. Repos select the strategy via `compute_type` in their blueprint configuration. Reduces vendor lock-in and enables workloads that exceed AgentCore limits (e.g. GPU, larger images, longer sessions). The ComputeStrategy interface contract is defined in Iteration 3a; Iteration 5 adds alternative implementations. - **Full web dashboard** — Extend the control panel from Iteration 4: detailed dashboards (cost, performance, evaluation), reasoning trace viewer or log explorer (linked to OpenTelemetry traces from AgentCore), task submit/cancel from the UI, and admin views (system health, capacity, user management). - **Customization (advanced) with tiered tool access** — Agent can be extended with **MCP servers**, **plugins**, and **skills** beyond the basic prompt-from-repo customization in Iteration 3a. Composable tool sets per repo. MCP server discovery and lifecycle management. More tools increase behavioral unpredictability, so use a **tiered tool access model**: a minimal default tool set (bash allowlist, git, verify/lint/test) that all repos get, with MCP servers and plugins as opt-in per repo during onboarding. Per-repo tool profiles are stored in the onboarding config and loaded by the orchestrator. This balances flexibility with predictability. See [SECURITY.md](../design/SECURITY.md) and [REPO_ONBOARDING.md](../design/REPO_ONBOARDING.md). @@ -287,14 +337,14 @@ Deep research identified **9 memory-layer security gaps** in the current archite - **Iteration 1** — Core agent + git (isolated run, CLI submit, branch + PR, minimal task state). - **Iteration 2** — Production orchestrator, API contract, task management (list/status/cancel), durable execution, observability, threat model, network isolation, basic cost guardrails, CI/CD. -- **Iteration 3a** — Repo onboarding, DNS Firewall (domain-level egress filtering), webhook trigger, GitHub Actions, per-repo customization (prompt from repo), data retention, turn/iteration caps, cost budget caps, user prompt guide, agent harness improvements (turn budget, default branch, safety net, lint, softened conventions), operator dashboard, WAF, model invocation logging, input length limits. +- **Iteration 3a** — Repo onboarding, DNS Firewall (domain-level egress filtering), webhook trigger (foundation for GitHub Actions integration in Iteration 6), per-repo customization (prompt from repo), data retention, turn/iteration caps, cost budget caps, user prompt guide, agent harness improvements (turn budget, default branch, safety net, lint, softened conventions), operator dashboard, WAF, model invocation logging, input length limits. - **Iteration 3b** ✅ — Memory Tier 1 (repo knowledge, task episodes), insights, agent self-feedback, prompt versioning, per-prompt commit attribution. CDK L2 construct with named semantic + episodic strategies using namespace templates (`/{actorId}/knowledge/`, `/{actorId}/episodes/{sessionId}/`), fail-open memory load/write, orchestrator fallback episode, SHA-256 prompt hashing, git trailer attribution. -- **Iteration 3c** — Per-repo GitHub App credentials, orchestrator pre-flight checks (fail-closed before session start), persistent session storage for select caches (AgentCore Runtime `/mnt/workspace` mount for npm/Claude config; mise/uv/repo on local disk due to FUSE `flock()` limitation), pre-execution task risk classification (model/limits/approval policy selection), tiered validation pipeline (tool validation, code quality analysis, post-execution risk/blast radius analysis), PR risk level, PR review task type (`pr_review` — read-only structured review with tool restriction, defense-in-depth enforcement, CLI `--review-pr` flag), input guardrail screening (Bedrock Guardrails, fail-closed), multi-modal input. -- **Iteration 3d** — Review feedback memory loop (Tier 2), PR outcome tracking, evaluation pipeline (basic). -- **Iteration 3e** — Memory security and integrity: input hardening (content sanitization, provenance tagging, integrity hashing), trust-aware retrieval (trust scoring, temporal decay, guardian validation), detection and response (anomaly detection, circuit breaker, quarantine, rollback), advanced protections (write-ahead validation, behavioral drift detection, cryptographic provenance, red teaming). Addresses OWASP ASI06 (Memory & Context Poisoning). +- **Iteration 3c** — Per-repo GitHub App credentials via AgentCore Token Vault (`CfnWorkloadIdentity` + Token Vault credential provider for automatic token refresh; agent uses `GetWorkloadAccessToken` for long-running sessions; sets pattern for GitLab/Jira/Slack integrations), orchestrator pre-flight checks (fail-closed before session start), persistent session storage for select caches (AgentCore Runtime `/mnt/workspace` mount for npm/Claude config; mise/uv/repo on local disk due to FUSE `flock()` limitation), pre-execution task risk classification (model/limits/approval policy selection), tiered validation pipeline (tool validation, code quality analysis, post-execution risk/blast radius analysis), PR risk level, PR review task type (`pr_review` — read-only structured review with tool restriction, defense-in-depth enforcement, CLI `--review-pr` flag), input guardrail screening (Bedrock Guardrails, fail-closed — including GitHub issue content for `new_task`), multi-modal input. +- **Iteration 3d** — Review feedback memory loop (Tier 2), PR outcome tracking, evaluation pipeline (basic), per-tool-call structured telemetry (tool name, input/output hash, duration, cost — foundational for evaluation and Iteration 5 policy enforcement). Co-ships with 3e Phase 1 (memory input hardening: content sanitization, provenance tagging, integrity hashing) as a prerequisite for safely writing attacker-controlled content to memory. +- **Iteration 3e** — Memory security and integrity: Phase 1 (input hardening — content sanitization, provenance tagging, integrity hashing) ships with 3d as a prerequisite; Phases 2–4 follow: trust-aware retrieval (trust scoring, temporal decay, guardian validation), detection and response (anomaly detection, circuit breaker, quarantine, rollback), advanced protections (write-ahead validation, behavioral drift detection, cryptographic provenance, red teaming). Addresses OWASP ASI06 (Memory & Context Poisoning). - **Iteration 3bis** (hardening) — Orchestrator IAM grant for Memory (was silently AccessDenied), memory schema versioning (`schema_version: "2"`), Python repo format validation, severity-aware error logging in Python memory, narrowed entrypoint try-catch, orchestrator fallback episode observability, conditional writes in agent task_state.py (ConditionExpression guards), orchestrator Lambda error alarm (CloudWatch, retryAttempts: 0), concurrency counter reconciliation (scheduled Lambda, drift correction), multi-AZ NAT documentation (already configurable), Python unit tests (pytest), entrypoint decomposition (4 extracted subfunctions), dual prompt assembly deprecation docstring, graceful thread drain in server.py (shutdown hook + atexit), dead QUEUED state removal (8 states, 4 active). - **Iteration 4** — Additional git providers, visual proof (screenshots/videos), Slack channel, skills pipeline, user preference memory (Tier 3), control panel (restrict CORS to dashboard origin), real-time event streaming (WebSocket), live session replay and mid-task nudge, browser extension client, MFA for production. -- **Iteration 5** — Snapshot-on-schedule pre-warming, multi-user/team, memory isolation for multi-tenancy, full cost management, adaptive model router with cost-aware cascade, advanced evaluation (optional adaptive-teaching / trajectory-driven prompt patterns), formal orchestrator verification with TLA+/TLC, Bedrock Guardrails output/tool-call (PII, denied topics, output filters) — input screening in 3c, capability-based security model, alternate runtime, advanced customization with tiered tool access (MCP/plugins via AgentCore Gateway), full dashboard, AI-specific WAF rules. +- **Iteration 5** — Automated container (devbox) from repo, CI/CD pipeline, snapshot-on-schedule pre-warming, multi-user/team, memory isolation for multi-tenancy, full cost management, adaptive model router with cost-aware cascade, advanced evaluation (optional adaptive-teaching / trajectory-driven prompt patterns), formal orchestrator verification with TLA+/TLC, Bedrock Guardrails output/tool-call with Guardian interceptor pattern (pre/post tool-call evaluation stages — pre-execution validates inputs before tool runs, post-execution screens outputs for PII/secrets/sensitive data before re-entering agent context; per-tool-call policy evaluation between agent decision and execution; PII, denied topics, output filters) — input screening in 3c, mid-execution behavioral monitoring (tool-call frequency circuit breaker, cost runaway detection, aggregate behavioral bounds within agent harness), centralized policy framework (Phase 1: policy audit normalization with `PolicyDecisionEvent` schema across all enforcement points, three enforcement modes — `enforced` | `observed` | `steered` — with observe-before-enforce rollout workflow; Phase 2: Cedar as single policy engine for operational tool-call policy and authorization — virtual-action classification pattern for enforce/observe/steer within Cedar's binary model, replaces scattered budget/quota/tool-access resolution, runtime-configurable policies via Amazon Verified Permissions, extended for multi-tenant authorization when multi-user/team lands, AWS-native with formal verification, integrates with AgentCore Gateway), capability-based security model (tiers feed into policy framework), alternate runtime, advanced customization with tiered tool access (MCP/plugins via AgentCore Gateway), full dashboard, AI-specific WAF rules. - **Iteration 6** — Agent swarm orchestration, skills learning, multi-repo, iterative feedback and multiplayer sessions, HITL approval, scheduled triggers, CDK constructs. Design docs to keep in sync: [ARCHITECTURE.md](../design/ARCHITECTURE.md), [ORCHESTRATOR.md](../design/ORCHESTRATOR.md), [API_CONTRACT.md](../design/API_CONTRACT.md), [INPUT_GATEWAY.md](../design/INPUT_GATEWAY.md), [REPO_ONBOARDING.md](../design/REPO_ONBOARDING.md), [MEMORY.md](../design/MEMORY.md), [OBSERVABILITY.md](../design/OBSERVABILITY.md), [COMPUTE.md](../design/COMPUTE.md), [CONTROL_PANEL.md](../design/CONTROL_PANEL.md), [SECURITY.md](../design/SECURITY.md), [EVALUATION.md](../design/EVALUATION.md). diff --git a/docs/src/content/docs/design/Api-contract.md b/docs/src/content/docs/design/Api-contract.md index beaa6f4..b034ff5 100644 --- a/docs/src/content/docs/design/Api-contract.md +++ b/docs/src/content/docs/design/Api-contract.md @@ -621,6 +621,8 @@ Rate limit status is communicated via response headers (see Standard response he | `WEBHOOK_NOT_FOUND` | 404 | Webhook does not exist or belongs to a different user. | | `WEBHOOK_ALREADY_REVOKED` | 409 | Webhook is already revoked. | | `REPO_NOT_ONBOARDED` | 422 | Repository is not registered with the platform. Repos are onboarded via CDK deployment, not via a runtime API. There are no `/v1/repos` endpoints. | +| `GITHUB_UNREACHABLE` | 502 | The GitHub API was unreachable during the orchestrator's pre-flight check. The task fails fast without consuming compute. Transient — retry with backoff. | +| `REPO_NOT_FOUND_OR_NO_ACCESS` | 422 | The target repository does not exist or the configured credentials lack access. Checked during the orchestrator's pre-flight step (`GET /repos/{owner}/{repo}`). Distinct from `REPO_NOT_ONBOARDED` — the repo is onboarded but the credential cannot reach it. | | `PR_NOT_FOUND_OR_CLOSED` | 422 | For `pr_iteration` and `pr_review` tasks: the specified PR does not exist, is not open, or is not accessible with the configured GitHub token. Checked during the orchestrator's pre-flight step. | | `INVALID_STEP_SEQUENCE` | 500 | The blueprint's step sequence is invalid (missing required steps or incorrect ordering). This indicates a CDK configuration error that slipped past synth-time validation. Visible via `GET /v1/tasks/{id}` as `error_code`. See [REPO_ONBOARDING.md](/design/repo-onboarding#step-sequence-validation). | | `GUARDRAIL_BLOCKED` | 400 | Task description was blocked by Bedrock Guardrail content screening (prompt injection detected). Revise the task description and retry. | diff --git a/docs/src/content/docs/design/Architecture.md b/docs/src/content/docs/design/Architecture.md index dd5f330..d6297a1 100644 --- a/docs/src/content/docs/design/Architecture.md +++ b/docs/src/content/docs/design/Architecture.md @@ -133,7 +133,8 @@ Cost efficiency is a design principle. The following estimates are based on **50 | **API Gateway** (REST API, ~2K requests/day) | ~$5–15 | Per-request pricing | | **AgentCore Memory** (events, records, retrieval) | TBD | Pricing not fully public; proportional to usage | | **CloudWatch** (logs, metrics, traces, Transaction Search) | ~$20–50 | Log ingestion + storage | -| **Secrets Manager** (GitHub App keys, webhook secrets) | ~$5–10 | Per-secret/month + API calls | +| **Secrets Manager** (GitHub token or App private key, webhook secrets) | ~$5–10 | Per-secret/month + API calls | +| **AgentCore Identity** (planned — WorkloadIdentity, Token Vault credential provider) | TBD | Token vending API calls; replaces per-task Secrets Manager reads for GitHub tokens | | **S3** (artifacts, memory backups) | ~$1–5 | Storage + requests | | **Total** | **~$700–1,600/month** | | @@ -205,10 +206,17 @@ Each concept has a **source-of-truth document** and one or more documents that r | Agent swarm orchestration | ROADMAP.md (Iter 6) | — | | Adaptive model router | ROADMAP.md (Iter 5) | COST_MODEL.md | | Capability-based security | ROADMAP.md (Iter 5) | SECURITY.md | +| Centralized policy framework | ROADMAP.md (Iter 5), SECURITY.md (Policy enforcement and audit) | ORCHESTRATOR.md, OBSERVABILITY.md | +| GitHub App + AgentCore Token Vault | ROADMAP.md (Iter 3c), SECURITY.md (Authentication) | ORCHESTRATOR.md (context hydration), COMPUTE.md | | Live session replay | ROADMAP.md (Iter 4) | API_CONTRACT.md | | PR iteration task type | API_CONTRACT.md, ORCHESTRATOR.md | USER_GUIDE.md, PROMPT_GUIDE.md, SECURITY.md, AGENT_HARNESS.md | | PR review task type | API_CONTRACT.md, ORCHESTRATOR.md | USER_GUIDE.md, PROMPT_GUIDE.md, SECURITY.md, AGENT_HARNESS.md | +| Orchestrator pre-flight checks | ORCHESTRATOR.md (Context hydration, pre-flight sub-step) | API_CONTRACT.md (Error codes: GITHUB_UNREACHABLE, REPO_NOT_FOUND_OR_NO_ACCESS), ROADMAP.md (3c), SECURITY.md | | Bedrock Guardrail input screening | SECURITY.md (Input validation and guardrails) | ORCHESTRATOR.md (Context hydration), API_CONTRACT.md (Error codes), OBSERVABILITY.md (Alarms), ROADMAP.md (3c) | +| Memory input hardening (3e Phase 1) | ROADMAP.md (Iter 3e Phase 1, co-ships with 3d) | MEMORY.md, SECURITY.md (Memory-specific threats) | +| Per-tool-call structured telemetry | ROADMAP.md (Iter 3d) | SECURITY.md (Mid-execution enforcement), EVALUATION.md, OBSERVABILITY.md | +| Mid-execution behavioral monitoring | ROADMAP.md (Iter 5), SECURITY.md (Mid-execution enforcement) | OBSERVABILITY.md | +| Tool-call interceptor (Guardian pattern) | SECURITY.md (Mid-execution enforcement), ROADMAP.md (Iter 5) | REPO_ONBOARDING.md (Blueprint security props) | ### Per-repo model selection diff --git a/docs/src/content/docs/design/Observability.md b/docs/src/content/docs/design/Observability.md index 54bb586..83d8e07 100644 --- a/docs/src/content/docs/design/Observability.md +++ b/docs/src/content/docs/design/Observability.md @@ -133,6 +133,7 @@ Both are one-time, account-level setup steps — not managed by CDK. - **Guardrail screening events** — `guardrail_blocked` (content blocked by Bedrock Guardrail during hydration, with metadata: `reason`, `task_type`, `pr_number`, `sources`, `token_estimate`). Screening failures are logged with structured `metric_type` fields (not emitted as task events). - Time in each state (e.g. time in HYDRATING, time RUNNING, cold start to first agent activity). - Correlation with a task id and user id so users and operators can filter by task or user. +- **Planned (Iteration 5, Phase 1): `PolicyDecisionEvent`** — A unified event schema for all policy decisions across the task lifecycle: admission control, budget/quota resolution, guardrail screening, tool-call interception, and finalization. Each event carries: decision ID, policy name, version, phase, input hash, result (`allow` | `deny` | `modify`), reason codes, and enforcement mode (`enforced` | `observed` | `steered`). This normalizes the current mix of structured events (e.g. `admission_rejected`, `guardrail_blocked`) and silent HTTP errors into a single auditable event type. See [ROADMAP.md Iteration 5](/roadmap/roadmap) (Centralized policy framework) and [SECURITY.md](/design/security) (Policy enforcement and audit). ### Agent execution diff --git a/docs/src/content/docs/design/Orchestrator.md b/docs/src/content/docs/design/Orchestrator.md index e621bf4..8d1d88f 100644 --- a/docs/src/content/docs/design/Orchestrator.md +++ b/docs/src/content/docs/design/Orchestrator.md @@ -33,11 +33,11 @@ The orchestrator document describes **behavior** (state machine, admission, canc **Relationship to blueprints.** The orchestrator is a **framework** that enforces platform invariants — the task state machine, event emission, concurrency management, and cancellation handling — and delegates variable work to **blueprint-defined step implementations**. A blueprint defines which steps run, in what order, and how each step is implemented (built-in strategy, Lambda-backed custom step, or custom sequence). The default blueprint is defined in this document (Section 4). Per-repo customization (see [REPO_ONBOARDING.md](/design/repo-onboarding)) changes the steps the orchestrator executes, not the framework guarantees it enforces. The orchestrator wraps every step with state transitions, event emission, and cancellation checks — regardless of whether the step is a built-in or a custom Lambda. -### Iteration 1 vs. target state +### Iteration 1 vs. current state -In **Iteration 1** (current), the orchestrator does not exist as a distinct component. The client calls `invoke_agent_runtime` synchronously, the agent runs to completion inside the AgentCore Runtime MicroVM, and the caller infers the result from the response. There is no durable state, no task management, no concurrency control, and no recovery. If the caller disconnects, the session is orphaned. +In **Iteration 1**, the orchestrator did not exist as a distinct component. The client called `invoke_agent_runtime` synchronously, the agent ran to completion inside the AgentCore Runtime MicroVM, and the caller inferred the result from the response. There was no durable state, no task management, no concurrency control, and no recovery. -The **target state** (Iteration 2 and beyond) introduces a durable orchestrator that manages the full task lifecycle. This document designs for the target state; where Iteration 1 constraints apply, they are called out explicitly. +**Current state (Iteration 3+):** The durable orchestrator manages the full task lifecycle with checkpoint/resume (Lambda Durable Functions), the full state machine (8 states), concurrency control, cancellation, context hydration, memory integration, pre-flight checks, and multi-task-type support. This document describes the current architecture; where historical Iteration 1 constraints are referenced (e.g. synchronous invocation model), they are called out explicitly. --- @@ -183,7 +183,7 @@ See the Admission control section for details. Validates that the task is allowe #### Step 2: Context hydration (deterministic) -See the Context hydration section for details. Assembles the agent's prompt from multiple sources depending on task type. For `new_task`: user message, GitHub issue (title, body, comments), memory, repo configuration, and platform defaults. For `pr_iteration`: PR metadata, review comments, diff summary, and optional user instructions. An additional **pre-flight** sub-step verifies PR accessibility when `pr_number` is set (see [preflight.ts](../../cdk/src/handlers/shared/preflight.ts)). For PR tasks, the assembled prompt is screened through Amazon Bedrock Guardrails for prompt injection before the agent receives it. The output is a fully assembled prompt, ready to pass to the compute session. +See the Context hydration section for details. Assembles the agent's prompt from multiple sources depending on task type. For `new_task`: user message, GitHub issue (title, body, comments), memory, repo configuration, and platform defaults. For `pr_iteration`: PR metadata, review comments, diff summary, and optional user instructions. An additional **pre-flight** sub-step verifies PR accessibility when `pr_number` is set (see [preflight.ts](../../cdk/src/handlers/shared/preflight.ts)). The assembled prompt is screened through Amazon Bedrock Guardrails for prompt injection before the agent receives it (PR tasks: always screened; `new_task`: screened when issue content is present). The output is a fully assembled prompt, ready to pass to the compute session. #### Step 3: Session start and agent execution (deterministic start + agentic execution) @@ -228,7 +228,7 @@ When the orchestrator loads a task's `blueprint_config`, it resolves the step pi 1. **Load `RepoConfig`** from the `RepoTable` by `repo` (PK). Merge with platform defaults (see [REPO_ONBOARDING.md](/design/repo-onboarding#platform-defaults) for default values and override precedence). 2. **Resolve compute strategy** from `compute_type` (default: `agentcore`). The strategy implements the `ComputeStrategy` interface (see [REPO_ONBOARDING.md](/design/repo-onboarding#compute-strategy-interface)). -3. **Build step list.** If `step_sequence` is provided, use it; otherwise use the default sequence (`admission-control` → `hydrate-context` → `start-session` → `await-agent-completion` → `finalize`). For each entry, resolve to a built-in step function or a Lambda invocation wrapper. +3. **Build step list.** If `step_sequence` is provided, use it; otherwise use the default sequence (`admission-control` → `hydrate-context` → `pre-flight` → `start-session` → `await-agent-completion` → `finalize`). The `pre-flight` step runs fail-closed readiness checks (GitHub API reachability, repo access, PR accessibility for PR tasks) before consuming compute — see [ROADMAP.md Iteration 3c](/roadmap/roadmap). For each entry, resolve to a built-in step function or a Lambda invocation wrapper. 4. **Inject custom steps.** If `custom_steps` are defined and no explicit `step_sequence` is provided, insert them at their declared `phase` position (pre-agent steps before `start-session`, post-agent steps after `await-agent-completion`). 5. **Validate.** Check that required steps are present and correctly ordered (see [step sequence validation](/design/repo-onboarding#step-sequence-validation)). If invalid, fail the task with `INVALID_STEP_SEQUENCE`. 6. **Execute.** Iterate the resolved list. For each step: check cancellation, filter `blueprintConfig` to only the fields that step needs (stripping credential ARNs for custom Lambda steps), execute with retry policy, enforce `StepOutput.metadata` size budget (10KB), prune `previousStepResults` to last 5 steps, emit events. Built-in steps that need durable waits (e.g. `await-agent-completion`) receive the `DurableContext` and `ComputeStrategy` so they can call `waitForCondition` and `computeStrategy.pollSession()` internally — no name-based special-casing in the framework loop. @@ -257,6 +257,8 @@ Admission control runs immediately after the input gateway dispatches a "create - **Rejected.** Task transitions to `FAILED` with a reason (repo not onboarded, rate limit exceeded, concurrency limit, validation error). No counter change. - **Deduplicated.** Existing task ID returned. No new task created. +**Planned (Iteration 5):** Admission control checks will be governed by Cedar policies as part of the centralized policy framework. Cedar replaces the current inline admission logic with formally verifiable policy evaluation — the same Cedar policy store handles admission, budget/quota resolution, tool-call interception, and (when multi-user/team lands) tenant-scoped authorization. All admission decisions will emit a structured `PolicyDecisionEvent` for audit. See [ROADMAP.md Iteration 5](/roadmap/roadmap) (Centralized policy framework) and [SECURITY.md](/design/security) (Policy enforcement and audit). + --- ## Context hydration @@ -275,7 +277,7 @@ The orchestrator's `hydrateAndTransition()` function calls `hydrateContext()` (` 4. **Assembles the user prompt** based on task type: - **`new_task`**: A structured markdown document with Task ID, Repository, GitHub Issue section, and Task section. The format mirrors the Python `assemble_prompt()` in `agent/entrypoint.py`. - **`pr_iteration`**: Assembled by `assemblePrIterationPrompt()` — includes PR metadata (number, title, body), the diff summary (changed files and patches), review comments (inline and conversation), and optional user instructions from `task_description`. -5. **Screens through Bedrock Guardrail** (PR tasks only): For `pr_iteration` and `pr_review` tasks, the assembled user prompt is screened through Amazon Bedrock Guardrails (`screenWithGuardrail()`) using the `PROMPT_ATTACK` content filter. If the guardrail detects prompt injection, `guardrail_blocked` is set on the result and the orchestrator fails the task. If the Bedrock API is unavailable, a `GuardrailScreeningError` is thrown (fail-closed — unscreened content never reaches the agent). Task descriptions for all task types are screened at submission time in `create-task-core.ts`. +5. **Screens through Bedrock Guardrail** (PR tasks; `new_task` when issue content is present): The assembled user prompt is screened through Amazon Bedrock Guardrails (`screenWithGuardrail()`) using the `PROMPT_ATTACK` content filter. For `new_task` tasks without issue content, screening is skipped because the task description was already screened at submission time. If the guardrail detects prompt injection, `guardrail_blocked` is set on the result and the orchestrator fails the task. If the Bedrock API is unavailable, a `GuardrailScreeningError` is thrown (fail-closed — unscreened content never reaches the agent). Task descriptions for all task types are screened at submission time in `create-task-core.ts`. 6. **Returns a `HydratedContext` object** containing `version`, `user_prompt`, `issue`, `sources`, `token_estimate`, `truncated`, and for `pr_iteration`/`pr_review` tasks: `resolved_branch_name` and `resolved_base_branch`. The hydrated context is passed to the agent as a new `hydrated_context` field in the invocation payload, alongside the existing legacy fields (`repo_url`, `task_id`, `branch_name`, `issue_number`, `prompt`). The agent checks for `hydrated_context` with `version == 1`; if present, it uses the pre-assembled `user_prompt` directly and skips in-container GitHub fetching and prompt assembly. If absent (e.g. during a deployment rollout or when the secret ARN isn't configured), the agent falls back to its existing behavior. @@ -308,7 +310,7 @@ We evaluated routing GitHub API calls through AgentCore Gateway (with the GitHub 4. **User message.** The free-text task description provided by the user (via CLI `--task` flag or equivalent). May supplement or replace the issue context. -5. **Memory context (Iteration 3+).** Query long-term memory (e.g. AgentCore Memory) for relevant past context: insights from previous tasks on this repo, failure summaries, learned patterns. See [MEMORY.md](/design/memory) for how insights and code attribution feed into hydration. Not yet implemented. +5. **Memory context (Iteration 3b+).** Query long-term memory (AgentCore Memory) for relevant past context: repository knowledge (semantic search) and past task episodes (episodic search). Memory is loaded during context hydration via two parallel `RetrieveMemoryRecordsCommand` calls with a 5-second timeout and 2,000-token budget. See [MEMORY.md](/design/memory) for how insights and code attribution feed into hydration. Tier 1 (repo knowledge + task episodes) is operational since Iteration 3b. Tier 2 (review feedback rules) is planned for Iteration 3d. 6. **Attachments.** Images or files provided by the user (multi-modal input). Passed through to the agent prompt as base64 or URLs. @@ -399,7 +401,7 @@ The orchestrator records the `(task_id, session_id)` mapping in the task record ### Invocation model: synchronous vs. asynchronous -**Iteration 1 (current).** `invoke_agent_runtime` is called synchronously with a long read timeout. The call blocks until the agent finishes. This is simple but limits concurrency: one orchestrator process per task. +**Iteration 1 (historical).** `invoke_agent_runtime` was called synchronously with a long read timeout. The call blocked until the agent finished. This was simple but limited concurrency: one orchestrator process per task. **Target state.** The orchestrator uses AgentCore's **asynchronous processing model** ([Runtime async docs](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/runtime-long-run.html)). The key capabilities: @@ -424,7 +426,7 @@ The orchestrator needs to know whether the session is still running. Two complem 2. **Re-invocation on the same session (target state).** The orchestrator calls `invoke_agent_runtime` with the same `runtimeSessionId`. Sticky routing ensures the request reaches the same instance. The agent's entrypoint can detect this is a poll (e.g., via a `poll: true` field in the payload or by tracking the initial task) and return the current status without starting a new task. This is a fast, lightweight call that returns immediately. -**Iteration 1.** The `invoke_agent_runtime` call blocks; when it returns, the session is over. No explicit liveness check needed. +**Iteration 1 (historical).** The `invoke_agent_runtime` call blocked; when it returned, the session was over. No explicit liveness check was needed. **Fallback: DynamoDB heartbeat (optional enhancement).** As defense in depth, the agent can write a heartbeat timestamp to DynamoDB every N minutes. The orchestrator reads it during its poll cycle. A missing heartbeat (e.g. none in the last 10 minutes while `/ping` reports `HealthyBusy`) could indicate the agent is stuck but not idle — triggering investigation or forced termination. @@ -434,7 +436,7 @@ AgentCore Runtime terminates sessions after 15 minutes of inactivity (no `/ping` **Mitigation (async model).** In the target state, the agent uses the AgentCore SDK's async task management: `add_async_task` registers a background task, and the SDK automatically reports `HealthyBusy` via `/ping` while any async task is active. AgentCore polls `/ping` and sees the agent is busy, preventing idle termination. When the agent calls `complete_async_task`, the status reverts to `Healthy`. The `/ping` endpoint runs on the main thread (or async event loop) while the coding task runs in a separate thread, so `/ping` remains responsive. -**Mitigation (Iteration 1 / current).** The agent container's FastAPI server defines `/ping` as a separate async endpoint. Because the agent task runs in a threadpool worker (not in the asyncio event loop), the `/ping` endpoint remains responsive while the agent works. AgentCore calls `/ping` periodically and the server responds, preventing idle timeout. +**Mitigation (current).** The agent container's FastAPI server defines `/ping` as a separate async endpoint. Because the agent task runs in a threadpool worker (not in the asyncio event loop), the `/ping` endpoint remains responsive while the agent works. AgentCore calls `/ping` periodically and the server responds, preventing idle timeout. **Risk.** If the agent's computation blocks the entire process (not just a thread) — e.g. due to a subprocess that consumes all resources, or the server becomes unresponsive — the `/ping` response may be delayed, triggering idle termination. This risk applies to both models. The defense is to ensure the coding task runs in a separate thread or process and does not starve the main thread. @@ -442,7 +444,7 @@ AgentCore Runtime terminates sessions after 15 minutes of inactivity (no `/ping` When the session ends (agent finishes, crashes, or is terminated), the orchestrator detects this: -- **Iteration 1:** The `invoke_agent_runtime` call returns (it blocks). The response body contains the agent's output (status, PR URL, cost, etc.). +- **Iteration 1 (historical):** The `invoke_agent_runtime` call returned (it blocked). The response body contained the agent's output (status, PR URL, cost, etc.). - **Target state:** The orchestrator polls the agent via re-invocation on the same session (see Invocation model above). Completion is detected when: (a) the agent responds with a "completed" or "failed" status in the poll response, or (b) the re-invocation fails because the session was terminated (idle timeout, crash, or 8-hour limit reached). In the durable orchestrator, a `waitForCondition` evaluates the poll result at each interval and resumes the pipeline when the condition is met. See the session monitoring pattern in the Implementation options section. ### External termination (cancellation) @@ -875,6 +877,7 @@ The primary table for task state. DynamoDB. | `cost_usd` | Number (optional) | Agent cost from the SDK result. | | `duration_s` | Number (optional) | Total task duration in seconds. | | `build_passed` | Boolean (optional) | Post-agent build verification result. | +| `lint_passed` | Boolean (optional) | Post-agent lint verification result. Recorded alongside `build_passed` during finalization; surfaced as a span attribute (`lint.passed`) and included in the PR body's verification section. | | `max_turns` | Number (optional) | Maximum agent turns for this task. Set during task creation — either the user-specified value (1–500) or the platform default (100). Included in the orchestrator payload and consumed by the agent SDK's `ClaudeAgentOptions(max_turns=...)`. | | `max_budget_usd` | Number (optional) | Maximum cost budget in USD for this task. Set during task creation — either the user-specified value ($0.01–$100) or the per-repo Blueprint default. When reached, the agent stops regardless of remaining turns. If neither the task nor the Blueprint specifies a value, no budget limit is applied (turn limit and session timeout still apply). Included in the orchestrator payload and consumed by the agent SDK's `ClaudeAgentOptions(max_budget_usd=...)`. | | `blueprint_config` | Map (optional) | Snapshot of the `RepoConfig` record at task creation time (or a reference to it). This ensures tasks are not affected by mid-flight config changes. The schema follows the `RepoConfig` interface defined in [REPO_ONBOARDING.md](/design/repo-onboarding#repoconfig-schema). Includes `compute_type`, `runtime_arn`, `model_id`, `max_turns`, `system_prompt_overrides`, `github_token_secret_arn`, `poll_interval_ms`, `custom_steps`, `step_sequence`, and `egress_allowlist`. The `max_turns` value from `blueprint_config` serves as the per-repo default; per-task `max_turns` (from the API request) takes higher priority. `max_budget_usd` follows the same 2-tier override pattern: per-task value takes priority over `blueprint_config.max_budget_usd`; if neither is specified, no budget limit is applied. | diff --git a/docs/src/content/docs/design/Repo-onboarding.md b/docs/src/content/docs/design/Repo-onboarding.md index 817bd5a..ce6b492 100644 --- a/docs/src/content/docs/design/Repo-onboarding.md +++ b/docs/src/content/docs/design/Repo-onboarding.md @@ -47,8 +47,21 @@ interface BlueprintProps { agent?: { modelId?: string; // foundation model override maxTurns?: number; // default turn limit for this repo + maxBudgetUsd?: number; // default cost budget for this repo ($0.01–$100) + memoryTokenBudget?: number; // memory context token budget override (default: 2000) systemPromptOverrides?: string; // additional system prompt instructions }; + // Security (planned — Iteration 5) + security?: { + capabilityTier?: 'standard' | 'elevated' | 'read-only'; // tool access tier + filePathDenyList?: string[]; // deny writes to these paths (e.g. '.github/workflows/') + bashAllowlist?: string[]; // allowed bash commands (overrides default tier allowlist) + circuitBreaker?: { // behavioral circuit breaker thresholds + maxCallsPerMinute?: number; // default: 50 + maxCostUsd?: number; // default: 10 + maxConsecutiveFailures?: number; // default: 5 + }; + }; // Credentials credentials?: { githubTokenSecretArn?: string; // per-repo GitHub token @@ -104,6 +117,8 @@ interface RepoConfig { // Agent model_id?: string; max_turns?: number; + max_budget_usd?: number; + memory_token_budget?: number; system_prompt_overrides?: string; // Credentials github_token_secret_arn?: string; @@ -182,6 +197,8 @@ Used when a `RepoConfig` field is absent: | `runtime_arn` | Stack-level `RUNTIME_ARN` env var | CDK stack props | | `model_id` | Claude Sonnet 4 | CDK stack props | | `max_turns` | 100 | Platform constant (`DEFAULT_MAX_TURNS`) | +| `max_budget_usd` | None (no budget limit) | — | +| `memory_token_budget` | 2000 | Platform constant | | `github_token_secret_arn` | Stack-level `GITHUB_TOKEN_SECRET_ARN` | CDK stack props | | `poll_interval_ms` | 30000 | Orchestrator constant | | `system_prompt_overrides` | None | — | @@ -207,7 +224,8 @@ The orchestrator loads the `RepoConfig` in the first step (after `load-task`) an | `load-blueprint` | `compute_type`, `custom_steps`, `step_sequence` (resolves the full step pipeline) | | `admission-control` | `status` (defense-in-depth; already checked at API level) | | `hydrate-context` | `github_token_secret_arn`, `system_prompt_overrides` | -| `start-session` | `compute_type`, `runtime_arn`, `model_id`, `max_turns` | +| `pre-flight` | `github_token_secret_arn` (verifies GitHub API reachability and repo access) | +| `start-session` | `compute_type`, `runtime_arn`, `model_id`, `max_turns`, `max_budget_usd` | | `await-agent-completion` | `poll_interval_ms` | | `finalize` | (custom post-agent steps run before finalize if configured) | | Custom steps (layer 2/3) | `custom_steps[].config` (step-specific configuration) | @@ -292,6 +310,7 @@ When a `stepSequence` is provided (Layer 3), the framework validates it at deplo | Step | Why it's required | |---|---| | `admission-control` | Enforces concurrency limits. Omitting it leaks concurrency slots. | +| `pre-flight` | Fail-closed readiness checks (GitHub API reachability, repo access). Omitting it allows doomed tasks to consume compute. | | `start-session` | Starts the compute session. Without it, nothing runs. | | `await-agent-completion` | Polls for session completion. Without it, the orchestrator cannot detect when the agent finishes. | | `finalize` | Releases concurrency slots, emits terminal events, persists outcome. Omitting it leaks concurrency counters and leaves tasks in non-terminal states. | @@ -300,6 +319,7 @@ When a `stepSequence` is provided (Layer 3), the framework validates it at deplo **Ordering constraints:** - `admission-control` must be first. +- `pre-flight` must precede `start-session`. - `start-session` must precede `await-agent-completion`. - `finalize` must be last. - Custom steps can be inserted between any adjacent pair of built-in steps, but cannot precede `admission-control` or follow `finalize`. diff --git a/docs/src/content/docs/design/Security.md b/docs/src/content/docs/design/Security.md index 36b2445..6704993 100644 --- a/docs/src/content/docs/design/Security.md +++ b/docs/src/content/docs/design/Security.md @@ -33,8 +33,8 @@ This aligns with AWS guidance: *Isolate sessions* (1.4) and use session-scoped s ## Authentication and authorization - **Authentication** — CLI users authenticate via Amazon Cognito (JWT). Webhook integrations authenticate via HMAC-SHA256 signatures (per-integration shared secrets stored in Secrets Manager). Each channel uses its own verification mechanism. The input gateway verifies every request before processing. -- **Credentials for the agent** — the agent does not hold long-lived secrets. GitHub access uses AgentCore Identity OAuth (token vault, auto-refresh); the agent receives tokens from the vault, not from application code. This reduces credential exposure and supports least-privilege at the identity layer. -- **Dynamic secret substitution** — the principle that **the LLM and agent context never see raw credentials**. Secrets (e.g. API keys, OAuth tokens) are held by the runtime or gateway and injected only at tool-execution time when a request is made. They do not appear in prompts, conversation history, or logs, which limits exposure from prompt leakage, log ingestion, or context exfiltration. AgentCore Identity’s token vault provides this for GitHub: the agent calls tools that use the vault; the model never receives the token. +- **Credentials for the agent** — currently, GitHub access uses a shared PAT (or per-repo PAT) stored in Secrets Manager. The orchestrator reads the secret at hydration time and passes it to the agent runtime via environment variable. The runtime execution role has `secretsmanager:GetSecretValue` for the token secret. **Planned (Iteration 3c):** Replace the shared PAT with a **GitHub App** integrated via **AgentCore Identity Token Vault**. A `CfnWorkloadIdentity` resource will represent the agent's identity; the GitHub App's credentials are registered as a Token Vault credential provider. At task hydration, the orchestrator will generate a short-lived installation token (1-hour TTL, scoped to the target repo) via the GitHub API. For long-running sessions, the agent calls `GetWorkloadAccessToken` to obtain a fresh token — the Token Vault handles refresh automatically. The runtime execution role already has the necessary permissions (`bedrock-agentcore:GetWorkloadAccessToken`, `GetWorkloadAccessTokenForJWT`, `GetWorkloadAccessTokenForUserId` — granted automatically by the AgentCore Runtime L2 construct). This will replace the shared PAT with per-task, repo-scoped, short-lived tokens and set up the same pattern for future integrations (GitLab, Jira, Slack). See [ROADMAP.md Iteration 3c](/roadmap/roadmap). +- **Dynamic secret substitution** — the principle that **the LLM and agent context never see raw credentials**. Secrets (e.g. API keys, OAuth tokens) are held by the runtime or gateway and injected only at tool-execution time when a request is made. They do not appear in prompts, conversation history, or logs, which limits exposure from prompt leakage, log ingestion, or context exfiltration. Currently, the GitHub PAT is fetched from Secrets Manager by the agent at runtime and used for git operations and GitHub API calls; the model does not receive the token in its context. **Planned (Iteration 3c):** AgentCore Identity's Token Vault will provide dynamic credential vending for GitHub — the agent will call `GetWorkloadAccessToken` to obtain a scoped, short-lived token at runtime. The GitHub App private key will be stored in Secrets Manager and accessed only by the orchestrator (never by the agent or model). Future Gateway integration will enable credential injection for GitHub API calls without any token in the sandbox. - **Webhook secret management** — Each webhook integration has a unique 32-byte random secret stored in AWS Secrets Manager (`bgagent/webhook/{webhook_id}`). Secrets are shown to the user only once at creation time. On revocation, secrets are scheduled for deletion with a 7-day recovery window. The webhook task handler caches secrets in-memory with a 5-minute TTL to reduce Secrets Manager API calls while maintaining reasonable secret rotation latency. IAM policies are scoped to the `bgagent/webhook/*` prefix. - **Authorization** — any authenticated user can submit tasks; users can view and cancel only their **own** tasks (enforced by user_id). Webhook management endpoints enforce ownership — a user can only list, view, and revoke their own webhooks (non-owners receive 404, not 403, to avoid leaking webhook existence). @@ -54,7 +54,7 @@ The agent runs with **full permissions inside the sandbox** but cannot escape it - **Per-repo tool profiles:** Stored in the onboarding config and loaded by the orchestrator during context hydration. The agent harness configures the tool set based on the profile. See [REPO_ONBOARDING.md](/design/repo-onboarding) for per-repo configuration. - **Enforcement mechanism:** Tools are exposed to the agent through **AgentCore Gateway**, which provides built-in mechanisms to enforce access control. The Gateway acts as a managed proxy between the agent and external tools/APIs — only tools registered and authorized in the Gateway are reachable. Per-repo tool profiles map to Gateway tool configurations: the orchestrator registers the allowed tool set for each session, and the Gateway enforces it. This is a platform-level enforcement boundary (not a prompt-level suggestion), meaning the agent cannot bypass it by requesting tools that are not registered. For tools not mediated by the Gateway (e.g. direct bash commands), enforcement relies on the sandbox environment (filesystem permissions, network egress rules, and the bash allowlist configured in the agent harness). - **Rationale:** More tools increase the agent's search space, making behavior less predictable and harder to evaluate. A minimal default with opt-in expansion balances capability with reliability. -- **Guardrails** — Amazon Bedrock Guardrails are deployed for task input screening. The `task-input-guardrail` applies a `PROMPT_ATTACK` content filter at `HIGH` strength on task descriptions at submission time. This provides a first layer of defense against prompt injection in user-supplied task descriptions. A second screening point runs during context hydration for PR tasks (`pr_iteration`, `pr_review`), screening the assembled prompt (PR body, review comments, conversation comments, diff summary, task description) before the agent receives it. Both screening points follow a **fail-closed** pattern: if the Bedrock Guardrail API is unavailable, the task is rejected (submission-time returns HTTP 503; hydration-time transitions the task to FAILED). This ensures unscreened content never reaches the agent, even during Bedrock outages. Screening failures are logged with a structured `metric_type: 'guardrail_screening_failure'` field for CloudWatch alerting: +- **Guardrails** — Amazon Bedrock Guardrails are deployed for task input screening. The `task-input-guardrail` applies a `PROMPT_ATTACK` content filter at `HIGH` strength on task descriptions at submission time. This provides a first layer of defense against prompt injection in user-supplied task descriptions. A second screening point runs during context hydration for PR tasks (`pr_iteration`, `pr_review`) and for `new_task` tasks when GitHub issue content is present, screening the assembled prompt before the agent receives it. Both screening points follow a **fail-closed** pattern: if the Bedrock Guardrail API is unavailable, the task is rejected (submission-time returns HTTP 503; hydration-time transitions the task to FAILED). This ensures unscreened content never reaches the agent, even during Bedrock outages. Screening failures are logged with a structured `metric_type: 'guardrail_screening_failure'` field for CloudWatch alerting: ``` filter metric_type = "guardrail_screening_failure" | stats count() by bin(5m) ``` @@ -106,6 +106,54 @@ The `functionArn` in `CustomStepConfig` should be validated at CDK synth time to - **Per-repo `egressAllowlist` is a declarative annotation**, not per-session enforcement. All agent sessions share the same VPC and DNS Firewall rules. Per-repo allowlists are aggregated (union) into the platform-wide policy. - **DNS Firewall does not prevent IP-based connections.** A direct connection to an IP address (e.g. `curl https://1.2.3.4/`) bypasses DNS resolution. This is acceptable for the "confused agent" threat model (the agent uses domain names in its tool calls) but does not defend against a sophisticated adversary. Closing this gap would require AWS Network Firewall (SNI-based filtering) at ~$274/month/endpoint. +## Policy enforcement and audit + +The platform enforces policies at multiple points in the task lifecycle. Today, these policies are implemented inline across ~20 files (handlers, constructs, agent code). A centralized policy framework is planned (Iteration 5) to improve auditability, consistency, and change control. + +### Current policy enforcement map + +| Phase | Policy | Enforcement location | Audit trail | +|---|---|---|---| +| **Submission** | Input validation (format, ranges, lengths) | `validation.ts`, `create-task-core.ts` | HTTP 400 response only — no event emitted | +| **Submission** | Repo onboarding gate | `repo-config.ts` → `create-task-core.ts` | HTTP 422 response only — no event emitted | +| **Submission** | Guardrail input screening | `create-task-core.ts` (Bedrock Guardrails) | HTTP 400 response only — no event emitted | +| **Submission** | Idempotency check | `create-task-core.ts` | HTTP 409 response only — no event emitted | +| **Admission** | Concurrency limit | `orchestrator.ts` (`admissionControl`) | `admission_rejected` event emitted | +| **Pre-flight** | GitHub reachability, repo access, PR access | `preflight.ts` | `preflight_failed` event emitted | +| **Hydration** | Guardrail prompt screening (PR + issue content) | `context-hydration.ts` | `guardrail_blocked` event emitted | +| **Hydration** | Budget/quota resolution (3-tier max_turns, 2-tier max_budget_usd) | `orchestrator.ts` (`hydrateAndTransition`) | Values persisted on task record — no policy decision event | +| **Hydration** | Token budget for prompt assembly | `context-hydration.ts` | No event emitted | +| **Session** | Tool access control (pr_review restrictions) | `agent/entrypoint.py` | No event emitted | +| **Session** | Budget enforcement (turns, cost) | Claude Agent SDK | Agent SDK enforces; cost in task result | +| **Finalization** | Build/lint verification | `agent/entrypoint.py` | Results in task record and PR body | +| **Infrastructure** | DNS Firewall egress allowlist | `dns-firewall.ts`, `agent.ts` (CDK synth) | DNS query logs in CloudWatch | +| **Infrastructure** | WAF rate limiting | `task-api.ts` (CDK synth) | WAF logs | +| **State machine** | Valid transition enforcement | `task-status.ts`, `orchestrator.ts` | DynamoDB conditional writes | + +### Audit gaps (planned remediation) + +Submission-time policy decisions (validation, onboarding gate, guardrail screening, idempotency) currently return HTTP errors without emitting structured audit events. Budget resolution decisions are persisted but not logged as policy decisions with reason codes. Tool access selection is implicit (hardcoded in agent code) with no audit event. + +**Planned (Iteration 5, Phase 1):** A unified `PolicyDecisionEvent` schema will normalize all policy decisions into structured events with: decision ID, policy name, version, phase, input hash, result, reason codes, and enforcement mode. Enforcement supports three modes: `enforced` (decision is binding — deny blocks, allow proceeds), `observed` (decision is logged but not enforced — shadow mode for safe rollout), and `steered` (decision modifies the input or output rather than blocking — redact PII, sanitize paths, mask secrets). New rules deploy in `observed` mode first; operators validate false-positive rates via `PolicyDecisionEvent` logs, then promote to `enforced` or `steered`. This observe-before-enforce workflow enables gradual rollout of security policies without risking false blocks on legitimate tasks. See [ROADMAP.md Iteration 5](/roadmap/roadmap) for the full centralized policy framework design. + +### Policy resolution and authorization (planned) + +**Planned (Iteration 5, Phase 2):** Cedar as the single policy engine for both **operational policy** (budget/quota/tool-access resolution, tool-call interception rules) and **authorization** (multi-tenant access control, extended when multi-user/team lands). Cedar replaces the scattered merge logic across handlers with a unified policy evaluation. A thin `policy.ts` adapter translates Cedar decisions into `PolicyDecision` objects consumed by existing handlers. Cedar is preferred over OPA: it is AWS-native, has formal verification guarantees, integrates with AgentCore Gateway, and policies can be evaluated in-process via the Cedar SDK without a separate service dependency. Cedar's binary permit/forbid model supports the three enforcement modes (`enforced`, `observed`, `steered`) via a **virtual-action classification pattern**: the interceptor evaluates against multiple virtual actions (`invoke_tool`, `invoke_tool_steered`, `invoke_tool_denied`) and uses the first permitted action to determine the mode. For example, `forbid(principal, action == Action::"invoke_tool", resource) when { resource.path like ".github/workflows/*" && principal.capability_tier != "elevated" }` blocks the call, while `permit(principal, action == Action::"invoke_tool_steered", resource) when { context.output_contains_pii }` triggers PII redaction instead of blocking. Cedar policies are stored in Amazon Verified Permissions and loaded at hydration/session-start time — policy changes take effect without CDK redeployment. When multi-user/team support lands, the same Cedar policy store expands to cover tenant-specific authorization (user/team/repo scoping, team budgets, risk-based approval requirements). + +### Mid-execution enforcement (planned) + +Today, once an agent session starts, the orchestrator can only observe it via polling (session running or terminated). There is no mechanism to detect or intervene when an agent goes off the rails mid-session — infinite tool-call loops, excessive file writes, or cost runaway. The orchestrator's hard timeout is the only backstop. + +**Planned (Iteration 5):** Two complementary mechanisms address this gap: + +1. **Tool-call interceptor (Guardian pattern)** — A policy-evaluation layer in the agent harness (`entrypoint.py`) that sits between the agent SDK's tool-call decision and actual tool execution. Evaluation is split into two stages: a **pre-execution stage** that validates tool inputs before the tool runs (file path deny patterns, bash command allowlist per capability tier, cost threshold checks, and per-repo rules from Blueprint `security` configuration) and blocks disallowed operations before they execute, and a **post-execution stage** that screens tool outputs after the tool runs (PII patterns in file content, secrets in command output, sensitive data leakage) and can redact or flag content before it re-enters the agent context. The interceptor can allow, modify (e.g. redact secrets from output), or deny tool calls. Denied calls return a structured error to the agent, which can retry with a different approach. This follows the Guardian interceptor pattern (Hu et al. 2025) — enforcement happens at tool-call time, not before the session starts (input guardrails) or after it ends (validation pipeline). Combined with per-tool-call structured telemetry (Iteration 3d), every interceptor decision is logged as a `PolicyDecisionEvent`. + +2. **Behavioral circuit breaker** — Lightweight monitoring of tool-call patterns within a session: call frequency (calls per minute), cumulative cost, repeated failures on the same tool, and file mutation rate. When metrics exceed configurable thresholds (e.g. >50 tool calls/minute, >$10 cumulative cost, >5 consecutive failures), the circuit breaker pauses or terminates the session and emits a `circuit_breaker_triggered` event. This catches runaway loops and cost explosions before the hard session timeout. Thresholds are configurable per-repo via Blueprint `security` props. + +These mechanisms are complementary: the interceptor enforces per-call policy (what the agent is allowed to do), while the circuit breaker enforces aggregate behavioral bounds (how the agent is behaving over time). Both operate within the existing agent harness — no sidecar process or external service required. For ABCA's single-agent-per-task model, embedded monitoring is simpler and more reliable than an external sidecar; sidecar architecture becomes relevant when multi-agent orchestration lands (Iteration 6). + +See [ROADMAP.md Iteration 5](/roadmap/roadmap) (Guardrails, Mid-execution behavioral monitoring). + ## Memory-specific threats ### OWASP ASI06 — Memory and context poisoning @@ -202,11 +250,11 @@ AgentCore Memory has **no native backup mechanism**. This is a significant gap f ## Known limitations -- **Single GitHub OAuth token** — one token may be shared for all users and repos the platform can access. Any authenticated user can trigger agent work against any repo that token can access. There is no per-user repo scoping. -- **Guardrails are input-only** — the `PROMPT_ATTACK` filter screens task descriptions at submission and assembled PR prompts during context hydration. No guardrails are applied to model output during agent execution or to review feedback entering the memory system. For `pr_iteration` and `pr_review` tasks, the assembled user prompt (including PR body, review comments, conversation comments, diff summary, and task description) is screened through the Bedrock Guardrail during hydration; if blocked, the task fails with a descriptive error. Guardrail screening follows a fail-closed pattern: a Bedrock outage blocks task submissions (HTTP 503) and fails PR tasks during hydration. +- **Single GitHub OAuth token (planned mitigation: GitHub App + AgentCore Token Vault)** — one token may be shared for all users and repos the platform can access. Any authenticated user can trigger agent work against any repo that token can access. There is no per-user repo scoping. **Planned mitigation (Iteration 3c):** Replace the shared PAT with a GitHub App integrated via AgentCore Token Vault. Each task receives a short-lived installation token scoped to the target repo only. The Token Vault manages refresh for long-running sessions. Combined with SSO (federated identity), tokens can be further scoped to the user's effective GitHub permissions. See [ROADMAP.md Iteration 3c](/roadmap/roadmap) for the implementation approach. +- **Guardrails are input-only** — the `PROMPT_ATTACK` filter screens task descriptions at submission and assembled prompts during context hydration (for PR tasks and for `new_task` tasks with issue content). No guardrails are applied to model output during agent execution or to review feedback entering the memory system. For `pr_iteration` and `pr_review` tasks, the assembled user prompt (including PR body, review comments, conversation comments, diff summary, and task description) is screened through the Bedrock Guardrail during hydration. For `new_task` tasks, the assembled prompt is screened when GitHub issue content is present; when no issue content is fetched, hydration-time screening is skipped because the task description was already screened at submission time. If blocked, the task fails with a descriptive error. Guardrail screening follows a fail-closed pattern: a Bedrock outage blocks task submissions (HTTP 503) and fails tasks during hydration. - **No memory content validation** — retrieved memory records are injected into the agent's context without sanitization, injection pattern scanning, or trust scoring. This is the most critical memory security gap (OWASP ASI06). See [MEMORY.md](/design/memory#memory-security-analysis) for the full gap analysis and [ROADMAP.md Iteration 3e](/roadmap/roadmap) for the remediation plan. - **No memory provenance or integrity checking** — memory entries carry no source attribution, content hashing, or trust metadata. The system cannot distinguish agent-generated memory from externally-influenced content. -- **GitHub issue content as untrusted input** — issue bodies and comments (attacker-controlled) are injected into the agent's context during hydration for `new_task` tasks without guardrail screening. Only the user-supplied `task_description` is screened at submission time; the fetched issue content bypasses both screening points. This is a known gap — extending hydration-time guardrail screening to `new_task` issue content is planned as a follow-up. +- **GitHub issue content as untrusted input** — issue bodies and comments (attacker-controlled) are injected into the agent's context during hydration for `new_task` tasks. The assembled user prompt is now screened through the Bedrock Guardrails `PROMPT_ATTACK` filter during context hydration when issue content is present; if prompt injection is detected, the task fails before reaching the agent. When no issue content is fetched (task_description only), hydration-time screening is skipped because the task description was already screened at submission time. - **PR review comments as untrusted input** — for `pr_iteration` and `pr_review` tasks, review comments, PR body, and conversation comments are fetched and injected into the agent's context. These are attacker-controlled inputs subject to the same prompt injection risks as issue comments. The assembled PR prompt is now screened by the Bedrock Guardrails `PROMPT_ATTACK` filter during context hydration; if prompt injection is detected, the task fails before reaching the agent. For `pr_review` tasks, additional defense-in-depth mitigates residual risk: the agent runs without `Write` or `Edit` tools, so even if injection bypasses the guardrail, the agent cannot modify files or push code. - **No memory rollback or quarantine** — the 365-day AgentCore Memory expiration is the only cleanup mechanism. There is no snapshot, rollback, or quarantine capability for suspected poisoned entries. - **No MFA** — Cognito MFA is disabled (CLI-based auth flow). Should be enabled for production deployments. diff --git a/docs/src/content/docs/roadmap/Roadmap.md b/docs/src/content/docs/roadmap/Roadmap.md index 453d76d..3697860 100644 --- a/docs/src/content/docs/roadmap/Roadmap.md +++ b/docs/src/content/docs/roadmap/Roadmap.md @@ -22,6 +22,7 @@ These practices apply continuously across iterations and are not treated as one- - `P-ABCA-4` memory fallback guarantee: if task finalization sees `memory_written = false`, fallback episode write is attempted and result is observable. - `P-ABCA-5` branch-name uniqueness: simultaneous tasks for the same repo generate distinct branch names (ULID-based suffix). - **Definition-of-done hook** — New orchestrator/concurrency changes should include: updated property mappings, at least one property-based test where applicable, and invariant notes in `ORCHESTRATOR.md` to keep docs and executable checks aligned. +- **Memory extraction prompt versioning** — Hash memory extraction prompts (in `agent/memory.py`: `write_task_episode`, `write_repo_learnings`) alongside system prompts so changes to extraction logic are tracked by `prompt_version`. This enables correlating memory quality changes with extraction prompt updates in the evaluation pipeline. --- @@ -155,7 +156,21 @@ These practices apply continuously across iterations and are not treated as one- **Goal:** Multi-layered validation catches errors, enforces code quality, and assesses change risk before PRs are created; the platform supports more than one task type; multi-modal input broadens what users can express. -- **Per-repo GitHub credentials (GitHub App)** — Replace the single shared OAuth token with a **GitHub App** installed per-organization or per-repository. Each onboarded repo is associated with a GitHub App installation that grants fine-grained permissions (read/write to that repo only). This eliminates the security gap where any authenticated user can trigger agent work against any repo the shared token can access. Token management (installation token generation, rotation) is handled by the platform, not by the agent. AgentCore Identity's token vault can store and refresh installation tokens. This is a prerequisite for any multi-user or multi-team deployment. +- **Per-repo GitHub credentials (GitHub App + AgentCore Token Vault)** — Replace the single shared OAuth token with a **GitHub App** installed per-organization or per-repository, using **AgentCore Identity's Token Vault** for credential management (recommended approach). Each onboarded repo is associated with a GitHub App installation that grants fine-grained permissions (read/write to that repo only). This eliminates the security gap where any authenticated user can trigger agent work against any repo the shared token can access. + + **Implementation approach — AgentCore Token Vault integration:** + 1. **WorkloadIdentity resource** — Create a `CfnWorkloadIdentity` in CDK representing the agent's identity, enabling token exchange with GitHub. + 2. **Token Vault credential provider** — Register the GitHub App's credentials in the AgentCore Token Vault. For server-to-server authentication, the GitHub App uses a private key to sign JWTs that are exchanged for installation tokens via the GitHub API. For the user-authorization OAuth flow (acting on behalf of a user), the App's client ID and client secret are registered as an OAuth credential provider. The Token Vault handles token refresh automatically — no expiry issues for long-running tasks (sessions exceeding 1 hour). + 3. **Orchestrator token generation** — At task hydration time, the orchestrator calls the GitHub API to generate an installation token (1-hour TTL, scoped to the target repo) and passes it to the agent at session start. + 4. **Agent-side token refresh** — For tasks running longer than 1 hour, the agent calls `GetWorkloadAccessToken` (permissions already granted to the runtime execution role: `bedrock-agentcore:GetWorkloadAccessToken`, `GetWorkloadAccessTokenForJWT`, `GetWorkloadAccessTokenForUserId`) to obtain a fresh token from the Token Vault. No Secrets Manager reads needed at runtime. + 5. **Blueprint configuration** — Extend `Blueprint` credentials with `githubAppId`, `githubAppPrivateKeySecretArn`, and `githubAppInstallationId` (per-org or per-repo). + 6. **Gateway integration (future)** — Wire an AgentCore Gateway target for GitHub API calls with automatic credential injection, enabling audit trails and Cedar policy enforcement per request. Git transport (clone/push) still requires a token in the remote URL, so Gateway-mediated access applies to API operations only. + + **Why Token Vault over Secrets Manager:** The runtime already has `GetWorkloadAccessToken` permissions (granted by the AgentCore Runtime construct). Token Vault is purpose-built for dynamic credential vending — it manages refresh automatically, supports arbitrary OAuth providers (GitHub, GitLab, Jira, Slack via the same pattern), and keeps credentials out of the sandbox as static secrets. This sets up the pattern for all future third-party integrations. + + **Per-user identity flow (future, connects to SSO):** With a GitHub App, installation tokens can be scoped per-repository and per-permission set. Combined with federated identity (SSO), the orchestrator can look up the user's GitHub identity and generate tokens scoped to the target repo with only the permissions that user would have. Git commits are attributed to the GitHub App acting on behalf of the user. + + This is a prerequisite for any multi-user or multi-team deployment. - [x] **Orchestrator pre-flight checks (fail-closed)** — Add a `pre-flight` step before `start-session` so doomed tasks fail fast without consuming AgentCore runtime. The orchestrator performs lightweight readiness checks with strict timeouts (for example, 5 seconds): verify GitHub API reachability, verify repository existence and credential access (`GET /repos/{owner}/{repo}` or equivalent), and optionally verify AgentCore Runtime availability when a status probe exists. If pre-flight fails, the task transitions to `FAILED` immediately with a clear terminal reason (`GITHUB_UNREACHABLE`, `REPO_NOT_FOUND_OR_NO_ACCESS`, `RUNTIME_UNAVAILABLE`), releases the concurrency slot, emits an event/notification, and does **not** invoke the agent. Unlike memory/context hydration (fail-open), pre-flight is explicitly fail-closed: inability to verify repo access blocks execution by design. - [x] **Persistent session storage (cache layer)** — Enabled AgentCore Runtime persistent session storage (preview) for selective cache persistence across stop/resume. A per-session filesystem is mounted at `/mnt/workspace` via `FilesystemConfigurations` (CFN escape hatch on the L2 construct). The S3-backed FUSE mount does not support `flock()` (returns `ENOTRECOVERABLE` / os error 524), so only caches whose tools never call `flock()` go on the mount (`npm_config_cache`, `CLAUDE_CONFIG_DIR`). Caches for tools that use `flock()` stay on local ephemeral disk (`MISE_DATA_DIR=/tmp/mise-data` — mise's pipx backend delegates to `uv` which flocks inside installs/; `UV_CACHE_DIR=/tmp/uv-cache`). Repo clones stay on `/workspace` (local) for the same reason. The `AGENT_WORKSPACE` env var and `{workspace}` system prompt placeholder are wired for a future move to persistent repo clones if the mount adds `flock()` support. Each `runtimeSessionId` gets isolated storage (no cross-task leakage). 14-day TTL; data deleted on runtime version update. See [COMPUTE.md](/design/compute#session-storage-persistent-filesystem). - **Pre-execution task risk classification** — Add a lightweight risk classifier at task submission (before orchestration starts) to drive proportional controls for agent execution. Initial implementation can be rule-based and Blueprint-configurable: prompt keywords (for example, `database`, `auth`, `security`, `infrastructure`), metadata from issue labels, and file/path signals when available (for example, `**/migrations/**`, `**/.github/**`, infra directories). Persist `risk_level` (`low` / `medium` / `high` / `critical`) on the task record and use it to set defaults and policy: model tier/cascade, turn and budget defaults, prompt strictness/conservatism, approval requirements before merge, and optional autonomous-execution blocks for `critical` tasks. This is intentionally pre-execution and complements (does not replace) post-execution PR risk/blast-radius analysis. @@ -166,8 +181,11 @@ These practices apply continuously across iterations and are not treated as one- - **PR risk level and validation report** — Every agent-created PR includes a structured **validation report** (as a PR comment or check run) summarizing: Tier 1 results (pass/fail per tool), Tier 2 findings (code quality issues by severity), Tier 3 risk assessment (risk level, blast radius summary, affected modules). The PR is labeled with the computed risk level (`risk:low`, `risk:medium`, `risk:high`, `risk:critical`). Risk level is persisted in the task record for evaluation and trending. See [EVALUATION.md](/design/evaluation#pr-risk-level). - [x] **Other task types: PR review and PR-iteration** — Support additional task types beyond "implement from issue": **iterate on pull request** (`pr_iteration`) reads review comments and addresses them (implement changes, push updates, post summary). **Review pull request** (`pr_review`) is a read-only task type where the agent analyzes a PR's changes and posts structured review comments via the GitHub Reviews API. The `pr_review` agent runs without `Write` or `Edit` tools (defense-in-depth), skips `ensure_committed` and push, and treats build status as informational only. Each review comment uses a structured format: type (comment/question/issue/good_point), severity for issues (minor/medium/major/critical), title, description with memory attribution, proposed fix, and a ready-to-use AI prompt. The CLI exposes `--review-pr ` (mutually exclusive with `--pr`). - [x] **Input guardrail screening (Bedrock Guardrails)** — Amazon Bedrock Guardrails screen task descriptions at submission time and assembled PR prompts during context hydration (`pr_iteration`, `pr_review`). Uses `PROMPT_ATTACK` content filter at `HIGH` strength. Fail-closed: Bedrock outages block tasks rather than letting unscreened content through. See [SECURITY.md](/design/security). +- [x] **Guardrail screening for GitHub issue content (`new_task`)** — Bedrock Guardrail screening now covers GitHub issue bodies and comments fetched during context hydration for `new_task` tasks. The assembled user prompt is screened through the `PROMPT_ATTACK` filter when issue content is present; when no issue content is fetched (task_description only), hydration-time screening is skipped because the task description was already screened at submission time. Same fail-closed pattern as PR tasks. See [SECURITY.md](/design/security). - **Multi-modal input** — Accept text and images (or other modalities) in the task payload; pass through to the agent. Gateway and schema support it; agent harness supports it where available. Primary use case: screenshots of bugs, UI mockups, or design specs attached to issues. +**Scope note:** Iteration 3c contains a wide range of items — from security-critical (GitHub App credentials, guardrail screening) to quality-improving (tiered validation, risk classification) to capability-expanding (multi-modal input). Items marked `[x]` are done. The remaining items can be delivered incrementally; the tiered validation pipeline and risk classification in particular can ship independently of per-repo credentials and multi-modal input. + **Builds on Iteration 3b:** Memory is operational; this iteration changes the orchestrator blueprint (tiered validation pipeline, new task type) and broadens the input schema. These are independently testable from memory. --- @@ -179,8 +197,11 @@ These practices apply continuously across iterations and are not treated as one- - **Review feedback memory loop (Tier 2)** — Capture PR review comments via GitHub webhook, extract actionable rules via LLM, and persist them as searchable memory so the agent internalizes reviewer preferences over time. This is the primary feedback loop between human reviewers and the agent — no shipping coding agent does this today. Requires a GitHub webhook → API Gateway → Lambda pipeline (separate from agent execution). Two types of extracted knowledge: repo-level rules ("don't use `any` types") and task-specific corrections. See [MEMORY.md](/design/memory) (Review feedback memory) and [SECURITY.md](/design/security) (prompt injection via review comments). - **PR outcome tracking** — Track whether agent-created PRs are merged, revised, or rejected via GitHub webhooks (`pull_request.closed` events). A merged PR is a positive signal; closed-without-merge is a negative signal. These outcome signals feed into the evaluation pipeline and enable the episodic memory to learn which approaches succeed. See [MEMORY.md](/design/memory) (PR outcome signals) and [EVALUATION.md](/design/evaluation). - **Evaluation pipeline (basic)** — Automated evaluation of agent runs: failure categorization (reasoning errors, missed instructions, missing tests, timeouts, tool failures). Results are stored and surfaced in observability dashboards. Basic version: rules-based analysis of task outcomes and agent responses. Track memory effectiveness metrics: first-review merge rate, revision cycles, CI pass rate on first push, review comment density, and repeated mistakes. Advanced version (ML-based trace analysis, A/B prompt comparison, feedback loop into prompts) is deferred to Iteration 5. See [EVALUATION.md](/design/evaluation) and [OBSERVABILITY.md](/design/observability). +- **Per-tool-call structured telemetry** — Instrument the agent harness (`entrypoint.py`) to emit structured events for every tool call: tool name, input hash (SHA-256), output hash, duration, cost attribution, and result status. Events flow through the existing `create_event` path and are surfaced in CloudWatch. This is foundational for: (a) the evaluation pipeline (tool-call-level success/failure analysis), (b) the centralized policy framework Phase 1 (tool calls become `PolicyDecisionEvent` sources in Iteration 5), and (c) future mid-execution policy enforcement (tool-call interceptor in Iteration 5). Without per-tool-call telemetry, the platform can only observe sessions as opaque black boxes — model invocation logs capture LLM reasoning but not the tool execution that connects reasoning to action. Informed by the Guardian system's tool-call interception architecture (Hu et al. 2025). See [OBSERVABILITY.md](/design/observability) and [SECURITY.md](/design/security) (Mid-execution enforcement). -**Builds on Iteration 3c:** Validation and PR review task type are in place; this iteration adds new infrastructure (webhook → Lambda → LLM extraction pipeline) and connects the feedback loop. Review feedback requires prompt injection mitigations (see SECURITY.md). +**Prerequisite: 3e Phase 1 (input hardening) ships with this iteration.** The review feedback memory loop writes attacker-controlled content (PR review comments) to persistent memory. Without content sanitization, provenance tagging, and integrity hashing (3e Phase 1), this creates a known attack vector — poisoned review comments stored as persistent rules that influence all future tasks on the repo. 3e Phase 1 items (memory content sanitization, GitHub issue input sanitization, source provenance on memory writes, content integrity hashing) must be implemented before or concurrently with the review feedback pipeline. See [SECURITY.md](/design/security) (Prompt injection via PR review comments). + +**Builds on Iteration 3c:** Validation and PR review task type are in place; this iteration adds new infrastructure (webhook → Lambda → LLM extraction pipeline) and connects the feedback loop. --- @@ -192,7 +213,9 @@ These practices apply continuously across iterations and are not treated as one- Deep research identified **9 memory-layer security gaps** in the current architecture (see the [Memory Security Analysis](#memory-security-analysis) section in [MEMORY.md](/design/memory)). The platform has strong network-layer security (VPC isolation, DNS Firewall, HTTPS-only egress) but lacks memory content validation, provenance tracking, trust scoring, anomaly detection, and rollback capabilities. Research shows that MINJA-style attacks achieve 95%+ injection success rates against undefended agent memory systems, and that emergent self-corruption (hallucination crystallization, error compounding feedback loops) is equally dangerous because it lacks an external attacker signature. -### Phase 1 — Input hardening +### Phase 1 — Input hardening (ships with Iteration 3d) + +**Phase 1 is a prerequisite for Iteration 3d's review feedback memory loop.** Attacker-controlled PR review comments must not enter persistent memory without sanitization, provenance tagging, and integrity checking. These items ship concurrently with 3d, not after it. - [ ] **Memory content sanitization** — Add content validation in `loadMemoryContext()` (`src/handlers/shared/memory.ts`). Scan retrieved memory records for injection patterns (embedded instructions, system prompt overrides, command injection payloads) before including them in the agent's context. Implement a `sanitizeMemoryContent()` function that strips or flags suspicious patterns while preserving legitimate repository knowledge. - [ ] **GitHub issue input sanitization** — Add trust-boundary-aware sanitization in `context-hydration.ts` for GitHub issue bodies and comments. These are attacker-controlled inputs that currently flow into the agent's context without differentiation. Strip control characters, embedded instruction patterns, and known injection payloads. Tag the content source as `untrusted-external` in the hydrated context. @@ -225,7 +248,7 @@ Deep research identified **9 memory-layer security gaps** in the current archite - The `MEMORY_REVIEW` task state is a new addition to the state machine (requires orchestrator, API contract, and observability updates). - Trust-scored retrieval changes the memory context budget allocation, which may affect prompt version hashing. -**Builds on Iteration 3d:** Review feedback memory and PR outcome tracking are in place; this iteration hardens the memory system that those components write to. The 4-phase approach allows incremental deployment with measurable security improvement at each phase. +**Builds on Iteration 3d:** Review feedback memory and PR outcome tracking are in place; Phases 2–4 harden the memory system that those components write to. Phase 1 (input hardening) ships with 3d as a prerequisite — see [Iteration 3d](#iteration-3d--review-feedback-loop-and-evaluation). The phased approach allows incremental deployment with measurable security improvement at each phase. --- @@ -259,9 +282,36 @@ Deep research identified **9 memory-layer security gaps** in the current archite - **Full cost management** — per-user and per-team monthly budgets, cost attribution dashboards (cost per task, per repo, per user), alerts when budgets are approaching limits. Token usage and compute cost are tracked per task and aggregated. The control panel (Iter 4) displays cost dashboards. - **Adaptive model router with cost-aware cascade** — Per-turn model selection via a lightweight heuristic engine. File reads and simple edits use a cheaper model (Haiku); multi-file refactors use Sonnet; complex reasoning escalates to Opus. Error escalation: if the agent fails twice on the same step, upgrade model for the retry. As the cost budget ceiling approaches, cascade down to cheaper models. Blueprint `modelCascade` config enables per-repo tuning. Potential 30-40% cost reduction on inference-dominated workloads. Requires agent harness changes to support mid-session model switching. - **Advanced evaluation and feedback loop** — Extend the basic evaluation pipeline from Iteration 3d: ML-based or LLM-based trace analysis (not just rules), A/B prompt comparison framework, automated feedback into prompt templates (e.g. "for repo X, always run tests before opening PR"), and per-repo or per-failure-type improvement tracking. Evaluation results can update the repo's agent configuration stored during onboarding. **Optional patterns from adaptive teaching research** (e.g. plan → targeted critique → execution; separate **evaluator** vs **prompt/reflection** roles; fitness from LLM judging plus efficiency metrics; evolution of teaching templates from failed trajectories with Pareto-style candidate sets for diverse failure modes) can inform offline or scheduled improvement of Blueprint prompts and checklists without replacing ABCA's core orchestrator. -- **Formal orchestrator verification (TLA+)** — Add a formal specification of the orchestrator in TLA+ and verify it with TLC model checking. Scope includes the task state machine (8 states, valid transitions, terminal states), concurrency admission control (atomic increment + max check), cancellation races (cancel arriving during any orchestration step), reconciler/orchestrator interleavings (counter drift correction while tasks are active), and the polling loop (agent writes terminal status, orchestrator observes and finalizes). Define invariants such as valid-state progression, no illegal transitions, and repo-level safety constraints (for example, at most one active `RUNNING` task per repo when configured). Keep the spec aligned with `src/constructs/task-status.ts` and orchestrator docs so regressions surface as model-check counterexamples before production. -- **Guardrails (output and tool-call)** — Extend Bedrock Guardrails from input screening (implemented in Iteration 3c) to **output filtering** and **agent tool-call guardrails**. Apply content filters to model responses during agent execution, restrict sensitive content generation, and enforce organizational policies (e.g. "do not modify files in `/infrastructure`"). See [SECURITY.md](/design/security). Guardrails configuration can be per-repo (via onboarding) or platform-wide. -- **Capability-based security model** — Fine-grained enforcement beyond Bedrock Guardrails, operating at three levels: (1) **Tool-level capabilities** — Bash command allowlist (git, npm, make permitted; curl, wget blocked), configurable per capability tier (standard / elevated / read-only). (2) **File-system scope** — Blueprint declares include/exclude path patterns; Write/Edit/Read tools are filtered to the declared scope. (3) **Input trust scoring** — Authenticated user input = trusted; external GitHub issues = untrusted; PR review comments entering memory = adversarial. Trust level selects the capability set. Essential once review feedback memory (Iter 3d) introduces attacker-controlled content into the agent's context. Blueprint `security` prop configures the capability profile per repo. +- **Formal orchestrator verification (TLA+)** — Add a formal specification of the orchestrator in TLA+ and verify it with TLC model checking. Scope includes the task state machine (8 states, valid transitions, terminal states), concurrency admission control (atomic increment + max check), cancellation races (cancel arriving during any orchestration step), reconciler/orchestrator interleavings (counter drift correction while tasks are active), and the polling loop (agent writes terminal status, orchestrator observes and finalizes). Define invariants such as valid-state progression, no illegal transitions, and repo-level safety constraints (for example, at most one active `RUNNING` task per repo when configured). Keep the spec aligned with `src/constructs/task-status.ts` and orchestrator docs so regressions surface as model-check counterexamples before production. **Note:** The TLA+ specification can be started earlier (e.g. during Iteration 3d) since the state machine and concurrency model are already stable. The spec is documentation that also catches bugs — writing it does not depend on Iteration 5 features. Consider starting the state machine and cancellation models as part of the ongoing engineering practice. +- **Guardrails (output and tool-call) with interceptor pattern** — Extend Bedrock Guardrails from input screening (implemented in Iteration 3c) to **output filtering** and **agent tool-call guardrails**. Apply content filters to model responses during agent execution, restrict sensitive content generation, and enforce organizational policies (e.g. "do not modify files in `/infrastructure`"). Guardrails configuration can be per-repo (via onboarding) or platform-wide. + + **Tool-call interceptor (Guardian pattern):** Implement a policy-evaluation layer in the agent harness (`entrypoint.py`) that intercepts tool calls between the agent SDK's decision and actual execution — enforcement happens at tool-call time, not before the session starts (input guardrails) or after it ends (validation pipeline). Each tool call is evaluated against a policy: file path restrictions (deny writes to `.github/workflows/`, `**/migrations/**`), bash command allowlist per capability tier, cost threshold checks, and per-repo rules from Blueprint `security` configuration. The interceptor can **allow**, **modify** (e.g. redact secrets from output), or **deny** (return structured error to agent, which retries with a different approach). Evaluation is split into two stages: a **pre-execution stage** that validates tool inputs before the tool runs (file path deny patterns, bash command allowlist, cost threshold checks) and blocks disallowed operations before they execute, and a **post-execution stage** that screens tool outputs after the tool runs (PII patterns in file content, secrets in command output, sensitive data leakage) and can redact or flag content before it re-enters the agent context. Combined with per-tool-call structured telemetry (Iteration 3d), every interceptor decision is logged as a `PolicyDecisionEvent`. This pattern is informed by the Guardian system (Hu et al. 2025) — a "guardian agent" that monitors and can intercept tool calls before execution. See [SECURITY.md](/design/security) (Mid-execution enforcement). +- **Mid-execution behavioral monitoring** — Lightweight monitoring of agent behavior within a running session, filling the gap between input guardrails (pre-session) and validation (post-session). A **behavioral circuit breaker** in the agent harness tracks aggregate metrics: tool-call frequency (calls per minute), cumulative session cost, repeated failures on the same tool, and file mutation rate. When metrics exceed configurable thresholds (e.g. >50 tool calls/minute, >$10 cumulative cost, >5 consecutive failures on the same tool), the circuit breaker pauses or terminates the session and emits a `circuit_breaker_triggered` event. This catches runaway loops, cost explosions, and stuck agents before the hard session timeout. Thresholds are configurable per-repo via Blueprint `security` props. The circuit breaker operates within the existing agent harness — no sidecar process or external service required. For ABCA's single-agent-per-task model, embedded monitoring is simpler and more reliable than an external sidecar; sidecar architecture becomes relevant when multi-agent orchestration lands (Iteration 6). See [SECURITY.md](/design/security) (Mid-execution enforcement). +- **Centralized policy framework** — Consolidate the platform's distributed policy decisions into a unified policy framework and audit layer. Policy logic today is scattered across 20+ files (input validation in `validation.ts` and `create-task-core.ts`, admission control in `orchestrator.ts`, guardrail screening in `context-hydration.ts`, budget resolution across `validation.ts`/`orchestrator.ts`/`entrypoint.py`, tool access in `entrypoint.py`, network egress in `dns-firewall.ts`/`agent.ts`, state transitions in `task-status.ts`/`orchestrator.ts`). This fragmentation makes it difficult to audit what policies exist, verify consistency, or change policy behavior without touching multiple files. + + **Phase 1 — Policy audit normalization:** + Define a stable `PolicyDecisionEvent` schema: `decision_id` (ULID), `policy_name` (e.g. `admission.concurrency`, `budget.max_turns`, `guardrail.input_screening`), `policy_version`, `phase` (`submission` | `admission` | `pre_flight` | `hydration` | `session_start` | `session` | `finalization`), `input_hash` (SHA-256 of the decision input for reproducibility), `result` (`allow` | `deny` | `modify`), `reason_codes[]`, `enforcement` (`enforced` | `observed` | `steered`), and `task_id`. The three enforcement modes serve distinct purposes: `enforced` means the decision is binding (deny blocks, allow proceeds), `observed` means the decision is logged but not enforced (shadow mode for safe rollout), and `steered` means the decision modifies the input or output rather than blocking (redact PII, sanitize paths, mask secrets). New rules deploy in `observed` mode first; operators validate false-positive rates via `PolicyDecisionEvent` logs, then promote to `enforced` or `steered`. This observe-before-enforce workflow enables gradual rollout of security policies without risking false blocks on legitimate tasks. Emit a `policy_decision` event via `emitTaskEvent` at every existing enforcement point. Today, some decisions emit events (`admission_rejected`, `preflight_failed`, `guardrail_blocked`) while others silently return HTTP errors — normalize them all. This is pure instrumentation of existing code paths; no behavior change. + + **Phase 2 — Cedar policy engine:** + Introduce **Cedar** (not OPA) as the single policy engine for both **operational policy** (budget/quota/tool-access resolution, tool-call interception rules) and **authorization** (extended for multi-tenant access control when multi-user/team support lands). Cedar is AWS-native, has formal verification guarantees, and integrates with AgentCore Gateway. + + **Policy resolution:** Cedar replaces the scattered budget/quota/tool-access merge logic (3-tier `max_turns` resolution, 2-tier `max_budget_usd` resolution, tool access determination in `entrypoint.py`, per-repo configuration merge in `loadBlueprintConfig`) with a unified policy evaluation. A thin `policy.ts` adapter module translates Cedar decisions into `PolicyDecision` objects (`PolicyInput` → Cedar evaluation → `PolicyDecision` with computed budgets, tool profile, risk tier, redaction directives) consumed by existing handlers — no new service, no network hop. Input validation (format checks, range checks) remains at the input boundary; Cedar handles resolution and policy composition. + + **Operational tool-call policies** use a **virtual-action classification pattern** to support the three enforcement modes (`enforced`, `observed`, `steered`) within Cedar's binary permit/forbid model. Instead of asking Cedar "allow or deny?", the interceptor evaluates against multiple virtual actions (`invoke_tool`, `invoke_tool_steered`, `invoke_tool_denied`) and uses the first permitted action to determine the mode. For example: `forbid(principal, action == Action::"invoke_tool", resource) when { resource.path like ".github/workflows/*" && principal.capability_tier != "elevated" }` blocks the call, while `permit(principal, action == Action::"invoke_tool_steered", resource) when { context.output_contains_pii }` triggers PII redaction. This keeps Cedar doing what it does best (binary decisions with formal verification) while the interceptor interprets the combination of decisions as allow/steer/deny. + + **Authorization policies (extended with multi-user/team):** When multi-user/team support lands, the same Cedar policy store expands to cover tenant-specific authorization: "users in team X can submit tasks to repos A, B, C", "team Y has a monthly budget of $500", "repos tagged `critical` require `pr_review` before `new_task`". This replaces the current single-dimensional ownership check (`record.user_id !== userId`) with multi-dimensional authorization (user, team, repo, action, risk level). No new policy engine — the same Cedar instance grows to cover authorization alongside operational policy. + + **Runtime-configurable policies:** Cedar policies are stored in Amazon Verified Permissions and loaded at hydration/session-start time. Policy changes take effect without CDK redeployment — operators update policies via the Verified Permissions API, and the next task evaluation picks them up. Deployment-time invariants (schema validation, state machine transitions) remain in CDK code. + + Policy versioning, rollback, and observe-before-enforce semantics carry forward from Phase 1. Cedar policies are evaluated at submission, admission, hydration, session (tool-call interception), and finalization. + + **Why not OPA:** OPA uses Rego (a custom DSL) and runs as a sidecar or external service. ABCA's policies change at the same cadence as infrastructure (deployed via CDK). A separate service with a separate language adds operational burden without proportionate benefit for a single-tenant platform. Cedar is a better fit: it's a typed language with formal verification, it's AWS-native (used by Amazon Verified Permissions and AgentCore Gateway), and policies can be evaluated in-process via the Cedar SDK without a separate service. Unlike OPA/Rego (which can return arbitrary JSON), Cedar's binary decisions require the virtual-action pattern for steering — but this keeps policy evaluation formally verifiable, which OPA cannot guarantee. + + **What stays out of the policy framework:** Schema validation (repo format, `max_turns` range, task description length) stays at the input boundary. State machine transitions stay in the orchestrator. DNS Firewall stays in CDK. These are infrastructure invariants, not policy decisions — they don't vary by tenant, user, or context. + + See [SECURITY.md](/design/security) (Policy enforcement and audit). + +- **Capability-based security model** — Fine-grained enforcement beyond Bedrock Guardrails, operating at three levels: (1) **Tool-level capabilities** — Bash command allowlist (git, npm, make permitted; curl, wget blocked), configurable per capability tier (standard / elevated / read-only). (2) **File-system scope** — Blueprint declares include/exclude path patterns; Write/Edit/Read tools are filtered to the declared scope. (3) **Input trust scoring** — Authenticated user input = trusted; external GitHub issues = untrusted; PR review comments entering memory = adversarial. Trust level selects the capability set. Essential once review feedback memory (Iter 3d) introduces attacker-controlled content into the agent's context. Blueprint `security` prop configures the capability profile per repo. Capability tiers become inputs to the centralized policy framework and are governed by Cedar policies (Phase 2). - **Additional execution environment** — Support an alternative to AgentCore Runtime (e.g. ECS/Fargate, EKS) behind the **ComputeStrategy** interface (see [REPO_ONBOARDING.md](/design/repo-onboarding#compute-strategy-interface)). The orchestrator calls abstract methods (`startSession`, `stopSession`, `pollSession`); the implementation maps to AgentCore, Fargate, or EKS. Repos select the strategy via `compute_type` in their blueprint configuration. Reduces vendor lock-in and enables workloads that exceed AgentCore limits (e.g. GPU, larger images, longer sessions). The ComputeStrategy interface contract is defined in Iteration 3a; Iteration 5 adds alternative implementations. - **Full web dashboard** — Extend the control panel from Iteration 4: detailed dashboards (cost, performance, evaluation), reasoning trace viewer or log explorer (linked to OpenTelemetry traces from AgentCore), task submit/cancel from the UI, and admin views (system health, capacity, user management). - **Customization (advanced) with tiered tool access** — Agent can be extended with **MCP servers**, **plugins**, and **skills** beyond the basic prompt-from-repo customization in Iteration 3a. Composable tool sets per repo. MCP server discovery and lifecycle management. More tools increase behavioral unpredictability, so use a **tiered tool access model**: a minimal default tool set (bash allowlist, git, verify/lint/test) that all repos get, with MCP servers and plugins as opt-in per repo during onboarding. Per-repo tool profiles are stored in the onboarding config and loaded by the orchestrator. This balances flexibility with predictability. See [SECURITY.md](/design/security) and [REPO_ONBOARDING.md](/design/repo-onboarding). @@ -291,14 +341,14 @@ Deep research identified **9 memory-layer security gaps** in the current archite - **Iteration 1** — Core agent + git (isolated run, CLI submit, branch + PR, minimal task state). - **Iteration 2** — Production orchestrator, API contract, task management (list/status/cancel), durable execution, observability, threat model, network isolation, basic cost guardrails, CI/CD. -- **Iteration 3a** — Repo onboarding, DNS Firewall (domain-level egress filtering), webhook trigger, GitHub Actions, per-repo customization (prompt from repo), data retention, turn/iteration caps, cost budget caps, user prompt guide, agent harness improvements (turn budget, default branch, safety net, lint, softened conventions), operator dashboard, WAF, model invocation logging, input length limits. +- **Iteration 3a** — Repo onboarding, DNS Firewall (domain-level egress filtering), webhook trigger (foundation for GitHub Actions integration in Iteration 6), per-repo customization (prompt from repo), data retention, turn/iteration caps, cost budget caps, user prompt guide, agent harness improvements (turn budget, default branch, safety net, lint, softened conventions), operator dashboard, WAF, model invocation logging, input length limits. - **Iteration 3b** ✅ — Memory Tier 1 (repo knowledge, task episodes), insights, agent self-feedback, prompt versioning, per-prompt commit attribution. CDK L2 construct with named semantic + episodic strategies using namespace templates (`/{actorId}/knowledge/`, `/{actorId}/episodes/{sessionId}/`), fail-open memory load/write, orchestrator fallback episode, SHA-256 prompt hashing, git trailer attribution. -- **Iteration 3c** — Per-repo GitHub App credentials, orchestrator pre-flight checks (fail-closed before session start), persistent session storage for select caches (AgentCore Runtime `/mnt/workspace` mount for npm/Claude config; mise/uv/repo on local disk due to FUSE `flock()` limitation), pre-execution task risk classification (model/limits/approval policy selection), tiered validation pipeline (tool validation, code quality analysis, post-execution risk/blast radius analysis), PR risk level, PR review task type (`pr_review` — read-only structured review with tool restriction, defense-in-depth enforcement, CLI `--review-pr` flag), input guardrail screening (Bedrock Guardrails, fail-closed), multi-modal input. -- **Iteration 3d** — Review feedback memory loop (Tier 2), PR outcome tracking, evaluation pipeline (basic). -- **Iteration 3e** — Memory security and integrity: input hardening (content sanitization, provenance tagging, integrity hashing), trust-aware retrieval (trust scoring, temporal decay, guardian validation), detection and response (anomaly detection, circuit breaker, quarantine, rollback), advanced protections (write-ahead validation, behavioral drift detection, cryptographic provenance, red teaming). Addresses OWASP ASI06 (Memory & Context Poisoning). +- **Iteration 3c** — Per-repo GitHub App credentials via AgentCore Token Vault (`CfnWorkloadIdentity` + Token Vault credential provider for automatic token refresh; agent uses `GetWorkloadAccessToken` for long-running sessions; sets pattern for GitLab/Jira/Slack integrations), orchestrator pre-flight checks (fail-closed before session start), persistent session storage for select caches (AgentCore Runtime `/mnt/workspace` mount for npm/Claude config; mise/uv/repo on local disk due to FUSE `flock()` limitation), pre-execution task risk classification (model/limits/approval policy selection), tiered validation pipeline (tool validation, code quality analysis, post-execution risk/blast radius analysis), PR risk level, PR review task type (`pr_review` — read-only structured review with tool restriction, defense-in-depth enforcement, CLI `--review-pr` flag), input guardrail screening (Bedrock Guardrails, fail-closed — including GitHub issue content for `new_task`), multi-modal input. +- **Iteration 3d** — Review feedback memory loop (Tier 2), PR outcome tracking, evaluation pipeline (basic), per-tool-call structured telemetry (tool name, input/output hash, duration, cost — foundational for evaluation and Iteration 5 policy enforcement). Co-ships with 3e Phase 1 (memory input hardening: content sanitization, provenance tagging, integrity hashing) as a prerequisite for safely writing attacker-controlled content to memory. +- **Iteration 3e** — Memory security and integrity: Phase 1 (input hardening — content sanitization, provenance tagging, integrity hashing) ships with 3d as a prerequisite; Phases 2–4 follow: trust-aware retrieval (trust scoring, temporal decay, guardian validation), detection and response (anomaly detection, circuit breaker, quarantine, rollback), advanced protections (write-ahead validation, behavioral drift detection, cryptographic provenance, red teaming). Addresses OWASP ASI06 (Memory & Context Poisoning). - **Iteration 3bis** (hardening) — Orchestrator IAM grant for Memory (was silently AccessDenied), memory schema versioning (`schema_version: "2"`), Python repo format validation, severity-aware error logging in Python memory, narrowed entrypoint try-catch, orchestrator fallback episode observability, conditional writes in agent task_state.py (ConditionExpression guards), orchestrator Lambda error alarm (CloudWatch, retryAttempts: 0), concurrency counter reconciliation (scheduled Lambda, drift correction), multi-AZ NAT documentation (already configurable), Python unit tests (pytest), entrypoint decomposition (4 extracted subfunctions), dual prompt assembly deprecation docstring, graceful thread drain in server.py (shutdown hook + atexit), dead QUEUED state removal (8 states, 4 active). - **Iteration 4** — Additional git providers, visual proof (screenshots/videos), Slack channel, skills pipeline, user preference memory (Tier 3), control panel (restrict CORS to dashboard origin), real-time event streaming (WebSocket), live session replay and mid-task nudge, browser extension client, MFA for production. -- **Iteration 5** — Snapshot-on-schedule pre-warming, multi-user/team, memory isolation for multi-tenancy, full cost management, adaptive model router with cost-aware cascade, advanced evaluation (optional adaptive-teaching / trajectory-driven prompt patterns), formal orchestrator verification with TLA+/TLC, Bedrock Guardrails output/tool-call (PII, denied topics, output filters) — input screening in 3c, capability-based security model, alternate runtime, advanced customization with tiered tool access (MCP/plugins via AgentCore Gateway), full dashboard, AI-specific WAF rules. +- **Iteration 5** — Automated container (devbox) from repo, CI/CD pipeline, snapshot-on-schedule pre-warming, multi-user/team, memory isolation for multi-tenancy, full cost management, adaptive model router with cost-aware cascade, advanced evaluation (optional adaptive-teaching / trajectory-driven prompt patterns), formal orchestrator verification with TLA+/TLC, Bedrock Guardrails output/tool-call with Guardian interceptor pattern (pre/post tool-call evaluation stages — pre-execution validates inputs before tool runs, post-execution screens outputs for PII/secrets/sensitive data before re-entering agent context; per-tool-call policy evaluation between agent decision and execution; PII, denied topics, output filters) — input screening in 3c, mid-execution behavioral monitoring (tool-call frequency circuit breaker, cost runaway detection, aggregate behavioral bounds within agent harness), centralized policy framework (Phase 1: policy audit normalization with `PolicyDecisionEvent` schema across all enforcement points, three enforcement modes — `enforced` | `observed` | `steered` — with observe-before-enforce rollout workflow; Phase 2: Cedar as single policy engine for operational tool-call policy and authorization — virtual-action classification pattern for enforce/observe/steer within Cedar's binary model, replaces scattered budget/quota/tool-access resolution, runtime-configurable policies via Amazon Verified Permissions, extended for multi-tenant authorization when multi-user/team lands, AWS-native with formal verification, integrates with AgentCore Gateway), capability-based security model (tiers feed into policy framework), alternate runtime, advanced customization with tiered tool access (MCP/plugins via AgentCore Gateway), full dashboard, AI-specific WAF rules. - **Iteration 6** — Agent swarm orchestration, skills learning, multi-repo, iterative feedback and multiplayer sessions, HITL approval, scheduled triggers, CDK constructs. Design docs to keep in sync: [ARCHITECTURE.md](/design/architecture), [ORCHESTRATOR.md](/design/orchestrator), [API_CONTRACT.md](/design/api-contract), [INPUT_GATEWAY.md](/design/input-gateway), [REPO_ONBOARDING.md](/design/repo-onboarding), [MEMORY.md](/design/memory), [OBSERVABILITY.md](/design/observability), [COMPUTE.md](/design/compute), [CONTROL_PANEL.md](/design/control-panel), [SECURITY.md](/design/security), [EVALUATION.md](/design/evaluation). diff --git a/package.json b/package.json index 52ebeea..57ecbf2 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "@astrojs/check": "^0.9.8" }, "resolutions": { + "basic-ftp": "^5.2.2", "eslint-plugin-import/minimatch": "^3.1.2", "defu": "^6.1.6", "vite": "^7.3.2", diff --git a/yarn.lock b/yarn.lock index fd218fe..ecdc6cd 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3938,10 +3938,10 @@ baseline-browser-mapping@^2.10.12: resolved "https://registry.yarnpkg.com/baseline-browser-mapping/-/baseline-browser-mapping-2.10.13.tgz#5a154cc4589193015a274e3d18319b0d76b9224e" integrity sha512-BL2sTuHOdy0YT1lYieUxTw/QMtPBC3pmlJC6xk8BBYVv6vcw3SGdKemQ+Xsx9ik2F/lYDO9tqsFQH1r9PFuHKw== -basic-ftp@^5.0.2: - version "5.2.1" - resolved "https://registry.yarnpkg.com/basic-ftp/-/basic-ftp-5.2.1.tgz#818ba176e0e52a9e746e8576331f7e9474b94668" - integrity sha512-0yaL8JdxTknKDILitVpfYfV2Ob6yb3udX/hK97M7I3jOeznBNxQPtVvTUtnhUkyHlxFWyr5Lvknmgzoc7jf+1Q== +basic-ftp@^5.0.2, basic-ftp@^5.2.2: + version "5.2.2" + resolved "https://registry.yarnpkg.com/basic-ftp/-/basic-ftp-5.2.2.tgz#4cb2422deddf432896bdb3c9b8f13b944ad4842c" + integrity sha512-1tDrzKsdCg70WGvbFss/ulVAxupNauGnOlgpyjKzeQxzyllBLS0CGLV7tjIXTK3ZQA9/FBEm9qyFFN1bciA6pw== bcp-47-match@^2.0.0: version "2.0.3"