aws-samples
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cdk/src/handlers/shared/context-hydration.ts‎
Lines changed: 14 additions & 5 deletions b/‎cdk/src/handlers/shared/context-hydration.ts‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎cdk/test/handlers/shared/context-hydration.test.ts‎
Lines changed: 65 additions & 17 deletions b/‎cdk/test/handlers/shared/context-hydration.test.ts‎
Lines changed: 65 additions & 17 deletions
diff --git a/‎docs/design/API_CONTRACT.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/design/API_CONTRACT.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/design/ARCHITECTURE.md‎
Lines changed: 9 additions & 1 deletion b/‎docs/design/ARCHITECTURE.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎docs/design/OBSERVABILITY.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/design/OBSERVABILITY.md‎
Lines changed: 1 addition & 0 deletions
@@ -56,11 +56,11 @@ ABCA is under active development. The platform ships iteratively — each iterat
 |---|---|---|
 | **1** | Done | Agent runs on AWS, CLI submit, branch + PR |
 | **2** | Done | Production orchestrator, API contract, task management, observability, security, webhooks |
-| **3a** | Done | Repo onboarding, per-repo GitHub App credentials, turn caps, prompt guide |
+| **3a** | Done | Repo onboarding, per-repo credentials, turn caps, prompt guide |
 | **3b** | Done | Memory Tier 1, insights, agent self-feedback, prompt versioning, commit attribution |
 | **3bis** | Done | Hardening — reconciler error tracking, error serialization, test coverage gaps |
 | **3c** | WIP | Pre-flight checks, persistent session storage, deterministic validation, PR review task type, multi-modal input, input guardrail screening |
-| **3d** | Planned | Review feedback loop, PR outcome tracking, evaluation pipeline |
+| **3d** | Planned | Review feedback loop, PR outcome tracking, evaluation pipeline, memory input hardening |
 | **4** | Planned | GitLab, visual proof, Slack, control panel, WebSocket streaming |
 | **5** | Planned | Pre-warming, multi-user/team, cost management, output guardrails, alternate runtime |
 | **6** | Planned | Skills learning, multi-repo, iterative feedback, multiplayer, CDK constructs |
 
@@ -793,12 +793,12 @@ export interface HydrateContextOptions {
 
 /**
  * Hydrate context for a task: resolve GitHub token, fetch issue/PR, enforce
- * token budget, assemble the user prompt, and (for PR tasks) screen through
- * Bedrock Guardrail for prompt injection.
+ * token budget, assemble the user prompt, and screen through Bedrock Guardrail
+ * for prompt injection (PR tasks; new_task when issue content is present).
  * @param task - the task record from DynamoDB.
  * @param options - optional per-repo overrides.
- * @returns the hydrated context. For PR tasks, `guardrail_blocked` is set when
- *          the guardrail intervened.
+ * @returns the hydrated context. `guardrail_blocked` is set when the guardrail
+ *          intervened (PR tasks: always screened; new_task: screened when issue content is present).
  * @throws GuardrailScreeningError when the Bedrock Guardrail API call fails
  *         (fail-closed — propagated to prevent unscreened content from reaching the agent).
  */
@@ -990,13 +990,19 @@ export async function hydrateContext(task: TaskRecord, options?: HydrateContextO
       return prContext;
     }
 
-    // Standard task: existing behavior
+    // Standard task
     const budgetResult = enforceTokenBudget(issue, task.task_description, USER_PROMPT_TOKEN_BUDGET);
     issue = budgetResult.issue;
 
     userPrompt = assembleUserPrompt(task.task_id, task.repo, issue, budgetResult.taskDescription);
     const tokenEstimate = estimateTokens(userPrompt);
 
+    // Screen assembled prompt when it includes GitHub issue content (attacker-controlled input).
+    // Skipped when no issue is present — task_description is already screened at submission time.
+    const guardrailAction = issue
+      ? await screenWithGuardrail(userPrompt, task.task_id)
+      : undefined;
+
     return {
       version: 1,
       user_prompt: userPrompt,
@@ -1005,6 +1011,9 @@ export async function hydrateContext(task: TaskRecord, options?: HydrateContextO
       sources,
       token_estimate: tokenEstimate,
       truncated: budgetResult.truncated,
+      ...(guardrailAction === 'GUARDRAIL_INTERVENED' && {
+        guardrail_blocked: 'Task context blocked by content policy',
+      }),
     };
   } catch (err) {
     // Guardrail failures must propagate (fail-closed) — unscreened content must not reach the agent
 
@@ -546,6 +546,8 @@ describe('hydrateContext', () => {
         ok: true,
         json: async () => ({ number: 42, title: 'Bug', body: 'Details', comments: 0 }),
       });
+    // Guardrail screens assembled prompt when issue content is present
+    mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' });
 
     const task = { ...baseTask, issue_number: 42, task_description: 'Fix it' };
     const result = await hydrateContext(task as any);
@@ -571,6 +573,9 @@ describe('hydrateContext', () => {
     expect(result.sources).toContain('task_description');
     expect(result.issue).toBeUndefined();
     expect(result.user_prompt).toContain('Fix it');
+    // No issue content fetched — guardrail should not be called (task_description already screened)
+    expect(result.guardrail_blocked).toBeUndefined();
+    expect(mockBedrockSend).not.toHaveBeenCalled();
   });
 
   test('no issue number — assembles from task description only', async () => {
@@ -628,6 +633,8 @@ describe('hydrateContext', () => {
       ok: true,
       json: async () => ({ number: 10, title: 'Test', body: 'body', comments: 0 }),
     });
+    // Guardrail screens assembled prompt when issue content is present
+    mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' });
 
     const task = { ...baseTask, issue_number: 10, task_description: 'Fix' };
     const result = await hydrateContext(task as any, { githubTokenSecretArn: perRepoArn });
@@ -1027,7 +1034,7 @@ describe('screenWithGuardrail', () => {
 });
 
 // ---------------------------------------------------------------------------
-// hydrateContext — guardrail screening for PR tasks
+// hydrateContext — guardrail screening
 // ---------------------------------------------------------------------------
 
 describe('hydrateContext — guardrail screening', () => {
@@ -1113,29 +1120,70 @@ describe('hydrateContext — guardrail screening', () => {
     expect(mockBedrockSend).toHaveBeenCalledTimes(1);
   });
 
-  test('does not invoke guardrail for new_task type', async () => {
+  // --- new_task guardrail screening ---
+
+  const baseNewTask = {
+    task_id: 'TASK-NEW-001',
+    user_id: 'user-123',
+    status: 'SUBMITTED',
+    repo: 'org/repo',
+    branch_name: 'bgagent/TASK-NEW-001/fix',
+    channel_source: 'api',
+    status_created_at: 'SUBMITTED#2024-01-01T00:00:00Z',
+    created_at: '2024-01-01T00:00:00Z',
+    updated_at: '2024-01-01T00:00:00Z',
+    task_type: 'new_task',
+    task_description: 'Fix it',
+  };
+
+  function mockIssueFetch(): void {
     mockSmSend.mockResolvedValueOnce({ SecretString: 'ghp_test' });
     mockFetch.mockResolvedValueOnce({
       ok: true,
       json: async () => ({ number: 42, title: 'Bug', body: 'Details', comments: 0 }),
     });
+  }
 
-    const newTask = {
-      task_id: 'TASK-NEW-001',
-      user_id: 'user-123',
-      status: 'SUBMITTED',
-      repo: 'org/repo',
-      branch_name: 'bgagent/TASK-NEW-001/fix',
-      channel_source: 'api',
-      status_created_at: 'SUBMITTED#2024-01-01T00:00:00Z',
-      created_at: '2024-01-01T00:00:00Z',
-      updated_at: '2024-01-01T00:00:00Z',
-      task_type: 'new_task',
-      issue_number: 42,
-      task_description: 'Fix it',
-    };
-    const result = await hydrateContext(newTask as any);
+  test('invokes guardrail for new_task with issue content', async () => {
+    mockIssueFetch();
+    mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' });
+
+    const result = await hydrateContext({ ...baseNewTask, issue_number: 42 } as any);
+    expect(result.guardrail_blocked).toBeUndefined();
+    expect(mockBedrockSend).toHaveBeenCalledTimes(1);
+  });
+
+  test('does not invoke guardrail for new_task without issue_number', async () => {
+    const result = await hydrateContext(baseNewTask as any);
     expect(result.guardrail_blocked).toBeUndefined();
     expect(mockBedrockSend).not.toHaveBeenCalled();
   });
+
+  test('returns guardrail_blocked when new_task issue context is blocked', async () => {
+    mockIssueFetch();
+    mockBedrockSend.mockResolvedValueOnce({ action: 'GUARDRAIL_INTERVENED' });
+
+    const result = await hydrateContext({ ...baseNewTask, issue_number: 42 } as any);
+    expect(result.guardrail_blocked).toBe('Task context blocked by content policy');
+    expect(mockBedrockSend).toHaveBeenCalledTimes(1);
+  });
+
+  test('proceeds normally when new_task issue context passes guardrail', async () => {
+    mockIssueFetch();
+    mockBedrockSend.mockResolvedValueOnce({ action: 'NONE' });
+
+    const result = await hydrateContext({ ...baseNewTask, issue_number: 42 } as any);
+    expect(result.guardrail_blocked).toBeUndefined();
+    expect(result.issue).toBeDefined();
+    expect(result.sources).toContain('issue');
+  });
+
+  test('throws when guardrail screening fails for new_task (fail-closed)', async () => {
+    mockIssueFetch();
+    mockBedrockSend.mockRejectedValueOnce(new Error('Bedrock timeout'));
+
+    await expect(
+      hydrateContext({ ...baseNewTask, issue_number: 42 } as any),
+    ).rejects.toThrow('Guardrail screening unavailable: Bedrock timeout');
+  });
 });
@@ -617,6 +617,8 @@ Rate limit status is communicated via response headers (see Standard response he
 | `WEBHOOK_NOT_FOUND` | 404 | Webhook does not exist or belongs to a different user. |
 | `WEBHOOK_ALREADY_REVOKED` | 409 | Webhook is already revoked. |
 | `REPO_NOT_ONBOARDED` | 422 | Repository is not registered with the platform. Repos are onboarded via CDK deployment, not via a runtime API. There are no `/v1/repos` endpoints. |
+| `GITHUB_UNREACHABLE` | 502 | The GitHub API was unreachable during the orchestrator's pre-flight check. The task fails fast without consuming compute. Transient — retry with backoff. |
+| `REPO_NOT_FOUND_OR_NO_ACCESS` | 422 | The target repository does not exist or the configured credentials lack access. Checked during the orchestrator's pre-flight step (`GET /repos/{owner}/{repo}`). Distinct from `REPO_NOT_ONBOARDED` — the repo is onboarded but the credential cannot reach it. |
 | `PR_NOT_FOUND_OR_CLOSED` | 422 | For `pr_iteration` and `pr_review` tasks: the specified PR does not exist, is not open, or is not accessible with the configured GitHub token. Checked during the orchestrator's pre-flight step. |
 | `INVALID_STEP_SEQUENCE` | 500 | The blueprint's step sequence is invalid (missing required steps or incorrect ordering). This indicates a CDK configuration error that slipped past synth-time validation. Visible via `GET /v1/tasks/{id}` as `error_code`. See [REPO_ONBOARDING.md](./REPO_ONBOARDING.md#step-sequence-validation). |
 | `GUARDRAIL_BLOCKED` | 400 | Task description was blocked by Bedrock Guardrail content screening (prompt injection detected). Revise the task description and retry. |
 
@@ -129,7 +129,8 @@ Cost efficiency is a design principle. The following estimates are based on **50
 | **API Gateway** (REST API, ~2K requests/day) | ~$5–15 | Per-request pricing |
 | **AgentCore Memory** (events, records, retrieval) | TBD | Pricing not fully public; proportional to usage |
 | **CloudWatch** (logs, metrics, traces, Transaction Search) | ~$20–50 | Log ingestion + storage |
-| **Secrets Manager** (GitHub App keys, webhook secrets) | ~$5–10 | Per-secret/month + API calls |
+| **Secrets Manager** (GitHub token or App private key, webhook secrets) | ~$5–10 | Per-secret/month + API calls |
+| **AgentCore Identity** (planned — WorkloadIdentity, Token Vault credential provider) | TBD | Token vending API calls; replaces per-task Secrets Manager reads for GitHub tokens |
 | **S3** (artifacts, memory backups) | ~$1–5 | Storage + requests |
 | **Total** | **~$700–1,600/month** | |
 
@@ -201,10 +202,17 @@ Each concept has a **source-of-truth document** and one or more documents that r
 | Agent swarm orchestration | ROADMAP.md (Iter 6) | — |
 | Adaptive model router | ROADMAP.md (Iter 5) | COST_MODEL.md |
 | Capability-based security | ROADMAP.md (Iter 5) | SECURITY.md |
+| Centralized policy framework | ROADMAP.md (Iter 5), SECURITY.md (Policy enforcement and audit) | ORCHESTRATOR.md, OBSERVABILITY.md |
+| GitHub App + AgentCore Token Vault | ROADMAP.md (Iter 3c), SECURITY.md (Authentication) | ORCHESTRATOR.md (context hydration), COMPUTE.md |
 | Live session replay | ROADMAP.md (Iter 4) | API_CONTRACT.md |
 | PR iteration task type | API_CONTRACT.md, ORCHESTRATOR.md | USER_GUIDE.md, PROMPT_GUIDE.md, SECURITY.md, AGENT_HARNESS.md |
 | PR review task type | API_CONTRACT.md, ORCHESTRATOR.md | USER_GUIDE.md, PROMPT_GUIDE.md, SECURITY.md, AGENT_HARNESS.md |
+| Orchestrator pre-flight checks | ORCHESTRATOR.md (Context hydration, pre-flight sub-step) | API_CONTRACT.md (Error codes: GITHUB_UNREACHABLE, REPO_NOT_FOUND_OR_NO_ACCESS), ROADMAP.md (3c), SECURITY.md |
 | Bedrock Guardrail input screening | SECURITY.md (Input validation and guardrails) | ORCHESTRATOR.md (Context hydration), API_CONTRACT.md (Error codes), OBSERVABILITY.md (Alarms), ROADMAP.md (3c) |
+| Memory input hardening (3e Phase 1) | ROADMAP.md (Iter 3e Phase 1, co-ships with 3d) | MEMORY.md, SECURITY.md (Memory-specific threats) |
+| Per-tool-call structured telemetry | ROADMAP.md (Iter 3d) | SECURITY.md (Mid-execution enforcement), EVALUATION.md, OBSERVABILITY.md |
+| Mid-execution behavioral monitoring | ROADMAP.md (Iter 5), SECURITY.md (Mid-execution enforcement) | OBSERVABILITY.md |
+| Tool-call interceptor (Guardian pattern) | SECURITY.md (Mid-execution enforcement), ROADMAP.md (Iter 5) | REPO_ONBOARDING.md (Blueprint security props) |
 
 ### Per-repo model selection
 
 
@@ -129,6 +129,7 @@ Both are one-time, account-level setup steps — not managed by CDK.
 - **Guardrail screening events** — `guardrail_blocked` (content blocked by Bedrock Guardrail during hydration, with metadata: `reason`, `task_type`, `pr_number`, `sources`, `token_estimate`). Screening failures are logged with structured `metric_type` fields (not emitted as task events).
 - Time in each state (e.g. time in HYDRATING, time RUNNING, cold start to first agent activity).
 - Correlation with a task id and user id so users and operators can filter by task or user.
+- **Planned (Iteration 5, Phase 1): `PolicyDecisionEvent`** — A unified event schema for all policy decisions across the task lifecycle: admission control, budget/quota resolution, guardrail screening, tool-call interception, and finalization. Each event carries: decision ID, policy name, version, phase, input hash, result (`allow` | `deny` | `modify`), reason codes, and enforcement mode (`enforced` | `observed` | `steered`). This normalizes the current mix of structured events (e.g. `admission_rejected`, `guardrail_blocked`) and silent HTTP errors into a single auditable event type. See [ROADMAP.md Iteration 5](../guides/ROADMAP.md) (Centralized policy framework) and [SECURITY.md](./SECURITY.md) (Policy enforcement and audit).
 
 ### Agent execution