Skip to content

Commit 808554c

Browse files
authored
Merge pull request #13 from aws-samples/guardrails
chore(guardrails): add more guardrails for input screening
2 parents a3a24b1 + 4d647de commit 808554c

31 files changed

Lines changed: 534 additions & 50 deletions

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,10 @@ ABCA is under active development. The platform ships iteratively — each iterat
5959
| **3a** | Done | Repo onboarding, per-repo GitHub App credentials, turn caps, prompt guide |
6060
| **3b** | Done | Memory Tier 1, insights, agent self-feedback, prompt versioning, commit attribution |
6161
| **3bis** | Done | Hardening — reconciler error tracking, error serialization, test coverage gaps |
62-
| **3c** | WIP | Pre-flight checks, persistent session storage, deterministic validation, PR review task type, multi-modal input |
62+
| **3c** | WIP | Pre-flight checks, persistent session storage, deterministic validation, PR review task type, multi-modal input, input guardrail screening |
6363
| **3d** | Planned | Review feedback loop, PR outcome tracking, evaluation pipeline |
6464
| **4** | Planned | GitLab, visual proof, Slack, control panel, WebSocket streaming |
65-
| **5** | Planned | Pre-warming, multi-user/team, cost management, guardrails, alternate runtime |
65+
| **5** | Planned | Pre-warming, multi-user/team, cost management, output guardrails, alternate runtime |
6666
| **6** | Planned | Skills learning, multi-repo, iterative feedback, multiplayer, CDK constructs |
6767

6868
See the full [ROADMAP](./docs/guides/ROADMAP.md) for details on each iteration.

cdk/src/constructs/task-orchestrator.ts

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
import * as path from 'path';
21-
import { Duration } from 'aws-cdk-lib';
21+
import { Duration, Stack } from 'aws-cdk-lib';
2222
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
2323
import * as dynamodb from 'aws-cdk-lib/aws-dynamodb';
2424
import * as iam from 'aws-cdk-lib/aws-iam';
@@ -100,6 +100,18 @@ export interface TaskOrchestratorProps {
100100
* and writes fallback episodes during finalization.
101101
*/
102102
readonly memoryId?: string;
103+
104+
/**
105+
* Bedrock Guardrail ID used by the orchestrator to screen assembled PR prompts
106+
* for prompt injection during context hydration. The same guardrail is also
107+
* used by the Task API for submission-time task description screening.
108+
*/
109+
readonly guardrailId?: string;
110+
111+
/**
112+
* Bedrock Guardrail version. Required when guardrailId is provided.
113+
*/
114+
readonly guardrailVersion?: string;
103115
}
104116

105117
/**
@@ -125,6 +137,13 @@ export class TaskOrchestrator extends Construct {
125137
constructor(scope: Construct, id: string, props: TaskOrchestratorProps) {
126138
super(scope, id);
127139

140+
if (props.guardrailId && !props.guardrailVersion) {
141+
throw new Error('guardrailVersion is required when guardrailId is provided');
142+
}
143+
if (!props.guardrailId && props.guardrailVersion) {
144+
throw new Error('guardrailId is required when guardrailVersion is provided');
145+
}
146+
128147
const handlersDir = path.join(__dirname, '..', 'handlers');
129148
const maxConcurrent = props.maxConcurrentTasksPerUser ?? 3;
130149

@@ -152,6 +171,8 @@ export class TaskOrchestrator extends Construct {
152171
USER_PROMPT_TOKEN_BUDGET: String(props.userPromptTokenBudget),
153172
}),
154173
...(props.memoryId && { MEMORY_ID: props.memoryId }),
174+
...(props.guardrailId && { GUARDRAIL_ID: props.guardrailId }),
175+
...(props.guardrailVersion && { GUARDRAIL_VERSION: props.guardrailVersion }),
155176
},
156177
bundling: {
157178
externalModules: ['@aws-sdk/*'],
@@ -200,6 +221,20 @@ export class TaskOrchestrator extends Construct {
200221
secret.grantRead(this.fn);
201222
}
202223

224+
// Bedrock Guardrail permissions
225+
if (props.guardrailId) {
226+
this.fn.addToRolePolicy(new iam.PolicyStatement({
227+
actions: ['bedrock:ApplyGuardrail'],
228+
resources: [
229+
Stack.of(this).formatArn({
230+
service: 'bedrock',
231+
resource: 'guardrail',
232+
resourceName: props.guardrailId,
233+
}),
234+
],
235+
}));
236+
}
237+
203238
// Create alias for durable function invocation
204239
const fnAlias = this.fn.currentVersion.addAlias('live');
205240
this.alias = fnAlias;

cdk/src/handlers/orchestrate-task.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ const durableHandler: DurableExecutionHandler<OrchestrateTaskEvent, void> = asyn
110110
try {
111111
return await hydrateAndTransition(task, blueprintConfig);
112112
} catch (err) {
113-
// Transition may fail if task was externally cancelled — release concurrency
114-
await failTask(taskId, task.status, `Hydration failed: ${String(err)}`, task.user_id, true);
113+
// Hydration may fail due to external cancellation, guardrail blocking, or guardrail API failure — fail the task and release concurrency
114+
await failTask(taskId, TaskStatus.HYDRATING, `Hydration failed: ${String(err)}`, task.user_id, true);
115115
throw err;
116116
}
117117
});

cdk/src/handlers/shared/context-hydration.ts

Lines changed: 97 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* SOFTWARE.
1818
*/
1919

20+
import { ApplyGuardrailCommand, BedrockRuntimeClient } from '@aws-sdk/client-bedrock-runtime';
2021
import { GetSecretValueCommand, SecretsManagerClient } from '@aws-sdk/client-secrets-manager';
2122
import { logger } from './logger';
2223
import { loadMemoryContext, type MemoryContext } from './memory';
@@ -85,6 +86,7 @@ export interface HydratedContext {
8586
readonly token_estimate: number;
8687
readonly truncated: boolean;
8788
readonly fallback_error?: string;
89+
readonly guardrail_blocked?: string;
8890
readonly resolved_branch_name?: string;
8991
readonly resolved_base_branch?: string;
9092
}
@@ -96,6 +98,81 @@ export interface HydratedContext {
9698
const GITHUB_TOKEN_SECRET_ARN = process.env.GITHUB_TOKEN_SECRET_ARN;
9799
const USER_PROMPT_TOKEN_BUDGET = Number(process.env.USER_PROMPT_TOKEN_BUDGET ?? '100000');
98100
const GITHUB_API_TIMEOUT_MS = 30_000;
101+
const GUARDRAIL_ID = process.env.GUARDRAIL_ID;
102+
const GUARDRAIL_VERSION = process.env.GUARDRAIL_VERSION;
103+
const bedrockClient = (GUARDRAIL_ID && GUARDRAIL_VERSION) ? new BedrockRuntimeClient({}) : undefined;
104+
if (GUARDRAIL_ID && !GUARDRAIL_VERSION) {
105+
logger.error('GUARDRAIL_ID is set but GUARDRAIL_VERSION is missing — guardrail screening disabled', {
106+
metric_type: 'guardrail_misconfiguration',
107+
});
108+
}
109+
110+
// ---------------------------------------------------------------------------
111+
// Bedrock Guardrail screening
112+
// ---------------------------------------------------------------------------
113+
114+
/**
115+
* Error thrown when the Bedrock Guardrail API call fails. Distinguished from
116+
* other errors so the outer catch in hydrateContext can re-throw it instead of
117+
* falling back to unscreened content (fail-closed).
118+
*/
119+
export class GuardrailScreeningError extends Error {
120+
constructor(message: string, cause?: Error) {
121+
super(message, cause ? { cause } : undefined);
122+
this.name = 'GuardrailScreeningError';
123+
}
124+
}
125+
126+
/**
127+
* Screen text through the Bedrock Guardrail for prompt injection detection.
128+
* Fail-closed: throws on Bedrock errors so unscreened content never reaches the agent.
129+
* @param text - the text to screen.
130+
* @param taskId - the task ID (for logging).
131+
* @returns 'GUARDRAIL_INTERVENED' if blocked, 'NONE' if allowed, undefined when guardrail is
132+
* not configured (env vars missing).
133+
* @throws GuardrailScreeningError when the Bedrock Guardrail API call fails (fail-closed).
134+
*/
135+
export async function screenWithGuardrail(text: string, taskId: string): Promise<'GUARDRAIL_INTERVENED' | 'NONE' | undefined> {
136+
if (!bedrockClient || !GUARDRAIL_ID || !GUARDRAIL_VERSION) {
137+
logger.info('Guardrail screening skipped — guardrail not configured', {
138+
task_id: taskId,
139+
metric_type: 'guardrail_screening_skipped',
140+
});
141+
return undefined;
142+
}
143+
144+
try {
145+
const result = await bedrockClient.send(new ApplyGuardrailCommand({
146+
guardrailIdentifier: GUARDRAIL_ID,
147+
guardrailVersion: GUARDRAIL_VERSION,
148+
source: 'INPUT',
149+
content: [{ text: { text } }],
150+
}));
151+
152+
if (result.action === 'GUARDRAIL_INTERVENED') {
153+
logger.warn('Content blocked by guardrail', {
154+
task_id: taskId,
155+
guardrail_id: GUARDRAIL_ID,
156+
guardrail_version: GUARDRAIL_VERSION,
157+
});
158+
return 'GUARDRAIL_INTERVENED';
159+
}
160+
161+
return 'NONE';
162+
} catch (err) {
163+
logger.error('Guardrail screening failed (fail-closed)', {
164+
task_id: taskId,
165+
guardrail_id: GUARDRAIL_ID,
166+
error: err instanceof Error ? err.message : String(err),
167+
error_name: err instanceof Error ? err.name : undefined,
168+
metric_type: 'guardrail_screening_failure',
169+
});
170+
throw new GuardrailScreeningError(
171+
`Guardrail screening unavailable: ${err instanceof Error ? err.message : String(err)}`,
172+
err instanceof Error ? err : undefined,
173+
);
174+
}
175+
}
99176

100177
// ---------------------------------------------------------------------------
101178
// GitHub token resolution (Secrets Manager with caching)
@@ -715,11 +792,15 @@ export interface HydrateContextOptions {
715792
}
716793

717794
/**
718-
* Hydrate context for a task: resolve GitHub token, fetch issue, enforce
719-
* token budget, and assemble the user prompt.
795+
* Hydrate context for a task: resolve GitHub token, fetch issue/PR, enforce
796+
* token budget, assemble the user prompt, and (for PR tasks) screen through
797+
* Bedrock Guardrail for prompt injection.
720798
* @param task - the task record from DynamoDB.
721799
* @param options - optional per-repo overrides.
722-
* @returns the hydrated context.
800+
* @returns the hydrated context. For PR tasks, `guardrail_blocked` is set when
801+
* the guardrail intervened.
802+
* @throws GuardrailScreeningError when the Bedrock Guardrail API call fails
803+
* (fail-closed — propagated to prevent unscreened content from reaching the agent).
723804
*/
724805
export async function hydrateContext(task: TaskRecord, options?: HydrateContextOptions): Promise<HydratedContext> {
725806
const sources: string[] = [];
@@ -889,7 +970,10 @@ export async function hydrateContext(task: TaskRecord, options?: HydrateContextO
889970
resolvedBranchName = prResult.head_ref;
890971
resolvedBaseBranch = prResult.base_ref;
891972

892-
return {
973+
// Screen assembled PR prompt through Bedrock Guardrail for prompt injection
974+
const guardrailAction = await screenWithGuardrail(userPrompt, task.task_id);
975+
976+
const prContext: HydratedContext = {
893977
version: 1,
894978
user_prompt: userPrompt,
895979
memory_context: memoryContext,
@@ -898,7 +982,12 @@ export async function hydrateContext(task: TaskRecord, options?: HydrateContextO
898982
sources,
899983
token_estimate: estimateTokens(userPrompt),
900984
truncated,
985+
...(guardrailAction === 'GUARDRAIL_INTERVENED' && {
986+
guardrail_blocked: 'PR context blocked by content policy',
987+
}),
901988
};
989+
990+
return prContext;
902991
}
903992

904993
// Standard task: existing behavior
@@ -918,6 +1007,10 @@ export async function hydrateContext(task: TaskRecord, options?: HydrateContextO
9181007
truncated: budgetResult.truncated,
9191008
};
9201009
} catch (err) {
1010+
// Guardrail failures must propagate (fail-closed) — unscreened content must not reach the agent
1011+
if (err instanceof GuardrailScreeningError) {
1012+
throw err;
1013+
}
9211014
// Fallback: minimal context from task_description only
9221015
logger.error('Unexpected error during context hydration', {
9231016
task_id: task.task_id, error: err instanceof Error ? err.message : String(err),

cdk/src/handlers/shared/create-task-core.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ export interface TaskCreationContext {
4646

4747
const ddb = DynamoDBDocumentClient.from(new DynamoDBClient({}));
4848
const lambdaClient = process.env.ORCHESTRATOR_FUNCTION_ARN ? new LambdaClient({}) : undefined;
49-
const bedrockClient = process.env.GUARDRAIL_ID ? new BedrockRuntimeClient({}) : undefined;
49+
const bedrockClient = (process.env.GUARDRAIL_ID && process.env.GUARDRAIL_VERSION)
50+
? new BedrockRuntimeClient({}) : undefined;
51+
if (process.env.GUARDRAIL_ID && !process.env.GUARDRAIL_VERSION) {
52+
logger.error('GUARDRAIL_ID is set but GUARDRAIL_VERSION is missing — guardrail screening disabled', {
53+
metric_type: 'guardrail_misconfiguration',
54+
});
55+
}
5056
const TABLE_NAME = process.env.TASK_TABLE_NAME!;
5157
const EVENTS_TABLE_NAME = process.env.TASK_EVENTS_TABLE_NAME!;
5258
const TASK_RETENTION_DAYS = Number(process.env.TASK_RETENTION_DAYS ?? '90');
@@ -117,8 +123,8 @@ export async function createTaskCore(
117123
}
118124
const userMaxBudgetUsd = maxBudgetResult;
119125

120-
// 2. Screen task description with Bedrock Guardrail (fail-open: a Bedrock outage
121-
// should not block all task submissions — log the error and proceed)
126+
// 2. Screen task description with Bedrock Guardrail (fail-closed: unscreened content
127+
// must not reach the agent — a Bedrock outage blocks task submissions)
122128
if (bedrockClient && body.task_description) {
123129
try {
124130
const guardrailResult = await bedrockClient.send(new ApplyGuardrailCommand({
@@ -133,11 +139,13 @@ export async function createTaskCore(
133139
return errorResponse(400, ErrorCode.VALIDATION_ERROR, 'Task description was blocked by content policy.', requestId);
134140
}
135141
} catch (guardrailErr) {
136-
logger.error('Guardrail screening failed — proceeding without screening (fail-open)', {
142+
logger.error('Guardrail screening failed (fail-closed)', {
137143
error: String(guardrailErr),
138144
user_id: context.userId,
139145
request_id: requestId,
146+
metric_type: 'guardrail_screening_failure',
140147
});
148+
return errorResponse(503, ErrorCode.INTERNAL_ERROR, 'Content screening is temporarily unavailable. Please try again later.', requestId);
141149
}
142150
}
143151

cdk/src/handlers/shared/orchestrator.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,26 @@ export async function hydrateAndTransition(task: TaskRecord, blueprintConfig?: B
252252
memoryId: MEMORY_ID,
253253
});
254254

255+
// If guardrail screening blocked the hydrated context, emit audit event and throw
256+
// to trigger task failure (the caller in orchestrate-task.ts catches and transitions to FAILED)
257+
if (hydratedContext.guardrail_blocked) {
258+
try {
259+
await emitTaskEvent(task.task_id, 'guardrail_blocked', {
260+
reason: hydratedContext.guardrail_blocked,
261+
task_type: task.task_type,
262+
pr_number: task.pr_number,
263+
sources: hydratedContext.sources,
264+
token_estimate: hydratedContext.token_estimate,
265+
});
266+
} catch (eventErr) {
267+
logger.error('Failed to emit guardrail_blocked event', {
268+
task_id: task.task_id,
269+
error: eventErr instanceof Error ? eventErr.message : String(eventErr),
270+
});
271+
}
272+
throw new Error(`Guardrail blocked: ${hydratedContext.guardrail_blocked}`);
273+
}
274+
255275
// For PR iteration: resolve actual branch name from PR head_ref
256276
if (hydratedContext.resolved_branch_name) {
257277
try {

cdk/src/stacks/agent.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ export class AgentStack extends Stack {
282282
runtimeArn: runtime.agentRuntimeArn,
283283
githubTokenSecretArn: githubTokenSecret.secretArn,
284284
memoryId: agentMemory.memory.memoryId,
285+
guardrailId: inputGuardrail.guardrailId,
286+
guardrailVersion: inputGuardrail.guardrailVersion,
285287
});
286288

287289
// Grant the orchestrator Lambda read+write access to memory

0 commit comments

Comments
 (0)