Skip to content

Commit 80afef1

Browse files
authored
Merge branch 'main' into feat/plugin
2 parents 3c2ad3a + 783e2b7 commit 80afef1

29 files changed

Lines changed: 3300 additions & 710 deletions

.github/workflows/build.yml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.gitignore

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

agent/uv.lock

Lines changed: 610 additions & 610 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cdk/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"@aws-cdk/mixins-preview": "2.238.0-alpha.0",
1919
"@aws-sdk/client-bedrock-agentcore": "^3.1021.0",
2020
"@aws-sdk/client-bedrock-runtime": "^3.1021.0",
21+
"@aws-sdk/client-ecs": "^3.1021.0",
2122
"@aws-sdk/client-dynamodb": "^3.1021.0",
2223
"@aws-sdk/client-lambda": "^3.1021.0",
2324
"@aws-sdk/client-secrets-manager": "^3.1021.0",
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/**
2+
* MIT No Attribution
3+
*
4+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy of
7+
* the Software without restriction, including without limitation the rights to
8+
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9+
* the Software, and to permit persons to whom the Software is furnished to do so.
10+
*
11+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
* SOFTWARE.
18+
*/
19+
20+
import { RemovalPolicy } from 'aws-cdk-lib';
21+
import * as dynamodb from 'aws-cdk-lib/aws-dynamodb';
22+
import * as ec2 from 'aws-cdk-lib/aws-ec2';
23+
import * as ecr_assets from 'aws-cdk-lib/aws-ecr-assets';
24+
import * as ecs from 'aws-cdk-lib/aws-ecs';
25+
import * as iam from 'aws-cdk-lib/aws-iam';
26+
import * as logs from 'aws-cdk-lib/aws-logs';
27+
import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager';
28+
import { NagSuppressions } from 'cdk-nag';
29+
import { Construct } from 'constructs';
30+
31+
export interface EcsAgentClusterProps {
32+
readonly vpc: ec2.IVpc;
33+
readonly agentImageAsset: ecr_assets.DockerImageAsset;
34+
readonly taskTable: dynamodb.ITable;
35+
readonly taskEventsTable: dynamodb.ITable;
36+
readonly userConcurrencyTable: dynamodb.ITable;
37+
readonly githubTokenSecret: secretsmanager.ISecret;
38+
readonly memoryId?: string;
39+
}
40+
41+
export class EcsAgentCluster extends Construct {
42+
public readonly cluster: ecs.Cluster;
43+
public readonly taskDefinition: ecs.FargateTaskDefinition;
44+
public readonly securityGroup: ec2.SecurityGroup;
45+
public readonly containerName: string;
46+
public readonly taskRoleArn: string;
47+
public readonly executionRoleArn: string;
48+
49+
constructor(scope: Construct, id: string, props: EcsAgentClusterProps) {
50+
super(scope, id);
51+
52+
this.containerName = 'AgentContainer';
53+
54+
// ECS Cluster with Fargate capacity provider and container insights
55+
this.cluster = new ecs.Cluster(this, 'Cluster', {
56+
vpc: props.vpc,
57+
containerInsights: true,
58+
});
59+
60+
// Security group — egress TCP 443 only
61+
this.securityGroup = new ec2.SecurityGroup(this, 'TaskSG', {
62+
vpc: props.vpc,
63+
description: 'ECS Agent Tasks - egress TCP 443 only',
64+
allowAllOutbound: false,
65+
});
66+
67+
this.securityGroup.addEgressRule(
68+
ec2.Peer.anyIpv4(),
69+
ec2.Port.tcp(443),
70+
'Allow HTTPS egress (GitHub API, AWS services)',
71+
);
72+
73+
// CloudWatch log group for agent task output
74+
const logGroup = new logs.LogGroup(this, 'TaskLogGroup', {
75+
retention: logs.RetentionDays.THREE_MONTHS,
76+
removalPolicy: RemovalPolicy.DESTROY,
77+
});
78+
79+
// Task execution role (used by ECS agent to pull images, write logs)
80+
// CDK creates this automatically via taskDefinition, but we need to
81+
// grant additional permissions to the task role.
82+
83+
// Fargate task definition
84+
this.taskDefinition = new ecs.FargateTaskDefinition(this, 'TaskDef', {
85+
cpu: 2048,
86+
memoryLimitMiB: 4096,
87+
runtimePlatform: {
88+
cpuArchitecture: ecs.CpuArchitecture.ARM64,
89+
operatingSystemFamily: ecs.OperatingSystemFamily.LINUX,
90+
},
91+
});
92+
93+
// Container
94+
this.taskDefinition.addContainer(this.containerName, {
95+
image: ecs.ContainerImage.fromDockerImageAsset(props.agentImageAsset),
96+
logging: ecs.LogDrivers.awsLogs({
97+
logGroup,
98+
streamPrefix: 'agent',
99+
}),
100+
environment: {
101+
CLAUDE_CODE_USE_BEDROCK: '1',
102+
TASK_TABLE_NAME: props.taskTable.tableName,
103+
TASK_EVENTS_TABLE_NAME: props.taskEventsTable.tableName,
104+
USER_CONCURRENCY_TABLE_NAME: props.userConcurrencyTable.tableName,
105+
LOG_GROUP_NAME: logGroup.logGroupName,
106+
GITHUB_TOKEN_SECRET_ARN: props.githubTokenSecret.secretArn,
107+
...(props.memoryId && { MEMORY_ID: props.memoryId }),
108+
},
109+
});
110+
111+
// Task role permissions
112+
const taskRole = this.taskDefinition.taskRole;
113+
114+
// DynamoDB read/write on task tables
115+
props.taskTable.grantReadWriteData(taskRole);
116+
props.taskEventsTable.grantReadWriteData(taskRole);
117+
props.userConcurrencyTable.grantReadWriteData(taskRole);
118+
119+
// Secrets Manager read for GitHub token
120+
props.githubTokenSecret.grantRead(taskRole);
121+
122+
// Bedrock model invocation
123+
taskRole.addToPrincipalPolicy(new iam.PolicyStatement({
124+
actions: [
125+
'bedrock:InvokeModel',
126+
'bedrock:InvokeModelWithResponseStream',
127+
],
128+
resources: ['*'],
129+
}));
130+
131+
// CloudWatch Logs write
132+
logGroup.grantWrite(taskRole);
133+
134+
// Expose role ARNs for scoped iam:PassRole in the orchestrator
135+
this.taskRoleArn = taskRole.roleArn;
136+
this.executionRoleArn = this.taskDefinition.executionRole!.roleArn;
137+
138+
NagSuppressions.addResourceSuppressions(this.taskDefinition, [
139+
{
140+
id: 'AwsSolutions-IAM5',
141+
reason: 'DynamoDB index/* wildcards generated by CDK grantReadWriteData; Bedrock InvokeModel requires * resource; Secrets Manager wildcards from CDK grantRead; CloudWatch Logs wildcards from CDK grantWrite',
142+
},
143+
{
144+
id: 'AwsSolutions-ECS2',
145+
reason: 'Environment variables contain table names and configuration, not secrets — GitHub token is fetched from Secrets Manager at runtime',
146+
},
147+
], true);
148+
149+
NagSuppressions.addResourceSuppressions(this.cluster, [
150+
{
151+
id: 'AwsSolutions-ECS4',
152+
reason: 'Container insights is enabled via the containerInsights prop',
153+
},
154+
], true);
155+
}
156+
}

cdk/src/constructs/task-api.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ export interface TaskApiProps {
100100
* First ARN is also passed as `RUNTIME_ARN` when the task record has no `agent_runtime_arn`.
101101
*/
102102
readonly agentCoreStopSessionRuntimeArns?: string[];
103+
104+
/**
105+
* ECS cluster ARN for cancel-task to stop ECS-backed tasks.
106+
* When provided, the cancel Lambda gets `ECS_CLUSTER_ARN` env var and `ecs:StopTask` permission.
107+
*/
108+
readonly ecsClusterArn?: string;
103109
}
104110

105111
/**
@@ -329,6 +335,9 @@ export class TaskApi extends Construct {
329335
if (stopSessionArns.length > 0) {
330336
cancelTaskEnv.RUNTIME_ARN = stopSessionArns[0]!;
331337
}
338+
if (props.ecsClusterArn) {
339+
cancelTaskEnv.ECS_CLUSTER_ARN = props.ecsClusterArn;
340+
}
332341

333342
const cancelTaskFn = new lambda.NodejsFunction(this, 'CancelTaskFn', {
334343
entry: path.join(handlersDir, 'cancel-task.ts'),
@@ -363,6 +372,18 @@ export class TaskApi extends Construct {
363372
}));
364373
}
365374

375+
if (props.ecsClusterArn) {
376+
cancelTaskFn.addToRolePolicy(new iam.PolicyStatement({
377+
actions: ['ecs:StopTask'],
378+
resources: ['*'],
379+
conditions: {
380+
ArnEquals: {
381+
'ecs:cluster': props.ecsClusterArn,
382+
},
383+
},
384+
}));
385+
}
386+
366387
// Repo table read for onboarding gate
367388
if (props.repoTable) {
368389
props.repoTable.grantReadData(createTaskFn);

cdk/src/constructs/task-orchestrator.ts

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,21 @@ export interface TaskOrchestratorProps {
112112
* Bedrock Guardrail version. Required when guardrailId is provided.
113113
*/
114114
readonly guardrailVersion?: string;
115+
116+
/**
117+
* ECS Fargate compute strategy configuration.
118+
* When provided, ECS-related env vars and IAM policies are added to the orchestrator.
119+
* All fields are required — this makes the all-or-nothing constraint self-evident at the type level.
120+
*/
121+
readonly ecsConfig?: {
122+
readonly clusterArn: string;
123+
readonly taskDefinitionArn: string;
124+
readonly subnets: string;
125+
readonly securityGroup: string;
126+
readonly containerName: string;
127+
readonly taskRoleArn: string;
128+
readonly executionRoleArn: string;
129+
};
115130
}
116131

117132
/**
@@ -173,6 +188,13 @@ export class TaskOrchestrator extends Construct {
173188
...(props.memoryId && { MEMORY_ID: props.memoryId }),
174189
...(props.guardrailId && { GUARDRAIL_ID: props.guardrailId }),
175190
...(props.guardrailVersion && { GUARDRAIL_VERSION: props.guardrailVersion }),
191+
...(props.ecsConfig && {
192+
ECS_CLUSTER_ARN: props.ecsConfig.clusterArn,
193+
ECS_TASK_DEFINITION_ARN: props.ecsConfig.taskDefinitionArn,
194+
ECS_SUBNETS: props.ecsConfig.subnets,
195+
ECS_SECURITY_GROUP: props.ecsConfig.securityGroup,
196+
ECS_CONTAINER_NAME: props.ecsConfig.containerName,
197+
}),
176198
},
177199
bundling: {
178200
externalModules: ['@aws-sdk/*'],
@@ -213,6 +235,33 @@ export class TaskOrchestrator extends Construct {
213235
resources: runtimeResources,
214236
}));
215237

238+
// ECS compute strategy permissions (only when ECS is configured)
239+
if (props.ecsConfig) {
240+
this.fn.addToRolePolicy(new iam.PolicyStatement({
241+
actions: [
242+
'ecs:RunTask',
243+
'ecs:DescribeTasks',
244+
'ecs:StopTask',
245+
],
246+
resources: ['*'],
247+
conditions: {
248+
ArnEquals: {
249+
'ecs:cluster': props.ecsConfig.clusterArn,
250+
},
251+
},
252+
}));
253+
254+
this.fn.addToRolePolicy(new iam.PolicyStatement({
255+
actions: ['iam:PassRole'],
256+
resources: [props.ecsConfig.taskRoleArn, props.ecsConfig.executionRoleArn],
257+
conditions: {
258+
StringEquals: {
259+
'iam:PassedToService': 'ecs-tasks.amazonaws.com',
260+
},
261+
},
262+
}));
263+
}
264+
216265
// Per-repo Secrets Manager grants (e.g. per-repo GitHub tokens from Blueprints)
217266
for (const [index, secretArn] of (props.additionalSecretArns ?? []).entries()) {
218267
const secret = secretsmanager.Secret.fromSecretCompleteArn(
@@ -264,7 +313,7 @@ export class TaskOrchestrator extends Construct {
264313
},
265314
{
266315
id: 'AwsSolutions-IAM5',
267-
reason: 'DynamoDB index/* wildcards generated by CDK grantReadWriteData; AgentCore runtime/* required for sub-resource invocation; Secrets Manager wildcards generated by CDK grantRead; AgentCore Memory wildcards generated by CDK grantRead/grantWrite',
316+
reason: 'DynamoDB index/* wildcards generated by CDK grantReadWriteData; AgentCore runtime/* required for sub-resource invocation; Secrets Manager wildcards generated by CDK grantRead; AgentCore Memory wildcards generated by CDK grantRead/grantWrite; ECS RunTask/DescribeTasks/StopTask conditioned on cluster ARN; iam:PassRole scoped to ECS task/execution roles and conditioned on ecs-tasks.amazonaws.com',
268317
},
269318
], true);
270319
}

cdk/src/handlers/cancel-task.ts

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import { BedrockAgentCoreClient, StopRuntimeSessionCommand } from '@aws-sdk/client-bedrock-agentcore';
2121
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
22+
import { ECSClient, StopTaskCommand } from '@aws-sdk/client-ecs';
2223
import { DynamoDBDocumentClient, GetCommand, PutCommand, UpdateCommand } from '@aws-sdk/lib-dynamodb';
2324
import type { APIGatewayProxyEvent, APIGatewayProxyResult } from 'aws-lambda';
2425
import { ulid } from 'ulid';
@@ -31,10 +32,12 @@ import { computeTtlEpoch } from './shared/validation';
3132

3233
const ddb = DynamoDBDocumentClient.from(new DynamoDBClient({}));
3334
const agentCoreClient = new BedrockAgentCoreClient({});
35+
const ecsClient = new ECSClient({});
3436
const TABLE_NAME = process.env.TASK_TABLE_NAME!;
3537
const EVENTS_TABLE_NAME = process.env.TASK_EVENTS_TABLE_NAME!;
3638
const TASK_RETENTION_DAYS = Number(process.env.TASK_RETENTION_DAYS ?? '90');
3739
const RUNTIME_ARN = process.env.RUNTIME_ARN;
40+
const ECS_CLUSTER_ARN = process.env.ECS_CLUSTER_ARN;
3841

3942
/**
4043
* DELETE /v1/tasks/{task_id} — Cancel a task.
@@ -107,19 +110,57 @@ export async function handler(event: APIGatewayProxyEvent): Promise<APIGatewayPr
107110
throw condErr;
108111
}
109112

110-
// 6b. Stop AgentCore runtime session so the container winds down (best-effort)
111-
if (wasRunning && runtimeSessionId && agentRuntimeArn) {
112-
try {
113-
await agentCoreClient.send(new StopRuntimeSessionCommand({
114-
runtimeSessionId: runtimeSessionId,
115-
agentRuntimeArn: agentRuntimeArn,
116-
}));
117-
logger.info('StopRuntimeSession invoked after cancel', { task_id: taskId, request_id: requestId });
118-
} catch (stopErr) {
119-
logger.warn('StopRuntimeSession failed after cancel (session may already be gone)', {
113+
// 6b. Stop the compute session so the container winds down (best-effort)
114+
if (wasRunning && runtimeSessionId) {
115+
const computeType = record.compute_type;
116+
if (computeType === 'ecs') {
117+
// ECS-backed task — stop the Fargate task
118+
const clusterArn = record.compute_metadata?.clusterArn ?? ECS_CLUSTER_ARN;
119+
const taskArn = record.compute_metadata?.taskArn;
120+
if (clusterArn && taskArn) {
121+
try {
122+
await ecsClient.send(new StopTaskCommand({
123+
cluster: clusterArn,
124+
task: taskArn,
125+
reason: 'Cancelled by user',
126+
}));
127+
logger.info('ECS StopTask invoked after cancel', { task_id: taskId, ecs_task_arn: taskArn, request_id: requestId });
128+
} catch (stopErr) {
129+
logger.warn('ECS StopTask failed after cancel (task may already be stopped)', {
130+
task_id: taskId,
131+
request_id: requestId,
132+
error: stopErr instanceof Error ? stopErr.message : String(stopErr),
133+
});
134+
}
135+
} else {
136+
logger.warn('ECS task cancel skipped: missing clusterArn or taskArn in compute_metadata', {
137+
task_id: taskId,
138+
request_id: requestId,
139+
has_cluster: !!clusterArn,
140+
has_task: !!taskArn,
141+
});
142+
}
143+
} else if (agentRuntimeArn) {
144+
// AgentCore-backed task (default)
145+
try {
146+
await agentCoreClient.send(new StopRuntimeSessionCommand({
147+
runtimeSessionId: runtimeSessionId,
148+
agentRuntimeArn: agentRuntimeArn,
149+
}));
150+
logger.info('StopRuntimeSession invoked after cancel', { task_id: taskId, request_id: requestId });
151+
} catch (stopErr) {
152+
logger.warn('StopRuntimeSession failed after cancel (session may already be gone)', {
153+
task_id: taskId,
154+
request_id: requestId,
155+
error: stopErr instanceof Error ? stopErr.message : String(stopErr),
156+
});
157+
}
158+
} else {
159+
logger.warn('Running task has no recognized compute backend to stop', {
120160
task_id: taskId,
121161
request_id: requestId,
122-
error: stopErr instanceof Error ? stopErr.message : String(stopErr),
162+
compute_type: computeType,
163+
has_runtime_arn: !!agentRuntimeArn,
123164
});
124165
}
125166
}

0 commit comments

Comments
 (0)