|
| 1 | +/** |
| 2 | + * MIT No Attribution |
| 3 | + * |
| 4 | + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 5 | + * |
| 6 | + * Permission is hereby granted, free of charge, to any person obtaining a copy of |
| 7 | + * the Software without restriction, including without limitation the rights to |
| 8 | + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |
| 9 | + * the Software, and to permit persons to whom the Software is furnished to do so. |
| 10 | + * |
| 11 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 12 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 13 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 14 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 15 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 16 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 17 | + * SOFTWARE. |
| 18 | + */ |
| 19 | + |
| 20 | +import { Duration, RemovalPolicy } from 'aws-cdk-lib'; |
| 21 | +import * as autoscaling from 'aws-cdk-lib/aws-autoscaling'; |
| 22 | +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; |
| 23 | +import * as ec2 from 'aws-cdk-lib/aws-ec2'; |
| 24 | +import * as ecr_assets from 'aws-cdk-lib/aws-ecr-assets'; |
| 25 | +import * as iam from 'aws-cdk-lib/aws-iam'; |
| 26 | +import * as logs from 'aws-cdk-lib/aws-logs'; |
| 27 | +import * as s3 from 'aws-cdk-lib/aws-s3'; |
| 28 | +import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; |
| 29 | +import { NagSuppressions } from 'cdk-nag'; |
| 30 | +import { Construct } from 'constructs'; |
| 31 | + |
| 32 | +export interface Ec2AgentFleetProps { |
| 33 | + readonly vpc: ec2.IVpc; |
| 34 | + readonly agentImageAsset: ecr_assets.DockerImageAsset; |
| 35 | + readonly taskTable: dynamodb.ITable; |
| 36 | + readonly taskEventsTable: dynamodb.ITable; |
| 37 | + readonly userConcurrencyTable: dynamodb.ITable; |
| 38 | + readonly githubTokenSecret: secretsmanager.ISecret; |
| 39 | + readonly memoryId?: string; |
| 40 | + readonly instanceType?: ec2.InstanceType; |
| 41 | + readonly desiredCapacity?: number; |
| 42 | + readonly maxCapacity?: number; |
| 43 | +} |
| 44 | + |
| 45 | +export class Ec2AgentFleet extends Construct { |
| 46 | + public readonly securityGroup: ec2.SecurityGroup; |
| 47 | + public readonly instanceRole: iam.Role; |
| 48 | + public readonly payloadBucket: s3.Bucket; |
| 49 | + public readonly autoScalingGroup: autoscaling.AutoScalingGroup; |
| 50 | + public readonly fleetTagKey: string; |
| 51 | + public readonly fleetTagValue: string; |
| 52 | + |
| 53 | + constructor(scope: Construct, id: string, props: Ec2AgentFleetProps) { |
| 54 | + super(scope, id); |
| 55 | + |
| 56 | + this.fleetTagKey = 'bgagent:fleet'; |
| 57 | + this.fleetTagValue = id; |
| 58 | + |
| 59 | + // Security group — egress TCP 443 only |
| 60 | + this.securityGroup = new ec2.SecurityGroup(this, 'FleetSG', { |
| 61 | + vpc: props.vpc, |
| 62 | + description: 'EC2 Agent Fleet - egress TCP 443 only', |
| 63 | + allowAllOutbound: false, |
| 64 | + }); |
| 65 | + |
| 66 | + this.securityGroup.addEgressRule( |
| 67 | + ec2.Peer.anyIpv4(), |
| 68 | + ec2.Port.tcp(443), |
| 69 | + 'Allow HTTPS egress (GitHub API, AWS services)', |
| 70 | + ); |
| 71 | + |
| 72 | + // S3 bucket for payload overflow |
| 73 | + this.payloadBucket = new s3.Bucket(this, 'PayloadBucket', { |
| 74 | + removalPolicy: RemovalPolicy.DESTROY, |
| 75 | + autoDeleteObjects: true, |
| 76 | + encryption: s3.BucketEncryption.S3_MANAGED, |
| 77 | + enforceSSL: true, |
| 78 | + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, |
| 79 | + lifecycleRules: [ |
| 80 | + { expiration: Duration.days(7) }, |
| 81 | + ], |
| 82 | + }); |
| 83 | + |
| 84 | + // CloudWatch log group |
| 85 | + const logGroup = new logs.LogGroup(this, 'FleetLogGroup', { |
| 86 | + retention: logs.RetentionDays.THREE_MONTHS, |
| 87 | + removalPolicy: RemovalPolicy.DESTROY, |
| 88 | + }); |
| 89 | + |
| 90 | + // IAM Role for instances |
| 91 | + this.instanceRole = new iam.Role(this, 'InstanceRole', { |
| 92 | + assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), |
| 93 | + managedPolicies: [ |
| 94 | + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), |
| 95 | + ], |
| 96 | + }); |
| 97 | + |
| 98 | + // DynamoDB read/write on task tables |
| 99 | + props.taskTable.grantReadWriteData(this.instanceRole); |
| 100 | + props.taskEventsTable.grantReadWriteData(this.instanceRole); |
| 101 | + props.userConcurrencyTable.grantReadWriteData(this.instanceRole); |
| 102 | + |
| 103 | + // Secrets Manager read for GitHub token |
| 104 | + props.githubTokenSecret.grantRead(this.instanceRole); |
| 105 | + |
| 106 | + // Bedrock model invocation |
| 107 | + this.instanceRole.addToPrincipalPolicy(new iam.PolicyStatement({ |
| 108 | + actions: [ |
| 109 | + 'bedrock:InvokeModel', |
| 110 | + 'bedrock:InvokeModelWithResponseStream', |
| 111 | + ], |
| 112 | + resources: ['*'], |
| 113 | + })); |
| 114 | + |
| 115 | + // CloudWatch Logs write |
| 116 | + logGroup.grantWrite(this.instanceRole); |
| 117 | + |
| 118 | + // ECR pull |
| 119 | + this.instanceRole.addToPrincipalPolicy(new iam.PolicyStatement({ |
| 120 | + actions: [ |
| 121 | + 'ecr:GetAuthorizationToken', |
| 122 | + ], |
| 123 | + resources: ['*'], |
| 124 | + })); |
| 125 | + this.instanceRole.addToPrincipalPolicy(new iam.PolicyStatement({ |
| 126 | + actions: [ |
| 127 | + 'ecr:BatchGetImage', |
| 128 | + 'ecr:GetDownloadUrlForLayer', |
| 129 | + ], |
| 130 | + resources: [props.agentImageAsset.repository.repositoryArn], |
| 131 | + })); |
| 132 | + |
| 133 | + // S3 read on payload bucket |
| 134 | + this.payloadBucket.grantRead(this.instanceRole); |
| 135 | + |
| 136 | + // EC2 tag management on self (conditioned on fleet tag) |
| 137 | + this.instanceRole.addToPrincipalPolicy(new iam.PolicyStatement({ |
| 138 | + actions: ['ec2:CreateTags', 'ec2:DeleteTags'], |
| 139 | + resources: ['*'], |
| 140 | + conditions: { |
| 141 | + StringEquals: { |
| 142 | + [`ec2:ResourceTag/${this.fleetTagKey}`]: this.fleetTagValue, |
| 143 | + }, |
| 144 | + }, |
| 145 | + })); |
| 146 | + |
| 147 | + const imageUri = props.agentImageAsset.imageUri; |
| 148 | + |
| 149 | + // User data: install Docker, pull image, tag as idle |
| 150 | + const userData = ec2.UserData.forLinux(); |
| 151 | + userData.addCommands( |
| 152 | + '#!/bin/bash', |
| 153 | + 'set -euo pipefail', |
| 154 | + '', |
| 155 | + '# Install Docker', |
| 156 | + 'dnf install -y docker', |
| 157 | + 'systemctl enable docker', |
| 158 | + 'systemctl start docker', |
| 159 | + '', |
| 160 | + '# ECR login and pre-pull agent image', |
| 161 | + 'REGION=$(ec2-metadata --availability-zone | cut -d" " -f2 | sed \'s/.$//\')', |
| 162 | + `aws ecr get-login-password --region "$REGION" | docker login --username AWS --password-stdin $(echo '${imageUri}' | cut -d/ -f1)`, |
| 163 | + `docker pull '${imageUri}'`, |
| 164 | + '', |
| 165 | + '# Tag self as idle', |
| 166 | + 'INSTANCE_ID=$(ec2-metadata -i | cut -d" " -f2)', |
| 167 | + 'aws ec2 create-tags --resources "$INSTANCE_ID" --region "$REGION" --tags Key=bgagent:status,Value=idle', |
| 168 | + ); |
| 169 | + |
| 170 | + // Auto Scaling Group |
| 171 | + this.autoScalingGroup = new autoscaling.AutoScalingGroup(this, 'ASG', { |
| 172 | + vpc: props.vpc, |
| 173 | + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, |
| 174 | + instanceType: props.instanceType ?? new ec2.InstanceType('m7g.xlarge'), |
| 175 | + machineImage: ec2.MachineImage.latestAmazonLinux2023({ |
| 176 | + cpuType: ec2.AmazonLinuxCpuType.ARM_64, |
| 177 | + }), |
| 178 | + role: this.instanceRole, |
| 179 | + securityGroup: this.securityGroup, |
| 180 | + userData, |
| 181 | + desiredCapacity: props.desiredCapacity ?? 1, |
| 182 | + minCapacity: props.desiredCapacity ?? 1, |
| 183 | + maxCapacity: props.maxCapacity ?? 3, |
| 184 | + healthCheck: autoscaling.HealthCheck.ec2(), |
| 185 | + }); |
| 186 | + |
| 187 | + // Tag the ASG instances for fleet identification |
| 188 | + // CDK auto-propagates tags from the ASG to instances |
| 189 | + this.autoScalingGroup.node.defaultChild; |
| 190 | + this.autoScalingGroup.addUserData(`aws ec2 create-tags --resources "$(ec2-metadata -i | cut -d' ' -f2)" --region "$(ec2-metadata --availability-zone | cut -d' ' -f2 | sed 's/.$//')" --tags Key=${this.fleetTagKey},Value=${this.fleetTagValue}`); |
| 191 | + |
| 192 | + NagSuppressions.addResourceSuppressions(this.instanceRole, [ |
| 193 | + { |
| 194 | + id: 'AwsSolutions-IAM4', |
| 195 | + reason: 'AmazonSSMManagedInstanceCore is the AWS-recommended managed policy for SSM-managed instances', |
| 196 | + }, |
| 197 | + { |
| 198 | + id: 'AwsSolutions-IAM5', |
| 199 | + reason: 'DynamoDB index/* wildcards generated by CDK grantReadWriteData; Bedrock InvokeModel requires * resource; Secrets Manager wildcards from CDK grantRead; CloudWatch Logs wildcards from CDK grantWrite; ECR GetAuthorizationToken requires * resource; EC2 CreateTags/DeleteTags conditioned on fleet tag; S3 read wildcards from CDK grantRead', |
| 200 | + }, |
| 201 | + ], true); |
| 202 | + |
| 203 | + NagSuppressions.addResourceSuppressions(this.autoScalingGroup, [ |
| 204 | + { |
| 205 | + id: 'AwsSolutions-AS3', |
| 206 | + reason: 'ASG scaling notifications are not required for this dev/preview compute backend', |
| 207 | + }, |
| 208 | + { |
| 209 | + id: 'AwsSolutions-EC26', |
| 210 | + reason: 'EBS encryption uses default AWS-managed key — sufficient for agent ephemeral workloads', |
| 211 | + }, |
| 212 | + ], true); |
| 213 | + |
| 214 | + NagSuppressions.addResourceSuppressions(this.payloadBucket, [ |
| 215 | + { |
| 216 | + id: 'AwsSolutions-S1', |
| 217 | + reason: 'Server access logging not required for ephemeral payload overflow bucket with 7-day lifecycle', |
| 218 | + }, |
| 219 | + ], true); |
| 220 | + } |
| 221 | +} |
0 commit comments