Skip to content

Commit 1f16973

Browse files
fix: ARM64 runtime platform, individual env vars, and error handler reload
- Set runtimePlatform to ARM64/LINUX on Fargate task definition to match the agent Dockerfile's linux/arm64 target platform - Pass individual env vars (REPO_URL, TASK_DESCRIPTION, etc.) to the container instead of a single TASK_PAYLOAD JSON blob, matching what the agent entrypoint.py get_config() expects - Fix HandleError to always reload task from DynamoDB for current status instead of using stale state from the SFN input, and handle Error/Cause nested under $.error from the catch path
1 parent 3b38fea commit 1f16973

13 files changed

Lines changed: 100 additions & 40 deletions

src/constructs/fargate-agent-cluster.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ export class FargateAgentCluster extends Construct {
108108
cpu: 4096,
109109
memoryLimitMiB: 16384,
110110
ephemeralStorageGiB: 100,
111+
runtimePlatform: {
112+
cpuArchitecture: ecs.CpuArchitecture.ARM64,
113+
operatingSystemFamily: ecs.OperatingSystemFamily.LINUX,
114+
},
111115
});
112116

113117
const logGroup = new logs.LogGroup(this, 'LogGroup', {

src/constructs/task-step-function.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ export class TaskStepFunction extends Construct {
107107
const notAdmittedError = new sfn_tasks.LambdaInvoke(this, 'HandleNotAdmitted', {
108108
lambdaFunction: props.handleErrorFn,
109109
payload: sfn.TaskInput.fromObject({
110-
Error: 'AdmissionDenied',
111-
Cause: 'Concurrency limit reached',
110+
'Error': 'AdmissionDenied',
111+
'Cause': 'Concurrency limit reached',
112112
'task_id.$': '$.task.task_id',
113113
'user_id.$': '$.task.user_id',
114114
}),
@@ -158,7 +158,14 @@ export class TaskStepFunction extends Construct {
158158
containerOverrides: [{
159159
containerDefinition: props.containerDefinition,
160160
environment: [
161-
{ name: 'TASK_PAYLOAD', value: sfn.JsonPath.stringAt('$.containerOverrides.environment[0].value') },
161+
{ name: 'REPO_URL', value: sfn.JsonPath.stringAt('$.containerEnv.REPO_URL') },
162+
{ name: 'TASK_DESCRIPTION', value: sfn.JsonPath.stringAt('$.containerEnv.TASK_DESCRIPTION') },
163+
{ name: 'ISSUE_NUMBER', value: sfn.JsonPath.stringAt('$.containerEnv.ISSUE_NUMBER') },
164+
{ name: 'MAX_TURNS', value: sfn.JsonPath.stringAt('$.containerEnv.MAX_TURNS') },
165+
{ name: 'MAX_BUDGET_USD', value: sfn.JsonPath.stringAt('$.containerEnv.MAX_BUDGET_USD') },
166+
{ name: 'ANTHROPIC_MODEL', value: sfn.JsonPath.stringAt('$.containerEnv.ANTHROPIC_MODEL') },
167+
{ name: 'TASK_ID', value: sfn.JsonPath.stringAt('$.containerEnv.TASK_ID') },
168+
{ name: 'SYSTEM_PROMPT_OVERRIDES', value: sfn.JsonPath.stringAt('$.containerEnv.SYSTEM_PROMPT_OVERRIDES') },
162169
],
163170
}],
164171
taskTimeout: sfn.Timeout.duration(Duration.hours(8)),

src/handlers/sfn-steps/admission-control.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
* SOFTWARE.
1818
*/
1919

20+
import { admissionControl } from '../shared/orchestrator';
2021
import type { BlueprintConfig } from '../shared/repo-config';
2122
import type { TaskRecord } from '../shared/types';
22-
import { admissionControl } from '../shared/orchestrator';
2323

2424
interface AdmissionControlInput {
2525
readonly task: TaskRecord;

src/handlers/sfn-steps/finalize-task.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
* SOFTWARE.
1818
*/
1919

20-
import type { TaskRecord } from '../shared/types';
2120
import { finalizeTask } from '../shared/orchestrator';
21+
import type { TaskRecord } from '../shared/types';
2222

2323
interface FinalizeTaskInput {
2424
readonly task: TaskRecord;

src/handlers/sfn-steps/handle-error.ts

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,19 @@
1717
* SOFTWARE.
1818
*/
1919

20-
import type { TaskRecord } from '../shared/types';
20+
import type { TaskStatusType } from '../../constructs/task-status';
2121
import { failTask, loadTask } from '../shared/orchestrator';
22+
import type { TaskRecord } from '../shared/types';
2223

23-
interface HandleErrorInput {
24+
interface CatchError {
2425
readonly Error: string;
2526
readonly Cause: string;
27+
}
28+
29+
interface HandleErrorInput {
30+
readonly Error?: string;
31+
readonly Cause?: string;
32+
readonly error?: CatchError;
2633
readonly task_id?: string;
2734
readonly user_id?: string;
2835
readonly task?: TaskRecord;
@@ -34,19 +41,20 @@ interface HandleErrorOutput {
3441
}
3542

3643
export async function handler(event: HandleErrorInput): Promise<HandleErrorOutput> {
37-
const errorMessage = `${event.Error}: ${event.Cause}`;
44+
// Error/Cause may be at top level (direct invoke) or nested under $.error (catch path)
45+
const errorObj = event.error ?? event;
46+
const errorMessage = `${errorObj.Error ?? 'Unknown'}: ${errorObj.Cause ?? 'Unknown'}`;
3847
const taskId = event.task_id ?? event.task?.task_id;
3948
const userId = event.user_id ?? event.task?.user_id;
4049

4150
if (taskId && userId) {
42-
let currentStatus = event.task?.status;
43-
if (!currentStatus) {
44-
try {
45-
const loaded = await loadTask(taskId);
46-
currentStatus = loaded.status;
47-
} catch {
48-
// Task may not exist or may have been deleted
49-
}
51+
// Always reload from DynamoDB to get the actual current status
52+
let currentStatus: TaskStatusType | undefined;
53+
try {
54+
const loaded = await loadTask(taskId);
55+
currentStatus = loaded.status;
56+
} catch {
57+
// Task may not exist or may have been deleted
5058
}
5159
if (currentStatus) {
5260
const concurrencyAcquired = currentStatus !== 'SUBMITTED';

src/handlers/sfn-steps/hydrate-context.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
* SOFTWARE.
1818
*/
1919

20+
import { hydrateAndTransition } from '../shared/orchestrator';
2021
import type { BlueprintConfig } from '../shared/repo-config';
2122
import type { TaskRecord } from '../shared/types';
22-
import { hydrateAndTransition } from '../shared/orchestrator';
2323

2424
interface HydrateContextInput {
2525
readonly task: TaskRecord;

src/handlers/sfn-steps/load-task.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
* SOFTWARE.
1818
*/
1919

20+
import { loadBlueprintConfig, loadTask } from '../shared/orchestrator';
2021
import type { BlueprintConfig } from '../shared/repo-config';
2122
import type { TaskRecord } from '../shared/types';
22-
import { loadBlueprintConfig, loadTask } from '../shared/orchestrator';
2323

2424
interface LoadTaskInput {
2525
readonly task_id: string;

src/handlers/sfn-steps/transition-to-running.ts

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,32 @@
1717
* SOFTWARE.
1818
*/
1919

20-
import type { BlueprintConfig } from '../shared/repo-config';
21-
import type { TaskRecord } from '../shared/types';
2220
import { TaskStatus } from '../../constructs/task-status';
2321
import { emitTaskEvent, transitionTask } from '../shared/orchestrator';
22+
import type { BlueprintConfig } from '../shared/repo-config';
23+
import type { TaskRecord } from '../shared/types';
2424

2525
interface TransitionToRunningInput {
2626
readonly task: TaskRecord;
2727
readonly blueprintConfig: BlueprintConfig;
2828
readonly payload: Record<string, unknown>;
2929
}
3030

31-
interface ContainerOverrides {
32-
readonly environment: ReadonlyArray<{ readonly name: string; readonly value: string }>;
31+
interface ContainerEnvVars {
32+
readonly REPO_URL: string;
33+
readonly TASK_DESCRIPTION: string;
34+
readonly ISSUE_NUMBER: string;
35+
readonly MAX_TURNS: string;
36+
readonly MAX_BUDGET_USD: string;
37+
readonly ANTHROPIC_MODEL: string;
38+
readonly TASK_ID: string;
39+
readonly SYSTEM_PROMPT_OVERRIDES: string;
3340
}
3441

3542
interface TransitionToRunningOutput {
3643
readonly task: TaskRecord;
3744
readonly payload: Record<string, unknown>;
38-
readonly containerOverrides: ContainerOverrides;
45+
readonly containerEnv: ContainerEnvVars;
3946
}
4047

4148
export async function handler(event: TransitionToRunningInput): Promise<TransitionToRunningOutput> {
@@ -46,11 +53,17 @@ export async function handler(event: TransitionToRunningInput): Promise<Transiti
4653
});
4754
await emitTaskEvent(event.task.task_id, 'task_running', { started_at: startedAt });
4855

49-
const containerOverrides: ContainerOverrides = {
50-
environment: [
51-
{ name: 'TASK_PAYLOAD', value: JSON.stringify(event.payload) },
52-
],
56+
const payload = event.payload;
57+
const containerEnv: ContainerEnvVars = {
58+
REPO_URL: String(payload.repo_url ?? ''),
59+
TASK_DESCRIPTION: String(payload.prompt ?? ''),
60+
ISSUE_NUMBER: String(payload.issue_number ?? ''),
61+
MAX_TURNS: String(payload.max_turns ?? '100'),
62+
MAX_BUDGET_USD: String(payload.max_budget_usd ?? '0'),
63+
ANTHROPIC_MODEL: String(payload.model_id ?? ''),
64+
TASK_ID: String(event.task.task_id ?? ''),
65+
SYSTEM_PROMPT_OVERRIDES: String(payload.system_prompt_overrides ?? ''),
5366
};
5467

55-
return { task: event.task, payload: event.payload, containerOverrides };
68+
return { task: event.task, payload: event.payload, containerEnv };
5669
}

test/handlers/sfn-steps/admission-control.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
* SOFTWARE.
1818
*/
1919

20-
import type { TaskRecord } from '../../../src/handlers/shared/types';
2120
import type { BlueprintConfig } from '../../../src/handlers/shared/repo-config';
21+
import type { TaskRecord } from '../../../src/handlers/shared/types';
2222

2323
const mockAdmissionControl = jest.fn();
2424

test/handlers/sfn-steps/handle-error.test.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ const TASK: TaskRecord = {
4343

4444
beforeEach(() => jest.resetAllMocks());
4545

46-
test('fails task when task object is provided', async () => {
46+
test('fails task when task object is provided (reloads from DDB)', async () => {
47+
mockLoadTask.mockResolvedValue({ ...TASK, status: 'RUNNING' });
4748
mockFailTask.mockResolvedValue(undefined);
4849

4950
const result = await handler({
@@ -52,12 +53,13 @@ test('fails task when task object is provided', async () => {
5253
task: TASK,
5354
});
5455

56+
expect(mockLoadTask).toHaveBeenCalledWith('task-1');
5557
expect(mockFailTask).toHaveBeenCalledWith(
5658
'task-1',
57-
'HYDRATING',
59+
'RUNNING',
5860
'States.TaskFailed: Container exited with code 1',
5961
'user-1',
60-
true, // concurrencyAcquired: HYDRATING !== SUBMITTED
62+
true, // concurrencyAcquired: RUNNING !== SUBMITTED
6163
);
6264
expect(result).toEqual({ status: 'failed', error: 'States.TaskFailed: Container exited with code 1' });
6365
});
@@ -110,6 +112,7 @@ test('handles loadTask failure gracefully', async () => {
110112
});
111113

112114
test('sets concurrencyAcquired=false when task is SUBMITTED', async () => {
115+
mockLoadTask.mockResolvedValue({ ...TASK, status: 'SUBMITTED' });
113116
mockFailTask.mockResolvedValue(undefined);
114117

115118
await handler({
@@ -118,6 +121,7 @@ test('sets concurrencyAcquired=false when task is SUBMITTED', async () => {
118121
task: { ...TASK, status: 'SUBMITTED' } as TaskRecord,
119122
});
120123

124+
expect(mockLoadTask).toHaveBeenCalledWith('task-1');
121125
expect(mockFailTask).toHaveBeenCalledWith(
122126
'task-1',
123127
'SUBMITTED',
@@ -126,3 +130,22 @@ test('sets concurrencyAcquired=false when task is SUBMITTED', async () => {
126130
false, // concurrencyAcquired: SUBMITTED === SUBMITTED
127131
);
128132
});
133+
134+
test('handles error nested under $.error (catch path)', async () => {
135+
mockLoadTask.mockResolvedValue({ ...TASK, status: 'RUNNING' });
136+
mockFailTask.mockResolvedValue(undefined);
137+
138+
const result = await handler({
139+
error: { Error: 'States.TaskFailed', Cause: 'Container exited' },
140+
task: TASK,
141+
} as unknown as Parameters<typeof handler>[0]);
142+
143+
expect(mockFailTask).toHaveBeenCalledWith(
144+
'task-1',
145+
'RUNNING',
146+
'States.TaskFailed: Container exited',
147+
'user-1',
148+
true,
149+
);
150+
expect(result.error).toContain('States.TaskFailed');
151+
});

0 commit comments

Comments
 (0)