Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions src/cli/cloudformation/__tests__/stack-failure.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { describeStackFailureDetail, formatStackFailureDetail } from '../stack-failure.js';
import type { StackEvent } from '@aws-sdk/client-cloudformation';
import { beforeEach, describe, expect, it, vi } from 'vitest';

const { mockSend } = vi.hoisted(() => ({
mockSend: vi.fn(),
}));

vi.mock('@aws-sdk/client-cloudformation', () => ({
CloudFormationClient: class {
send = mockSend;
},
DescribeStackEventsCommand: class {
constructor(public input: unknown) {}
},
}));

vi.mock('../../aws', () => ({
getCredentialProvider: vi.fn().mockReturnValue({}),
}));

const ROOT_FAILURE: StackEvent = {
StackId: 'stack-id',
EventId: '1',
StackName: 'my-stack',
Timestamp: new Date(),
LogicalResourceId: 'AgentRuntimeFunction',
ResourceType: 'AWS::Lambda::Function',
ResourceStatus: 'CREATE_FAILED',
ResourceStatusReason: 'Resource handler returned message: "Role arn is invalid"',
};

const CASCADE: StackEvent = {
StackId: 'stack-id',
EventId: '2',
StackName: 'my-stack',
Timestamp: new Date(),
LogicalResourceId: 'OtherResource',
ResourceType: 'AWS::IAM::Role',
ResourceStatus: 'CREATE_FAILED',
ResourceStatusReason: 'Resource creation cancelled',
};

describe('formatStackFailureDetail', () => {
it('names the root logical id, resource type, and reason; skips cascade noise; includes a console link', () => {
const detail = formatStackFailureDetail([CASCADE, ROOT_FAILURE], 'us-east-1', 'my-stack');

expect(detail).not.toBeNull();
expect(detail).toContain('AgentRuntimeFunction (AWS::Lambda::Function) failed: Resource handler returned message');
// Cascade noise is filtered out
expect(detail).not.toContain('Resource creation cancelled');
expect(detail).not.toContain('OtherResource');
// Console deep link with the partition console domain and stack name
expect(detail).toContain('console.aws.amazon.com');
expect(detail).toContain('my-stack');
});

it('returns null when no actionable failure reason is present', () => {
expect(formatStackFailureDetail([CASCADE], 'us-east-1', 'my-stack')).toBeNull();
expect(formatStackFailureDetail([], 'us-east-1', 'my-stack')).toBeNull();
});
});

describe('describeStackFailureDetail', () => {
beforeEach(() => {
vi.clearAllMocks();
});

it('fetches events via DescribeStackEvents and distills the root failure', async () => {
mockSend.mockResolvedValue({ StackEvents: [CASCADE, ROOT_FAILURE] });

const detail = await describeStackFailureDetail('us-east-1', 'my-stack');

expect(detail).toContain('AgentRuntimeFunction (AWS::Lambda::Function) failed');
expect(detail).not.toContain('Resource creation cancelled');
});

it('returns null when DescribeStackEvents throws', async () => {
mockSend.mockRejectedValue(new Error('boom'));
expect(await describeStackFailureDetail('us-east-1', 'my-stack')).toBeNull();
});
});
1 change: 1 addition & 0 deletions src/cli/cloudformation/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export * from './bootstrap';
export * from './outputs';
export * from './stack-discovery';
export * from './stack-failure';
export * from './stack-status';
export * from './types';
66 changes: 66 additions & 0 deletions src/cli/cloudformation/stack-failure.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { getCredentialProvider } from '../aws';
import { consoleDomain } from '../aws/partition';
import { isFailureEvent } from './types';
import { CloudFormationClient, DescribeStackEventsCommand, type StackEvent } from '@aws-sdk/client-cloudformation';

// CloudFormation reports a cascade of these on sibling resources once one resource
// in the stack fails. They carry no root-cause information, so they're filtered out.
const CASCADE_REASONS = [
'Resource creation cancelled',
'Resource update cancelled',
'The following resource(s) failed to create',
'The following resource(s) failed to update',
'The following resource(s) failed to delete',
];

function isCascadeNoise(reason?: string): boolean {
if (!reason) return true;
return CASCADE_REASONS.some(prefix => reason.startsWith(prefix));
}

/**
* Build a CloudFormation stack-events console deep link for the given stack.
*/
export function stackEventsConsoleUrl(region: string, stackName: string): string {
const encoded = encodeURIComponent(stackName);
return `https://${region}.${consoleDomain(region)}/cloudformation/home?region=${region}#/stacks/events?stackId=${encoded}`;
}

/**
* Reduce a stack's events to the root resource failure(s), skipping the generic
* cascade noise CloudFormation emits on sibling resources once one fails.
*
* Returns a human-readable, multi-line detail string of the form
* `<LogicalId> (<ResourceType>) failed: <ResourceStatusReason>` plus a console
* deep link, or null if no actionable failure reason is present.
*/
export function formatStackFailureDetail(events: StackEvent[], region: string, stackName: string): string | null {
const rootFailures = events.filter(ev => isFailureEvent(ev) && !isCascadeNoise(ev.ResourceStatusReason));

if (rootFailures.length === 0) {
return null;
}

const lines = rootFailures.map(ev => {
const logicalId = ev.LogicalResourceId ?? 'UnknownResource';
const resourceType = ev.ResourceType ?? 'UnknownType';
return `${logicalId} (${resourceType}) failed: ${ev.ResourceStatusReason}`;
});

lines.push(`See stack events: ${stackEventsConsoleUrl(region, stackName)}`);
return lines.join('\n');
}

/**
* Fetch the most recent stack events and distill the root failure reason(s).
* Returns null if the events can't be read or contain no actionable failure.
*/
export async function describeStackFailureDetail(region: string, stackName: string): Promise<string | null> {
try {
const cfn = new CloudFormationClient({ region, credentials: getCredentialProvider() });
const resp = await cfn.send(new DescribeStackEventsCommand({ StackName: stackName }));
return formatStackFailureDetail(resp.StackEvents ?? [], region, stackName);
} catch {
return null;
}
}
18 changes: 17 additions & 1 deletion src/cli/commands/deploy/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { CdkToolkitWrapper, createSwitchableIoHost } from '../../cdk/toolkit-lib
import type { DeployMessage, SwitchableIoHost } from '../../cdk/toolkit-lib';
import {
buildDeployedState,
describeStackFailureDetail,
getStackOutputs,
parseAgentOutputs,
parseConfigBundleOutputs,
Expand Down Expand Up @@ -162,6 +163,10 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
const logger = new ExecLogger({ command: 'deploy' });
const { onProgress } = options;
let currentStepName = '';
// Tracked for the catch block so a CloudFormation failure can be enriched with
// the root resource failure reason (DescribeStackEvents) and a console link.
let failedRegion: string | undefined;
let failedStackName: string | undefined;

const startStep = (name: string) => {
currentStepName = name;
Expand Down Expand Up @@ -199,6 +204,7 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
// calls that don't receive an explicit region option.
// See https://github.com/aws/agentcore-cli/issues/924.
restoreEnv = applyTargetRegionToEnv(target.region);
failedRegion = target.region;
endStep('success');

// Read project spec for gateway information (used later for deploy step name and outputs)
Expand Down Expand Up @@ -431,6 +437,7 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
};
}
const stackName = stackSelection.stackName;
failedStackName = stackName;
endStep('success');

// Check if bootstrap needed
Expand Down Expand Up @@ -950,7 +957,16 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise<Dep
} catch (err: unknown) {
logger.log(getErrorMessage(err), 'error');
logger.finalize(false);
return { success: false, error: toError(err), logPath: logger.getRelativeLogPath() };
const error = toError(err);
// Enrich CloudFormation/CDK failures with the root resource failure reason and a
// console link so users don't have to dig through stack events manually.
if (failedRegion && failedStackName) {
const detail = await describeStackFailureDetail(failedRegion, failedStackName);
if (detail) {
error.message = `${error.message}\n\n${detail}`;
}
}
return { success: false, error, logPath: logger.getRelativeLogPath() };
} finally {
if (toolkitWrapper) {
await toolkitWrapper.dispose();
Expand Down
12 changes: 12 additions & 0 deletions src/cli/tui/components/DeployStatus.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ interface DeployStatusProps {
hasError: boolean;
hasPostDeployError?: boolean;
postDeployWarnings?: string[];
/** Root CloudFormation resource failure detail (logical id, type, reason + console link). */
failureDetail?: string | null;
}

const PROGRESS_BAR_WIDTH = 20;
Expand Down Expand Up @@ -139,6 +141,7 @@ export function DeployStatus({
hasError,
hasPostDeployError,
postDeployWarnings,
failureDetail,
}: DeployStatusProps) {
// Parse and filter messages to only meaningful resource updates
const parsedResources = messages
Expand Down Expand Up @@ -177,6 +180,15 @@ export function DeployStatus({
))}
</Box>
)}
{hasError && failureDetail && (
<Box flexDirection="column" marginTop={1}>
{failureDetail.split('\n').map((line, i) => (
<Text key={i} color="red">
{line}
</Text>
))}
</Box>
)}
{hasWarning && postDeployWarnings && postDeployWarnings.length > 0 && (
<Box flexDirection="column" marginTop={1}>
{postDeployWarnings.map((w, i) => (
Expand Down
2 changes: 2 additions & 0 deletions src/cli/tui/screens/deploy/DeployScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ export function DeployScreen({
managedMemoryNotice,
postDeployWarnings,
postDeployHasError,
deployFailureDetail,
isDiffLoading,
requestDiff,
hasError,
Expand Down Expand Up @@ -359,6 +360,7 @@ export function DeployScreen({
hasError={hasError}
hasPostDeployError={postDeployHasError}
postDeployWarnings={postDeployWarnings}
failureDetail={deployFailureDetail}
/>
</Box>
)}
Expand Down
17 changes: 17 additions & 0 deletions src/cli/tui/screens/deploy/useDeployFlow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { ConfigIO } from '../../../../lib';
import type { CdkToolkitWrapper, DeployMessage, SwitchableIoHost } from '../../../cdk/toolkit-lib';
import {
buildDeployedState,
describeStackFailureDetail,
getStackOutputs,
parseAgentOutputs,
parseConfigBundleOutputs,
Expand Down Expand Up @@ -108,6 +109,8 @@ interface DeployFlowState {
postDeployWarnings: string[];
/** True if any post-deploy sub-resource operation had errors */
postDeployHasError: boolean;
/** Root CloudFormation resource failure detail (logical id, type, reason + console link) */
deployFailureDetail: string | null;
/** Whether an on-demand diff is currently running */
isDiffLoading: boolean;
/** Request an on-demand diff (lazy: runs once, caches result) */
Expand Down Expand Up @@ -153,6 +156,9 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState
});
const [publishAssetsStep, setPublishAssetsStep] = useState<Step>({ label: 'Publish assets', status: 'pending' });
const [deployStep, setDeployStep] = useState<Step>({ label: 'Deploy to AWS', status: 'pending' });
// Root CloudFormation resource failure detail (logical id, type, reason + console link),
// surfaced in the deploy status box once the CFN apply has started.
const [deployFailureDetail, setDeployFailureDetail] = useState<string | null>(null);
const [persistStateStep, setPersistStateStep] = useState<Step>({
label: 'Persist deployment state',
status: 'pending',
Expand Down Expand Up @@ -894,6 +900,16 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState
setHasTokenExpiredError(true);
}

// Enrich CloudFormation/CDK failures with the root resource failure reason and
// a console link so users don't have to dig through stack events manually.
const failureRegion = context?.awsTargets[0]?.region;
const failureStackName = stackNames[0];
const failureDetail =
hasReceivedCfnEvent.current && failureRegion && failureStackName
? await describeStackFailureDetail(failureRegion, failureStackName)
: null;
setDeployFailureDetail(failureDetail);

// Mark the appropriate step as error based on whether CFn started
if (hasReceivedCfnEvent.current) {
setDeployStep(prev => ({
Expand Down Expand Up @@ -1157,6 +1173,7 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState
managedMemoryNotice,
postDeployWarnings,
postDeployHasError,
deployFailureDetail,
isDiffLoading,
requestDiff,
stackOutputs,
Expand Down
Loading