Skip to content

Commit f46c91c

Browse files
committed
fix: custom auth trigger lambda was not deleted in teardown
1 parent 0ec87e3 commit f46c91c

File tree

1 file changed

+83
-22
lines changed
  • packages/amplify-gen2-migration-e2e-system/src/core

1 file changed

+83
-22
lines changed

packages/amplify-gen2-migration-e2e-system/src/core/app.ts

Lines changed: 83 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -571,41 +571,102 @@ export class App {
571571
// ============================================================
572572

573573
/**
574-
* Delete a CloudFormation stack. If it fails (e.g. due to a custom resource
575-
* whose dependencies have already been removed), retry while retaining the
576-
* failed resources.
574+
* Delete a CloudFormation stack, handling the common failure mode where a
575+
* custom resource (typically `CustomAuthTriggerResource`) fails to clean
576+
* itself up because its service-token Lambda or the Cognito user pool it
577+
* references has already been removed.
578+
*
579+
* Strategy:
580+
* 1. Issue the delete and wait for completion.
581+
* 2. If the stack reaches `DELETE_FAILED`, inspect its resources:
582+
* - For any nested stack that failed, recursively clean it up. This
583+
* matters because `RetainResources` on a parent stack cannot skip
584+
* resources inside a nested stack — the nested stack itself must be
585+
* deleted with its own `RetainResources` targeting the actual
586+
* problem leaf.
587+
* - After the recursive pass, retry the parent, retaining any nested
588+
* stacks or leaf resources that are still `DELETE_FAILED`.
577589
*/
578590
private async deleteStackWithRetainOnFailure(cfnClient: CloudFormationClient, stackName: string): Promise<void> {
579591
await cfnClient.send(new DeleteStackCommand({ StackName: stackName }));
580-
try {
581-
await waitUntilStackDeleteComplete({ client: cfnClient, maxWaitTime: 300 }, { StackName: stackName });
592+
if (await this.tryWaitForStackDelete(cfnClient, stackName)) return;
593+
594+
await this.cleanupNestedFailedStacks(cfnClient, stackName);
595+
596+
const failed = await this.listFailedResources(cfnClient, stackName);
597+
if (failed.length === 0) {
598+
this.logger.info(`Stack ${stackName} delete did not complete within timeout (continuing teardown)`);
582599
return;
583-
} catch {
584-
// fall through to retry with RetainResources
585600
}
586601

602+
this.logger.info(`Retrying delete of ${stackName} with retained resources: ${failed.join(', ')}`);
587603
try {
588-
const { StackResources } = await cfnClient.send(new DescribeStackResourcesCommand({ StackName: stackName }));
589-
const failed = (StackResources ?? [])
590-
.filter((r) => r.ResourceStatus === 'DELETE_FAILED' && r.LogicalResourceId)
591-
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
592-
.map((r) => r.LogicalResourceId!);
593-
if (failed.length > 0) {
594-
this.logger.info(`Retrying delete of ${stackName} with retained resources: ${failed.join(', ')}`);
595-
await cfnClient.send(new DeleteStackCommand({ StackName: stackName, RetainResources: failed }));
596-
try {
597-
await waitUntilStackDeleteComplete({ client: cfnClient, maxWaitTime: 300 }, { StackName: stackName });
598-
} catch {
599-
this.logger.info(`Stack ${stackName} retry did not complete within timeout (continuing teardown)`);
600-
}
601-
} else {
602-
this.logger.info(`Stack ${stackName} delete did not complete within timeout (continuing teardown)`);
604+
await cfnClient.send(new DeleteStackCommand({ StackName: stackName, RetainResources: failed }));
605+
if (!(await this.tryWaitForStackDelete(cfnClient, stackName))) {
606+
this.logger.info(`Stack ${stackName} retry did not complete within timeout (continuing teardown)`);
603607
}
604608
} catch (e) {
605609
this.logger.info(`Failed to retry stack ${stackName} delete: ${(e as Error).message} (continuing teardown)`);
606610
}
607611
}
608612

613+
/**
614+
* Recursively delete any nested stacks of `stackName` that are in
615+
* `DELETE_FAILED`. Each recursive call can itself retain problem leaf
616+
* resources, so after this returns the parent's retry only needs to retain
617+
* nested-stack logical IDs that remained stuck.
618+
*/
619+
private async cleanupNestedFailedStacks(cfnClient: CloudFormationClient, stackName: string): Promise<void> {
620+
let resources;
621+
try {
622+
resources = await cfnClient.send(new DescribeStackResourcesCommand({ StackName: stackName }));
623+
} catch (e) {
624+
this.logger.info(`Failed to describe resources for ${stackName}: ${(e as Error).message} (continuing teardown)`);
625+
return;
626+
}
627+
const nestedFailed = (resources.StackResources ?? []).filter(
628+
(r) => r.ResourceType === 'AWS::CloudFormation::Stack' && r.ResourceStatus === 'DELETE_FAILED' && r.PhysicalResourceId,
629+
);
630+
for (const nested of nestedFailed) {
631+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
632+
const nestedName = nested.PhysicalResourceId!;
633+
this.logger.info(`Recursively cleaning nested stack: ${nestedName}`);
634+
await this.emptyStackBuckets(cfnClient, nestedName);
635+
await this.deleteStackWithRetainOnFailure(cfnClient, nestedName);
636+
}
637+
}
638+
639+
/**
640+
* List logical IDs of resources in `DELETE_FAILED` for the given stack.
641+
*/
642+
private async listFailedResources(cfnClient: CloudFormationClient, stackName: string): Promise<string[]> {
643+
try {
644+
const { StackResources } = await cfnClient.send(new DescribeStackResourcesCommand({ StackName: stackName }));
645+
return (
646+
(StackResources ?? [])
647+
.filter((r) => r.ResourceStatus === 'DELETE_FAILED' && r.LogicalResourceId)
648+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
649+
.map((r) => r.LogicalResourceId!)
650+
);
651+
} catch (e) {
652+
this.logger.info(`Failed to list failed resources for ${stackName}: ${(e as Error).message} (continuing teardown)`);
653+
return [];
654+
}
655+
}
656+
657+
/**
658+
* Wait for a stack delete to complete. Returns true on success, false on
659+
* timeout or delete-failure (the caller decides how to recover).
660+
*/
661+
private async tryWaitForStackDelete(cfnClient: CloudFormationClient, stackName: string): Promise<boolean> {
662+
try {
663+
await waitUntilStackDeleteComplete({ client: cfnClient, maxWaitTime: 300 }, { StackName: stackName });
664+
return true;
665+
} catch {
666+
return false;
667+
}
668+
}
669+
609670
/**
610671
* Empty all S3 buckets owned by the given CloudFormation stack.
611672
* CloudFormation cannot delete a bucket with objects, so we must empty them first.

0 commit comments

Comments
 (0)