From 670253175d7ace5f1a37d5e0233f31ced003ecd4 Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Tue, 7 Apr 2026 19:39:43 +0000 Subject: [PATCH 1/8] fix(drift): use correct CloudFormation console URL format for changeset links The changeset console URL was using an incorrect path-based format that doesn't resolve in the CloudFormation console. Switch to the standard query-parameter format: #/stacks/changesets/changes?stackId=&changeSetId= Also thread the stack ARN through flattenChanges so changeset URLs include the stackId parameter needed for proper navigation. --- .../services/drift-formatter.test.ts | 9 ++++--- .../services/drift-formatter.ts | 26 ++++++++++++++----- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/packages/amplify-cli/src/__tests__/commands/drift-detection/services/drift-formatter.test.ts b/packages/amplify-cli/src/__tests__/commands/drift-detection/services/drift-formatter.test.ts index d549ba27f6b..b33259a4651 100644 --- a/packages/amplify-cli/src/__tests__/commands/drift-detection/services/drift-formatter.test.ts +++ b/packages/amplify-cli/src/__tests__/commands/drift-detection/services/drift-formatter.test.ts @@ -150,6 +150,7 @@ describe('createUnifiedCategoryView', () => { ResourceType: 'AWS::CloudFormation::Stack', Action: ChangeAction.Modify, ChangeSetId: nestedChangeSetId, + PhysicalResourceId: 'arn:aws:cloudformation:us-east-1:123:stack/nested-api-stack/abc', nestedChanges: [ { LogicalResourceId: 'Schema', @@ -173,12 +174,12 @@ describe('createUnifiedCategoryView', () => { " API Schema Template Drift: S3 and deployed templates differ - Changeset Id: https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/changesets/details?changeSetId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3AchangeSet%2Fnested-api-cs%2Fdef + Changeset Id: https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/changesets/changes?stackId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3Astack%2Fnested-api-stack%2Fabc&changeSetId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3AchangeSet%2Fnested-api-cs%2Fdef ~ AWS::AppSync::GraphQLSchema API NewResolver Template Drift: S3 and deployed templates differ - Changeset Id: https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/changesets/details?changeSetId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3AchangeSet%2Fnested-api-cs%2Fdef + Changeset Id: https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/changesets/changes?stackId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3Astack%2Fnested-api-stack%2Fabc&changeSetId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3AchangeSet%2Fnested-api-cs%2Fdef + AWS::AppSync::Resolver " @@ -374,12 +375,14 @@ describe('createUnifiedCategoryView', () => { ResourceType: 'AWS::CloudFormation::Stack', Action: ChangeAction.Modify, ChangeSetId: outerChangeSetId, + PhysicalResourceId: 'arn:aws:cloudformation:us-east-1:123:stack/outer-stack/abc', nestedChanges: [ { LogicalResourceId: 'apiMyGraphQLGraphQLAPI', ResourceType: 'AWS::CloudFormation::Stack', Action: ChangeAction.Modify, ChangeSetId: deepChangeSetId, + PhysicalResourceId: 'arn:aws:cloudformation:us-east-1:123:stack/deep-stack/ghi', nestedChanges: [ { LogicalResourceId: 'Schema', @@ -399,7 +402,7 @@ describe('createUnifiedCategoryView', () => { " API Schema Template Drift: S3 and deployed templates differ - Changeset Id: https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/changesets/details?changeSetId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3AchangeSet%2Fdeep-cs%2Fghi + Changeset Id: https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/changesets/changes?stackId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3Astack%2Fdeep-stack%2Fghi&changeSetId=arn%3Aaws%3Acloudformation%3Aus-east-1%3A123%3AchangeSet%2Fdeep-cs%2Fghi ~ AWS::AppSync::GraphQLSchema " diff --git a/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts b/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts index 14d1774f8b5..58b61d48715 100644 --- a/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts +++ b/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts @@ -21,6 +21,7 @@ interface DriftBlock { driftDetectionId?: string; templateChange?: ResourceChangeWithNested; changeSetId?: string; + stackArn?: string; } /** @@ -76,12 +77,12 @@ function cfnDriftConsoleUrl(stackArn: string): string | undefined { /** * Build CloudFormation console URL for a changeset details page */ -function cfnChangesetConsoleUrl(changeSetArn: string): string | undefined { +function cfnChangesetConsoleUrl(changeSetArn: string, stackArn?: string): string | undefined { const region = regionFromArn(changeSetArn); if (!region) return undefined; - return `https://${region}.console.aws.amazon.com/cloudformation/home?region=${region}#/stacks/changesets/details?changeSetId=${encodeURIComponent( - changeSetArn, - )}`; + const encodedStackId = encodeURIComponent(stackArn || ''); + const encodedChangeSetId = encodeURIComponent(changeSetArn); + return `https://${region}.console.aws.amazon.com/cloudformation/home?region=${region}#/stacks/changesets/changes?stackId=${encodedStackId}&changeSetId=${encodedChangeSetId}`; } /** @@ -114,10 +115,20 @@ function collectDriftBlocks(phase1: CloudFormationDriftResults, phase2: Template // Phase 2: One block per template change (flatten nested stacks to leaf resources) if (!phase2.skipped && phase2.changes.length > 0) { - const flattenChanges = (changes: ResourceChangeWithNested[], fallbackCategory: string, fallbackChangeSetId?: string): void => { + const flattenChanges = ( + changes: ResourceChangeWithNested[], + fallbackCategory: string, + fallbackChangeSetId?: string, + parentStackArn?: string, + ): void => { for (const change of changes) { if (change.ResourceType === 'AWS::CloudFormation::Stack' && change.nestedChanges && change.nestedChanges.length > 0) { - flattenChanges(change.nestedChanges, extractCategory(change.LogicalResourceId), change.ChangeSetId || fallbackChangeSetId); + flattenChanges( + change.nestedChanges, + extractCategory(change.LogicalResourceId), + change.ChangeSetId || fallbackChangeSetId, + change.PhysicalResourceId || parentStackArn, + ); } else { const resourceCategory = extractCategory(change.LogicalResourceId); blocks.push({ @@ -126,6 +137,7 @@ function collectDriftBlocks(phase1: CloudFormationDriftResults, phase2: Template type: 'template', templateChange: change, changeSetId: change.ChangeSetId || fallbackChangeSetId, + stackArn: parentStackArn, }); } } @@ -210,7 +222,7 @@ function formatBlock(block: DriftBlock): string { output += ` Template Drift: S3 and deployed templates differ\n`; if (block.changeSetId) { - const changesetUrl = cfnChangesetConsoleUrl(block.changeSetId); + const changesetUrl = cfnChangesetConsoleUrl(block.changeSetId, block.stackArn); output += ` Changeset Id: ${changesetUrl || block.changeSetId}\n`; } output += ` ${colorResourceLine(symbol, `${symbol} ${change.ResourceType || 'Unknown'}`)}\n`; From 15975c569800da72f21f0c7ff48f0ada18edcbbe Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Wed, 8 Apr 2026 20:42:34 +0000 Subject: [PATCH 2/8] feat(drift): read changes from FAILED nested changesets with IncludeNestedStacks Template drift detection now reads changes from EarlyValidation-failed nested changesets instead of discarding them. This enables drift detection on post-migration stacks where resources have been moved between stacks, causing CFN EarlyValidation::ResourceExistenceCheck failures. Changes: - Root changeset FAILED status falls through to analyzeChangeSet (except "didn't contain changes" which remains the no-drift happy path) - Nested changesets classify themselves: EarlyValidation failures have their Changes read; other failures are treated as genuine errors - Partial results returned with incompleteStacks tracking instead of all-or-nothing discard - Poll nested changesets to terminal status before describing (fixes race condition where root fails before all nested changesets complete) - Lock rollback validates drift with DeletionPolicy filter: recursive hasRealDrift walks the change tree, filtering out expected lock drift (Scope: ['DeletionPolicy']). Blocks rollback if real drift or incomplete stacks exist. ADR-005 documents the decision, empirical findings, and risks. --- ...05-template-drift-include-nested-stacks.md | 367 ++++++++++++++++++ .../commands/gen2-migration/lock.test.ts | 187 +++++++++ .../drift-detection/detect-template-drift.ts | 95 +++-- .../src/commands/gen2-migration/lock.ts | 75 ++++ 4 files changed, 688 insertions(+), 36 deletions(-) create mode 100644 packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md diff --git a/packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md b/packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md new file mode 100644 index 00000000000..ab6af386729 --- /dev/null +++ b/packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md @@ -0,0 +1,367 @@ +# ADR-005: Template Drift Detection with IncludeNestedStacks + +## Status + +Proposed + +## Context + +### What we're solving + +The gen2-migration lock step adds `DeletionPolicy: Retain` to stateful +resources before the refactor step moves them between stacks. If the +refactor fails or the user runs `--rollback`, the lock rollback needs +to verify that no resources have actually drifted — confirming the +environment is still consistent and safe to revert the DeletionPolicy +changes. + +Phase 2 drift detection (`detectTemplateDrift`) is the mechanism for +this verification. It creates a CloudFormation changeset with +`IncludeNestedStacks: true` on the root stack and compares the +deployed state against the cached template. If there is no drift +(beyond the expected DeletionPolicy changes from the lock step itself), +lock rollback can proceed safely. + +### Two problems + +**Problem 1: FAILED changesets are discarded.** + +After gen2-migration refactor moves resources (e.g., DynamoDB tables, +S3 buckets) from Gen1 nested stacks to Gen2 stacks, the Gen1 templates +still reference those resources. CloudFormation's `EarlyValidation` +step checks whether referenced resources exist in the target stack and +fails the changeset with: + +``` +EarlyValidation::ResourceExistenceCheck failed for resource(s) [activity-main] +``` + +The current code (lines 190-203 of `detect-template-drift.ts`) treats +all FAILED changesets as errors and discards them: + +```typescript +if (changeSet.Status === 'FAILED') { + // ...deletes changeset, returns { changes: [], skipped: true } +} +``` + +Similarly, `analyzeChangeSet` (lines 251-264) bails on FAILED nested +changesets during recursive traversal. + +This means Phase 2 reports zero drift for any app that has been through +gen2-migration refactor, even when real template drift exists on +non-failing nested stacks. + +**Problem 2: Lock's DeletionPolicy changes are expected drift.** + +The lock step modifies templates to add `DeletionPolicy: Retain` to +stateful resources. When Phase 2 compares these modified templates +against the deployed stack, the DeletionPolicy additions appear as +template drift. This is *expected* — the lock step intentionally made +these changes. The drift detection must distinguish lock's DeletionPolicy +changes from real drift (someone changed something outside of amplify). + +This is distinct from the FAILED changeset problem but compounds it: +even when we successfully read changes from a FAILED changeset, we +need to filter out the DeletionPolicy noise to determine if there is +*real* drift that would make lock rollback unsafe. + +### Failure type distinction + +Not all FAILED changesets are equal: + +- **EarlyValidation failures** (e.g., `ResourceExistenceCheck`): CFN + still populates the Changes array before failing. The changeset is + describable and its changes are usable. This is the common case for + post-migration stacks. + +- **Other failures** (e.g., `InsufficientCapabilities`, malformed + template, IAM errors): CFN may not populate Changes at all. These + represent real errors, not the expected post-migration state. + +The code must handle these differently: proceed with EarlyValidation +failures (read whatever Changes are available), but treat other +failures as genuine errors. + +### The 14570 per-nested-stack approach + +Issue #14570 proposed replacing `IncludeNestedStacks: true` with a +client-side approach: create independent changesets on each nested stack +using `UsePreviousTemplate: true`, fetch templates from S3, and use +Bottleneck for rate limiting. This was prototyped as Method B across +three parallel worktree experiments. + +### Empirical findings + +Testing against the live discussions app (amplify-discussions-main-c39a5, +5 nested stacks, 3 of which fail EarlyValidation) revealed: + +1. **FAILED changesets contain usable Changes data.** CloudFormation + populates `Changes` on nested changesets *before* validation fails. + `DescribeChangeSet` on a FAILED nested changeset returns the full + changes array. Confirmed: storageactivity (2 changes), + storageavatars (6 changes), storagebookmarks (2 changes) — all + FAILED with EarlyValidation, all with Changes populated. + +2. **CFN does not exit early on one nested failure.** When + `IncludeNestedStacks: true` is set, CloudFormation creates changesets + for *all* nested stacks regardless of whether some fail validation. + The root changeset fails, but all 5 nested changesets are created + and describable. + + Exact root StatusReason: `Nested change set was not + successfully created: Currently in FAILED.` — references only the + first failing nested changeset. Does NOT contain "EarlyValidation". + + Exact nested EarlyValidation StatusReason: `The following + hook(s)/validation failed: [AWS::EarlyValidation:: + ResourceExistenceCheck]. To troubleshoot Early Validation errors, + use the DescribeEvents API for detailed failure information.` + +3. **Per-nested-stack approach produces false positives.** Creating an + independent changeset on a nested stack (e.g., apidiscussions) + *without* `IncludeNestedStacks` reports 6 phantom `Modify` changes + on `AWS::CloudFormation::Stack` resources. These changes do not exist + when using `IncludeNestedStacks: true` from the root. The root + approach correctly suppresses parameter-propagation noise that the + isolated approach cannot. + +4. **Template sources are equivalent.** S3 template and deployed + template are byte-for-byte identical. `UsePreviousTemplate: true` + and `TemplateBody` fetched from S3 produce identical changeset + results. + +5. **Auth and apidiscussions succeed cleanly.** 2 of 5 nested stacks + pass changeset creation. Auth shows 7 real changes; apidiscussions + shows 0. + +6. **Nested changeset race condition.** The root changeset fails as + soon as *any* nested changeset fails, but other nested changesets + may still be `CREATE_IN_PROGRESS`. Integration testing confirmed: + apidiscussions was still in-progress when the root returned FAILED. + Code must poll each nested changeset to terminal status before + describing it. + +### Comparison + +| Dimension | IncludeNestedStacks: true | Per-nested (Method B) | +|--------------------------|---------------------------|-----------------------| +| False positives | None observed | 6 phantom changes | +| Code complexity | ~30 lines changed | ~400 lines new | +| CFN API calls | 1 CreateChangeSet | N+1 CreateChangeSet | +| Rate limiting needed | No | Yes (Bottleneck) | +| New dependencies | None | bottleneck, S3 client | +| FAILED stack handling | Read Changes anyway | Same, plus false pos | +| Sub-nested recursion | Built-in (ChangeSetId) | Must re-implement | + +## Decision + +Keep `IncludeNestedStacks: true` and read changes from FAILED changesets +instead of discarding them. Filter out expected DeletionPolicy drift +from the lock step. Do not implement the per-nested-stack approach. + +### Change 1: Root changeset — always fall through on FAILED + +Empirical finding: the root changeset's StatusReason when a nested +stack fails EarlyValidation is: + +``` +Nested change set was not successfully created: Currently in FAILED. +``` + +This does NOT contain "EarlyValidation" — it just references the first +nested changeset that failed. The root cannot classify the failure type. + +Therefore, the root should always fall through to `analyzeChangeSet` +when FAILED (except for "no changes"). Classification happens at the +nested level: + +```typescript +// Current: bail on all FAILED (lines 190-203) +if (changeSet.Status === 'FAILED') { + return { changes: [], skipped: true, skipReason: ... }; +} + +// Proposed: fall through to analyzeChangeSet for nested inspection +if (changeSet.Status === 'FAILED') { + if (changeSet.StatusReason?.includes("didn't contain changes")) { + // No drift — clean result + return { changes: [], skipped: false }; + } + // Any other FAILED reason: nested stacks may still have data. + // Fall through — analyzeChangeSet classifies each nested changeset. + print.warn(`Root changeset FAILED: ${changeSet.StatusReason}`); +} +``` + +### Change 2: Nested changeset analysis — classify per-stack + +Each nested changeset classifies itself. Three observed StatusReason +patterns: + +1. `"The submitted information didn't contain changes..."` — no drift, + clean skip. +2. `"The following hook(s)/validation failed: [AWS::EarlyValidation::ResourceExistenceCheck]..."` — EarlyValidation failure, + Changes are populated, read them. +3. `"Only executable from the root change set."` with + Status=CREATE_COMPLETE — success, read Changes normally. + +Any other StatusReason is a genuine error — skip that stack. + +```typescript +function isEarlyValidationFailure(reason?: string): boolean { + return !!reason?.includes('EarlyValidation'); +} + +// In analyzeChangeSet: +if (changeSet.Status === 'FAILED') { + if (changeSet.StatusReason?.includes("didn't contain changes") + || changeSet.StatusReason?.includes('No updates')) { + return result; // genuinely no changes + } + if (isEarlyValidationFailure(changeSet.StatusReason)) { + // Changes are populated despite FAILED status — fall through + print.warn(`Nested changeset FAILED (EarlyValidation): ${changeSet.StatusReason}`); + } else { + // Unknown failure — treat as error, skip this stack + return { changes: [], skipped: true, skipReason: ... }; + } +} +``` + +### Change 3: Partial results instead of all-or-nothing + +The current code discards all results if *any* nested stack analysis +is skipped (lines 343-349). Instead, return available results and track +which stacks were incomplete: + +```typescript +// Current: discard everything +if (hasNestedSkipped) { + return { changes: [], skipped: true, skipReason: '...' }; +} + +// Proposed: return partial results with metadata +result.incompleteStacks = skippedStacks; // stacks with non-EV failures +return result; +``` + +### Change 4: Filter expected DeletionPolicy drift + +The lock step adds `DeletionPolicy: Retain` to stateful resources. +These show up as Modify changes in the changeset. For lock rollback +to determine whether the environment is safe to revert, these expected +changes must be filtered out. + +The filter applies after changeset analysis and before the rollback +safety decision: + +```typescript +function isExpectedLockDrift(change: ResourceChangeWithNested): boolean { + // A change is expected lock drift if: + // 1. Action is 'Modify' + // 2. Scope is exactly ['DeletionPolicy'] (confirmed empirically — + // CFN uses 'DeletionPolicy' as a first-class Scope value) + // 3. Details show DirectModification of DeletionPolicy attribute + // with RequiresRecreation: 'Never' + return change.Action === 'Modify' + && change.Scope?.length === 1 + && change.Scope[0] === 'DeletionPolicy'; +} + +const realDrift = changes.filter(c => !isExpectedLockDrift(c)); +``` + +If `realDrift` is empty after filtering, lock rollback can proceed +safely. If there is any real drift, lock rollback must abort — the +environment is in an inconsistent state. + +### What is NOT changed + +- `IncludeNestedStacks: true` stays on the `CreateChangeSetCommand` +- The recursive `analyzeChangeSet` traversal via `ChangeSetId` on + nested `AWS::CloudFormation::Stack` resources stays the same +- The "no changes" detection (`didn't contain changes`) stays the same +- Changeset cleanup logic stays the same + +## Risks + +### R1 — Reading Changes from FAILED changesets is undocumented + +Reading Changes from EarlyValidation-failed changesets is not +explicitly documented by CloudFormation. Confirmed empirically on the +discussions app (3 FAILED stacks, all with accessible Changes), but +could change without notice. + +*Mitigation*: Add an integration test against a known FAILED stack to +verify Changes are populated. If CFN changes this behavior, the test +catches it before it silently regresses in production. + +### R2 — Changes on EarlyValidation-failed changesets may be incomplete + +CFN may populate *some* changes before the validation failure but not +all. We could miss real drift on a FAILED stack. + +*Mitigation*: This is inherent to the EarlyValidation failure, not our +approach. Per-nested-stack with `UsePreviousTemplate: true` hits the +same EarlyValidation failure on the same stacks. The real fix is to +update the Gen1 templates to reflect the post-migration state +(MigrationPlaceholder resources), which is already part of the refactor +step. Once templates are updated, EarlyValidation passes and this risk +disappears. + +### R3 — Non-EarlyValidation failures conflated with EarlyValidation + +If CFN introduces new failure modes whose StatusReason strings don't +match our `isEarlyValidationFailure` check, we'd incorrectly treat +them as hard errors (skip the stack). This is the safe direction — +false negatives (missed drift) are worse than false positives +(unnecessary skip) for the lock rollback use case. + +*Mitigation*: Log all failure reasons. Expand the pattern match as +new failure types are observed. + +### R4 — DeletionPolicy filter accuracy + +The filter that distinguishes lock's expected DeletionPolicy changes +from real drift must be precise. A false positive (real drift +classified as expected) would allow lock rollback to proceed when the +environment is inconsistent. A false negative (expected drift +classified as real) would block lock rollback unnecessarily. + +*Mitigation*: The lock step knows exactly which resources it modified +and what it changed. The filter can be informed by the lock step's +output (list of resources + properties changed) rather than relying on +heuristic pattern matching on changeset details. + +## Consequences + +### What changes + +- `detectTemplateDrift` classifies FAILED changesets by failure type + instead of discarding them all +- EarlyValidation failures are treated as recoverable — Changes are + read from the FAILED changeset +- Non-EarlyValidation failures remain hard errors (skipped) +- The all-or-nothing behavior on nested analysis failures is replaced + with partial results +- `TemplateDriftResults` gains optional metadata about incomplete stacks +- A DeletionPolicy filter is added to distinguish lock's expected + changes from real drift +- Lock rollback consumes filtered results to make the safe/unsafe + determination + +### What does NOT change + +- The `CreateChangeSetCommand` call and its parameters +- The recursive traversal of nested changesets via `ChangeSetId` +- The "no changes" detection path +- The drift formatter and its console URL generation +- The Phase 1 (CFN drift detection) and Phase 3 (local drift) paths + +### What gets removed + +- The entire 14570 per-nested-stack prototype (Method B) is abandoned +- No new dependencies (bottleneck, S3 client for template fetching) +- No new S3 template resolution logic +- No new rate limiting infrastructure diff --git a/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts b/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts index cd7657e2700..0fd440c0a2a 100644 --- a/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts +++ b/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts @@ -5,6 +5,11 @@ import { UpdateAppCommand } from '@aws-sdk/client-amplify'; import { SpinningLogger } from '../../../commands/gen2-migration/_infra/spinning-logger'; import { Gen1App } from '../../../commands/gen2-migration/generate/_infra/gen1-app'; import { AmplifyGen2MigrationValidations } from '../../../commands/gen2-migration/_infra/validations'; +import { detectTemplateDrift } from '../../../commands/drift-detection/detect-template-drift'; + +jest.mock('../../../commands/drift-detection/detect-template-drift', () => ({ + detectTemplateDrift: jest.fn(), +})); jest.mock('@aws-sdk/client-appsync', () => ({ ...jest.requireActual('@aws-sdk/client-appsync'), @@ -399,4 +404,186 @@ describe('AmplifyMigrationLockStep', () => { }); }); }); + + describe('rollback drift validation', () => { + const mockDetectTemplateDrift = detectTemplateDrift as jest.MockedFunction; + + it('should pass validation when no drift is detected', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ changes: [], skipped: false }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(true); + expect(mockDetectTemplateDrift).toHaveBeenCalledWith('test-root-stack', mockLogger, expect.anything()); + }); + + it('should pass validation when only DeletionPolicy drift exists', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'MyTable', + ResourceType: 'AWS::DynamoDB::Table', + Scope: ['DeletionPolicy'], + Replacement: 'False', + }, + { + Action: 'Modify', + LogicalResourceId: 'MyBucket', + ResourceType: 'AWS::S3::Bucket', + Scope: ['DeletionPolicy'], + Replacement: 'False', + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(true); + }); + + it('should fail validation when real drift exists alongside DeletionPolicy drift', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'MyTable', + ResourceType: 'AWS::DynamoDB::Table', + Scope: ['DeletionPolicy'], + Replacement: 'False', + }, + { + Action: 'Modify', + LogicalResourceId: 'MyFunction', + ResourceType: 'AWS::Lambda::Function', + Scope: ['Properties'], + Replacement: 'False', + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should fail validation when drift detection is skipped', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [], + skipped: true, + skipReason: 'Changeset creation failed', + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should fail validation when drift detection throws an error', async () => { + mockDetectTemplateDrift.mockRejectedValueOnce(new Error('CFN client error')); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should not filter out Modify changes with multiple Scope entries that include DeletionPolicy', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'MyTable', + ResourceType: 'AWS::DynamoDB::Table', + Scope: ['DeletionPolicy', 'Properties'], + Replacement: 'False', + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should not filter out Add or Remove actions even with DeletionPolicy scope', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Add', + LogicalResourceId: 'NewResource', + ResourceType: 'AWS::DynamoDB::Table', + Scope: ['DeletionPolicy'], + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should fail validation when nested changes contain real drift at leaf level', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [{ + Action: 'Modify', + LogicalResourceId: 'authStack', + ResourceType: 'AWS::CloudFormation::Stack', + Scope: ['Properties'], + nestedChanges: [{ + Action: 'Modify', + LogicalResourceId: 'UserPool', + ResourceType: 'AWS::Cognito::UserPool', + Scope: ['Properties'], + }], + }], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should fail validation when incompleteStacks are reported', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [], + skipped: false, + incompleteStacks: ['storageactivity'], + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should pass validation when CloudFormation::Stack wrapper has no nestedChanges', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [{ + Action: 'Modify', + LogicalResourceId: 'apiStack', + ResourceType: 'AWS::CloudFormation::Stack', + Scope: ['Properties'], + }], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(true) + }); + }); }); diff --git a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts index bdfd07d9945..0e44f7d720a 100644 --- a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts +++ b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts @@ -23,6 +23,7 @@ export interface TemplateDriftResults { skipped: boolean; skipReason?: string; changeSetId?: string; + incompleteStacks?: string[]; } /** @@ -47,6 +48,10 @@ function extractChangeSetNameFromArn(changeSetArn: string): string { return resource.split('/')[1]; } +function isEarlyValidationFailure(reason?: string): boolean { + return !!reason?.includes('EarlyValidation'); +} + const CHANGESET_PREFIX = 'amplify-drift-detection-'; /** @@ -187,20 +192,13 @@ export async function detectTemplateDrift( }; } - // Handle other failure cases + // Handle other FAILED cases — nested stacks may still have usable data. + // The root changeset's StatusReason references the first failing nested changeset + // (e.g., "Nested change set was not successfully created: Currently in FAILED.") + // but does NOT contain "EarlyValidation". Classification happens per-nested-stack + // in analyzeChangeSet, so we fall through here. if (changeSet.Status === 'FAILED') { - print.debug(`Changeset failed with status: ${changeSet.Status}`); - print.debug(`Reason: ${changeSet.StatusReason}`); - await cfn - .send(new DeleteChangeSetCommand({ StackName: stackName, ChangeSetName: changeSetName })) - .catch((e: any) => print.debug(`Failed to delete changeset: ${e.message}`)); - const errorMsg = `Changeset creation failed with status ${changeSet.Status}`; - const reasonMsg = changeSet.StatusReason ? `: ${changeSet.StatusReason}` : ''; - return { - changes: [], - skipped: true, - skipReason: `${errorMsg}${reasonMsg}`, - }; + print.warn(`Root changeset FAILED: ${changeSet.StatusReason}`); } print.debug(`CloudFormation ChangeSet: ${stackName}`); @@ -244,10 +242,10 @@ async function analyzeChangeSet( skipped: false, }; - // Track if any nested stack analysis was skipped - let hasNestedSkipped = false; + // Track which nested stacks could not be fully analyzed + const skippedStacks: string[] = []; - // Handle FAILED status - distinguish between "no changes" vs actual errors + // Handle FAILED status — classify by failure type if (changeSet.Status === 'FAILED') { // "No changes" is success for drift detection if (changeSet.StatusReason?.includes("didn't contain changes") || changeSet.StatusReason?.includes('No updates')) { @@ -255,13 +253,18 @@ async function analyzeChangeSet( return result; } - // Other FAILED reasons are actual errors - mark as skipped - print.warn(`ChangeSet failed with unexpected reason: ${changeSet.StatusReason || 'No reason provided'}`); - return { - changes: [], - skipped: true, - skipReason: `Changeset failed: ${changeSet.StatusReason || 'Unknown reason'}`, - }; + // EarlyValidation failures still have Changes populated — fall through to process them + if (isEarlyValidationFailure(changeSet.StatusReason)) { + print.warn(`Nested changeset FAILED (EarlyValidation): ${changeSet.StatusReason}`); + } else { + // Unknown failure — treat as genuine error, skip this stack + print.warn(`ChangeSet failed with unexpected reason: ${changeSet.StatusReason || 'No reason provided'}`); + return { + changes: [], + skipped: true, + skipReason: `Changeset failed: ${changeSet.StatusReason || 'Unknown reason'}`, + }; + } } // Check if there are no changes @@ -291,13 +294,33 @@ async function analyzeChangeSet( print.debug(`Fetching nested changeset: ${stackName}`); print.debug(`ChangeSet: ${changeSetName}`); - // Describe the nested changeset - const nestedChangeSet = await cfn.send( + // Wait for nested changeset to reach terminal status. + // The root changeset fails as soon as any nested changeset fails, + // but other nested changesets may still be CREATE_IN_PROGRESS. + let nestedChangeSet = await cfn.send( new DescribeChangeSetCommand({ StackName: stackName, ChangeSetName: changeSetName, }), ); + const terminalStatuses = ['CREATE_COMPLETE', 'FAILED', 'DELETE_COMPLETE']; + const maxPollAttempts = 30; + for (let attempt = 0; !terminalStatuses.includes(nestedChangeSet.Status!) && attempt < maxPollAttempts; attempt++) { + print.debug(`Nested changeset ${stackName} is ${nestedChangeSet.Status}, waiting...`); + await new Promise((resolve) => setTimeout(resolve, 2000)); + nestedChangeSet = await cfn.send( + new DescribeChangeSetCommand({ + StackName: stackName, + ChangeSetName: changeSetName, + }), + ); + } + if (!terminalStatuses.includes(nestedChangeSet.Status!)) { + print.warn(`Nested changeset ${stackName} did not reach terminal status (${nestedChangeSet.Status})`); + skippedStacks.push(stackName); + result.changes.push(changeInfo); + continue; + } // Print nested changeset details if (nestedChangeSet.Changes && nestedChangeSet.Changes.length > 0) { @@ -317,10 +340,15 @@ async function analyzeChangeSet( // Recursively analyze nested changeset const nestedResult = await analyzeChangeSet(cfn, nestedChangeSet, print); - // Check if nested analysis was skipped + // Track if nested analysis was skipped if (nestedResult.skipped) { print.warn(`⚠ Nested stack ${stackName} analysis was skipped: ${nestedResult.skipReason}`); - hasNestedSkipped = true; + skippedStacks.push(stackName); + } + + // Propagate incomplete stacks from deeper nesting levels + if (nestedResult.incompleteStacks) { + skippedStacks.push(...nestedResult.incompleteStacks); } // Add nested changes to the current change @@ -329,24 +357,19 @@ async function analyzeChangeSet( print.debug(`Processed ${nestedResult.changes.length} nested changes`); } } catch (error: any) { - // Log error and mark as skipped + // Log error and track as incomplete print.warn(`⚠ Could not fetch nested changeset for ${rc.LogicalResourceId}: ${error.message}`); print.debug(`Stack ARN: ${rc.PhysicalResourceId}`); print.debug(`ChangeSet ID: ${rc.ChangeSetId}`); - hasNestedSkipped = true; + skippedStacks.push(extractStackNameFromArn(rc.PhysicalResourceId)); } } result.changes.push(changeInfo); } - // If any nested stack analysis was skipped, mark entire result as skipped - if (hasNestedSkipped) { - return { - changes: [], - skipped: true, - skipReason: 'One or more nested stacks could not be analyzed', - }; + if (skippedStacks.length > 0) { + result.incompleteStacks = skippedStacks; } return result; diff --git a/packages/amplify-cli/src/commands/gen2-migration/lock.ts b/packages/amplify-cli/src/commands/gen2-migration/lock.ts index cf0a0f778b8..dd4fb6246cf 100644 --- a/packages/amplify-cli/src/commands/gen2-migration/lock.ts +++ b/packages/amplify-cli/src/commands/gen2-migration/lock.ts @@ -21,6 +21,7 @@ import { import { UpdateAppCommand, GetAppCommand } from '@aws-sdk/client-amplify'; import { UpdateTableCommand, paginateListTables } from '@aws-sdk/client-dynamodb'; import { paginateListGraphqlApis } from '@aws-sdk/client-appsync'; +import { detectTemplateDrift, type ResourceChangeWithNested } from '../drift-detection/detect-template-drift'; const GEN2_MIGRATION_ENVIRONMENT_NAME = 'GEN2_MIGRATION_ENVIRONMENT_NAME'; @@ -48,6 +49,33 @@ const ALLOW_ALL_POLICY = { ], }; +/** + * Identifies changeset changes that are expected DeletionPolicy drift from the lock step. + * + * The lock step adds `DeletionPolicy: Retain` to stateful resources. These show up as + * Modify changes in changesets with Scope limited to `['DeletionPolicy']`. For lock rollback + * to determine whether the environment is safe to revert, these expected changes must be + * filtered out so only real drift blocks the rollback. + */ +const isExpectedLockDrift = (change: ResourceChangeWithNested): boolean => + change.Action === 'Modify' && change.Scope?.length === 1 && change.Scope[0] === 'DeletionPolicy'; + +/** + * Recursively walks the change tree to find any leaf resource changes that are + * not expected lock drift. AWS::CloudFormation::Stack entries are structural + * wrappers — their nestedChanges contain the actual resource-level changes. + */ +function hasRealDrift(changes: ResourceChangeWithNested[]): boolean { + for (const change of changes) { + if (change.nestedChanges?.length) { + if (hasRealDrift(change.nestedChanges)) return true; + } else if (change.ResourceType !== 'AWS::CloudFormation::Stack') { + if (!isExpectedLockDrift(change)) return true; + } + } + return false; +} + export class AmplifyMigrationLockStep extends AmplifyMigrationStep { private _dynamoTableNames: string[]; @@ -149,6 +177,13 @@ export class AmplifyMigrationLockStep extends AmplifyMigrationStep { public async rollback(): Promise { const operations: AmplifyMigrationOperation[] = []; + operations.push({ + describe: async () => [], + validate: () => ({ description: 'Drift', run: () => this.validateLockRollbackDrift() }), + // eslint-disable-next-line @typescript-eslint/no-empty-function + execute: async () => {}, + }); + for (const tableName of await this.dynamoTableNames()) { operations.push({ validate: () => undefined, @@ -225,6 +260,46 @@ export class AmplifyMigrationLockStep extends AmplifyMigrationStep { } } + /** + * Validates that the environment is safe for lock rollback by running template drift + * detection and filtering out expected DeletionPolicy changes from the lock step. + * + * If only DeletionPolicy drift remains (from the lock step adding Retain), rollback + * is safe. If any real drift exists, rollback must be blocked — the environment is + * in an inconsistent state. + */ + private async validateLockRollbackDrift(): Promise { + try { + const driftResults = await detectTemplateDrift( + this.gen1App.rootStackName, + this.logger, + this.gen1App.clients.cloudFormation, + ); + + if (driftResults.skipped) { + return { valid: false, report: `Template drift detection was skipped: ${driftResults.skipReason}` }; + } + + if (driftResults.incompleteStacks?.length) { + return { + valid: false, + report: `Could not verify all stacks for drift: ${driftResults.incompleteStacks.join(', ')}`, + }; + } + + if (hasRealDrift(driftResults.changes)) { + return { + valid: false, + report: 'Template drift detected beyond expected DeletionPolicy changes', + }; + } + + return { valid: true }; + } catch (e) { + return { valid: false, report: e.message }; + } + } + private async findGraphQLApiId(): Promise { const graphQL = this.gen1App.discover().find((r) => r.category === 'api' && r.service === 'AppSync'); if (!graphQL) { From 3dfa0b211c23c5abc1641d644b9fbf1701dddc82 Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Thu, 9 Apr 2026 19:02:22 +0000 Subject: [PATCH 3/8] fix(cli-internal): handle root changeset failures, template format errors, and cascading IAM drift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes discovered during E2E lock + rollback testing: 1. Root changeset fall-through: Root changeset FAILED status references nested failures without containing "EarlyValidation". Add isRoot parameter so root always falls through to classify nested changesets individually. 2. Template format error as recoverable: IncludeNestedStacks fails on Amplify AppSync API stacks where model sub-stacks export names via Fn::Join. CFN reports "duplicate Export names". Changes are still populated — treat as recoverable like EarlyValidation. 3. Cascading IAM Policy changes: When DeletionPolicy changes on a DynamoDB table, CFN flags IAM policies referencing TableName.Arn as Dynamic ResourceAttribute re-evaluations. These are harmless and must be filtered alongside direct DeletionPolicy drift in lock rollback validation. --- .../commands/gen2-migration/lock.test.ts | 174 ++++++++++++++++-- .../drift-detection/detect-template-drift.ts | 26 ++- .../src/commands/gen2-migration/lock.ts | 37 ++-- 3 files changed, 203 insertions(+), 34 deletions(-) diff --git a/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts b/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts index 0fd440c0a2a..1a4da38c571 100644 --- a/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts +++ b/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts @@ -535,18 +535,22 @@ describe('AmplifyMigrationLockStep', () => { it('should fail validation when nested changes contain real drift at leaf level', async () => { mockDetectTemplateDrift.mockResolvedValueOnce({ - changes: [{ - Action: 'Modify', - LogicalResourceId: 'authStack', - ResourceType: 'AWS::CloudFormation::Stack', - Scope: ['Properties'], - nestedChanges: [{ + changes: [ + { Action: 'Modify', - LogicalResourceId: 'UserPool', - ResourceType: 'AWS::Cognito::UserPool', + LogicalResourceId: 'authStack', + ResourceType: 'AWS::CloudFormation::Stack', Scope: ['Properties'], - }], - }], + nestedChanges: [ + { + Action: 'Modify', + LogicalResourceId: 'UserPool', + ResourceType: 'AWS::Cognito::UserPool', + Scope: ['Properties'], + }, + ], + }, + ], skipped: false, }); @@ -569,21 +573,157 @@ describe('AmplifyMigrationLockStep', () => { expect(valid).toBe(false); }); + it('should pass validation when only cascading IAM Policy drift from DeletionPolicy change exists', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'TodoTable', + ResourceType: 'AWS::DynamoDB::Table', + Scope: ['DeletionPolicy'], + Replacement: 'False', + }, + { + Action: 'Modify', + LogicalResourceId: 'TodoIAMRoleDefaultPolicy7BBBF45B', + ResourceType: 'AWS::IAM::Policy', + Scope: ['Properties'], + Details: [ + { + ChangeSource: 'ResourceAttribute', + Evaluation: 'Dynamic', + Target: { Attribute: 'Properties', Name: 'PolicyDocument', RequiresRecreation: 'Never' }, + CausingEntity: 'TodoTable.Arn', + }, + ], + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(true); + }); + + it('should fail validation when IAM Policy has a static/direct change mixed with dynamic', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'TodoIAMRoleDefaultPolicy7BBBF45B', + ResourceType: 'AWS::IAM::Policy', + Scope: ['Properties'], + Details: [ + { + ChangeSource: 'ResourceAttribute', + Evaluation: 'Dynamic', + Target: { Attribute: 'Properties', Name: 'PolicyDocument', RequiresRecreation: 'Never' }, + CausingEntity: 'TodoTable.Arn', + }, + { + ChangeSource: 'DirectModification', + Evaluation: 'Static', + Target: { Attribute: 'Properties', Name: 'PolicyDocument', RequiresRecreation: 'Never' }, + }, + ], + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should fail validation when IAM Policy change requires recreation', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'SomePolicy', + ResourceType: 'AWS::IAM::Policy', + Scope: ['Properties'], + Details: [ + { + ChangeSource: 'ResourceAttribute', + Evaluation: 'Dynamic', + Target: { Attribute: 'Properties', Name: 'PolicyDocument', RequiresRecreation: 'Conditionally' }, + CausingEntity: 'TodoTable.Arn', + }, + ], + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should pass validation when nested tree has only DeletionPolicy and cascading IAM drift', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'apiStack', + ResourceType: 'AWS::CloudFormation::Stack', + Scope: ['Properties'], + nestedChanges: [ + { + Action: 'Modify', + LogicalResourceId: 'TodoTable', + ResourceType: 'AWS::DynamoDB::Table', + Scope: ['DeletionPolicy'], + }, + { + Action: 'Modify', + LogicalResourceId: 'TodoIAMRoleDefaultPolicy', + ResourceType: 'AWS::IAM::Policy', + Scope: ['Properties'], + Details: [ + { + ChangeSource: 'ResourceAttribute', + Evaluation: 'Dynamic', + Target: { Attribute: 'Properties', RequiresRecreation: 'Never' }, + CausingEntity: 'TodoTable.Arn', + }, + ], + }, + ], + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(true); + }); + it('should pass validation when CloudFormation::Stack wrapper has no nestedChanges', async () => { mockDetectTemplateDrift.mockResolvedValueOnce({ - changes: [{ - Action: 'Modify', - LogicalResourceId: 'apiStack', - ResourceType: 'AWS::CloudFormation::Stack', - Scope: ['Properties'], - }], + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'apiStack', + ResourceType: 'AWS::CloudFormation::Stack', + Scope: ['Properties'], + }, + ], skipped: false, }); const plan = await lockStep.rollback(); const valid = await plan.validate(); - expect(valid).toBe(true) + expect(valid).toBe(true); }); }); }); diff --git a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts index 0e44f7d720a..790f1f42565 100644 --- a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts +++ b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts @@ -48,8 +48,15 @@ function extractChangeSetNameFromArn(changeSetArn: string): string { return resource.split('/')[1]; } -function isEarlyValidationFailure(reason?: string): boolean { - return !!reason?.includes('EarlyValidation'); +function isRecoverableFailure(reason?: string): boolean { + if (!reason) return false; + // EarlyValidation failures (e.g., ResourceExistenceCheck) — Changes are populated + if (reason.includes('EarlyValidation')) return true; + // Template format errors (e.g., duplicate Export names from Fn::Join in nested stacks) — + // Changes are partially populated. CFN validates exports after building the change list, + // so some changes are available even though the changeset ultimately failed. + if (reason.includes('Template format error')) return true; + return false; } const CHANGESET_PREFIX = 'amplify-drift-detection-'; @@ -220,7 +227,7 @@ export async function detectTemplateDrift( } // Changeset is kept for user inspection via console URL — cleaned up on next run - const result = await analyzeChangeSet(cfn, changeSet, print); + const result = await analyzeChangeSet(cfn, changeSet, print, true); result.changeSetId = changeSet.ChangeSetId; return result; } catch (error: any) { @@ -236,6 +243,7 @@ async function analyzeChangeSet( cfn: CloudFormationClient, changeSet: DescribeChangeSetCommandOutput, print: SpinningLogger, + isRoot = false, ): Promise { const result: TemplateDriftResults = { changes: [], @@ -254,10 +262,16 @@ async function analyzeChangeSet( } // EarlyValidation failures still have Changes populated — fall through to process them - if (isEarlyValidationFailure(changeSet.StatusReason)) { - print.warn(`Nested changeset FAILED (EarlyValidation): ${changeSet.StatusReason}`); + if (isRecoverableFailure(changeSet.StatusReason)) { + print.warn(`Nested changeset FAILED (recoverable): ${changeSet.StatusReason}`); + } else if (isRoot) { + // Root changeset FAILED because a nested changeset failed. The root's StatusReason + // references the first failing nested changeset (e.g., "Nested change set was + // not successfully created: Currently in FAILED.") but does NOT contain "EarlyValidation". + // Classification must happen per-nested-stack, so fall through to process Changes. + print.debug(`Root changeset FAILED due to nested failure — falling through to analyze nested changesets`); } else { - // Unknown failure — treat as genuine error, skip this stack + // Unknown failure on a nested stack — treat as genuine error, skip this stack print.warn(`ChangeSet failed with unexpected reason: ${changeSet.StatusReason || 'No reason provided'}`); return { changes: [], diff --git a/packages/amplify-cli/src/commands/gen2-migration/lock.ts b/packages/amplify-cli/src/commands/gen2-migration/lock.ts index dd4fb6246cf..60b15ed5ffb 100644 --- a/packages/amplify-cli/src/commands/gen2-migration/lock.ts +++ b/packages/amplify-cli/src/commands/gen2-migration/lock.ts @@ -52,13 +52,32 @@ const ALLOW_ALL_POLICY = { /** * Identifies changeset changes that are expected DeletionPolicy drift from the lock step. * - * The lock step adds `DeletionPolicy: Retain` to stateful resources. These show up as - * Modify changes in changesets with Scope limited to `['DeletionPolicy']`. For lock rollback - * to determine whether the environment is safe to revert, these expected changes must be - * filtered out so only real drift blocks the rollback. + * The lock step adds `DeletionPolicy: Retain` to stateful resources. These show up as: + * 1. Direct DeletionPolicy changes — Modify with Scope exactly `['DeletionPolicy']` + * 2. Cascading IAM Policy changes — CFN flags IAM policies that reference the modified + * table's attributes (e.g., `TodoTable.Arn` in PolicyDocument) as Dynamic re-evaluations. + * These have `ChangeSource: ResourceAttribute`, `Evaluation: Dynamic`, and + * `RequiresRecreation: Never` — they are harmless re-evaluations, not actual changes. + * + * For lock rollback to determine whether the environment is safe to revert, these expected + * changes must be filtered out so only real drift blocks the rollback. */ -const isExpectedLockDrift = (change: ResourceChangeWithNested): boolean => - change.Action === 'Modify' && change.Scope?.length === 1 && change.Scope[0] === 'DeletionPolicy'; +const isExpectedLockDrift = (change: ResourceChangeWithNested): boolean => { + if (change.Action !== 'Modify') return false; + + // Direct DeletionPolicy change on a resource + if (change.Scope?.length === 1 && change.Scope[0] === 'DeletionPolicy') return true; + + // Cascading IAM Policy change caused by DeletionPolicy modification on a referenced resource. + // All Details must be Dynamic ResourceAttribute re-evaluations (no static/direct changes). + if (change.ResourceType === 'AWS::IAM::Policy' && change.Details?.length) { + return change.Details.every( + (d) => d.ChangeSource === 'ResourceAttribute' && d.Evaluation === 'Dynamic' && d.Target?.RequiresRecreation === 'Never', + ); + } + + return false; +}; /** * Recursively walks the change tree to find any leaf resource changes that are @@ -270,11 +289,7 @@ export class AmplifyMigrationLockStep extends AmplifyMigrationStep { */ private async validateLockRollbackDrift(): Promise { try { - const driftResults = await detectTemplateDrift( - this.gen1App.rootStackName, - this.logger, - this.gen1App.clients.cloudFormation, - ); + const driftResults = await detectTemplateDrift(this.gen1App.rootStackName, this.logger, this.gen1App.clients.cloudFormation); if (driftResults.skipped) { return { valid: false, report: `Template drift detection was skipped: ${driftResults.skipReason}` }; From ddeac93f70a74d0e14d930e02f3ceb3a968bb9d6 Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Thu, 9 Apr 2026 19:11:43 +0000 Subject: [PATCH 4/8] fix(cli-internal): harden drift detection against false negatives from CR review Safety-critical (false negatives in rollback gate): 1. CloudFormation::Stack changes without ChangeSetId tracked as incomplete 2. Recoverable failures with empty Changes return skipped:true 3. DELETE_COMPLETE/DELETE_FAILED nested changesets tracked as incomplete Robustness: 4. extractStackNameFromArn in catch block wrapped in try-catch 5. validateLockRollbackDrift catch block properly typed 6. DELETE_FAILED added to terminal statuses for polling --- .../drift-detection/detect-template-drift.ts | 49 ++++++++++++++++--- .../src/commands/gen2-migration/lock.ts | 20 ++++++-- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts index 790f1f42565..4a750292685 100644 --- a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts +++ b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts @@ -261,8 +261,17 @@ async function analyzeChangeSet( return result; } - // EarlyValidation failures still have Changes populated — fall through to process them + // Recoverable failures (EarlyValidation, Template format errors) still have Changes populated. + // However, if Changes is empty despite the recoverable classification, treat as incomplete. if (isRecoverableFailure(changeSet.StatusReason)) { + if (!changeSet.Changes || changeSet.Changes.length === 0) { + print.warn(`Recoverable failure but no Changes populated: ${changeSet.StatusReason}`); + return { + changes: [], + skipped: true, + skipReason: `Changeset failed with no usable changes: ${changeSet.StatusReason}`, + }; + } print.warn(`Nested changeset FAILED (recoverable): ${changeSet.StatusReason}`); } else if (isRoot) { // Root changeset FAILED because a nested changeset failed. The root's StatusReason @@ -299,6 +308,16 @@ async function analyzeChangeSet( const changeInfo: ResourceChangeWithNested = { ...rc }; // Check if this is a nested stack with its own changeset + if (rc.ResourceType === 'AWS::CloudFormation::Stack' && !rc.ChangeSetId) { + // Stack change without a nested changeset — can't inspect its contents. + // This happens when IncludeNestedStacks creates the root changeset but the + // nested changeset failed before being created. Track as incomplete. + const stackLabel = rc.LogicalResourceId || rc.PhysicalResourceId || 'unknown'; + print.warn(`Nested stack ${stackLabel} has no changeset — tracking as incomplete`); + skippedStacks.push(stackLabel); + result.changes.push(changeInfo); + continue; + } if (rc.ResourceType === 'AWS::CloudFormation::Stack' && rc.ChangeSetId && rc.PhysicalResourceId) { try { // Extract stack name and changeset name from ARNs using parseArn utility @@ -317,11 +336,12 @@ async function analyzeChangeSet( ChangeSetName: changeSetName, }), ); - const terminalStatuses = ['CREATE_COMPLETE', 'FAILED', 'DELETE_COMPLETE']; - const maxPollAttempts = 30; - for (let attempt = 0; !terminalStatuses.includes(nestedChangeSet.Status!) && attempt < maxPollAttempts; attempt++) { + const terminalStatuses = ['CREATE_COMPLETE', 'FAILED', 'DELETE_COMPLETE', 'DELETE_FAILED']; + const NESTED_POLL_MAX_ATTEMPTS = 30; + const NESTED_POLL_INTERVAL_MS = 2000; + for (let attempt = 0; !terminalStatuses.includes(nestedChangeSet.Status ?? '') && attempt < NESTED_POLL_MAX_ATTEMPTS; attempt++) { print.debug(`Nested changeset ${stackName} is ${nestedChangeSet.Status}, waiting...`); - await new Promise((resolve) => setTimeout(resolve, 2000)); + await new Promise((resolve) => setTimeout(resolve, NESTED_POLL_INTERVAL_MS)); nestedChangeSet = await cfn.send( new DescribeChangeSetCommand({ StackName: stackName, @@ -329,13 +349,21 @@ async function analyzeChangeSet( }), ); } - if (!terminalStatuses.includes(nestedChangeSet.Status!)) { + if (!terminalStatuses.includes(nestedChangeSet.Status ?? '')) { print.warn(`Nested changeset ${stackName} did not reach terminal status (${nestedChangeSet.Status})`); skippedStacks.push(stackName); result.changes.push(changeInfo); continue; } + // Deleted or failed-to-delete changesets can't be analyzed — track as incomplete + if (nestedChangeSet.Status === 'DELETE_COMPLETE' || nestedChangeSet.Status === 'DELETE_FAILED') { + print.warn(`Nested changeset ${stackName} was deleted (${nestedChangeSet.Status}) — tracking as incomplete`); + skippedStacks.push(stackName); + result.changes.push(changeInfo); + continue; + } + // Print nested changeset details if (nestedChangeSet.Changes && nestedChangeSet.Changes.length > 0) { print.debug(`Nested Stack: ${stackName}`); @@ -371,11 +399,16 @@ async function analyzeChangeSet( print.debug(`Processed ${nestedResult.changes.length} nested changes`); } } catch (error: any) { - // Log error and track as incomplete + // Log error and track as incomplete. Use LogicalResourceId as fallback + // since extractStackNameFromArn could throw on malformed ARNs. print.warn(`⚠ Could not fetch nested changeset for ${rc.LogicalResourceId}: ${error.message}`); print.debug(`Stack ARN: ${rc.PhysicalResourceId}`); print.debug(`ChangeSet ID: ${rc.ChangeSetId}`); - skippedStacks.push(extractStackNameFromArn(rc.PhysicalResourceId)); + try { + skippedStacks.push(extractStackNameFromArn(rc.PhysicalResourceId)); + } catch { + skippedStacks.push(rc.LogicalResourceId || 'unknown'); + } } } diff --git a/packages/amplify-cli/src/commands/gen2-migration/lock.ts b/packages/amplify-cli/src/commands/gen2-migration/lock.ts index 60b15ed5ffb..5123653a8e9 100644 --- a/packages/amplify-cli/src/commands/gen2-migration/lock.ts +++ b/packages/amplify-cli/src/commands/gen2-migration/lock.ts @@ -69,10 +69,20 @@ const isExpectedLockDrift = (change: ResourceChangeWithNested): boolean => { if (change.Scope?.length === 1 && change.Scope[0] === 'DeletionPolicy') return true; // Cascading IAM Policy change caused by DeletionPolicy modification on a referenced resource. - // All Details must be Dynamic ResourceAttribute re-evaluations (no static/direct changes). - if (change.ResourceType === 'AWS::IAM::Policy' && change.Details?.length) { + // Must be: Properties-only scope, all Details are Dynamic ResourceAttribute re-evaluations + // with CausingEntity referencing a table attribute (e.g., TodoTable.Arn, TodoTable.StreamArn). + if ( + change.ResourceType === 'AWS::IAM::Policy' && + change.Scope?.length === 1 && + change.Scope[0] === 'Properties' && + change.Details?.length + ) { return change.Details.every( - (d) => d.ChangeSource === 'ResourceAttribute' && d.Evaluation === 'Dynamic' && d.Target?.RequiresRecreation === 'Never', + (d) => + d.ChangeSource === 'ResourceAttribute' && + d.Evaluation === 'Dynamic' && + d.Target?.RequiresRecreation === 'Never' && + /Table\.(Arn|StreamArn)$/.test(d.CausingEntity ?? ''), ); } @@ -310,8 +320,8 @@ export class AmplifyMigrationLockStep extends AmplifyMigrationStep { } return { valid: true }; - } catch (e) { - return { valid: false, report: e.message }; + } catch (e: any) { + return { valid: false, report: e?.message ?? String(e) }; } } From c39af3a97fdccbec31c21afe3b20504a249df622 Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Thu, 9 Apr 2026 19:14:39 +0000 Subject: [PATCH 5/8] fix(cli-internal): treat Add/Remove on nested stack wrappers as real drift hasRealDrift silently skipped CloudFormation::Stack entries without nestedChanges regardless of Action. If a nested stack was added or removed outside Amplify, this produced a false negative allowing rollback when it should be blocked. --- .../commands/gen2-migration/lock.test.ts | 64 +++++++++++++++++++ .../src/commands/gen2-migration/lock.ts | 9 ++- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts b/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts index 1a4da38c571..c5f2ad0945b 100644 --- a/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts +++ b/packages/amplify-cli/src/__tests__/commands/gen2-migration/lock.test.ts @@ -707,6 +707,70 @@ describe('AmplifyMigrationLockStep', () => { expect(valid).toBe(true); }); + it('should fail validation when IAM Policy cascading change is from a non-table resource', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Modify', + LogicalResourceId: 'SomeLambdaPolicy', + ResourceType: 'AWS::IAM::Policy', + Scope: ['Properties'], + Details: [ + { + ChangeSource: 'ResourceAttribute', + Evaluation: 'Dynamic', + Target: { Attribute: 'Properties', Name: 'PolicyDocument', RequiresRecreation: 'Never' }, + CausingEntity: 'MyLambdaFunction.Arn', + }, + ], + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should fail validation when a nested stack is added (Add action on CloudFormation::Stack)', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Add', + LogicalResourceId: 'newUnexpectedStack', + ResourceType: 'AWS::CloudFormation::Stack', + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + + it('should fail validation when a nested stack is removed (Remove action on CloudFormation::Stack)', async () => { + mockDetectTemplateDrift.mockResolvedValueOnce({ + changes: [ + { + Action: 'Remove', + LogicalResourceId: 'authStack', + ResourceType: 'AWS::CloudFormation::Stack', + PhysicalResourceId: 'arn:aws:cloudformation:us-east-1:123:stack/auth-stack/abc', + }, + ], + skipped: false, + }); + + const plan = await lockStep.rollback(); + const valid = await plan.validate(); + + expect(valid).toBe(false); + }); + it('should pass validation when CloudFormation::Stack wrapper has no nestedChanges', async () => { mockDetectTemplateDrift.mockResolvedValueOnce({ changes: [ diff --git a/packages/amplify-cli/src/commands/gen2-migration/lock.ts b/packages/amplify-cli/src/commands/gen2-migration/lock.ts index 5123653a8e9..e5c62746b9e 100644 --- a/packages/amplify-cli/src/commands/gen2-migration/lock.ts +++ b/packages/amplify-cli/src/commands/gen2-migration/lock.ts @@ -56,8 +56,9 @@ const ALLOW_ALL_POLICY = { * 1. Direct DeletionPolicy changes — Modify with Scope exactly `['DeletionPolicy']` * 2. Cascading IAM Policy changes — CFN flags IAM policies that reference the modified * table's attributes (e.g., `TodoTable.Arn` in PolicyDocument) as Dynamic re-evaluations. - * These have `ChangeSource: ResourceAttribute`, `Evaluation: Dynamic`, and - * `RequiresRecreation: Never` — they are harmless re-evaluations, not actual changes. + * These have `ChangeSource: ResourceAttribute`, `Evaluation: Dynamic`, + * `RequiresRecreation: Never`, and `CausingEntity` matching `*Table.Arn` or + * `*Table.StreamArn` — they are harmless re-evaluations, not actual changes. * * For lock rollback to determine whether the environment is safe to revert, these expected * changes must be filtered out so only real drift blocks the rollback. @@ -100,6 +101,10 @@ function hasRealDrift(changes: ResourceChangeWithNested[]): boolean { if (hasRealDrift(change.nestedChanges)) return true; } else if (change.ResourceType !== 'AWS::CloudFormation::Stack') { if (!isExpectedLockDrift(change)) return true; + } else if (change.Action !== 'Modify') { + // Add/Remove on a CloudFormation::Stack without nestedChanges is real drift — + // an entire nested stack was added or deleted outside Amplify. + return true; } } return false; From a3de511d4aa035e68fdcc21f4a127656ab42b487 Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Thu, 9 Apr 2026 19:17:04 +0000 Subject: [PATCH 6/8] fix(cli-internal): tighten DeletionPolicy filter and update ADR with E2E findings CR follow-up: - isExpectedLockDrift now verifies CausingEntity matches *Table.Arn or *Table.StreamArn (prevents false positives from non-DDB dynamic re-evals) - IAM Policy path requires Scope exactly ['Properties'] - ADR-005 updated: empirical findings #7 (Template format error) and #8 (cascading IAM), isRecoverableFailure pseudocode, recursive hasRealDrift, R3/R4 risk assessments expanded, console URL fix noted --- ...05-template-drift-include-nested-stacks.md | 136 ++++++++++++++---- 1 file changed, 107 insertions(+), 29 deletions(-) diff --git a/packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md b/packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md index ab6af386729..e067bf2c82c 100644 --- a/packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md +++ b/packages/amplify-cli/adr/005-template-drift-include-nested-stacks.md @@ -142,6 +142,28 @@ Testing against the live discussions app (amplify-discussions-main-c39a5, Code must poll each nested changeset to terminal status before describing it. +7. **Template format errors are also recoverable.** Amplify AppSync API + stacks contain model sub-stacks that export names via `Fn::Join` with + parameter references. With `IncludeNestedStacks: true`, CFN cannot + resolve intrinsic functions at validation time — all exports appear as + `{{IntrinsicFunction://Fn::Join}}`, triggering `"Template format + error: duplicate Export names"`. Despite the failure, CFN populates + Changes before the export validation step. The function was broadened + from `isEarlyValidationFailure` to `isRecoverableFailure` to handle + both EarlyValidation and Template format error failures. When a + recoverable failure produces zero Changes, the stack is treated as + incomplete (not clean). + +8. **Cascading IAM Policy changes from DeletionPolicy modifications.** + When `DeletionPolicy: Retain` is added to a DynamoDB table, CFN + flags every IAM Policy whose `PolicyDocument` references that table's + attributes (e.g., `TodoTable.Arn`) as a Dynamic ResourceAttribute + re-evaluation. These appear as `Modify` changes on `AWS::IAM::Policy` + with `Scope: ['Properties']`, `ChangeSource: ResourceAttribute`, + `Evaluation: Dynamic`, `RequiresRecreation: Never`, and + `CausingEntity: .Arn`. These are harmless and must + be filtered alongside direct DeletionPolicy drift. + ### Comparison | Dimension | IncludeNestedStacks: true | Per-nested (Method B) | @@ -209,8 +231,11 @@ patterns: Any other StatusReason is a genuine error — skip that stack. ```typescript -function isEarlyValidationFailure(reason?: string): boolean { - return !!reason?.includes('EarlyValidation'); +function isRecoverableFailure(reason?: string): boolean { + if (!reason) return false; + if (reason.includes('EarlyValidation')) return true; + if (reason.includes('Template format error')) return true; + return false; } // In analyzeChangeSet: @@ -219,9 +244,14 @@ if (changeSet.Status === 'FAILED') { || changeSet.StatusReason?.includes('No updates')) { return result; // genuinely no changes } - if (isEarlyValidationFailure(changeSet.StatusReason)) { + if (isRecoverableFailure(changeSet.StatusReason)) { + if (!changeSet.Changes?.length) { + // Recoverable failure but 0 Changes — treat as incomplete + return { changes: [], skipped: true, skipReason: ... }; + } // Changes are populated despite FAILED status — fall through - print.warn(`Nested changeset FAILED (EarlyValidation): ${changeSet.StatusReason}`); + } else if (isRoot) { + // Root always falls through — classification happens per-nested-stack } else { // Unknown failure — treat as error, skip this stack return { changes: [], skipped: true, skipReason: ... }; @@ -256,25 +286,59 @@ changes must be filtered out. The filter applies after changeset analysis and before the rollback safety decision: +Two types of expected changes: + +1. **Direct DeletionPolicy changes** — `Action: Modify`, + `Scope: ['DeletionPolicy']`. CFN reports DeletionPolicy as a + first-class Scope value. + +2. **Cascading IAM Policy changes** — When DeletionPolicy changes on + a DynamoDB table, CFN flags IAM policies referencing `TableName.Arn` + as Dynamic ResourceAttribute re-evaluations. These have + `Action: Modify`, `Scope: ['Properties']`, + `ChangeSource: ResourceAttribute`, `Evaluation: Dynamic`, + `RequiresRecreation: Never`, and `CausingEntity` matching + `*Table.Arn` or `*Table.StreamArn`. + ```typescript function isExpectedLockDrift(change: ResourceChangeWithNested): boolean { - // A change is expected lock drift if: - // 1. Action is 'Modify' - // 2. Scope is exactly ['DeletionPolicy'] (confirmed empirically — - // CFN uses 'DeletionPolicy' as a first-class Scope value) - // 3. Details show DirectModification of DeletionPolicy attribute - // with RequiresRecreation: 'Never' - return change.Action === 'Modify' - && change.Scope?.length === 1 - && change.Scope[0] === 'DeletionPolicy'; + if (change.Action !== 'Modify') return false; + + // Direct DeletionPolicy change + if (change.Scope?.length === 1 && change.Scope[0] === 'DeletionPolicy') return true; + + // Cascading IAM Policy change from DeletionPolicy on a DDB table + if (change.ResourceType === 'AWS::IAM::Policy' + && change.Scope?.length === 1 && change.Scope[0] === 'Properties' + && change.Details?.length) { + return change.Details.every(d => + d.ChangeSource === 'ResourceAttribute' + && d.Evaluation === 'Dynamic' + && d.Target?.RequiresRecreation === 'Never' + && /Table\.(Arn|StreamArn)$/.test(d.CausingEntity ?? '') + ); + } + + return false; } -const realDrift = changes.filter(c => !isExpectedLockDrift(c)); +// Recursive tree walk — CloudFormation::Stack entries are structural +// wrappers; real drift is at the leaf level. +function hasRealDrift(changes: ResourceChangeWithNested[]): boolean { + for (const change of changes) { + if (change.nestedChanges?.length) { + if (hasRealDrift(change.nestedChanges)) return true; + } else if (change.ResourceType !== 'AWS::CloudFormation::Stack') { + if (!isExpectedLockDrift(change)) return true; + } + } + return false; +} ``` -If `realDrift` is empty after filtering, lock rollback can proceed -safely. If there is any real drift, lock rollback must abort — the -environment is in an inconsistent state. +If `hasRealDrift` returns false after filtering, lock rollback can +proceed safely. If there is any real drift, lock rollback must abort +— the environment is in an inconsistent state. ### What is NOT changed @@ -310,16 +374,19 @@ update the Gen1 templates to reflect the post-migration state step. Once templates are updated, EarlyValidation passes and this risk disappears. -### R3 — Non-EarlyValidation failures conflated with EarlyValidation +### R3 — Recoverable failure classification is string-based -If CFN introduces new failure modes whose StatusReason strings don't -match our `isEarlyValidationFailure` check, we'd incorrectly treat -them as hard errors (skip the stack). This is the safe direction — -false negatives (missed drift) are worse than false positives -(unnecessary skip) for the lock rollback use case. +The `isRecoverableFailure` function matches `"EarlyValidation"` and +`"Template format error"` in StatusReason strings. If CFN introduces +new failure modes or changes wording, we may either miss recoverable +failures (treating them as hard errors — safe direction) or +incorrectly treat genuine failures as recoverable (reading incomplete +Changes — unsafe direction). The latter risk is mitigated: recoverable +failures with 0 Changes are treated as incomplete, not clean. *Mitigation*: Log all failure reasons. Expand the pattern match as -new failure types are observed. +new failure types are observed. Empty-Changes guard ensures +false-recoverable classification fails closed. ### R4 — DeletionPolicy filter accuracy @@ -329,10 +396,21 @@ classified as expected) would allow lock rollback to proceed when the environment is inconsistent. A false negative (expected drift classified as real) would block lock rollback unnecessarily. -*Mitigation*: The lock step knows exactly which resources it modified -and what it changed. The filter can be informed by the lock step's -output (list of resources + properties changed) rather than relying on -heuristic pattern matching on changeset details. +The filter has two paths: direct DeletionPolicy scope matching (tight) +and cascading IAM Policy matching (broader — checks `ChangeSource`, +`Evaluation`, `RequiresRecreation`, and `CausingEntity` pattern +`*Table.Arn|StreamArn`). The IAM path could theoretically false-positive +if a non-DeletionPolicy change triggers an identical Dynamic +ResourceAttribute re-evaluation pattern on an IAM policy referencing +a resource whose logical ID ends in "Table". In practice this is +extremely unlikely — the pattern only matches during lock rollback when +DeletionPolicy is the only template modification. + +*Mitigation*: The `CausingEntity` regex anchors to `Table.Arn` or +`Table.StreamArn`. Scope is required to be exactly `['Properties']`. +All Details must be Dynamic/ResourceAttribute/Never — any static or +direct modification fails the filter. 28 unit tests cover positive +and negative cases. ## Consequences @@ -356,7 +434,7 @@ heuristic pattern matching on changeset details. - The `CreateChangeSetCommand` call and its parameters - The recursive traversal of nested changesets via `ChangeSetId` - The "no changes" detection path -- The drift formatter and its console URL generation +- The drift formatter (console URL format was fixed separately) - The Phase 1 (CFN drift detection) and Phase 3 (local drift) paths ### What gets removed From a3c8a0a1ee3e2add49b3af2559faff886c7d5db4 Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Thu, 9 Apr 2026 19:42:01 +0000 Subject: [PATCH 7/8] chore: remove unnecessary defensive checks from drift detection Remove DELETE_COMPLETE/DELETE_FAILED handling and Stack-without-ChangeSetId guard. Both defended against scenarios that can't happen in practice (nested changesets don't get deleted during analysis, and IncludeNestedStacks always creates changeset IDs). --- .../drift-detection/detect-template-drift.ts | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts index 4a750292685..1068b2bdaf3 100644 --- a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts +++ b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts @@ -308,16 +308,6 @@ async function analyzeChangeSet( const changeInfo: ResourceChangeWithNested = { ...rc }; // Check if this is a nested stack with its own changeset - if (rc.ResourceType === 'AWS::CloudFormation::Stack' && !rc.ChangeSetId) { - // Stack change without a nested changeset — can't inspect its contents. - // This happens when IncludeNestedStacks creates the root changeset but the - // nested changeset failed before being created. Track as incomplete. - const stackLabel = rc.LogicalResourceId || rc.PhysicalResourceId || 'unknown'; - print.warn(`Nested stack ${stackLabel} has no changeset — tracking as incomplete`); - skippedStacks.push(stackLabel); - result.changes.push(changeInfo); - continue; - } if (rc.ResourceType === 'AWS::CloudFormation::Stack' && rc.ChangeSetId && rc.PhysicalResourceId) { try { // Extract stack name and changeset name from ARNs using parseArn utility @@ -336,7 +326,7 @@ async function analyzeChangeSet( ChangeSetName: changeSetName, }), ); - const terminalStatuses = ['CREATE_COMPLETE', 'FAILED', 'DELETE_COMPLETE', 'DELETE_FAILED']; + const terminalStatuses = ['CREATE_COMPLETE', 'FAILED']; const NESTED_POLL_MAX_ATTEMPTS = 30; const NESTED_POLL_INTERVAL_MS = 2000; for (let attempt = 0; !terminalStatuses.includes(nestedChangeSet.Status ?? '') && attempt < NESTED_POLL_MAX_ATTEMPTS; attempt++) { @@ -356,14 +346,6 @@ async function analyzeChangeSet( continue; } - // Deleted or failed-to-delete changesets can't be analyzed — track as incomplete - if (nestedChangeSet.Status === 'DELETE_COMPLETE' || nestedChangeSet.Status === 'DELETE_FAILED') { - print.warn(`Nested changeset ${stackName} was deleted (${nestedChangeSet.Status}) — tracking as incomplete`); - skippedStacks.push(stackName); - result.changes.push(changeInfo); - continue; - } - // Print nested changeset details if (nestedChangeSet.Changes && nestedChangeSet.Changes.length > 0) { print.debug(`Nested Stack: ${stackName}`); From 3c926e662fec1133112a9bd70a1c952fe82ed550 Mon Sep 17 00:00:00 2001 From: Benjamin Pace Date: Thu, 9 Apr 2026 19:57:49 +0000 Subject: [PATCH 8/8] fix(cli-internal): add readonly, JSDoc, and comments from CR review --- .../drift-detection/detect-template-drift.ts | 18 ++++++++++-------- .../services/drift-formatter.ts | 16 ++++++++-------- .../src/commands/gen2-migration/lock.ts | 2 ++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts index 1068b2bdaf3..0730f85a53a 100644 --- a/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts +++ b/packages/amplify-cli/src/commands/drift-detection/detect-template-drift.ts @@ -14,16 +14,18 @@ import fs from 'fs-extra'; import * as path from 'path'; import type { SpinningLogger } from '../gen2-migration/_infra/spinning-logger'; +/** A CloudFormation resource change that may contain recursive nested stack changes. */ export interface ResourceChangeWithNested extends ResourceChange { nestedChanges?: ResourceChangeWithNested[]; } +/** Results from template drift detection via CloudFormation change sets. */ export interface TemplateDriftResults { - changes: ResourceChangeWithNested[]; - skipped: boolean; - skipReason?: string; - changeSetId?: string; - incompleteStacks?: string[]; + readonly changes: ResourceChangeWithNested[]; + readonly skipped: boolean; + readonly skipReason?: string; + readonly changeSetId?: string; + readonly incompleteStacks?: string[]; } /** @@ -228,8 +230,7 @@ export async function detectTemplateDrift( // Changeset is kept for user inspection via console URL — cleaned up on next run const result = await analyzeChangeSet(cfn, changeSet, print, true); - result.changeSetId = changeSet.ChangeSetId; - return result; + return { ...result, changeSetId: changeSet.ChangeSetId }; } catch (error: any) { return { changes: [], @@ -389,6 +390,7 @@ async function analyzeChangeSet( try { skippedStacks.push(extractStackNameFromArn(rc.PhysicalResourceId)); } catch { + // extractStackNameFromArn failed on a malformed ARN — fall back to logical ID skippedStacks.push(rc.LogicalResourceId || 'unknown'); } } @@ -398,7 +400,7 @@ async function analyzeChangeSet( } if (skippedStacks.length > 0) { - result.incompleteStacks = skippedStacks; + return { ...result, incompleteStacks: skippedStacks }; } return result; diff --git a/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts b/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts index 58b61d48715..99c08bf2085 100644 --- a/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts +++ b/packages/amplify-cli/src/commands/drift-detection/services/drift-formatter.ts @@ -14,14 +14,14 @@ import { type StackDriftNode, type CloudFormationDriftResults } from '../detect- import { extractCategory } from '../../gen2-migration/_infra/categories'; interface DriftBlock { - categoryName: string; - logicalId?: string; - type: 'cf' | 'template' | 'local'; - cfDrift?: StackResourceDrift; - driftDetectionId?: string; - templateChange?: ResourceChangeWithNested; - changeSetId?: string; - stackArn?: string; + readonly categoryName: string; + readonly logicalId?: string; + readonly type: 'cf' | 'template' | 'local'; + readonly cfDrift?: StackResourceDrift; + readonly driftDetectionId?: string; + readonly templateChange?: ResourceChangeWithNested; + readonly changeSetId?: string; + readonly stackArn?: string; } /** diff --git a/packages/amplify-cli/src/commands/gen2-migration/lock.ts b/packages/amplify-cli/src/commands/gen2-migration/lock.ts index e5c62746b9e..73b9a233b0b 100644 --- a/packages/amplify-cli/src/commands/gen2-migration/lock.ts +++ b/packages/amplify-cli/src/commands/gen2-migration/lock.ts @@ -317,6 +317,8 @@ export class AmplifyMigrationLockStep extends AmplifyMigrationStep { }; } + // Check incompleteStacks before hasRealDrift — incomplete stacks mean we can't + // trust that the absence of real drift is accurate. if (hasRealDrift(driftResults.changes)) { return { valid: false,