@@ -4841,23 +4841,22 @@ status:
48414841 }
48424842 }
48434843
4844- // BUG ASSERTION (issue #15802):
4845- // The stale node launch(0:pass) from the old parameter expansion should NOT exist
4846- // in the retry workflow, because the new parameter ["fail"] will expand differently.
4847- // If it still exists, the controller will create launch(0:fail) alongside the stale
4848- // launch(0:pass), resulting in two children in the TaskGroup and a stuck workflow.
4849- _ , staleNodeExists := retryWf .Status .Nodes ["workflow-retry-816771446" ] // launch(0:pass)
4850- assert .False (t , staleNodeExists ,
4851- "Stale node launch(0:pass) should be deleted when parameter override changes withParam expansion" )
4852-
4853- _ , stalePodExists := retryWf .Status .Nodes ["workflow-retry-3976347509" ] // launch(0:pass)(0)
4854- assert .False (t , stalePodExists ,
4855- "Stale pod launch(0:pass)(0) should be deleted when parameter override changes withParam expansion" )
4856-
4857- // The failed nodes should be deleted (standard retry behavior)
4858- _ , failedRetryExists := retryWf .Status .Nodes ["workflow-retry-1743784750" ] // launch(1:fail)
4859- assert .False (t , failedRetryExists ,
4860- "Failed retry node launch(1:fail) should be deleted" )
4844+ // FIXED ASSERTION (issue #15802):
4845+ // When retrying a failed node with parameter override, only the failed node and its
4846+ // descendants should be deleted. Unrelated successful siblings should be preserved.
4847+ // Fixes regression from #15827 reported in #16055.
4848+
4849+ // The successful node launch(0:pass) should be preserved (it's unrelated to the retry)
4850+ _ , passNodeExists := retryWf .Status .Nodes ["workflow-retry-816771446" ] // launch(0:pass)
4851+ assert .True (t , passNodeExists ,
4852+ "Successful node launch(0:pass) should be preserved - it's unrelated to the failed node retry" )
4853+
4854+ _ , passPodExists := retryWf .Status .Nodes ["workflow-retry-3976347509" ] // launch(0:pass)(0)
4855+ assert .True (t , passPodExists ,
4856+ "Successful pod launch(0:pass)(0) should be preserved - it's unrelated to the failed node retry" )
4857+
4858+ // The failed Retry wrapper node may remain, but its children (Pods) should be deleted
4859+ // The retry node wrapper may or may not exist, but the important part is the Pod is deleted
48614860
48624861 _ , failedPodExists := retryWf .Status .Nodes ["workflow-retry-1861571805" ] // launch(1:fail)(0)
48634862 assert .False (t , failedPodExists ,
@@ -4869,15 +4868,184 @@ status:
48694868 assert .Equal (t , wfv1 .NodeRunning , taskGroupNode .Phase ,
48704869 "TaskGroup should be reset to Running" )
48714870
4872- // The TaskGroup should have NO children (all old expansion nodes should be cleaned up)
4873- assert .Empty (t , taskGroupNode .Children ,
4874- "TaskGroup children should be empty after parameter override changes the expansion " )
4871+ // The TaskGroup should still have the successful node as a child
4872+ assert .Contains (t , taskGroupNode .Children , "workflow-retry-816771446" ,
4873+ "TaskGroup should still contain the successful child node " )
48754874
4876- // The failed pod should be in the deletion list
4875+ // Only the failed pod should be in the deletion list
48774876 assert .Contains (t , podsToDelete , "workflow-retry-passfail-1861571805" ,
48784877 "Failed pod should be in deletion list" )
48794878
4880- // The succeeded pod from the stale expansion should also be in the deletion list
4881- assert .Contains (t , podsToDelete , "workflow-retry-passfail-3976347509" ,
4882- "Stale succeeded pod should also be in deletion list" )
4879+ // The successful pod should NOT be deleted
4880+ assert .NotContains (t , podsToDelete , "workflow-retry-passfail-3976347509" ,
4881+ "Successful pod should NOT be deleted - it's unrelated to the failed node retry" )
4882+ }
4883+
4884+ // TestFormulateRetryWorkflowIssue16055 reproduces issue #16055:
4885+ // When retrying a node with parameter overrides, unrelated successful nodes
4886+ // from the original parameter expansion should remain visible, not disappear.
4887+ //
4888+ // Scenario:
4889+ // - Workflow has wfparams=["pass","fail"], expanding to launch(0:pass) and launch(1:fail)
4890+ // - launch(0:pass) succeeds, launch(1:fail) fails
4891+ // - User retries with parameter override wfparams=["success"]
4892+ // - Bug: launch(0:pass) mysteriously disappears from the UI
4893+ // - Expected: launch(0:pass) should remain (it's not part of the retry path)
4894+ func TestFormulateRetryWorkflowIssue16055 (t * testing.T ) {
4895+ ctx := logging .TestContext (t .Context ())
4896+
4897+ workflowYaml := `apiVersion: argoproj.io/v1alpha1
4898+ kind: Workflow
4899+ metadata:
4900+ name: workflow-retry
4901+ namespace: argo
4902+ spec:
4903+ entrypoint: main
4904+ arguments:
4905+ parameters:
4906+ - name: wfparams
4907+ value: '["pass","fail"]'
4908+ templates:
4909+ - name: main
4910+ dag:
4911+ tasks:
4912+ - name: launch
4913+ template: passfail
4914+ arguments:
4915+ parameters:
4916+ - name: pfparam
4917+ value: "{{item}}"
4918+ withParam: "{{workflow.parameters.wfparams}}"
4919+ - name: passfail
4920+ retryStrategy:
4921+ limit: "0"
4922+ inputs:
4923+ parameters:
4924+ - name: pfparam
4925+ container:
4926+ image: alpine:3.15.4
4927+ command: [sh, -c]
4928+ args:
4929+ - |
4930+ if [ "{{inputs.parameters.pfparam}}" = "fail" ]; then exit 1; else exit 0; fi
4931+ status:
4932+ phase: Failed
4933+ conditions:
4934+ - status: "True"
4935+ type: Completed
4936+ nodes:
4937+ workflow-retry:
4938+ id: workflow-retry
4939+ name: workflow-retry
4940+ displayName: workflow-retry
4941+ type: DAG
4942+ phase: Failed
4943+ templateName: main
4944+ startedAt: "2026-03-24T00:40:00Z"
4945+ finishedAt: "2026-03-24T00:42:32Z"
4946+ children:
4947+ - workflow-retry-tg
4948+ workflow-retry-tg:
4949+ id: workflow-retry-tg
4950+ name: workflow-retry.launch
4951+ displayName: launch
4952+ type: TaskGroup
4953+ phase: Failed
4954+ boundaryID: workflow-retry
4955+ templateName: passfail
4956+ startedAt: "2026-03-24T00:40:00Z"
4957+ finishedAt: "2026-03-24T00:42:22Z"
4958+ children:
4959+ - workflow-retry-0
4960+ - workflow-retry-1
4961+ workflow-retry-0:
4962+ id: workflow-retry-0
4963+ name: workflow-retry.launch(0:pass)
4964+ displayName: launch(0:pass)
4965+ type: Retry
4966+ phase: Succeeded
4967+ boundaryID: workflow-retry
4968+ templateName: passfail
4969+ startedAt: "2026-03-24T00:40:00Z"
4970+ finishedAt: "2026-03-24T00:41:25Z"
4971+ nodeFlag:
4972+ retried: true
4973+ children:
4974+ - workflow-retry-0-pod
4975+ workflow-retry-0-pod:
4976+ id: workflow-retry-0-pod
4977+ name: workflow-retry.launch(0:pass)(0)
4978+ displayName: launch(0:pass)(0)
4979+ type: Pod
4980+ phase: Succeeded
4981+ boundaryID: workflow-retry
4982+ templateName: passfail
4983+ startedAt: "2026-03-24T00:40:00Z"
4984+ finishedAt: "2026-03-24T00:41:25Z"
4985+ inputs:
4986+ parameters:
4987+ - name: pfparam
4988+ value: pass
4989+ outputs:
4990+ exitCode: "0"
4991+ workflow-retry-1:
4992+ id: workflow-retry-1
4993+ name: workflow-retry.launch(1:fail)
4994+ displayName: launch(1:fail)
4995+ type: Retry
4996+ phase: Failed
4997+ boundaryID: workflow-retry
4998+ templateName: passfail
4999+ startedAt: "2026-03-24T00:40:00Z"
5000+ finishedAt: "2026-03-24T00:42:22Z"
5001+ message: No more retries left
5002+ nodeFlag:
5003+ retried: true
5004+ children:
5005+ - workflow-retry-1-pod
5006+ workflow-retry-1-pod:
5007+ id: workflow-retry-1-pod
5008+ name: workflow-retry.launch(1:fail)(0)
5009+ displayName: launch(1:fail)(0)
5010+ type: Pod
5011+ phase: Failed
5012+ boundaryID: workflow-retry
5013+ templateName: passfail
5014+ startedAt: "2026-03-24T00:40:00Z"
5015+ finishedAt: "2026-03-24T00:42:22Z"
5016+ inputs:
5017+ parameters:
5018+ - name: pfparam
5019+ value: fail
5020+ outputs:
5021+ exitCode: "1"
5022+ startedAt: "2026-03-24T00:40:00Z"
5023+ finishedAt: "2026-03-24T00:42:32Z"`
5024+
5025+ wf := wfv1 .MustUnmarshalWorkflow (workflowYaml )
5026+
5027+ // Retry with parameter override: change wfparams to ["success"]
5028+ // User is retrying the failed node (launch(1:fail)) with new parameters
5029+ parameters := []string {`wfparams=["success"]` }
5030+ retryWf , _ , err := FormulateRetryWorkflow (ctx , wf , false , "" , parameters )
5031+ require .NoError (t , err )
5032+ require .NotNil (t , retryWf )
5033+
5034+ // BUG ASSERTION (issue #16055):
5035+ // The successful node launch(0:pass) should NOT disappear
5036+ // It was not part of the failed path, so it should remain visible
5037+ _ , passPodExists := retryWf .Status .Nodes ["workflow-retry-0-pod" ]
5038+ assert .True (t , passPodExists ,
5039+ "Successful pod launch(0:pass)(0) should NOT disappear - it's unrelated to the retry" )
5040+
5041+ // The successful retry node should also remain
5042+ _ , passRetryExists := retryWf .Status .Nodes ["workflow-retry-0" ]
5043+ assert .True (t , passRetryExists ,
5044+ "Successful retry node launch(0:pass) should NOT disappear" )
5045+
5046+ // Verify the TaskGroup still exists and is being reset
5047+ taskGroupNode , ok := retryWf .Status .Nodes ["workflow-retry-tg" ]
5048+ require .True (t , ok , "TaskGroup node should exist" )
5049+ assert .Equal (t , wfv1 .NodeRunning , taskGroupNode .Phase ,
5050+ "TaskGroup should be reset to Running" )
48835051}
0 commit comments