Skip to content

Commit d2e5ecb

Browse files
fix: Allow Retrying status while preventing RollingOut loop
Generated-by: Claude
1 parent 451e43a commit d2e5ecb

File tree

5 files changed

+41
-24
lines changed

5 files changed

+41
-24
lines changed

internal/operator-controller/controllers/clusterextension_controller_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,9 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te
785785
// the same inputs the runtime would see.
786786
d.RevisionStatesGetter = &MockRevisionStatesGetter{
787787
RevisionStates: &controllers.RevisionStates{
788-
RollingOut: []*controllers.RevisionMetadata{{}},
788+
RollingOut: []*controllers.RevisionMetadata{{
789+
BundleMetadata: ocv1.BundleMetadata{Version: "1.0.0"},
790+
}},
789791
},
790792
}
791793
d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,28 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
140140
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
141141
l := log.FromContext(ctx)
142142

143-
// If already rolling out, use existing revision and set deprecation to Unknown (no catalog check)
143+
// If already rolling out, check if the rolling-out revision matches the current spec.
144+
// If spec has changed (e.g., version updated), we need to resolve a new bundle instead of
145+
// reusing the rolling-out revision.
144146
if len(state.revisionStates.RollingOut) > 0 {
145-
installedBundleName := ""
146-
if state.revisionStates.Installed != nil {
147-
installedBundleName = state.revisionStates.Installed.Name
147+
rollingOutRevision := state.revisionStates.RollingOut[0]
148+
specVersion := ""
149+
if ext.Spec.Source.Catalog != nil {
150+
specVersion = ext.Spec.Source.Catalog.Version
148151
}
149-
SetDeprecationStatus(ext, installedBundleName, nil, false)
150-
state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0]
151-
return nil, nil
152+
153+
// If spec version matches rolling-out revision version (or no version specified),
154+
// reuse the existing rolling-out revision
155+
if specVersion == "" || specVersion == rollingOutRevision.Version {
156+
installedBundleName := ""
157+
if state.revisionStates.Installed != nil {
158+
installedBundleName = state.revisionStates.Installed.Name
159+
}
160+
SetDeprecationStatus(ext, installedBundleName, nil, false)
161+
state.resolvedRevisionMetadata = rollingOutRevision
162+
return nil, nil
163+
}
164+
// Spec version changed - fall through to resolve new bundle from catalog
152165
}
153166

154167
// Resolve a new bundle from the catalog

internal/operator-controller/controllers/clusterobjectset_controller.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -672,27 +672,27 @@ func setRetryingConditions(cos *ocv1.ClusterObjectSet, message string) {
672672
// markAsProgressing sets the Progressing condition to True with the given reason.
673673
//
674674
// Once ProgressDeadlineExceeded has been set for the current generation, this function
675-
// keeps that condition sticky for non-terminal progressing reasons that would otherwise
676-
// cause the reconciler to oscillate between Progressing=True and
677-
// ProgressDeadlineExceeded=False for the same generation.
675+
// blocks only the RollingOut reason to prevent a reconcile loop for Active revisions.
676+
// The loop would occur when reconcile() sees in-transition objects and sets
677+
// Progressing=True/RollingOut, then the progress deadline check immediately resets it
678+
// to ProgressDeadlineExceeded, causing a status-only update that triggers another reconcile.
678679
//
679-
// In particular, both RollingOut and Retrying are blocked for Active revisions.
680-
// Allowing those reasons to overwrite ProgressDeadlineExceeded can trigger unnecessary
681-
// status-only reconciles and interfere with retry/requeue behavior due to the progress
682-
// deadline check resetting the condition.
680+
// Archived revisions are exempt because they skip progress deadline enforcement entirely,
681+
// so their status updates won't be overridden.
683682
//
684-
// Archived revisions are exempt from this guard because the progress deadline check
685-
// does not apply to them, allowing archival teardown retries to show Retrying status.
683+
// Other reasons like Succeeded and Retrying are always allowed because:
684+
// - Succeeded represents terminal success (rollout eventually completed)
685+
// - Retrying represents transient errors that need to be visible in status
686686
//
687-
// If the generation changed (spec update) since ProgressDeadlineExceeded was recorded,
688-
// the guard allows the update through so the condition reflects the new generation.
687+
// If the generation changed since ProgressDeadlineExceeded was recorded, the guard
688+
// allows all updates through so the condition reflects the new generation.
689689
func markAsProgressing(cos *ocv1.ClusterObjectSet, reason, message string) {
690690
if cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil &&
691691
cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded &&
692692
cnd.ObservedGeneration == cos.Generation &&
693-
(reason == ocv1.ReasonRollingOut || reason == ocv1.ClusterObjectSetReasonRetrying) &&
693+
reason == ocv1.ReasonRollingOut &&
694694
cos.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived {
695-
log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded is sticky for non-terminal progressing reasons",
695+
log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded is sticky for RollingOut on Active revisions",
696696
"requestedReason", reason, "revision", cos.Name, "lifecycleState", cos.Spec.LifecycleState)
697697
return
698698
}

internal/operator-controller/controllers/clusterobjectset_controller_internal_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ func Test_markAsProgressing_doesNotOverwriteProgressDeadlineExceeded(t *testing.
300300
expectReason: ocv1.ReasonSucceeded,
301301
},
302302
{
303-
name: "does not overwrite ProgressDeadlineExceeded with Retrying (same generation)",
303+
name: "allows Retrying to overwrite ProgressDeadlineExceeded (same generation)",
304304
generation: 1,
305305
existingConditions: []metav1.Condition{
306306
{
@@ -311,8 +311,8 @@ func Test_markAsProgressing_doesNotOverwriteProgressDeadlineExceeded(t *testing.
311311
},
312312
},
313313
reason: ocv1.ClusterObjectSetReasonRetrying,
314-
expectProgressing: metav1.ConditionFalse,
315-
expectReason: ocv1.ReasonProgressDeadlineExceeded,
314+
expectProgressing: metav1.ConditionTrue,
315+
expectReason: ocv1.ClusterObjectSetReasonRetrying,
316316
},
317317
{
318318
name: "allows RollingOut when generation changed since ProgressDeadlineExceeded",

internal/operator-controller/controllers/clusterobjectset_controller_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,8 @@ func Test_ClusterObjectSetReconciler_Reconcile_ArchivalAfterProgressDeadlineExce
10831083

10841084
ext := newTestClusterExtension()
10851085
rev1 := newTestClusterObjectSet(t, clusterObjectSetName, ext, testScheme)
1086+
rev1.Spec.ProgressDeadlineMinutes = 1
1087+
rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-2 * time.Minute))
10861088
rev1.Finalizers = []string{"olm.operatorframework.io/teardown"}
10871089
meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{
10881090
Type: ocv1.ClusterObjectSetTypeProgressing,

0 commit comments

Comments
 (0)