Skip to content

Commit 7452c0b

Browse files
fix: Allow spec changes after ProgressDeadlineExceeded
This commit addresses the reconcile loop and archival timeout issues when ProgressDeadlineExceeded is set on a ClusterObjectSet. Generated-By: Claude RollingOut revisions are sorted oldest→newest, so checking the first element (index 0) could cause unnecessary re-resolution when a newer rolling-out revision already matches the current spec. Now checks the last element (latest rolling-out revision) before deciding whether to resolve a new bundle from catalog. This prevents creating extra revisions when multiple rolling-out revisions exist (e.g., older revision stuck, newer one rolling out). Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 4510b1b commit 7452c0b

File tree

6 files changed

+387
-27
lines changed

6 files changed

+387
-27
lines changed

internal/operator-controller/controllers/clusterextension_controller_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,9 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te
785785
// the same inputs the runtime would see.
786786
d.RevisionStatesGetter = &MockRevisionStatesGetter{
787787
RevisionStates: &controllers.RevisionStates{
788-
RollingOut: []*controllers.RevisionMetadata{{}},
788+
RollingOut: []*controllers.RevisionMetadata{{
789+
BundleMetadata: ocv1.BundleMetadata{Version: "1.0.0"},
790+
}},
789791
},
790792
}
791793
d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,32 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
140140
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
141141
l := log.FromContext(ctx)
142142

143-
// If already rolling out, use existing revision and set deprecation to Unknown (no catalog check)
143+
// If already rolling out, check if the latest rolling-out revision matches the current spec.
144+
// If spec has changed (e.g., version updated), we need to resolve a new bundle instead of
145+
// reusing the rolling-out revision.
146+
//
147+
// Note: RollingOut slice is sorted oldest→newest, so use the last element (most recent).
148+
// This avoids re-resolving when a newer revision already matches the spec, even if an
149+
// older revision is still stuck rolling out.
144150
if len(state.revisionStates.RollingOut) > 0 {
145-
installedBundleName := ""
146-
if state.revisionStates.Installed != nil {
147-
installedBundleName = state.revisionStates.Installed.Name
151+
latestRollingOutRevision := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1]
152+
specVersion := ""
153+
if ext.Spec.Source.Catalog != nil {
154+
specVersion = ext.Spec.Source.Catalog.Version
148155
}
149-
SetDeprecationStatus(ext, installedBundleName, nil, false)
150-
state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0]
151-
return nil, nil
156+
157+
// If spec version matches latest rolling-out revision version (or no version specified),
158+
// reuse the existing rolling-out revision
159+
if specVersion == "" || specVersion == latestRollingOutRevision.Version {
160+
installedBundleName := ""
161+
if state.revisionStates.Installed != nil {
162+
installedBundleName = state.revisionStates.Installed.Name
163+
}
164+
SetDeprecationStatus(ext, installedBundleName, nil, false)
165+
state.resolvedRevisionMetadata = latestRollingOutRevision
166+
return nil, nil
167+
}
168+
// Spec version changed - fall through to resolve new bundle from catalog
152169
}
153170

154171
// Resolve a new bundle from the catalog

internal/operator-controller/controllers/clusterobjectset_controller.go

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
"sigs.k8s.io/controller-runtime/pkg/builder"
3232
"sigs.k8s.io/controller-runtime/pkg/client"
3333
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
34-
"sigs.k8s.io/controller-runtime/pkg/event"
3534
"sigs.k8s.io/controller-runtime/pkg/handler"
3635
"sigs.k8s.io/controller-runtime/pkg/log"
3736
"sigs.k8s.io/controller-runtime/pkg/predicate"
@@ -81,7 +80,10 @@ func (c *ClusterObjectSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
8180
reconciledRev := existingRev.DeepCopy()
8281
res, reconcileErr := c.reconcile(ctx, reconciledRev)
8382

84-
if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 {
83+
// Progress deadline enforcement only applies to Active revisions, not Archived ones.
84+
// Archived revisions may need to retry teardown operations and should not have their
85+
// Progressing condition or requeue behavior overridden by the deadline check.
86+
if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 && reconciledRev.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived {
8587
cnd := meta.FindStatusCondition(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
8688
isStillProgressing := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason != ocv1.ReasonSucceeded
8789
succeeded := meta.IsStatusConditionTrue(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeSucceeded)
@@ -344,29 +346,12 @@ type Sourcoser interface {
344346
}
345347

346348
func (c *ClusterObjectSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
347-
skipProgressDeadlineExceededPredicate := predicate.Funcs{
348-
UpdateFunc: func(e event.UpdateEvent) bool {
349-
rev, ok := e.ObjectNew.(*ocv1.ClusterObjectSet)
350-
if !ok {
351-
return true
352-
}
353-
// allow deletions to happen
354-
if !rev.DeletionTimestamp.IsZero() {
355-
return true
356-
}
357-
if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded {
358-
return false
359-
}
360-
return true
361-
},
362-
}
363349
c.Clock = clock.RealClock{}
364350
return ctrl.NewControllerManagedBy(mgr).
365351
For(
366352
&ocv1.ClusterObjectSet{},
367353
builder.WithPredicates(
368354
predicate.ResourceVersionChangedPredicate{},
369-
skipProgressDeadlineExceededPredicate,
370355
),
371356
).
372357
WatchesRawSource(
@@ -684,7 +669,33 @@ func setRetryingConditions(cos *ocv1.ClusterObjectSet, message string) {
684669
}
685670
}
686671

672+
// markAsProgressing sets the Progressing condition to True with the given reason.
673+
//
674+
// Once ProgressDeadlineExceeded has been set for the current generation, this function
675+
// blocks only the RollingOut reason to prevent a reconcile loop for Active revisions.
676+
// The loop would occur when reconcile() sees in-transition objects and sets
677+
// Progressing=True/RollingOut, then the progress deadline check immediately resets it
678+
// to ProgressDeadlineExceeded, causing a status-only update that triggers another reconcile.
679+
//
680+
// Archived revisions are exempt because they skip progress deadline enforcement entirely,
681+
// so their status updates won't be overridden.
682+
//
683+
// Other reasons like Succeeded and Retrying are always allowed because:
684+
// - Succeeded represents terminal success (rollout eventually completed)
685+
// - Retrying represents transient errors that need to be visible in status
686+
//
687+
// If the generation changed since ProgressDeadlineExceeded was recorded, the guard
688+
// allows all updates through so the condition reflects the new generation.
687689
func markAsProgressing(cos *ocv1.ClusterObjectSet, reason, message string) {
690+
if cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil &&
691+
cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded &&
692+
cnd.ObservedGeneration == cos.Generation &&
693+
reason == ocv1.ReasonRollingOut &&
694+
cos.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived {
695+
log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded is sticky for RollingOut on Active revisions",
696+
"requestedReason", reason, "revision", cos.Name, "lifecycleState", cos.Spec.LifecycleState)
697+
return
698+
}
688699
meta.SetStatusCondition(&cos.Status.Conditions, metav1.Condition{
689700
Type: ocv1.ClusterObjectSetTypeProgressing,
690701
Status: metav1.ConditionTrue,

internal/operator-controller/controllers/clusterobjectset_controller_internal_test.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"time"
77

88
"github.com/stretchr/testify/require"
9+
"k8s.io/apimachinery/pkg/api/meta"
910
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1011
"k8s.io/apimachinery/pkg/runtime"
1112
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -235,3 +236,113 @@ func (m *mockTrackingCacheInternal) Watch(ctx context.Context, user client.Objec
235236
func (m *mockTrackingCacheInternal) Source(h handler.EventHandler, predicates ...predicate.Predicate) source.Source {
236237
return nil
237238
}
239+
240+
func Test_markAsProgressing_doesNotOverwriteProgressDeadlineExceeded(t *testing.T) {
241+
for _, tc := range []struct {
242+
name string
243+
generation int64
244+
existingConditions []metav1.Condition
245+
reason string
246+
expectProgressing metav1.ConditionStatus
247+
expectReason string
248+
}{
249+
{
250+
name: "sets Progressing when no condition exists",
251+
generation: 1,
252+
existingConditions: nil,
253+
reason: ocv1.ReasonRollingOut,
254+
expectProgressing: metav1.ConditionTrue,
255+
expectReason: ocv1.ReasonRollingOut,
256+
},
257+
{
258+
name: "sets Progressing when currently RollingOut",
259+
generation: 1,
260+
existingConditions: []metav1.Condition{
261+
{
262+
Type: ocv1.ClusterObjectSetTypeProgressing,
263+
Status: metav1.ConditionTrue,
264+
Reason: ocv1.ReasonRollingOut,
265+
ObservedGeneration: 1,
266+
},
267+
},
268+
reason: ocv1.ReasonSucceeded,
269+
expectProgressing: metav1.ConditionTrue,
270+
expectReason: ocv1.ReasonSucceeded,
271+
},
272+
{
273+
name: "does not overwrite ProgressDeadlineExceeded with RollingOut (same generation)",
274+
generation: 1,
275+
existingConditions: []metav1.Condition{
276+
{
277+
Type: ocv1.ClusterObjectSetTypeProgressing,
278+
Status: metav1.ConditionFalse,
279+
Reason: ocv1.ReasonProgressDeadlineExceeded,
280+
ObservedGeneration: 1,
281+
},
282+
},
283+
reason: ocv1.ReasonRollingOut,
284+
expectProgressing: metav1.ConditionFalse,
285+
expectReason: ocv1.ReasonProgressDeadlineExceeded,
286+
},
287+
{
288+
name: "allows Succeeded to overwrite ProgressDeadlineExceeded",
289+
generation: 1,
290+
existingConditions: []metav1.Condition{
291+
{
292+
Type: ocv1.ClusterObjectSetTypeProgressing,
293+
Status: metav1.ConditionFalse,
294+
Reason: ocv1.ReasonProgressDeadlineExceeded,
295+
ObservedGeneration: 1,
296+
},
297+
},
298+
reason: ocv1.ReasonSucceeded,
299+
expectProgressing: metav1.ConditionTrue,
300+
expectReason: ocv1.ReasonSucceeded,
301+
},
302+
{
303+
name: "allows Retrying to overwrite ProgressDeadlineExceeded (same generation)",
304+
generation: 1,
305+
existingConditions: []metav1.Condition{
306+
{
307+
Type: ocv1.ClusterObjectSetTypeProgressing,
308+
Status: metav1.ConditionFalse,
309+
Reason: ocv1.ReasonProgressDeadlineExceeded,
310+
ObservedGeneration: 1,
311+
},
312+
},
313+
reason: ocv1.ClusterObjectSetReasonRetrying,
314+
expectProgressing: metav1.ConditionTrue,
315+
expectReason: ocv1.ClusterObjectSetReasonRetrying,
316+
},
317+
{
318+
name: "allows RollingOut when generation changed since ProgressDeadlineExceeded",
319+
generation: 2,
320+
existingConditions: []metav1.Condition{
321+
{
322+
Type: ocv1.ClusterObjectSetTypeProgressing,
323+
Status: metav1.ConditionFalse,
324+
Reason: ocv1.ReasonProgressDeadlineExceeded,
325+
ObservedGeneration: 1,
326+
},
327+
},
328+
reason: ocv1.ReasonRollingOut,
329+
expectProgressing: metav1.ConditionTrue,
330+
expectReason: ocv1.ReasonRollingOut,
331+
},
332+
} {
333+
t.Run(tc.name, func(t *testing.T) {
334+
cos := &ocv1.ClusterObjectSet{
335+
ObjectMeta: metav1.ObjectMeta{Generation: tc.generation},
336+
Status: ocv1.ClusterObjectSetStatus{
337+
Conditions: tc.existingConditions,
338+
},
339+
}
340+
markAsProgressing(cos, tc.reason, "test message")
341+
342+
cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
343+
require.NotNil(t, cnd)
344+
require.Equal(t, tc.expectProgressing, cnd.Status)
345+
require.Equal(t, tc.expectReason, cnd.Reason)
346+
})
347+
}
348+
}

0 commit comments

Comments
 (0)