Skip to content

Commit 76f5430

Browse files
fix: Allow spec changes after ProgressDeadlineExceeded
Generated-by: Cursor:Claude
1 parent 4510b1b commit 76f5430

4 files changed

Lines changed: 355 additions & 18 deletions

File tree

internal/operator-controller/controllers/clusterobjectset_controller.go

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
"sigs.k8s.io/controller-runtime/pkg/builder"
3232
"sigs.k8s.io/controller-runtime/pkg/client"
3333
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
34-
"sigs.k8s.io/controller-runtime/pkg/event"
3534
"sigs.k8s.io/controller-runtime/pkg/handler"
3635
"sigs.k8s.io/controller-runtime/pkg/log"
3736
"sigs.k8s.io/controller-runtime/pkg/predicate"
@@ -344,29 +343,12 @@ type Sourcoser interface {
344343
}
345344

346345
func (c *ClusterObjectSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
347-
skipProgressDeadlineExceededPredicate := predicate.Funcs{
348-
UpdateFunc: func(e event.UpdateEvent) bool {
349-
rev, ok := e.ObjectNew.(*ocv1.ClusterObjectSet)
350-
if !ok {
351-
return true
352-
}
353-
// allow deletions to happen
354-
if !rev.DeletionTimestamp.IsZero() {
355-
return true
356-
}
357-
if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded {
358-
return false
359-
}
360-
return true
361-
},
362-
}
363346
c.Clock = clock.RealClock{}
364347
return ctrl.NewControllerManagedBy(mgr).
365348
For(
366349
&ocv1.ClusterObjectSet{},
367350
builder.WithPredicates(
368351
predicate.ResourceVersionChangedPredicate{},
369-
skipProgressDeadlineExceededPredicate,
370352
),
371353
).
372354
WatchesRawSource(
@@ -684,7 +666,37 @@ func setRetryingConditions(cos *ocv1.ClusterObjectSet, message string) {
684666
}
685667
}
686668

669+
// markAsProgressing sets the Progressing condition to True with the given reason.
670+
//
671+
// Once ProgressDeadlineExceeded has been set for the current generation, this function
672+
// becomes a no-op for every reason except Succeeded. This prevents a reconcile loop
673+
// where the reconciler would set Progressing=True on each run only for the deadline
674+
// check to immediately reset it back to ProgressDeadlineExceeded, producing an
675+
// unnecessary status update that triggers another reconcile.
676+
//
677+
// If the generation changed (spec update) since ProgressDeadlineExceeded was recorded,
678+
// the guard allows the update through so the condition reflects the new generation.
679+
//
680+
// The ReasonSucceeded carve-out (reason != ReasonSucceeded) exists because objects may
681+
// eventually become ready even after the deadline. This allows a revision that exceeded
682+
// its deadline to still transition to Succeeded if the rollout eventually completes,
683+
// providing a complete lifecycle: RollingOut → ProgressDeadlineExceeded → Succeeded.
684+
// Without this exception, a slow-but-eventually-successful rollout would be stuck at
685+
// ProgressDeadlineExceeded forever, even after all objects become healthy.
686+
//
687+
// NOTE: new reasons added in the future will be silently blocked by default due to
688+
// the `reason != ReasonSucceeded` check. This is the safe default, but it means any
689+
// new reason that should override ProgressDeadlineExceeded must be explicitly added
690+
// to the guard condition below.
687691
func markAsProgressing(cos *ocv1.ClusterObjectSet, reason, message string) {
692+
if cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil &&
693+
cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded &&
694+
cnd.ObservedGeneration == cos.Generation &&
695+
reason != ocv1.ReasonSucceeded {
696+
log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded already set",
697+
"requestedReason", reason, "revision", cos.Name)
698+
return
699+
}
688700
meta.SetStatusCondition(&cos.Status.Conditions, metav1.Condition{
689701
Type: ocv1.ClusterObjectSetTypeProgressing,
690702
Status: metav1.ConditionTrue,

internal/operator-controller/controllers/clusterobjectset_controller_internal_test.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"time"
77

88
"github.com/stretchr/testify/require"
9+
"k8s.io/apimachinery/pkg/api/meta"
910
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1011
"k8s.io/apimachinery/pkg/runtime"
1112
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -235,3 +236,113 @@ func (m *mockTrackingCacheInternal) Watch(ctx context.Context, user client.Objec
235236
func (m *mockTrackingCacheInternal) Source(h handler.EventHandler, predicates ...predicate.Predicate) source.Source {
236237
return nil
237238
}
239+
240+
func Test_markAsProgressing_doesNotOverwriteProgressDeadlineExceeded(t *testing.T) {
241+
for _, tc := range []struct {
242+
name string
243+
generation int64
244+
existingConditions []metav1.Condition
245+
reason string
246+
expectProgressing metav1.ConditionStatus
247+
expectReason string
248+
}{
249+
{
250+
name: "sets Progressing when no condition exists",
251+
generation: 1,
252+
existingConditions: nil,
253+
reason: ocv1.ReasonRollingOut,
254+
expectProgressing: metav1.ConditionTrue,
255+
expectReason: ocv1.ReasonRollingOut,
256+
},
257+
{
258+
name: "sets Progressing when currently RollingOut",
259+
generation: 1,
260+
existingConditions: []metav1.Condition{
261+
{
262+
Type: ocv1.ClusterObjectSetTypeProgressing,
263+
Status: metav1.ConditionTrue,
264+
Reason: ocv1.ReasonRollingOut,
265+
ObservedGeneration: 1,
266+
},
267+
},
268+
reason: ocv1.ReasonSucceeded,
269+
expectProgressing: metav1.ConditionTrue,
270+
expectReason: ocv1.ReasonSucceeded,
271+
},
272+
{
273+
name: "does not overwrite ProgressDeadlineExceeded with RollingOut (same generation)",
274+
generation: 1,
275+
existingConditions: []metav1.Condition{
276+
{
277+
Type: ocv1.ClusterObjectSetTypeProgressing,
278+
Status: metav1.ConditionFalse,
279+
Reason: ocv1.ReasonProgressDeadlineExceeded,
280+
ObservedGeneration: 1,
281+
},
282+
},
283+
reason: ocv1.ReasonRollingOut,
284+
expectProgressing: metav1.ConditionFalse,
285+
expectReason: ocv1.ReasonProgressDeadlineExceeded,
286+
},
287+
{
288+
name: "allows Succeeded to overwrite ProgressDeadlineExceeded",
289+
generation: 1,
290+
existingConditions: []metav1.Condition{
291+
{
292+
Type: ocv1.ClusterObjectSetTypeProgressing,
293+
Status: metav1.ConditionFalse,
294+
Reason: ocv1.ReasonProgressDeadlineExceeded,
295+
ObservedGeneration: 1,
296+
},
297+
},
298+
reason: ocv1.ReasonSucceeded,
299+
expectProgressing: metav1.ConditionTrue,
300+
expectReason: ocv1.ReasonSucceeded,
301+
},
302+
{
303+
name: "does not overwrite ProgressDeadlineExceeded with Retrying (same generation)",
304+
generation: 1,
305+
existingConditions: []metav1.Condition{
306+
{
307+
Type: ocv1.ClusterObjectSetTypeProgressing,
308+
Status: metav1.ConditionFalse,
309+
Reason: ocv1.ReasonProgressDeadlineExceeded,
310+
ObservedGeneration: 1,
311+
},
312+
},
313+
reason: ocv1.ClusterObjectSetReasonRetrying,
314+
expectProgressing: metav1.ConditionFalse,
315+
expectReason: ocv1.ReasonProgressDeadlineExceeded,
316+
},
317+
{
318+
name: "allows RollingOut when generation changed since ProgressDeadlineExceeded",
319+
generation: 2,
320+
existingConditions: []metav1.Condition{
321+
{
322+
Type: ocv1.ClusterObjectSetTypeProgressing,
323+
Status: metav1.ConditionFalse,
324+
Reason: ocv1.ReasonProgressDeadlineExceeded,
325+
ObservedGeneration: 1,
326+
},
327+
},
328+
reason: ocv1.ReasonRollingOut,
329+
expectProgressing: metav1.ConditionTrue,
330+
expectReason: ocv1.ReasonRollingOut,
331+
},
332+
} {
333+
t.Run(tc.name, func(t *testing.T) {
334+
cos := &ocv1.ClusterObjectSet{
335+
ObjectMeta: metav1.ObjectMeta{Generation: tc.generation},
336+
Status: ocv1.ClusterObjectSetStatus{
337+
Conditions: tc.existingConditions,
338+
},
339+
}
340+
markAsProgressing(cos, tc.reason, "test message")
341+
342+
cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
343+
require.NotNil(t, cnd)
344+
require.Equal(t, tc.expectProgressing, cnd.Status)
345+
require.Equal(t, tc.expectReason, cnd.Reason)
346+
})
347+
}
348+
}

internal/operator-controller/controllers/clusterobjectset_controller_test.go

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,185 @@ func Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadline(t *testing.T) {
10701070
}
10711071
}
10721072

1073+
// Test_ClusterObjectSetReconciler_Reconcile_ArchivalAfterProgressDeadlineExceeded verifies that
1074+
// a COS with ProgressDeadlineExceeded can still be archived. It simulates the real scenario:
1075+
// the COS starts as Active with ProgressDeadlineExceeded, then a spec patch sets
1076+
// lifecycleState to Archived (as a succeeding revision would do), and a subsequent
1077+
// reconcile processes the archival.
1078+
func Test_ClusterObjectSetReconciler_Reconcile_ArchivalAfterProgressDeadlineExceeded(t *testing.T) {
1079+
const clusterObjectSetName = "test-ext-1"
1080+
1081+
testScheme := newScheme(t)
1082+
require.NoError(t, corev1.AddToScheme(testScheme))
1083+
1084+
ext := newTestClusterExtension()
1085+
rev1 := newTestClusterObjectSet(t, clusterObjectSetName, ext, testScheme)
1086+
rev1.Finalizers = []string{"olm.operatorframework.io/teardown"}
1087+
meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{
1088+
Type: ocv1.ClusterObjectSetTypeProgressing,
1089+
Status: metav1.ConditionFalse,
1090+
Reason: ocv1.ReasonProgressDeadlineExceeded,
1091+
Message: "Revision has not rolled out for 1 minute(s).",
1092+
ObservedGeneration: rev1.Generation,
1093+
})
1094+
1095+
testClient := fake.NewClientBuilder().
1096+
WithScheme(testScheme).
1097+
WithStatusSubresource(&ocv1.ClusterObjectSet{}).
1098+
WithObjects(rev1, ext).
1099+
Build()
1100+
1101+
// Simulate the patch that a succeeding revision would apply.
1102+
patch := []byte(`{"spec":{"lifecycleState":"Archived"}}`)
1103+
require.NoError(t, testClient.Patch(t.Context(),
1104+
&ocv1.ClusterObjectSet{ObjectMeta: metav1.ObjectMeta{Name: clusterObjectSetName}},
1105+
client.RawPatch(types.MergePatchType, patch),
1106+
))
1107+
1108+
mockEngine := &mockRevisionEngine{
1109+
teardown: func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionTeardownOption) (machinery.RevisionTeardownResult, error) {
1110+
return &mockRevisionTeardownResult{isComplete: true}, nil
1111+
},
1112+
}
1113+
1114+
result, err := (&controllers.ClusterObjectSetReconciler{
1115+
Client: testClient,
1116+
RevisionEngineFactory: &mockRevisionEngineFactory{engine: mockEngine},
1117+
TrackingCache: &mockTrackingCache{client: testClient},
1118+
}).Reconcile(t.Context(), ctrl.Request{
1119+
NamespacedName: types.NamespacedName{Name: clusterObjectSetName},
1120+
})
1121+
require.NoError(t, err)
1122+
require.Equal(t, ctrl.Result{}, result)
1123+
1124+
rev := &ocv1.ClusterObjectSet{}
1125+
require.NoError(t, testClient.Get(t.Context(), client.ObjectKey{Name: clusterObjectSetName}, rev))
1126+
1127+
cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
1128+
require.NotNil(t, cond)
1129+
require.Equal(t, metav1.ConditionFalse, cond.Status)
1130+
require.Equal(t, ocv1.ClusterObjectSetReasonArchived, cond.Reason)
1131+
1132+
cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeAvailable)
1133+
require.NotNil(t, cond)
1134+
require.Equal(t, metav1.ConditionUnknown, cond.Status)
1135+
require.Equal(t, ocv1.ClusterObjectSetReasonArchived, cond.Reason)
1136+
}
1137+
1138+
// Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_StaysSticky verifies that
1139+
// once ProgressDeadlineExceeded is set, it is not overwritten when the reconciler runs again
1140+
// and sees objects still in transition. The Progressing condition should stay at
1141+
// ProgressDeadlineExceeded instead of being set back to RollingOut.
1142+
func Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_StaysSticky(t *testing.T) {
1143+
const clusterObjectSetName = "test-ext-1"
1144+
1145+
testScheme := newScheme(t)
1146+
require.NoError(t, corev1.AddToScheme(testScheme))
1147+
1148+
ext := newTestClusterExtension()
1149+
rev1 := newTestClusterObjectSet(t, clusterObjectSetName, ext, testScheme)
1150+
rev1.Spec.ProgressDeadlineMinutes = 1
1151+
rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-5 * time.Minute))
1152+
meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{
1153+
Type: ocv1.ClusterObjectSetTypeProgressing,
1154+
Status: metav1.ConditionFalse,
1155+
Reason: ocv1.ReasonProgressDeadlineExceeded,
1156+
Message: "Revision has not rolled out for 1 minute(s).",
1157+
ObservedGeneration: rev1.Generation,
1158+
})
1159+
1160+
testClient := fake.NewClientBuilder().
1161+
WithScheme(testScheme).
1162+
WithStatusSubresource(&ocv1.ClusterObjectSet{}).
1163+
WithObjects(rev1, ext).
1164+
Build()
1165+
1166+
mockEngine := &mockRevisionEngine{
1167+
reconcile: func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionReconcileOption) (machinery.RevisionResult, error) {
1168+
return &mockRevisionResult{inTransition: true}, nil
1169+
},
1170+
}
1171+
1172+
result, err := (&controllers.ClusterObjectSetReconciler{
1173+
Client: testClient,
1174+
RevisionEngineFactory: &mockRevisionEngineFactory{engine: mockEngine},
1175+
TrackingCache: &mockTrackingCache{client: testClient},
1176+
}).Reconcile(t.Context(), ctrl.Request{
1177+
NamespacedName: types.NamespacedName{Name: clusterObjectSetName},
1178+
})
1179+
require.NoError(t, err)
1180+
require.Equal(t, ctrl.Result{}, result)
1181+
1182+
rev := &ocv1.ClusterObjectSet{}
1183+
require.NoError(t, testClient.Get(t.Context(), client.ObjectKey{Name: clusterObjectSetName}, rev))
1184+
1185+
cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
1186+
require.NotNil(t, cond)
1187+
require.Equal(t, metav1.ConditionFalse, cond.Status, "Progressing should stay False")
1188+
require.Equal(t, ocv1.ReasonProgressDeadlineExceeded, cond.Reason, "Reason should stay ProgressDeadlineExceeded")
1189+
}
1190+
1191+
// Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_SucceededOverrides verifies
1192+
// that if a revision's objects eventually complete rolling out after the deadline was exceeded,
1193+
// the COS can still transition to Succeeded. ProgressDeadlineExceeded should not prevent a
1194+
// revision from completing when its objects become ready.
1195+
func Test_ClusterObjectSetReconciler_Reconcile_ProgressDeadlineExceeded_SucceededOverrides(t *testing.T) {
1196+
const clusterObjectSetName = "test-ext-1"
1197+
1198+
testScheme := newScheme(t)
1199+
require.NoError(t, corev1.AddToScheme(testScheme))
1200+
1201+
ext := newTestClusterExtension()
1202+
rev1 := newTestClusterObjectSet(t, clusterObjectSetName, ext, testScheme)
1203+
rev1.Spec.ProgressDeadlineMinutes = 1
1204+
rev1.CreationTimestamp = metav1.NewTime(time.Now().Add(-5 * time.Minute))
1205+
meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{
1206+
Type: ocv1.ClusterObjectSetTypeProgressing,
1207+
Status: metav1.ConditionFalse,
1208+
Reason: ocv1.ReasonProgressDeadlineExceeded,
1209+
Message: "Revision has not rolled out for 1 minute(s).",
1210+
ObservedGeneration: rev1.Generation,
1211+
})
1212+
1213+
testClient := fake.NewClientBuilder().
1214+
WithScheme(testScheme).
1215+
WithStatusSubresource(&ocv1.ClusterObjectSet{}).
1216+
WithObjects(rev1, ext).
1217+
Build()
1218+
1219+
mockEngine := &mockRevisionEngine{
1220+
reconcile: func(ctx context.Context, rev machinerytypes.Revision, opts ...machinerytypes.RevisionReconcileOption) (machinery.RevisionResult, error) {
1221+
return &mockRevisionResult{isComplete: true}, nil
1222+
},
1223+
}
1224+
1225+
result, err := (&controllers.ClusterObjectSetReconciler{
1226+
Client: testClient,
1227+
RevisionEngineFactory: &mockRevisionEngineFactory{engine: mockEngine},
1228+
TrackingCache: &mockTrackingCache{client: testClient},
1229+
}).Reconcile(t.Context(), ctrl.Request{
1230+
NamespacedName: types.NamespacedName{Name: clusterObjectSetName},
1231+
})
1232+
require.NoError(t, err)
1233+
require.Equal(t, ctrl.Result{}, result)
1234+
1235+
rev := &ocv1.ClusterObjectSet{}
1236+
require.NoError(t, testClient.Get(t.Context(), client.ObjectKey{Name: clusterObjectSetName}, rev))
1237+
1238+
cond := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
1239+
require.NotNil(t, cond)
1240+
require.Equal(t, metav1.ConditionTrue, cond.Status, "Progressing should transition to True")
1241+
require.Equal(t, ocv1.ReasonSucceeded, cond.Reason, "Reason should be Succeeded")
1242+
1243+
cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeAvailable)
1244+
require.NotNil(t, cond)
1245+
require.Equal(t, metav1.ConditionTrue, cond.Status)
1246+
1247+
cond = meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeSucceeded)
1248+
require.NotNil(t, cond)
1249+
require.Equal(t, metav1.ConditionTrue, cond.Status)
1250+
}
1251+
10731252
func newTestClusterExtension() *ocv1.ClusterExtension {
10741253
return &ocv1.ClusterExtension{
10751254
ObjectMeta: metav1.ObjectMeta{

0 commit comments

Comments
 (0)