Skip to content

Commit 489155d

Browse files
fix: Allow spec changes after ProgressDeadlineExceeded
This commit addresses the reconcile loop and archival timeout issues when ProgressDeadlineExceeded is set on a ClusterObjectSet. Generated-By: Claude
1 parent b6dfd40 commit 489155d

File tree

9 files changed

+489
-55
lines changed

9 files changed

+489
-55
lines changed

internal/operator-controller/controllers/boxcutter_reconcile_steps.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,11 @@ func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, e
6262
// is fairly decoupled from this code where we get the annotations back out. We may want to co-locate
6363
// the set/get logic a bit better to make it more maintainable and less likely to get out of sync.
6464
rm := &RevisionMetadata{
65-
RevisionName: rev.Name,
66-
Package: rev.Annotations[labels.PackageNameKey],
67-
Image: rev.Annotations[labels.BundleReferenceKey],
68-
Conditions: rev.Status.Conditions,
65+
RevisionName: rev.Name,
66+
Package: rev.Annotations[labels.PackageNameKey],
67+
Image: rev.Annotations[labels.BundleReferenceKey],
68+
ResolutionDigest: rev.Annotations[labels.ResolutionDigestKey],
69+
Conditions: rev.Status.Conditions,
6970
BundleMetadata: ocv1.BundleMetadata{
7071
Name: rev.Annotations[labels.BundleNameKey],
7172
Version: rev.Annotations[labels.BundleVersionKey],
@@ -100,10 +101,11 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e
100101
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
101102
l := log.FromContext(ctx)
102103
revisionAnnotations := map[string]string{
103-
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
104-
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
105-
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
106-
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
104+
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
105+
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
106+
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
107+
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
108+
labels.ResolutionDigestKey: state.resolvedRevisionMetadata.ResolutionDigest,
107109
}
108110
objLbls := map[string]string{
109111
labels.OwnerKindKey: ocv1.ClusterExtensionKind,

internal/operator-controller/controllers/clusterextension_controller.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -499,9 +499,10 @@ func clusterExtensionRequestsForCatalog(c client.Reader, logger logr.Logger) crh
499499
}
500500

501501
type RevisionMetadata struct {
502-
RevisionName string
503-
Package string
504-
Image string
502+
RevisionName string
503+
Package string
504+
Image string
505+
ResolutionDigest string
505506
ocv1.BundleMetadata
506507
Conditions []metav1.Condition
507508
}
@@ -535,8 +536,9 @@ func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *o
535536
for _, rel := range relhis {
536537
if rel.Info != nil && rel.Info.Status == release.StatusDeployed {
537538
rs.Installed = &RevisionMetadata{
538-
Package: rel.Labels[labels.PackageNameKey],
539-
Image: rel.Labels[labels.BundleReferenceKey],
539+
Package: rel.Labels[labels.PackageNameKey],
540+
Image: rel.Labels[labels.BundleReferenceKey],
541+
ResolutionDigest: rel.Labels[labels.ResolutionDigestKey],
540542
BundleMetadata: ocv1.BundleMetadata{
541543
Name: rel.Labels[labels.BundleNameKey],
542544
Version: rel.Labels[labels.BundleVersionKey],

internal/operator-controller/controllers/clusterextension_controller_test.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,12 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te
785785
// the same inputs the runtime would see.
786786
d.RevisionStatesGetter = &MockRevisionStatesGetter{
787787
RevisionStates: &controllers.RevisionStates{
788-
RollingOut: []*controllers.RevisionMetadata{{}},
788+
RollingOut: []*controllers.RevisionMetadata{{
789+
// Different digest from current spec - ensures we call the resolver
790+
// (This simulates a rolling-out revision from a previous reconcile)
791+
ResolutionDigest: "different-digest-from-previous-reconcile",
792+
BundleMetadata: ocv1.BundleMetadata{Version: "1.0.0"},
793+
}},
789794
},
790795
}
791796
d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
@@ -845,21 +850,18 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te
845850

846851
deprecatedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeDeprecated)
847852
require.NotNil(t, deprecatedCond)
848-
require.Equal(t, metav1.ConditionUnknown, deprecatedCond.Status, "no catalog data during rollout, so Unknown")
849-
require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, deprecatedCond.Reason)
850-
require.Equal(t, "deprecation status unknown: catalog data unavailable", deprecatedCond.Message)
853+
require.Equal(t, metav1.ConditionFalse, deprecatedCond.Status, "catalog data available from resolution, not deprecated")
854+
require.Equal(t, ocv1.ReasonNotDeprecated, deprecatedCond.Reason)
851855

852856
packageCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypePackageDeprecated)
853857
require.NotNil(t, packageCond)
854-
require.Equal(t, metav1.ConditionUnknown, packageCond.Status, "no catalog data during rollout, so Unknown")
855-
require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, packageCond.Reason)
856-
require.Equal(t, "deprecation status unknown: catalog data unavailable", packageCond.Message)
858+
require.Equal(t, metav1.ConditionFalse, packageCond.Status, "catalog data available from resolution, package not deprecated")
859+
require.Equal(t, ocv1.ReasonNotDeprecated, packageCond.Reason)
857860

858861
channelCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeChannelDeprecated)
859862
require.NotNil(t, channelCond)
860-
require.Equal(t, metav1.ConditionUnknown, channelCond.Status, "no catalog data during rollout, so Unknown")
861-
require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, channelCond.Reason)
862-
require.Equal(t, "deprecation status unknown: catalog data unavailable", channelCond.Message)
863+
require.Equal(t, metav1.ConditionFalse, channelCond.Status, "catalog data available from resolution, channel not deprecated")
864+
require.Equal(t, ocv1.ReasonNotDeprecated, channelCond.Reason)
863865

864866
bundleCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeBundleDeprecated)
865867
require.NotNil(t, bundleCond)

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 95 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@ package controllers
1818

1919
import (
2020
"context"
21+
"crypto/sha256"
22+
"encoding/base64"
23+
"encoding/json"
2124
"errors"
2225
"fmt"
26+
"slices"
2327

2428
apierrors "k8s.io/apimachinery/pkg/api/errors"
2529
apimeta "k8s.io/apimachinery/pkg/api/meta"
@@ -37,6 +41,58 @@ import (
3741
imageutil "github.com/operator-framework/operator-controller/internal/shared/util/image"
3842
)
3943

44+
// resolutionInputs captures the catalog filter fields that affect bundle resolution.
45+
// Changes to any of these fields require re-resolution.
46+
type resolutionInputs struct {
47+
PackageName string `json:"packageName"`
48+
Version string `json:"version"`
49+
Channels []string `json:"channels,omitempty"`
50+
Selector string `json:"selector,omitempty"`
51+
UpgradeConstraintPolicy string `json:"upgradeConstraintPolicy,omitempty"`
52+
}
53+
54+
// calculateResolutionDigest computes a SHA256 hash of the catalog filter inputs
55+
// that affect bundle resolution. This digest enables detection of spec changes that
56+
// require re-resolution versus when a rolling-out revision can be reused.
57+
//
58+
// The digest is base64 URL-safe encoded (43 characters) to fit within Kubernetes
59+
// label value limits (63 characters max) when stored in Helm release metadata.
60+
func calculateResolutionDigest(catalog *ocv1.CatalogFilter) (string, error) {
61+
if catalog == nil {
62+
return "", nil
63+
}
64+
65+
// Sort channels for deterministic hashing
66+
channels := slices.Clone(catalog.Channels)
67+
slices.Sort(channels)
68+
69+
inputs := resolutionInputs{
70+
PackageName: catalog.PackageName,
71+
Version: catalog.Version,
72+
Channels: channels,
73+
UpgradeConstraintPolicy: string(catalog.UpgradeConstraintPolicy),
74+
}
75+
76+
// Convert selector to canonical string representation for deterministic hashing
77+
if catalog.Selector != nil {
78+
selector, err := metav1.LabelSelectorAsSelector(catalog.Selector)
79+
if err != nil {
80+
return "", fmt.Errorf("converting selector: %w", err)
81+
}
82+
inputs.Selector = selector.String()
83+
}
84+
85+
// Marshal to JSON for consistent hashing
86+
inputsBytes, err := json.Marshal(inputs)
87+
if err != nil {
88+
return "", fmt.Errorf("marshaling resolution inputs: %w", err)
89+
}
90+
91+
// Compute SHA256 hash and encode as base64 URL-safe (43 chars, fits in label value limit)
92+
hash := sha256.Sum256(inputsBytes)
93+
return base64.RawURLEncoding.EncodeToString(hash[:]), nil
94+
}
95+
4096
func HandleFinalizers(f finalizer.Finalizer) ReconcileStepFunc {
4197
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
4298
l := log.FromContext(ctx)
@@ -140,15 +196,32 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
140196
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
141197
l := log.FromContext(ctx)
142198

143-
// If already rolling out, use existing revision and set deprecation to Unknown (no catalog check)
199+
// If already rolling out, check if the latest rolling-out revision matches the current spec.
200+
// If spec has changed (e.g., version, channels, selector, upgradeConstraintPolicy), we need
201+
// to resolve a new bundle instead of reusing the rolling-out revision.
202+
//
203+
// Note: RollingOut slice is sorted oldest→newest, so use the last element (most recent).
204+
// This avoids re-resolving when a newer revision already matches the spec, even if an
205+
// older revision is still stuck rolling out.
144206
if len(state.revisionStates.RollingOut) > 0 {
145-
installedBundleName := ""
146-
if state.revisionStates.Installed != nil {
147-
installedBundleName = state.revisionStates.Installed.Name
207+
latestRollingOutRevision := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1]
208+
209+
// Calculate digest of current spec's catalog filter
210+
currentDigest, err := calculateResolutionDigest(ext.Spec.Source.Catalog)
211+
if err != nil {
212+
l.Error(err, "failed to calculate resolution digest from spec")
213+
// On digest calculation error, fall through to re-resolve for safety
214+
} else if currentDigest == latestRollingOutRevision.ResolutionDigest {
215+
// Resolution inputs haven't changed - reuse the rolling-out revision
216+
installedBundleName := ""
217+
if state.revisionStates.Installed != nil {
218+
installedBundleName = state.revisionStates.Installed.Name
219+
}
220+
SetDeprecationStatus(ext, installedBundleName, nil, false)
221+
state.resolvedRevisionMetadata = latestRollingOutRevision
222+
return nil, nil
148223
}
149-
SetDeprecationStatus(ext, installedBundleName, nil, false)
150-
state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0]
151-
return nil, nil
224+
// Resolution inputs changed - fall through to resolve new bundle from catalog
152225
}
153226

154227
// Resolve a new bundle from the catalog
@@ -188,9 +261,17 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
188261
return handleResolutionError(ctx, c, state, ext, err)
189262
}
190263

264+
// Calculate digest of the resolution inputs for change detection
265+
digest, err := calculateResolutionDigest(ext.Spec.Source.Catalog)
266+
if err != nil {
267+
l.Error(err, "failed to calculate resolution digest, continuing without it")
268+
digest = ""
269+
}
270+
191271
state.resolvedRevisionMetadata = &RevisionMetadata{
192-
Package: resolvedBundle.Package,
193-
Image: resolvedBundle.Image,
272+
Package: resolvedBundle.Package,
273+
Image: resolvedBundle.Image,
274+
ResolutionDigest: digest,
194275
// TODO: Right now, operator-controller only supports registry+v1 bundles and has no concept
195276
// of a "release" field. If/when we add a release field concept or a new bundle format
196277
// we need to re-evaluate use of `AsLegacyRegistryV1Version` so that we avoid propagating
@@ -409,10 +490,11 @@ func ApplyBundle(a Applier) ReconcileStepFunc {
409490
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
410491
l := log.FromContext(ctx)
411492
revisionAnnotations := map[string]string{
412-
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
413-
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
414-
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
415-
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
493+
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
494+
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
495+
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
496+
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
497+
labels.ResolutionDigestKey: state.resolvedRevisionMetadata.ResolutionDigest,
416498
}
417499
objLbls := map[string]string{
418500
labels.OwnerKindKey: ocv1.ClusterExtensionKind,

internal/operator-controller/controllers/clusterobjectset_controller.go

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import (
3131
"sigs.k8s.io/controller-runtime/pkg/builder"
3232
"sigs.k8s.io/controller-runtime/pkg/client"
3333
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
34-
"sigs.k8s.io/controller-runtime/pkg/event"
3534
"sigs.k8s.io/controller-runtime/pkg/handler"
3635
"sigs.k8s.io/controller-runtime/pkg/log"
3736
"sigs.k8s.io/controller-runtime/pkg/predicate"
@@ -81,7 +80,10 @@ func (c *ClusterObjectSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
8180
reconciledRev := existingRev.DeepCopy()
8281
res, reconcileErr := c.reconcile(ctx, reconciledRev)
8382

84-
if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 {
83+
// Progress deadline enforcement only applies to Active revisions, not Archived ones.
84+
// Archived revisions may need to retry teardown operations and should not have their
85+
// Progressing condition or requeue behavior overridden by the deadline check.
86+
if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 && reconciledRev.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived {
8587
cnd := meta.FindStatusCondition(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
8688
isStillProgressing := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason != ocv1.ReasonSucceeded
8789
succeeded := meta.IsStatusConditionTrue(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeSucceeded)
@@ -333,29 +335,12 @@ type Sourcoser interface {
333335
}
334336

335337
func (c *ClusterObjectSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
336-
skipProgressDeadlineExceededPredicate := predicate.Funcs{
337-
UpdateFunc: func(e event.UpdateEvent) bool {
338-
rev, ok := e.ObjectNew.(*ocv1.ClusterObjectSet)
339-
if !ok {
340-
return true
341-
}
342-
// allow deletions to happen
343-
if !rev.DeletionTimestamp.IsZero() {
344-
return true
345-
}
346-
if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded {
347-
return false
348-
}
349-
return true
350-
},
351-
}
352338
c.Clock = clock.RealClock{}
353339
return ctrl.NewControllerManagedBy(mgr).
354340
For(
355341
&ocv1.ClusterObjectSet{},
356342
builder.WithPredicates(
357343
predicate.ResourceVersionChangedPredicate{},
358-
skipProgressDeadlineExceededPredicate,
359344
),
360345
).
361346
WatchesRawSource(
@@ -638,7 +623,33 @@ func setRetryingConditions(cos *ocv1.ClusterObjectSet, message string) {
638623
}
639624
}
640625

626+
// markAsProgressing sets the Progressing condition to True with the given reason.
627+
//
628+
// Once ProgressDeadlineExceeded has been set for the current generation, this function
629+
// blocks only the RollingOut reason to prevent a reconcile loop for Active revisions.
630+
// The loop would occur when reconcile() sees in-transition objects and sets
631+
// Progressing=True/RollingOut, then the progress deadline check immediately resets it
632+
// to ProgressDeadlineExceeded, causing a status-only update that triggers another reconcile.
633+
//
634+
// Archived revisions are exempt because they skip progress deadline enforcement entirely,
635+
// so their status updates won't be overridden.
636+
//
637+
// Other reasons like Succeeded and Retrying are always allowed because:
638+
// - Succeeded represents terminal success (rollout eventually completed)
639+
// - Retrying represents transient errors that need to be visible in status
640+
//
641+
// If the generation changed since ProgressDeadlineExceeded was recorded, the guard
642+
// allows all updates through so the condition reflects the new generation.
641643
func markAsProgressing(cos *ocv1.ClusterObjectSet, reason, message string) {
644+
if cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil &&
645+
cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded &&
646+
cnd.ObservedGeneration == cos.Generation &&
647+
reason == ocv1.ReasonRollingOut &&
648+
cos.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived {
649+
log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded is sticky for RollingOut on Active revisions",
650+
"requestedReason", reason, "revision", cos.Name, "lifecycleState", cos.Spec.LifecycleState)
651+
return
652+
}
642653
meta.SetStatusCondition(&cos.Status.Conditions, metav1.Condition{
643654
Type: ocv1.ClusterObjectSetTypeProgressing,
644655
Status: metav1.ConditionTrue,

0 commit comments

Comments
 (0)