Skip to content

Commit dce6d68

Browse files
(fix) catalog deletion resilience support
Enables installed extensions to continue working when their source catalog becomes unavailable or is deleted. When resolution fails due to catalog unavailability, the operator now continues reconciling with the currently installed bundle instead of failing. Changes: - Resolution falls back to installed bundle when catalog unavailable - Unpacking skipped when maintaining current installed state - Helm and Boxcutter appliers handle nil contentFS gracefully - Version upgrades properly blocked without catalog access This ensures workloads remain stable and operational even when the catalog they were installed from is temporarily unavailable or deleted, while appropriately preventing version changes that require catalog access.
1 parent 0cf8c11 commit dce6d68

3 files changed

Lines changed: 77 additions & 12 deletions

File tree

internal/operator-controller/applier/boxcutter.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,22 +317,34 @@ func (bc *Boxcutter) createOrUpdate(ctx context.Context, obj client.Object) erro
317317
}
318318

319319
func (bc *Boxcutter) apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error) {
320-
// Generate desired revision
321-
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
320+
// List all existing revisions
321+
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
322322
if err != nil {
323323
return false, "", err
324324
}
325325

326-
if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
327-
return false, "", fmt.Errorf("set ownerref: %w", err)
326+
// If contentFS is nil, we're maintaining the current state without catalog access.
327+
// In this case, we should use the existing installed revision without generating a new one.
328+
if contentFS == nil {
329+
if len(existingRevisions) == 0 {
330+
return false, "", fmt.Errorf("no bundle content available and no existing revisions found")
331+
}
332+
// Use the most recent revision and just reconcile it (don't create a new one)
333+
// Boxcutter's ClusterExtensionRevision reconciliation will handle maintaining the resources
334+
// Return success since we're maintaining the current state
335+
return true, "", nil
328336
}
329337

330-
// List all existing revisions
331-
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
338+
// Generate desired revision
339+
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
332340
if err != nil {
333341
return false, "", err
334342
}
335343

344+
if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
345+
return false, "", fmt.Errorf("set ownerref: %w", err)
346+
}
347+
336348
currentRevision := &ocv1.ClusterExtensionRevision{}
337349
state := StateNeedsInstall
338350
// check if we can update the current revision.

internal/operator-controller/applier/helm.go

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,44 @@ func (h *Helm) runPreAuthorizationChecks(ctx context.Context, ext *ocv1.ClusterE
103103
}
104104

105105
func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels map[string]string, storageLabels map[string]string) (bool, string, error) {
106+
ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext)
107+
if err != nil {
108+
return false, "", err
109+
}
110+
111+
// If contentFS is nil, we're maintaining the current state without catalog access.
112+
// In this case, reconcile the existing Helm release if it exists.
113+
if contentFS == nil {
114+
rel, err := ac.Get(ext.GetName())
115+
if errors.Is(err, driver.ErrReleaseNotFound) {
116+
return false, "", fmt.Errorf("no bundle content available and no existing release found")
117+
}
118+
if err != nil {
119+
return false, "", fmt.Errorf("getting current release: %w", err)
120+
}
121+
122+
// Reconcile the existing release to ensure resources are maintained
123+
if err := ac.Reconcile(rel); err != nil {
124+
return false, "", err
125+
}
126+
127+
// Watch the release objects to ensure they're maintained
128+
relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name))
129+
if err != nil {
130+
return true, "", err
131+
}
132+
klog.FromContext(ctx).Info("watching managed objects")
133+
cache, err := h.Manager.Get(ctx, ext)
134+
if err != nil {
135+
return true, "", err
136+
}
137+
if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil {
138+
return true, "", err
139+
}
140+
141+
return true, "", nil
142+
}
143+
106144
chrt, err := h.buildHelmChart(contentFS, ext)
107145
if err != nil {
108146
return false, "", err
@@ -121,11 +159,6 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
121159
}
122160
}
123161

124-
ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext)
125-
if err != nil {
126-
return false, "", err
127-
}
128-
129162
rel, desiredRel, state, err := h.getReleaseState(ac, ext, chrt, values, post)
130163
if err != nil {
131164
return false, "", fmt.Errorf("failed to get release state using server-side dry-run: %w", err)

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,17 @@ func ResolveBundle(r resolve.Resolver) ReconcileStepFunc {
9595
}
9696
resolvedBundle, resolvedBundleVersion, resolvedDeprecation, err := r.Resolve(ctx, ext, bm)
9797
if err != nil {
98-
// Note: We don't distinguish between resolution-specific errors and generic errors
98+
// If resolution fails but we have an installed bundle, we can continue
99+
// reconciling with the installed bundle to maintain the current state.
100+
// This enables workload resilience when the catalog becomes unavailable.
101+
if state.revisionStates.Installed != nil {
102+
l.Info("resolution failed but continuing with installed bundle", "error", err, "installedBundle", state.revisionStates.Installed.BundleMetadata)
103+
setStatusProgressing(ext, err)
104+
setInstalledStatusFromRevisionStates(ext, state.revisionStates)
105+
state.resolvedRevisionMetadata = state.revisionStates.Installed
106+
return nil, nil
107+
}
108+
// No installed bundle and resolution failed - cannot proceed
99109
setStatusProgressing(ext, err)
100110
setInstalledStatusFromRevisionStates(ext, state.revisionStates)
101111
ensureAllConditionsWithReason(ext, ocv1.ReasonFailed, err.Error())
@@ -137,6 +147,16 @@ func ResolveBundle(r resolve.Resolver) ReconcileStepFunc {
137147
func UnpackBundle(i imageutil.Puller, cache imageutil.Cache) ReconcileStepFunc {
138148
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
139149
l := log.FromContext(ctx)
150+
151+
// Skip unpacking if we're using an already-installed bundle
152+
// (e.g., when catalog is unavailable but we're maintaining current state)
153+
if state.revisionStates.Installed != nil &&
154+
state.resolvedRevisionMetadata.BundleMetadata == state.revisionStates.Installed.BundleMetadata {
155+
l.Info("skipping unpack - using installed bundle content")
156+
// imageFS will remain nil - the applier will use the existing installed content
157+
return nil, nil
158+
}
159+
140160
l.Info("unpacking resolved bundle")
141161
imageFS, _, _, err := i.Pull(ctx, ext.GetName(), state.resolvedRevisionMetadata.Image, cache)
142162
if err != nil {

0 commit comments

Comments
 (0)