Skip to content

Commit e14ff89

Browse files
(fix) catalog deletion resilience support
Enables installed extensions to continue working when their source catalog becomes unavailable or is deleted. When resolution fails due to catalog unavailability, the operator now continues reconciling with the currently installed bundle instead of failing. Changes: - Resolution falls back to installed bundle when catalog unavailable - Unpacking skipped when maintaining current installed state - Helm and Boxcutter appliers handle nil contentFS gracefully - Version upgrades properly blocked without catalog access This ensures workloads remain stable and operational even when the catalog they were installed from is temporarily unavailable or deleted, while appropriately preventing version changes that require catalog access.
1 parent 44f5686 commit e14ff89

7 files changed

Lines changed: 383 additions & 15 deletions

File tree

cmd/operator-controller/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl
626626
controllers.HandleFinalizers(c.finalizers),
627627
controllers.MigrateStorage(storageMigrator),
628628
controllers.RetrieveRevisionStates(revisionStatesGetter),
629-
controllers.ResolveBundle(c.resolver),
629+
controllers.ResolveBundle(c.resolver, c.mgr.GetClient()),
630630
controllers.UnpackBundle(c.imagePuller, c.imageCache),
631631
controllers.ApplyBundleWithBoxcutter(appl),
632632
}
@@ -737,7 +737,7 @@ func (c *helmReconcilerConfigurator) Configure(ceReconciler *controllers.Cluster
737737
ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{
738738
controllers.HandleFinalizers(c.finalizers),
739739
controllers.RetrieveRevisionStates(revisionStatesGetter),
740-
controllers.ResolveBundle(c.resolver),
740+
controllers.ResolveBundle(c.resolver, c.mgr.GetClient()),
741741
controllers.UnpackBundle(c.imagePuller, c.imageCache),
742742
controllers.ApplyBundle(appl),
743743
}

internal/operator-controller/applier/boxcutter.go

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -303,22 +303,35 @@ func (bc *Boxcutter) createOrUpdate(ctx context.Context, obj client.Object) erro
303303
}
304304

305305
func (bc *Boxcutter) apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error) {
306-
// Generate desired revision
307-
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
306+
// List all existing revisions
307+
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
308308
if err != nil {
309309
return false, "", err
310310
}
311311

312-
if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
313-
return false, "", fmt.Errorf("set ownerref: %w", err)
312+
// If contentFS is nil, we're maintaining the current state without catalog access.
313+
// In this case, we should use the existing installed revision without generating a new one.
314+
if contentFS == nil {
315+
if len(existingRevisions) == 0 {
316+
return false, "", fmt.Errorf("cannot maintain workload: no catalog content available and no previously installed revision found")
317+
}
318+
// Use the most recent revision and rely on its existing controller loop (don't create a new one).
319+
// Returning true here signals that the rollout has succeeded using the current revision; the
320+
// ClusterExtensionRevision controller will continue to reconcile and maintain the resources
321+
// independently of this apply call.
322+
return true, "", nil
314323
}
315324

316-
// List all existing revisions
317-
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
325+
// Generate desired revision
326+
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
318327
if err != nil {
319328
return false, "", err
320329
}
321330

331+
if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
332+
return false, "", fmt.Errorf("set ownerref: %w", err)
333+
}
334+
322335
currentRevision := &ocv1.ClusterExtensionRevision{}
323336
state := StateNeedsInstall
324337
// check if we can update the current revision.

internal/operator-controller/applier/helm.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,16 @@ func (h *Helm) runPreAuthorizationChecks(ctx context.Context, ext *ocv1.ClusterE
103103
}
104104

105105
func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels map[string]string, storageLabels map[string]string) (bool, string, error) {
106+
// If contentFS is nil, we're maintaining the current state without catalog access.
107+
// In this case, reconcile the existing Helm release if it exists.
108+
if contentFS == nil {
109+
ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext)
110+
if err != nil {
111+
return false, "", err
112+
}
113+
return h.reconcileExistingRelease(ctx, ac, ext)
114+
}
115+
106116
chrt, err := h.buildHelmChart(contentFS, ext)
107117
if err != nil {
108118
return false, "", err
@@ -197,6 +207,45 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
197207
return true, "", nil
198208
}
199209

210+
// reconcileExistingRelease reconciles an existing Helm release without catalog access.
211+
// This is used when the catalog is unavailable but we need to maintain the current installation.
212+
// It reconciles the release and sets up watchers to ensure resources are maintained.
213+
func (h *Helm) reconcileExistingRelease(ctx context.Context, ac helmclient.ActionInterface, ext *ocv1.ClusterExtension) (bool, string, error) {
214+
rel, err := ac.Get(ext.GetName())
215+
if errors.Is(err, driver.ErrReleaseNotFound) {
216+
return false, "", fmt.Errorf("cannot maintain workload: no catalog content available and no previously installed Helm release found")
217+
}
218+
if err != nil {
219+
return false, "", fmt.Errorf("getting current release: %w", err)
220+
}
221+
222+
// Reconcile the existing release to ensure resources are maintained
223+
if err := ac.Reconcile(rel); err != nil {
224+
// Reconcile failed - resources NOT maintained
225+
// Return false (rollout failed) with error
226+
return false, "", err
227+
}
228+
229+
// At this point: Reconcile succeeded - resources ARE maintained
230+
// The operations below are for setting up monitoring (watches).
231+
// If they fail, the resources are still successfully reconciled and maintained,
232+
// so we return true (rollout succeeded) even though monitoring setup failed.
233+
relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name))
234+
if err != nil {
235+
return true, "", err
236+
}
237+
klog.FromContext(ctx).Info("watching managed objects")
238+
cache, err := h.Manager.Get(ctx, ext)
239+
if err != nil {
240+
return true, "", err
241+
}
242+
if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil {
243+
return true, "", err
244+
}
245+
246+
return true, "", nil
247+
}
248+
200249
func (h *Helm) buildHelmChart(bundleFS fs.FS, ext *ocv1.ClusterExtension) (*chart.Chart, error) {
201250
if h.HelmChartProvider == nil {
202251
return nil, errors.New("HelmChartProvider is nil")

internal/operator-controller/controllers/clusterextension_controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ func (r *ClusterExtensionReconciler) Reconcile(ctx context.Context, req ctrl.Req
168168

169169
// ensureAllConditionsWithReason checks that all defined condition types exist in the given ClusterExtension,
170170
// and assigns a specified reason and custom message to any missing condition.
171+
//
172+
//nolint:unparam // reason parameter is designed to be flexible, even if current callers use the same value
171173
func ensureAllConditionsWithReason(ext *ocv1.ClusterExtension, reason v1alpha1.ConditionReason, message string) {
172174
for _, condType := range conditionsets.ConditionTypes {
173175
cond := apimeta.FindStatusCondition(ext.Status.Conditions, condType)

internal/operator-controller/controllers/clusterextension_controller_test.go

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,3 +1611,189 @@ func TestGetInstalledBundleHistory(t *testing.T) {
16111611
}
16121612
}
16131613
}
1614+
1615+
// TestResolutionFallbackToInstalledBundle tests the catalog deletion resilience fallback logic
1616+
func TestResolutionFallbackToInstalledBundle(t *testing.T) {
1617+
t.Run("falls back when catalog unavailable and no version change", func(t *testing.T) {
1618+
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
1619+
// Resolver fails (simulating catalog unavailable)
1620+
d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
1621+
return nil, nil, nil, fmt.Errorf("catalog unavailable")
1622+
})
1623+
// Applier succeeds (resources maintained)
1624+
d.Applier = &MockApplier{
1625+
installCompleted: true,
1626+
installStatus: "",
1627+
err: nil,
1628+
}
1629+
d.RevisionStatesGetter = &MockRevisionStatesGetter{
1630+
RevisionStates: &controllers.RevisionStates{
1631+
Installed: &controllers.RevisionMetadata{
1632+
Package: "test-pkg",
1633+
BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"},
1634+
Image: "test-image:1.0.0",
1635+
},
1636+
},
1637+
}
1638+
})
1639+
1640+
ctx := context.Background()
1641+
extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))}
1642+
1643+
// Create ClusterExtension with no version specified
1644+
ext := &ocv1.ClusterExtension{
1645+
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
1646+
Spec: ocv1.ClusterExtensionSpec{
1647+
Source: ocv1.SourceConfig{
1648+
SourceType: "Catalog",
1649+
Catalog: &ocv1.CatalogFilter{
1650+
PackageName: "test-pkg",
1651+
// No version - should fall back
1652+
},
1653+
},
1654+
Namespace: "default",
1655+
ServiceAccount: ocv1.ServiceAccountReference{Name: "default"},
1656+
},
1657+
}
1658+
require.NoError(t, cl.Create(ctx, ext))
1659+
1660+
// Reconcile should succeed (fallback to installed, then apply succeeds)
1661+
// Catalog watch will trigger reconciliation when catalog becomes available again
1662+
res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1663+
require.NoError(t, err)
1664+
require.Equal(t, ctrl.Result{}, res)
1665+
1666+
// Verify status shows successful reconciliation
1667+
require.NoError(t, cl.Get(ctx, extKey, ext))
1668+
1669+
// Progressing should be Succeeded (apply completed successfully)
1670+
progCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing)
1671+
require.NotNil(t, progCond)
1672+
require.Equal(t, metav1.ConditionTrue, progCond.Status)
1673+
require.Equal(t, ocv1.ReasonSucceeded, progCond.Reason)
1674+
1675+
// Installed should be True (maintaining current version)
1676+
instCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled)
1677+
require.NotNil(t, instCond)
1678+
require.Equal(t, metav1.ConditionTrue, instCond.Status)
1679+
require.Equal(t, ocv1.ReasonSucceeded, instCond.Reason)
1680+
})
1681+
1682+
t.Run("fails when version upgrade requested without catalog", func(t *testing.T) {
1683+
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
1684+
d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
1685+
return nil, nil, nil, fmt.Errorf("catalog unavailable")
1686+
})
1687+
d.RevisionStatesGetter = &MockRevisionStatesGetter{
1688+
RevisionStates: &controllers.RevisionStates{
1689+
Installed: &controllers.RevisionMetadata{
1690+
BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"},
1691+
},
1692+
},
1693+
}
1694+
})
1695+
1696+
ctx := context.Background()
1697+
extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))}
1698+
1699+
// Create ClusterExtension requesting version upgrade
1700+
ext := &ocv1.ClusterExtension{
1701+
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
1702+
Spec: ocv1.ClusterExtensionSpec{
1703+
Source: ocv1.SourceConfig{
1704+
SourceType: "Catalog",
1705+
Catalog: &ocv1.CatalogFilter{
1706+
PackageName: "test-pkg",
1707+
Version: "1.0.1", // Requesting upgrade
1708+
},
1709+
},
1710+
Namespace: "default",
1711+
ServiceAccount: ocv1.ServiceAccountReference{Name: "default"},
1712+
},
1713+
}
1714+
require.NoError(t, cl.Create(ctx, ext))
1715+
1716+
// Reconcile should fail (can't upgrade without catalog)
1717+
res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1718+
require.Error(t, err)
1719+
require.Equal(t, ctrl.Result{}, res)
1720+
1721+
// Verify status shows Retrying
1722+
require.NoError(t, cl.Get(ctx, extKey, ext))
1723+
cond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing)
1724+
require.NotNil(t, cond)
1725+
require.Equal(t, metav1.ConditionTrue, cond.Status)
1726+
require.Equal(t, ocv1.ReasonRetrying, cond.Reason)
1727+
})
1728+
1729+
t.Run("auto-updates when catalog becomes available after fallback", func(t *testing.T) {
1730+
resolveAttempt := 0
1731+
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
1732+
// First attempt: catalog unavailable, then becomes available
1733+
d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
1734+
resolveAttempt++
1735+
if resolveAttempt == 1 {
1736+
// First reconcile: catalog unavailable
1737+
return nil, nil, nil, fmt.Errorf("catalog temporarily unavailable")
1738+
}
1739+
// Second reconcile (triggered by catalog watch): catalog available with new version
1740+
v := bundle.VersionRelease{Version: bsemver.MustParse("2.0.0")}
1741+
return &declcfg.Bundle{
1742+
Name: "test.2.0.0",
1743+
Package: "test-pkg",
1744+
Image: "test-image:2.0.0",
1745+
}, &v, nil, nil
1746+
})
1747+
d.RevisionStatesGetter = &MockRevisionStatesGetter{
1748+
RevisionStates: &controllers.RevisionStates{
1749+
Installed: &controllers.RevisionMetadata{
1750+
Package: "test-pkg",
1751+
BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"},
1752+
Image: "test-image:1.0.0",
1753+
},
1754+
},
1755+
}
1756+
d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}}
1757+
d.Applier = &MockApplier{installCompleted: true}
1758+
})
1759+
1760+
ctx := context.Background()
1761+
extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))}
1762+
1763+
ext := &ocv1.ClusterExtension{
1764+
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
1765+
Spec: ocv1.ClusterExtensionSpec{
1766+
Source: ocv1.SourceConfig{
1767+
SourceType: "Catalog",
1768+
Catalog: &ocv1.CatalogFilter{
1769+
PackageName: "test-pkg",
1770+
// No version - auto-update to latest
1771+
},
1772+
},
1773+
Namespace: "default",
1774+
ServiceAccount: ocv1.ServiceAccountReference{Name: "default"},
1775+
},
1776+
}
1777+
require.NoError(t, cl.Create(ctx, ext))
1778+
1779+
// First reconcile: catalog unavailable, falls back to v1.0.0
1780+
res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1781+
require.NoError(t, err)
1782+
require.Equal(t, ctrl.Result{}, res)
1783+
1784+
require.NoError(t, cl.Get(ctx, extKey, ext))
1785+
require.Equal(t, "1.0.0", ext.Status.Install.Bundle.Version)
1786+
1787+
// Second reconcile: simulating catalog watch trigger, catalog now available with v2.0.0
1788+
res, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1789+
require.NoError(t, err)
1790+
require.Equal(t, ctrl.Result{}, res)
1791+
1792+
// Should have upgraded to v2.0.0
1793+
require.NoError(t, cl.Get(ctx, extKey, ext))
1794+
require.Equal(t, "2.0.0", ext.Status.Install.Bundle.Version)
1795+
1796+
// Verify resolution was attempted twice (fallback, then success)
1797+
require.Equal(t, 2, resolveAttempt)
1798+
})
1799+
}

0 commit comments

Comments
 (0)