Skip to content

Commit 6aa486e

Browse files
🐛 Fix catalog deletion issues found in e2e tests
This fix resolves the issues discovered by the e2e tests, ensuring Helm extensions keep running when catalogs are deleted. Issues Fixed: Issue 1: Extensions fail when catalog is deleted Problem: Resolution failure causes complete failure Fix: Fall back to installed bundle when catalog unavailable Issue 2: Resources break when catalog is deleted Problem: No reconciliation without catalog content Fix: Helm reconcileExistingRelease() maintains resources Issue 3: Config changes rejected without catalog Problem: Any spec change triggers resolution failure Fix: Allow non-version changes to proceed with installed bundle Issue 4: Version upgrades fail silently Problem: Can't distinguish catalog deletion from other errors Fix: Check if catalogs exist, block upgrades explicitly How the Fix Works: When bundle resolution fails, the controller now: 1. Checks if a bundle is already installed 2. Checks if spec is requesting a version change 3. Checks if catalogs still exist in the cluster Decision Logic: - Installed + No version change + Catalog deleted → Use installed bundle - Installed + Version change + Catalog deleted → Block and retry - Installed + No version change + Catalog exists → Retry (transient error) - No installed bundle → Fail (can't proceed) For Helm Runtime: - reconcileExistingRelease() uses Helm's built-in reconciliation - Maintains resources via Server-Side Apply - Sets up watchers to monitor and restore resources - Returns success if reconcile succeeds (even if watch setup fails) Controller watches ClusterCatalog resources, so reconciliation automatically resumes when catalogs return. Unit Tests Added: - TestResolutionFallbackToInstalledBundle (3 scenarios) - TestCheckCatalogsExist (5 scenarios) All e2e tests now pass.
1 parent 5d51a84 commit 6aa486e

5 files changed

Lines changed: 485 additions & 9 deletions

File tree

cmd/operator-controller/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl
626626
controllers.HandleFinalizers(c.finalizers),
627627
controllers.MigrateStorage(storageMigrator),
628628
controllers.RetrieveRevisionStates(revisionStatesGetter),
629-
controllers.ResolveBundle(c.resolver),
629+
controllers.ResolveBundle(c.resolver, c.mgr.GetClient()),
630630
controllers.UnpackBundle(c.imagePuller, c.imageCache),
631631
controllers.ApplyBundleWithBoxcutter(appl),
632632
}
@@ -737,7 +737,7 @@ func (c *helmReconcilerConfigurator) Configure(ceReconciler *controllers.Cluster
737737
ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{
738738
controllers.HandleFinalizers(c.finalizers),
739739
controllers.RetrieveRevisionStates(revisionStatesGetter),
740-
controllers.ResolveBundle(c.resolver),
740+
controllers.ResolveBundle(c.resolver, c.mgr.GetClient()),
741741
controllers.UnpackBundle(c.imagePuller, c.imageCache),
742742
controllers.ApplyBundle(appl),
743743
}

internal/operator-controller/applier/helm.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,16 @@ func (h *Helm) runPreAuthorizationChecks(ctx context.Context, ext *ocv1.ClusterE
103103
}
104104

105105
func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels map[string]string, storageLabels map[string]string) (bool, string, error) {
106+
// If contentFS is nil, we're maintaining the current state without catalog access.
107+
// In this case, reconcile the existing Helm release if it exists.
108+
if contentFS == nil {
109+
ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext)
110+
if err != nil {
111+
return false, "", err
112+
}
113+
return h.reconcileExistingRelease(ctx, ac, ext)
114+
}
115+
106116
chrt, err := h.buildHelmChart(contentFS, ext)
107117
if err != nil {
108118
return false, "", err
@@ -197,6 +207,45 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
197207
return true, "", nil
198208
}
199209

210+
// reconcileExistingRelease reconciles an existing Helm release without catalog access.
211+
// This is used when the catalog is unavailable but we need to maintain the current installation.
212+
// It reconciles the release and sets up watchers to ensure resources are maintained.
213+
func (h *Helm) reconcileExistingRelease(ctx context.Context, ac helmclient.ActionInterface, ext *ocv1.ClusterExtension) (bool, string, error) {
214+
rel, err := ac.Get(ext.GetName())
215+
if errors.Is(err, driver.ErrReleaseNotFound) {
216+
return false, "", fmt.Errorf("cannot maintain workload: no catalog content available and no previously installed Helm release found")
217+
}
218+
if err != nil {
219+
return false, "", fmt.Errorf("getting current release: %w", err)
220+
}
221+
222+
// Reconcile the existing release to ensure resources are maintained
223+
if err := ac.Reconcile(rel); err != nil {
224+
// Reconcile failed - resources NOT maintained
225+
// Return false (rollout failed) with error
226+
return false, "", err
227+
}
228+
229+
// At this point: Reconcile succeeded - resources ARE maintained
230+
// The operations below are for setting up monitoring (watches).
231+
// If they fail, the resources are still successfully reconciled and maintained,
232+
// so we return true (rollout succeeded) even though monitoring setup failed.
233+
relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name))
234+
if err != nil {
235+
return true, "", err
236+
}
237+
klog.FromContext(ctx).Info("watching managed objects")
238+
cache, err := h.Manager.Get(ctx, ext)
239+
if err != nil {
240+
return true, "", err
241+
}
242+
if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil {
243+
return true, "", err
244+
}
245+
246+
return true, "", nil
247+
}
248+
200249
func (h *Helm) buildHelmChart(bundleFS fs.FS, ext *ocv1.ClusterExtension) (*chart.Chart, error) {
201250
if h.HelmChartProvider == nil {
202251
return nil, errors.New("HelmChartProvider is nil")

internal/operator-controller/controllers/clusterextension_controller_test.go

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,3 +1611,304 @@ func TestGetInstalledBundleHistory(t *testing.T) {
16111611
}
16121612
}
16131613
}
1614+
1615+
// TestResolutionFallbackToInstalledBundle tests the catalog deletion resilience fallback logic
1616+
func TestResolutionFallbackToInstalledBundle(t *testing.T) {
1617+
t.Run("falls back when catalog unavailable and no version change", func(t *testing.T) {
1618+
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
1619+
// Resolver fails (simulating catalog unavailable)
1620+
d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
1621+
return nil, nil, nil, fmt.Errorf("catalog unavailable")
1622+
})
1623+
// Applier succeeds (resources maintained)
1624+
d.Applier = &MockApplier{
1625+
installCompleted: true,
1626+
installStatus: "",
1627+
err: nil,
1628+
}
1629+
d.RevisionStatesGetter = &MockRevisionStatesGetter{
1630+
RevisionStates: &controllers.RevisionStates{
1631+
Installed: &controllers.RevisionMetadata{
1632+
Package: "test-pkg",
1633+
BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"},
1634+
Image: "test-image:1.0.0",
1635+
},
1636+
},
1637+
}
1638+
})
1639+
1640+
ctx := context.Background()
1641+
extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))}
1642+
1643+
// Create ClusterExtension with no version specified
1644+
ext := &ocv1.ClusterExtension{
1645+
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
1646+
Spec: ocv1.ClusterExtensionSpec{
1647+
Source: ocv1.SourceConfig{
1648+
SourceType: "Catalog",
1649+
Catalog: &ocv1.CatalogFilter{
1650+
PackageName: "test-pkg",
1651+
// No version - should fall back
1652+
},
1653+
},
1654+
Namespace: "default",
1655+
ServiceAccount: ocv1.ServiceAccountReference{Name: "default"},
1656+
},
1657+
}
1658+
require.NoError(t, cl.Create(ctx, ext))
1659+
1660+
// Reconcile should succeed (fallback to installed, then apply succeeds)
1661+
// Catalog watch will trigger reconciliation when catalog becomes available again
1662+
res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1663+
require.NoError(t, err)
1664+
require.Equal(t, ctrl.Result{}, res)
1665+
1666+
// Verify status shows successful reconciliation
1667+
require.NoError(t, cl.Get(ctx, extKey, ext))
1668+
1669+
// Progressing should be Succeeded (apply completed successfully)
1670+
progCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing)
1671+
require.NotNil(t, progCond)
1672+
require.Equal(t, metav1.ConditionTrue, progCond.Status)
1673+
require.Equal(t, ocv1.ReasonSucceeded, progCond.Reason)
1674+
1675+
// Installed should be True (maintaining current version)
1676+
instCond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled)
1677+
require.NotNil(t, instCond)
1678+
require.Equal(t, metav1.ConditionTrue, instCond.Status)
1679+
require.Equal(t, ocv1.ReasonSucceeded, instCond.Reason)
1680+
})
1681+
1682+
t.Run("fails when version upgrade requested without catalog", func(t *testing.T) {
1683+
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
1684+
d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
1685+
return nil, nil, nil, fmt.Errorf("catalog unavailable")
1686+
})
1687+
d.RevisionStatesGetter = &MockRevisionStatesGetter{
1688+
RevisionStates: &controllers.RevisionStates{
1689+
Installed: &controllers.RevisionMetadata{
1690+
BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"},
1691+
},
1692+
},
1693+
}
1694+
})
1695+
1696+
ctx := context.Background()
1697+
extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))}
1698+
1699+
// Create ClusterExtension requesting version upgrade
1700+
ext := &ocv1.ClusterExtension{
1701+
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
1702+
Spec: ocv1.ClusterExtensionSpec{
1703+
Source: ocv1.SourceConfig{
1704+
SourceType: "Catalog",
1705+
Catalog: &ocv1.CatalogFilter{
1706+
PackageName: "test-pkg",
1707+
Version: "1.0.1", // Requesting upgrade
1708+
},
1709+
},
1710+
Namespace: "default",
1711+
ServiceAccount: ocv1.ServiceAccountReference{Name: "default"},
1712+
},
1713+
}
1714+
require.NoError(t, cl.Create(ctx, ext))
1715+
1716+
// Reconcile should fail (can't upgrade without catalog)
1717+
res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1718+
require.Error(t, err)
1719+
require.Equal(t, ctrl.Result{}, res)
1720+
1721+
// Verify status shows Retrying
1722+
require.NoError(t, cl.Get(ctx, extKey, ext))
1723+
cond := apimeta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeProgressing)
1724+
require.NotNil(t, cond)
1725+
require.Equal(t, metav1.ConditionTrue, cond.Status)
1726+
require.Equal(t, ocv1.ReasonRetrying, cond.Reason)
1727+
})
1728+
1729+
t.Run("auto-updates when catalog becomes available after fallback", func(t *testing.T) {
1730+
resolveAttempt := 0
1731+
cl, reconciler := newClientAndReconciler(t, func(d *deps) {
1732+
// First attempt: catalog unavailable, then becomes available
1733+
d.Resolver = resolve.Func(func(_ context.Context, _ *ocv1.ClusterExtension, _ *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
1734+
resolveAttempt++
1735+
if resolveAttempt == 1 {
1736+
// First reconcile: catalog unavailable
1737+
return nil, nil, nil, fmt.Errorf("catalog temporarily unavailable")
1738+
}
1739+
// Second reconcile (triggered by catalog watch): catalog available with new version
1740+
v := bundle.VersionRelease{Version: bsemver.MustParse("2.0.0")}
1741+
return &declcfg.Bundle{
1742+
Name: "test.2.0.0",
1743+
Package: "test-pkg",
1744+
Image: "test-image:2.0.0",
1745+
}, &v, nil, nil
1746+
})
1747+
d.RevisionStatesGetter = &MockRevisionStatesGetter{
1748+
RevisionStates: &controllers.RevisionStates{
1749+
Installed: &controllers.RevisionMetadata{
1750+
Package: "test-pkg",
1751+
BundleMetadata: ocv1.BundleMetadata{Name: "test.1.0.0", Version: "1.0.0"},
1752+
Image: "test-image:1.0.0",
1753+
},
1754+
},
1755+
}
1756+
d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}}
1757+
d.Applier = &MockApplier{installCompleted: true}
1758+
})
1759+
1760+
ctx := context.Background()
1761+
extKey := types.NamespacedName{Name: fmt.Sprintf("test-%s", rand.String(8))}
1762+
1763+
ext := &ocv1.ClusterExtension{
1764+
ObjectMeta: metav1.ObjectMeta{Name: extKey.Name},
1765+
Spec: ocv1.ClusterExtensionSpec{
1766+
Source: ocv1.SourceConfig{
1767+
SourceType: "Catalog",
1768+
Catalog: &ocv1.CatalogFilter{
1769+
PackageName: "test-pkg",
1770+
// No version - auto-update to latest
1771+
},
1772+
},
1773+
Namespace: "default",
1774+
ServiceAccount: ocv1.ServiceAccountReference{Name: "default"},
1775+
},
1776+
}
1777+
require.NoError(t, cl.Create(ctx, ext))
1778+
1779+
// First reconcile: catalog unavailable, falls back to v1.0.0
1780+
res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1781+
require.NoError(t, err)
1782+
require.Equal(t, ctrl.Result{}, res)
1783+
1784+
require.NoError(t, cl.Get(ctx, extKey, ext))
1785+
require.Equal(t, "1.0.0", ext.Status.Install.Bundle.Version)
1786+
1787+
// Second reconcile: simulating catalog watch trigger, catalog now available with v2.0.0
1788+
res, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey})
1789+
require.NoError(t, err)
1790+
require.Equal(t, ctrl.Result{}, res)
1791+
1792+
// Should have upgraded to v2.0.0
1793+
require.NoError(t, cl.Get(ctx, extKey, ext))
1794+
require.Equal(t, "2.0.0", ext.Status.Install.Bundle.Version)
1795+
1796+
// Verify resolution was attempted twice (fallback, then success)
1797+
require.Equal(t, 2, resolveAttempt)
1798+
})
1799+
}
1800+
1801+
func TestCheckCatalogsExist(t *testing.T) {
1802+
t.Run("returns false when no catalogs exist", func(t *testing.T) {
1803+
cl := newClient(t)
1804+
ctx := context.Background()
1805+
1806+
ext := &ocv1.ClusterExtension{
1807+
Spec: ocv1.ClusterExtensionSpec{
1808+
Source: ocv1.SourceConfig{
1809+
SourceType: "Catalog",
1810+
Catalog: &ocv1.CatalogFilter{
1811+
PackageName: "test-pkg",
1812+
},
1813+
},
1814+
},
1815+
}
1816+
1817+
exists, err := controllers.CheckCatalogsExist(ctx, cl, ext)
1818+
require.NoError(t, err)
1819+
require.False(t, exists, "should return false when no catalogs exist")
1820+
})
1821+
1822+
t.Run("returns false when CRD doesn't exist", func(t *testing.T) {
1823+
cl := newClient(t)
1824+
ctx := context.Background()
1825+
1826+
ext := &ocv1.ClusterExtension{
1827+
Spec: ocv1.ClusterExtensionSpec{
1828+
Source: ocv1.SourceConfig{
1829+
SourceType: "Catalog",
1830+
Catalog: &ocv1.CatalogFilter{
1831+
PackageName: "test-pkg",
1832+
},
1833+
},
1834+
},
1835+
}
1836+
1837+
// ClusterCatalog CRD is not installed in test env
1838+
exists, err := controllers.CheckCatalogsExist(ctx, cl, ext)
1839+
require.NoError(t, err, "should not return error when CRD doesn't exist")
1840+
require.False(t, exists, "should treat missing CRD as no catalogs exist")
1841+
})
1842+
1843+
t.Run("returns false when no selector provided", func(t *testing.T) {
1844+
cl := newClient(t)
1845+
ctx := context.Background()
1846+
1847+
ext := &ocv1.ClusterExtension{
1848+
Spec: ocv1.ClusterExtensionSpec{
1849+
Source: ocv1.SourceConfig{
1850+
SourceType: "Catalog",
1851+
Catalog: &ocv1.CatalogFilter{
1852+
PackageName: "test-pkg",
1853+
Selector: nil, // No selector
1854+
},
1855+
},
1856+
},
1857+
}
1858+
1859+
exists, err := controllers.CheckCatalogsExist(ctx, cl, ext)
1860+
require.NoError(t, err)
1861+
require.False(t, exists, "should return false when no catalogs exist (no selector)")
1862+
})
1863+
1864+
t.Run("returns false when empty selector provided", func(t *testing.T) {
1865+
cl := newClient(t)
1866+
ctx := context.Background()
1867+
1868+
ext := &ocv1.ClusterExtension{
1869+
Spec: ocv1.ClusterExtensionSpec{
1870+
Source: ocv1.SourceConfig{
1871+
SourceType: "Catalog",
1872+
Catalog: &ocv1.CatalogFilter{
1873+
PackageName: "test-pkg",
1874+
Selector: &metav1.LabelSelector{}, // Empty selector (matches everything)
1875+
},
1876+
},
1877+
},
1878+
}
1879+
1880+
exists, err := controllers.CheckCatalogsExist(ctx, cl, ext)
1881+
require.NoError(t, err, "empty selector should not cause error")
1882+
require.False(t, exists, "should return false when no catalogs exist (empty selector)")
1883+
})
1884+
1885+
t.Run("returns error for invalid selector", func(t *testing.T) {
1886+
cl := newClient(t)
1887+
ctx := context.Background()
1888+
1889+
ext := &ocv1.ClusterExtension{
1890+
Spec: ocv1.ClusterExtensionSpec{
1891+
Source: ocv1.SourceConfig{
1892+
SourceType: "Catalog",
1893+
Catalog: &ocv1.CatalogFilter{
1894+
PackageName: "test-pkg",
1895+
Selector: &metav1.LabelSelector{
1896+
MatchExpressions: []metav1.LabelSelectorRequirement{
1897+
{
1898+
Key: "invalid",
1899+
Operator: "InvalidOperator", // Invalid operator
1900+
Values: []string{"value"},
1901+
},
1902+
},
1903+
},
1904+
},
1905+
},
1906+
},
1907+
}
1908+
1909+
exists, err := controllers.CheckCatalogsExist(ctx, cl, ext)
1910+
require.Error(t, err, "should return error for invalid selector")
1911+
require.Contains(t, err.Error(), "invalid catalog selector")
1912+
require.False(t, exists)
1913+
})
1914+
}

0 commit comments

Comments
 (0)