Skip to content

Commit c8fa0a0

Browse files
(chore): add e2e tests for workload resilience when catalog is deleted
1 parent 8167ff8 commit c8fa0a0

3 files changed

Lines changed: 138 additions & 1 deletion

File tree

internal/operator-controller/applier/boxcutter_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1190,10 +1190,12 @@ func Test_PreAuthorizer_Integration(t *testing.T) {
11901190
RevisionGenerator: dummyGenerator,
11911191
PreAuthorizer: tc.preAuthorizer(t),
11921192
}
1193-
err := boxcutter.Apply(t.Context(), dummyBundleFs, ext, nil, revisionAnnotations)
1193+
completed, status, err := boxcutter.Apply(t.Context(), dummyBundleFs, ext, nil, revisionAnnotations)
11941194
if tc.validate != nil {
11951195
tc.validate(t, err)
11961196
}
1197+
_ = completed
1198+
_ = status
11971199
})
11981200
}
11991201
}

test/e2e/features/recover.feature

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,108 @@ Feature: Recover cluster extension from errors that might occur during its lifet
149149
Then ClusterExtension is available
150150
And ClusterExtension reports Progressing as True with Reason Succeeded
151151
And ClusterExtension reports Installed as True
152+
153+
# CATALOG DELETION RESILIENCE SCENARIOS
154+
155+
Scenario: Auto-healing continues working after catalog deletion
156+
# This test proves that extensions continue to auto-heal (restore deleted resources) even when
157+
# their source catalog is unavailable. We verify this by:
158+
# 1. Deleting the catalog
159+
# 2. Manually deleting a managed resource (configmap)
160+
# 3. Verifying the resource is automatically restored
161+
#
162+
# Why this proves auto-healing works:
163+
# - If the controller stopped reconciling, the configmap would stay deleted
164+
# - Resource restoration is an observable event that PROVES active reconciliation
165+
# - The deployment staying healthy proves the workload continues running
166+
Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}
167+
And ClusterExtension is applied
168+
"""
169+
apiVersion: olm.operatorframework.io/v1
170+
kind: ClusterExtension
171+
metadata:
172+
name: ${NAME}
173+
spec:
174+
namespace: ${TEST_NAMESPACE}
175+
serviceAccount:
176+
name: olm-sa
177+
source:
178+
sourceType: Catalog
179+
catalog:
180+
packageName: test
181+
selector:
182+
matchLabels:
183+
"olm.operatorframework.io/metadata.name": test-catalog
184+
"""
185+
And ClusterExtension is rolled out
186+
And ClusterExtension is available
187+
And resource "deployment/test-operator" is available
188+
And resource "configmap/test-configmap" is available
189+
When ClusterCatalog "test" is deleted
190+
And resource "configmap/test-configmap" is removed
191+
Then resource "configmap/test-configmap" is eventually restored
192+
And resource "deployment/test-operator" is available
193+
194+
Scenario: Spec changes are allowed when catalog is unavailable
195+
# This test proves that users can modify extension configuration (non-version changes) even when
196+
# the catalog is missing. We verify this by:
197+
# 1. Deleting the catalog
198+
# 2. Changing the preflight configuration in the ClusterExtension spec
199+
# 3. Verifying the controller accepts and reconciles the change successfully
200+
#
201+
# Why this proves spec changes work without catalog:
202+
# - If the controller rejected the change, Progressing would show Retrying or Failed
203+
# - Reconciliation completing (observedGeneration == generation) proves the spec was processed
204+
# - Progressing=Succeeded proves the controller didn't block on missing catalog
205+
# - Extension staying Available proves workload continues running
206+
#
207+
# Note: We test install.preflight config (not bundle config) because test-operator supports
208+
# AllNamespaces mode and doesn't require configuration. This works for both Helm and Boxcutter.
209+
Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}
210+
And ClusterExtension is applied
211+
"""
212+
apiVersion: olm.operatorframework.io/v1
213+
kind: ClusterExtension
214+
metadata:
215+
name: ${NAME}
216+
spec:
217+
namespace: ${TEST_NAMESPACE}
218+
serviceAccount:
219+
name: olm-sa
220+
source:
221+
sourceType: Catalog
222+
catalog:
223+
packageName: test
224+
selector:
225+
matchLabels:
226+
"olm.operatorframework.io/metadata.name": test-catalog
227+
"""
228+
And ClusterExtension is rolled out
229+
And ClusterExtension is available
230+
And ClusterCatalog "test" is deleted
231+
When ClusterExtension is updated to add preflight config
232+
"""
233+
apiVersion: olm.operatorframework.io/v1
234+
kind: ClusterExtension
235+
metadata:
236+
name: ${NAME}
237+
spec:
238+
namespace: ${TEST_NAMESPACE}
239+
serviceAccount:
240+
name: olm-sa
241+
install:
242+
preflight:
243+
crdUpgradeSafety:
244+
enforcement: None
245+
source:
246+
sourceType: Catalog
247+
catalog:
248+
packageName: test
249+
selector:
250+
matchLabels:
251+
"olm.operatorframework.io/metadata.name": test-catalog
252+
"""
253+
And ClusterExtension latest generation has been reconciled
254+
And ClusterExtension reports Progressing as True with Reason Succeeded
255+
Then ClusterExtension is available
256+
And ClusterExtension reports Installed as True

test/e2e/steps/steps.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ func RegisterSteps(sc *godog.ScenarioContext) {
5656
sc.Step(`^(?i)ClusterExtension is updated(?:\s+.*)?$`, ResourceIsApplied)
5757
sc.Step(`^(?i)ClusterExtension is available$`, ClusterExtensionIsAvailable)
5858
sc.Step(`^(?i)ClusterExtension is rolled out$`, ClusterExtensionIsRolledOut)
59+
sc.Step(`^(?i)ClusterExtension (?:latest generation )?has (?:been )?reconciled(?: the latest generation)?$`, ClusterExtensionReconciledLatestGeneration)
5960
sc.Step(`^(?i)ClusterExtension reports "([^"]+)" as active revision(s?)$`, ClusterExtensionReportsActiveRevisions)
6061
sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+) and Message:$`, ClusterExtensionReportsCondition)
6162
sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+) and Message includes:$`, ClusterExtensionReportsConditionWithMessageFragment)
@@ -89,6 +90,7 @@ func RegisterSteps(sc *godog.ScenarioContext) {
8990
sc.Step(`^(?i)ClusterCatalog "([^"]+)" serves bundles$`, CatalogServesBundles)
9091
sc.Step(`^"([^"]+)" catalog image version "([^"]+)" is also tagged as "([^"]+)"$`, TagCatalogImage)
9192
sc.Step(`^(?i)ClusterCatalog "([^"]+)" image version "([^"]+)" is also tagged as "([^"]+)"$`, TagCatalogImage)
93+
sc.Step(`^(?i)ClusterCatalog "([^"]+)" is deleted$`, CatalogIsDeleted)
9294

9395
sc.Step(`^(?i)operator "([^"]+)" target namespace is "([^"]+)"$`, OperatorTargetNamespace)
9496
sc.Step(`^(?i)Prometheus metrics are returned in the response$`, PrometheusMetricsAreReturned)
@@ -246,6 +248,25 @@ func ClusterExtensionIsAvailable(ctx context.Context) error {
246248
return nil
247249
}
248250

251+
func ClusterExtensionReconciledLatestGeneration(ctx context.Context) error {
252+
sc := scenarioCtx(ctx)
253+
waitFor(ctx, func() bool {
254+
// Get both generation and observedGeneration in a single kubectl call
255+
output, err := k8sClient("get", "clusterextension", sc.clusterExtensionName,
256+
"-o", "jsonpath={.metadata.generation},{.status.conditions[?(@.type=='Progressing')].observedGeneration}")
257+
if err != nil || output == "" {
258+
return false
259+
}
260+
parts := strings.Split(output, ",")
261+
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
262+
return false
263+
}
264+
// Both exist and are equal means reconciliation happened
265+
return parts[0] == parts[1]
266+
})
267+
return nil
268+
}
269+
249270
func ClusterExtensionIsRolledOut(ctx context.Context) error {
250271
sc := scenarioCtx(ctx)
251272
require.Eventually(godog.T(ctx), func() bool {
@@ -727,6 +748,15 @@ func TagCatalogImage(name, oldTag, newTag string) error {
727748
return crane.Tag(imageRef, newTag, crane.Insecure)
728749
}
729750

751+
func CatalogIsDeleted(ctx context.Context, catalogName string) error {
752+
catalogFullName := fmt.Sprintf("%s-catalog", catalogName)
753+
_, err := k8sClient("delete", "clustercatalog", catalogFullName, "--ignore-not-found=true", "--wait=true")
754+
if err != nil {
755+
return fmt.Errorf("failed to delete catalog: %v", err)
756+
}
757+
return nil
758+
}
759+
730760
func PrometheusMetricsAreReturned(ctx context.Context) error {
731761
sc := scenarioCtx(ctx)
732762
for podName, mr := range sc.metricsResponse {

0 commit comments

Comments
 (0)