Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion internal/operator-controller/applier/boxcutter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1190,10 +1190,12 @@ func Test_PreAuthorizer_Integration(t *testing.T) {
RevisionGenerator: dummyGenerator,
PreAuthorizer: tc.preAuthorizer(t),
}
err := boxcutter.Apply(t.Context(), dummyBundleFs, ext, nil, revisionAnnotations)
completed, status, err := boxcutter.Apply(t.Context(), dummyBundleFs, ext, nil, revisionAnnotations)
if tc.validate != nil {
tc.validate(t, err)
}
_ = completed
_ = status
})
}
}
Expand Down
102 changes: 102 additions & 0 deletions test/e2e/features/recover.feature
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,105 @@ Feature: Recover cluster extension from errors that might occur during its lifet
Then ClusterExtension is available
And ClusterExtension reports Progressing as True with Reason Succeeded
And ClusterExtension reports Installed as True

# CATALOG DELETION RESILIENCE SCENARIOS

Scenario: Auto-healing continues working after catalog deletion
# This test proves that extensions continue to auto-heal (restore deleted resources) even when
# their source catalog is unavailable. We verify this by:
# 1. Deleting the catalog
# 2. Manually deleting a managed resource (configmap)
# 3. Verifying the resource is automatically restored
#
# Why this proves auto-healing works:
# - If the controller stopped reconciling, the configmap would stay deleted
# - Resource restoration is an observable event that PROVES active reconciliation
# - The deployment staying healthy proves the workload continues running
Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}
And ClusterExtension is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtension
metadata:
name: ${NAME}
spec:
namespace: ${TEST_NAMESPACE}
serviceAccount:
name: olm-sa
source:
sourceType: Catalog
catalog:
packageName: test
selector:
matchLabels:
"olm.operatorframework.io/metadata.name": test-catalog
"""
And ClusterExtension is rolled out
And ClusterExtension is available
And resource "deployment/test-operator" is available
And resource "configmap/test-configmap" is available
When ClusterCatalog "test" is deleted
And resource "configmap/test-configmap" is removed
Then resource "configmap/test-configmap" is eventually restored
And resource "deployment/test-operator" is available

Scenario: Spec changes are allowed when catalog is unavailable
# This test proves that users can modify extension configuration (non-version changes) even when
# the catalog is missing. We verify this by:
# 1. Deleting the catalog
# 2. Changing the preflight configuration in the ClusterExtension spec
# 3. Verifying the controller accepts and reconciles the change successfully
#
# Why this proves spec changes work without catalog:
# - If the controller rejected the change, Progressing would show Retrying or Failed
# - Reconciliation completing (observedGeneration == generation) proves the spec was processed
# - Progressing=Succeeded proves the controller didn't block on missing catalog
# - Extension staying Available proves workload continues running
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pedjak ^ I added a documentation to clarify how and why it works for both cases.

Given ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}
And ClusterExtension is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtension
metadata:
name: ${NAME}
spec:
namespace: ${TEST_NAMESPACE}
serviceAccount:
name: olm-sa
source:
sourceType: Catalog
catalog:
packageName: test
selector:
matchLabels:
"olm.operatorframework.io/metadata.name": test-catalog
"""
And ClusterExtension is rolled out
And ClusterExtension is available
And ClusterCatalog "test" is deleted
When ClusterExtension is updated to add preflight config
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtension
metadata:
name: ${NAME}
spec:
namespace: ${TEST_NAMESPACE}
serviceAccount:
name: olm-sa
install:
preflight:
crdUpgradeSafety:
enforcement: None
source:
sourceType: Catalog
catalog:
packageName: test
selector:
matchLabels:
"olm.operatorframework.io/metadata.name": test-catalog
"""
And ClusterExtension latest generation has been reconciled
And ClusterExtension reports Progressing as True with Reason Succeeded
Then ClusterExtension is available
And ClusterExtension reports Installed as True
30 changes: 30 additions & 0 deletions test/e2e/steps/steps.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func RegisterSteps(sc *godog.ScenarioContext) {
sc.Step(`^(?i)ClusterExtension is updated(?:\s+.*)?$`, ResourceIsApplied)
sc.Step(`^(?i)ClusterExtension is available$`, ClusterExtensionIsAvailable)
sc.Step(`^(?i)ClusterExtension is rolled out$`, ClusterExtensionIsRolledOut)
sc.Step(`^(?i)ClusterExtension (?:latest generation )?has (?:been )?reconciled(?: the latest generation)?$`, ClusterExtensionReconciledLatestGeneration)
sc.Step(`^(?i)ClusterExtension reports "([^"]+)" as active revision(s?)$`, ClusterExtensionReportsActiveRevisions)
sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+) and Message:$`, ClusterExtensionReportsCondition)
sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+) and Message includes:$`, ClusterExtensionReportsConditionWithMessageFragment)
Expand Down Expand Up @@ -89,6 +90,7 @@ func RegisterSteps(sc *godog.ScenarioContext) {
sc.Step(`^(?i)ClusterCatalog "([^"]+)" serves bundles$`, CatalogServesBundles)
sc.Step(`^"([^"]+)" catalog image version "([^"]+)" is also tagged as "([^"]+)"$`, TagCatalogImage)
sc.Step(`^(?i)ClusterCatalog "([^"]+)" image version "([^"]+)" is also tagged as "([^"]+)"$`, TagCatalogImage)
sc.Step(`^(?i)ClusterCatalog "([^"]+)" is deleted$`, CatalogIsDeleted)

sc.Step(`^(?i)operator "([^"]+)" target namespace is "([^"]+)"$`, OperatorTargetNamespace)
sc.Step(`^(?i)Prometheus metrics are returned in the response$`, PrometheusMetricsAreReturned)
Expand Down Expand Up @@ -246,6 +248,25 @@ func ClusterExtensionIsAvailable(ctx context.Context) error {
return nil
}

func ClusterExtensionReconciledLatestGeneration(ctx context.Context) error {
sc := scenarioCtx(ctx)
waitFor(ctx, func() bool {
// Get both generation and observedGeneration in a single kubectl call
output, err := k8sClient("get", "clusterextension", sc.clusterExtensionName,
"-o", "jsonpath={.metadata.generation},{.status.conditions[?(@.type=='Progressing')].observedGeneration}")
if err != nil || output == "" {
return false
}
parts := strings.Split(output, ",")
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
return false
}
// Both exist and are equal means reconciliation happened
return parts[0] == parts[1]
})
return nil
}

func ClusterExtensionIsRolledOut(ctx context.Context) error {
sc := scenarioCtx(ctx)
require.Eventually(godog.T(ctx), func() bool {
Expand Down Expand Up @@ -727,6 +748,15 @@ func TagCatalogImage(name, oldTag, newTag string) error {
return crane.Tag(imageRef, newTag, crane.Insecure)
}

func CatalogIsDeleted(ctx context.Context, catalogName string) error {
catalogFullName := fmt.Sprintf("%s-catalog", catalogName)
_, err := k8sClient("delete", "clustercatalog", catalogFullName, "--ignore-not-found=true", "--wait=true")
if err != nil {
return fmt.Errorf("failed to delete catalog: %v", err)
}
return nil
}

func PrometheusMetricsAreReturned(ctx context.Context) error {
sc := scenarioCtx(ctx)
for podName, mr := range sc.metricsResponse {
Expand Down