Skip to content

Commit d619f4e

Browse files
mclasmeierMoritz Clasmeierclaude
authored
Enable OLM tests in CI with cluster resource dump on failure (#146)
Co-authored-by: Moritz Clasmeier <mclasmeier@redhat.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2f6373e commit d619f4e

9 files changed

Lines changed: 205 additions & 13 deletions

File tree

.github/workflows/create-dev-cluster.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ on:
66
cluster-name:
77
required: true
88
type: string
9+
flavor:
10+
required: true
11+
type: string
12+
args:
13+
required: false
14+
type: string
15+
default: ''
916
outputs:
1017
cluster-name:
1118
description: "Name of the created cluster"
@@ -19,9 +26,9 @@ jobs:
1926
steps:
2027
- uses: stackrox/actions/infra/create-cluster@v1
2128
with:
22-
flavor: gke-default
29+
flavor: ${{ inputs.flavor }}
2330
name: ${{ inputs.cluster-name }}
24-
args: machine-type=e2-standard-4,nodes=3,gcp-image-type=ubuntu_containerd
31+
args: ${{ inputs.args }}
2532
lifespan: "2h"
2633
wait: true
2734
token: ${{ secrets.INFRA_CI_TOKEN }}

.github/workflows/e2e-tests.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@ on:
99
image:
1010
required: true
1111
type: string
12+
cluster-type:
13+
required: false
14+
type: string
15+
default: 'gke'
16+
skip-olm-tests:
17+
required: false
18+
type: string
19+
default: 'true'
1220
env:
1321
REGISTRY: quay.io
1422
IMAGE_NAME: rhacs-eng/roxie
@@ -23,7 +31,6 @@ jobs:
2331
KUBECONFIG: /github/home/artifacts/kubeconfig
2432
INFRA_TOKEN: ${{ secrets.INFRA_CI_TOKEN }}
2533
INFRACTL: bin/infractl -k -e localhost:8443
26-
USE_GKE_GCLOUD_AUTH_PLUGIN: "True"
2734
steps:
2835
- name: Checkout
2936
uses: actions/checkout@v6
@@ -65,15 +72,21 @@ jobs:
6572
roxctl version
6673
6774
- name: Authenticate to GCloud
75+
if: inputs.cluster-type == 'gke'
6876
uses: google-github-actions/auth@v3
6977
with:
7078
credentials_json: ${{ secrets.ROXIE_CI_AUTOMATION_GCP_SA }}
7179

7280
- name: Set up Cloud SDK
81+
if: inputs.cluster-type == 'gke'
7382
uses: "google-github-actions/setup-gcloud@v3"
7483
with:
7584
install_components: "gke-gcloud-auth-plugin"
7685

86+
- name: Configure GKE auth plugin
87+
if: inputs.cluster-type == 'gke'
88+
run: echo "USE_GKE_GCLOUD_AUTH_PLUGIN=True" >> "$GITHUB_ENV"
89+
7790
- name: Download production infractl
7891
uses: stackrox/actions/infra/install-infractl@v1
7992

@@ -89,7 +102,7 @@ jobs:
89102
env:
90103
REGISTRY_USERNAME: ${{ secrets.QUAY_RHACS_ENG_RO_USERNAME }}
91104
REGISTRY_PASSWORD: ${{ secrets.QUAY_RHACS_ENG_RO_PASSWORD }}
92-
SKIP_OLM_TESTS: "true"
105+
SKIP_OLM_TESTS: ${{ inputs.skip-olm-tests == 'true' && 'true' || '' }}
93106
run: |
94107
make run-test-e2e
95108

.github/workflows/main-push.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ jobs:
1515
create-dev-cluster:
1616
uses: ./.github/workflows/create-dev-cluster.yml
1717
with:
18-
cluster-name: infra-roxie-main-${{ github.run_number }}
18+
cluster-name: infra-roxie-main-${{ github.run_number }}-gke
19+
flavor: gke-default
20+
args: machine-type=e2-standard-4,nodes=3,gcp-image-type=ubuntu_containerd
1921
secrets: inherit
2022

2123
build-roxie-image:

.github/workflows/pr.yml

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,38 @@ jobs:
1212
unit-tests:
1313
uses: ./.github/workflows/unit-tests.yml
1414

15-
create-dev-cluster:
15+
check-olm-label:
16+
runs-on: ubuntu-latest
17+
outputs:
18+
has-label: ${{ steps.check.outputs.has-label }}
19+
steps:
20+
- name: Check for olm-tests label
21+
id: check
22+
run: |
23+
has_label="${{ contains(github.event.pull_request.labels.*.name, 'olm-tests') }}"
24+
echo "has-label=${has_label}" >> "$GITHUB_OUTPUT"
25+
if [ "$has_label" = "true" ]; then
26+
echo "::notice::olm-tests label is set — OpenShift cluster will be created"
27+
else
28+
echo "::notice::olm-tests label is not set — skipping OpenShift cluster"
29+
fi
30+
31+
create-gke-cluster:
1632
uses: ./.github/workflows/create-dev-cluster.yml
1733
with:
18-
cluster-name: infra-roxie-pr-${{ github.event.pull_request.number }}
34+
cluster-name: infra-roxie-pr-${{ github.event.pull_request.number }}-gke
35+
flavor: gke-default
36+
args: machine-type=e2-standard-4,nodes=3,gcp-image-type=ubuntu_containerd
37+
secrets: inherit
38+
39+
create-openshift-cluster:
40+
needs: check-olm-label
41+
if: needs.check-olm-label.outputs.has-label == 'true'
42+
uses: ./.github/workflows/create-dev-cluster.yml
43+
with:
44+
cluster-name: infra-roxie-pr-${{ github.event.pull_request.number }}-openshift
45+
flavor: ocp-4
46+
args: master-node-type=e2-standard-4,worker-node-type=e2-standard-8,master-node-count=3,worker-node-count=3
1947
secrets: inherit
2048

2149
build-roxie-image:
@@ -26,17 +54,35 @@ jobs:
2654
secrets: inherit
2755

2856
e2e-tests:
29-
needs: [ create-dev-cluster, build-roxie-image ]
57+
needs: [ create-gke-cluster, build-roxie-image ]
58+
uses: ./.github/workflows/e2e-tests.yml
59+
with:
60+
cluster-name: ${{ needs.create-gke-cluster.outputs.cluster-name }}
61+
image: ${{ needs.build-roxie-image.outputs.image }}
62+
secrets: inherit
63+
64+
e2e-tests-openshift:
65+
needs: [ create-openshift-cluster, build-roxie-image ]
3066
uses: ./.github/workflows/e2e-tests.yml
3167
with:
32-
cluster-name: ${{ needs.create-dev-cluster.outputs.cluster-name }}
68+
cluster-name: ${{ needs.create-openshift-cluster.outputs.cluster-name }}
3369
image: ${{ needs.build-roxie-image.outputs.image }}
70+
cluster-type: openshift
71+
skip-olm-tests: 'false'
72+
secrets: inherit
73+
74+
delete-gke-cluster:
75+
if: ${{ always() && needs.create-gke-cluster.result == 'success' }}
76+
needs: [ create-gke-cluster, e2e-tests ]
77+
uses: ./.github/workflows/delete-dev-cluster.yml
78+
with:
79+
cluster-name: ${{ needs.create-gke-cluster.outputs.cluster-name }}
3480
secrets: inherit
3581

36-
delete-dev-cluster:
37-
if: ${{ always() && needs.create-dev-cluster.result == 'success' }}
38-
needs: [ create-dev-cluster, e2e-tests ]
82+
delete-openshift-cluster:
83+
if: ${{ always() && needs.create-openshift-cluster.result == 'success' }}
84+
needs: [ create-openshift-cluster, e2e-tests-openshift ]
3985
uses: ./.github/workflows/delete-dev-cluster.yml
4086
with:
41-
cluster-name: ${{ needs.create-dev-cluster.outputs.cluster-name }}
87+
cluster-name: ${{ needs.create-openshift-cluster.outputs.cluster-name }}
4288
secrets: inherit

internal/deployer/deployer.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ func (d *Deployer) deleteCentralResources(ctx context.Context) error {
132132
} else {
133133
d.logger.Info("Deletion of Central resources requested, but Central CR is not present anymore")
134134
}
135+
if d.verbose {
136+
d.logger.Dim("Deleted Central CR")
137+
}
135138

136139
for _, resource := range []ResourceToDelete{
137140
{Name: "central-db", Kind: "pvc"},

tests/e2e/basic_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111

1212
// TestDeployBothSimple tests deploying both components together (simplest scenario)
1313
func TestDeployBothSimple(t *testing.T) {
14+
dumpClusterStateOnFailure(t)
15+
1416
// Create temporary envrc file
1517
envrcFile, err := os.CreateTemp(t.TempDir(), ".envrc.roxie-test-*")
1618
if err != nil {

tests/e2e/e2e_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ func TestMain(m *testing.M) {
3939
}
4040

4141
func TestDeployBothComponentsTogetherInSingleNamespace(t *testing.T) {
42+
dumpClusterStateOnFailure(t)
43+
4244
// Create temporary envrc file.
4345
envrcFile, err := os.CreateTemp(t.TempDir(), ".envrc.roxie-test-*")
4446
if err != nil {

tests/e2e/helpers.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,115 @@ func verifySecuredClusterNotInstalled(t *testing.T, namespace string) {
239239
}
240240
}
241241

242+
var clusterDumpNamespaces = []string{
243+
"rhacs-operator-system",
244+
"acs-central",
245+
"acs-sensor",
246+
"stackrox",
247+
}
248+
249+
func dumpClusterStateOnFailure(t *testing.T) {
250+
t.Helper()
251+
t.Cleanup(func() {
252+
if !t.Failed() {
253+
return
254+
}
255+
dumpClusterResources(t)
256+
})
257+
}
258+
259+
func dumpClusterResources(t *testing.T) {
260+
t.Helper()
261+
fmt.Fprintf(os.Stderr, "=== CLUSTER RESOURCE DUMP (test %s failed) ===\n", t.Name())
262+
263+
runKubectlDump("get", "namespaces")
264+
265+
for _, ns := range clusterDumpNamespaces {
266+
fmt.Fprintf(os.Stderr, "--- Namespace: %s ---\n", ns)
267+
runKubectlDump("get", "pods", "-n", ns, "-o", "wide")
268+
runKubectlDump("describe", "pods", "-n", ns)
269+
runKubectlDump("get", "deployments", "-n", ns, "-o", "wide")
270+
runKubectlDump("describe", "deployments", "-n", ns)
271+
runKubectlDump("get", "daemonsets", "-n", ns, "-o", "wide")
272+
runKubectlDump("describe", "daemonsets", "-n", ns)
273+
runKubectlDump("get", "events", "-n", ns, "--sort-by=.lastTimestamp")
274+
dumpLogsForFailingPods(ns)
275+
}
276+
277+
dumpACSCustomResources()
278+
dumpOLMResources()
279+
280+
fmt.Fprintln(os.Stderr, "=== END CLUSTER RESOURCE DUMP ===")
281+
}
282+
283+
func dumpACSCustomResources() {
284+
fmt.Fprintln(os.Stderr, "--- ACS Custom Resources ---")
285+
for _, ns := range clusterDumpNamespaces {
286+
runKubectlDump("get", "centrals.platform.stackrox.io", "-n", ns, "-o", "yaml")
287+
runKubectlDump("get", "securedclusters.platform.stackrox.io", "-n", ns, "-o", "yaml")
288+
}
289+
}
290+
291+
func dumpOLMResources() {
292+
cmd := exec.Command("kubectl", "api-resources", "--api-group=operators.coreos.com", "-o", "name")
293+
output, err := cmd.Output()
294+
if err != nil || strings.TrimSpace(string(output)) == "" {
295+
fmt.Fprintln(os.Stderr, "[dump] OLM not installed, skipping OLM resource dump")
296+
return
297+
}
298+
299+
fmt.Fprintln(os.Stderr, "--- OLM Resources ---")
300+
operatorNamespace := "rhacs-operator-system"
301+
runKubectlDump("get", "subscriptions.operators.coreos.com", "-n", operatorNamespace, "-o", "wide")
302+
runKubectlDump("describe", "subscriptions.operators.coreos.com", "-n", operatorNamespace)
303+
runKubectlDump("get", "installplans.operators.coreos.com", "-n", operatorNamespace, "-o", "wide")
304+
runKubectlDump("describe", "installplans.operators.coreos.com", "-n", operatorNamespace)
305+
runKubectlDump("get", "catalogsources.operators.coreos.com", "-n", operatorNamespace, "-o", "wide")
306+
runKubectlDump("describe", "catalogsources.operators.coreos.com", "-n", operatorNamespace)
307+
runKubectlDump("get", "clusterserviceversions.operators.coreos.com", "-n", operatorNamespace, "-o", "wide")
308+
runKubectlDump("describe", "clusterserviceversions.operators.coreos.com", "-n", operatorNamespace)
309+
runKubectlDump("get", "operatorgroups.operators.coreos.com", "-n", operatorNamespace, "-o", "wide")
310+
runKubectlDump("describe", "operatorgroups.operators.coreos.com", "-n", operatorNamespace)
311+
}
312+
313+
func runKubectlDump(args ...string) {
314+
fmt.Fprintf(os.Stderr, "## kubectl %s\n", strings.Join(args, " "))
315+
cmd := exec.Command("kubectl", args...)
316+
cmd.Stdout = os.Stderr
317+
cmd.Stderr = os.Stderr
318+
if err := cmd.Run(); err != nil {
319+
fmt.Fprintf(os.Stderr, "kubectl failed: %v\n", err)
320+
}
321+
fmt.Fprintln(os.Stderr)
322+
}
323+
324+
func dumpLogsForFailingPods(namespace string) {
325+
cmd := exec.Command("kubectl", "get", "pods", "-n", namespace,
326+
"-o", "jsonpath={range .items[*]}{.metadata.name}{\"\\t\"}{.status.phase}{\"\\n\"}{end}")
327+
output, err := cmd.Output()
328+
if err != nil {
329+
fmt.Fprintf(os.Stderr, "[dump] failed to list pods in %s: %v\n", namespace, err)
330+
return
331+
}
332+
333+
for line := range strings.SplitSeq(strings.TrimSpace(string(output)), "\n") {
334+
if line == "" {
335+
continue
336+
}
337+
parts := strings.SplitN(line, "\t", 2)
338+
if len(parts) != 2 {
339+
continue
340+
}
341+
podName, phase := parts[0], parts[1]
342+
if phase == "Running" || phase == "Succeeded" {
343+
continue
344+
}
345+
fmt.Fprintf(os.Stderr, "[dump] logs for pod %s/%s (phase=%s):\n", namespace, podName, phase)
346+
runKubectlDump("logs", "-n", namespace, podName, "--all-containers", "--tail=100")
347+
runKubectlDump("logs", "-n", namespace, podName, "--all-containers", "--previous", "--tail=50")
348+
}
349+
}
350+
242351
func verifyAnnotation(t *testing.T, resourceType, resourceName, namespace, annotationKey, expectedValue string) {
243352
t.Helper()
244353

tests/e2e/olm_switch_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ func verifyOperatorDeploymentExists(t *testing.T) {
6565

6666
// TestOLMToNonOLMSwitch tests switching from OLM operator to non-OLM operator
6767
func TestOLMToNonOLMSwitch(t *testing.T) {
68+
dumpClusterStateOnFailure(t)
69+
6870
if os.Getenv("SKIP_OLM_TESTS") != "" {
6971
t.Skip("SKIP_OLM_TESTS is set")
7072
}
@@ -113,6 +115,8 @@ func TestOLMToNonOLMSwitch(t *testing.T) {
113115

114116
// TestNonOLMToOLMSwitch tests switching from non-OLM operator to OLM operator
115117
func TestNonOLMToOLMSwitch(t *testing.T) {
118+
dumpClusterStateOnFailure(t)
119+
116120
if os.Getenv("SKIP_OLM_TESTS") != "" {
117121
t.Skip("SKIP_OLM_TESTS is set")
118122
}
@@ -161,6 +165,8 @@ func TestNonOLMToOLMSwitch(t *testing.T) {
161165

162166
// TestOLMOperatorVersionUpgrade tests that OLM operator version mismatches trigger teardown and redeploy
163167
func TestOLMOperatorVersionUpgrade(t *testing.T) {
168+
dumpClusterStateOnFailure(t)
169+
164170
if os.Getenv("SKIP_OLM_TESTS") != "" {
165171
t.Skip("SKIP_OLM_TESTS is set")
166172
}
@@ -223,6 +229,8 @@ func TestOLMOperatorVersionUpgrade(t *testing.T) {
223229

224230
// TestSecuredClusterWithOLMSwitch tests that secured-cluster deployment also respects OLM mode switches
225231
func TestSecuredClusterWithOLMSwitch(t *testing.T) {
232+
dumpClusterStateOnFailure(t)
233+
226234
if os.Getenv("SKIP_OLM_TESTS") != "" {
227235
t.Skip("SKIP_OLM_TESTS is set")
228236
}

0 commit comments

Comments
 (0)