Skip to content

Commit 0185b5e

Browse files
author
Moritz Clasmeier
committed
Dump cluster state on failure
1 parent 37772a6 commit 0185b5e

4 files changed

Lines changed: 121 additions & 0 deletions

File tree

tests/e2e/basic_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111

1212
// TestDeployBothSimple tests deploying both components together (simplest scenario)
1313
func TestDeployBothSimple(t *testing.T) {
14+
dumpClusterOnFailure(t)
15+
1416
// Create temporary envrc file
1517
envrcFile, err := os.CreateTemp(t.TempDir(), ".envrc.roxie-test-*")
1618
if err != nil {

tests/e2e/e2e_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ func TestMain(m *testing.M) {
3939
}
4040

4141
func TestDeployBothComponentsTogetherInSingleNamespace(t *testing.T) {
42+
dumpClusterOnFailure(t)
43+
4244
// Create temporary envrc file.
4345
envrcFile, err := os.CreateTemp(t.TempDir(), ".envrc.roxie-test-*")
4446
if err != nil {

tests/e2e/helpers.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,115 @@ func verifySecuredClusterNotInstalled(t *testing.T, namespace string) {
239239
}
240240
}
241241

242+
var clusterDumpNamespaces = []string{
243+
"rhacs-operator-system",
244+
"acs-central",
245+
"acs-sensor",
246+
"stackrox",
247+
}
248+
249+
func dumpClusterOnFailure(t *testing.T) {
250+
t.Helper()
251+
t.Cleanup(func() {
252+
if !t.Failed() {
253+
return
254+
}
255+
dumpClusterResources(t)
256+
})
257+
}
258+
259+
func dumpClusterResources(t *testing.T) {
260+
t.Helper()
261+
fmt.Fprintf(os.Stderr, "=== CLUSTER RESOURCE DUMP (test %s failed) ===\n", t.Name())
262+
263+
runKubectlDump("get", "namespaces")
264+
265+
for _, ns := range clusterDumpNamespaces {
266+
fmt.Fprintf(os.Stderr, "--- Namespace: %s ---\n", ns)
267+
runKubectlDump("get", "pods", "-n", ns, "-o", "wide")
268+
runKubectlDump("describe", "pods", "-n", ns)
269+
runKubectlDump("get", "deployments", "-n", ns, "-o", "wide")
270+
runKubectlDump("describe", "deployments", "-n", ns)
271+
runKubectlDump("get", "daemonsets", "-n", ns, "-o", "wide")
272+
runKubectlDump("describe", "daemonsets", "-n", ns)
273+
runKubectlDump("get", "events", "-n", ns, "--sort-by=.lastTimestamp")
274+
dumpLogsForFailingPods(ns)
275+
}
276+
277+
dumpACSCustomResources()
278+
dumpOLMResources()
279+
280+
fmt.Fprintln(os.Stderr, "=== END CLUSTER RESOURCE DUMP ===")
281+
}
282+
283+
func dumpACSCustomResources() {
284+
fmt.Fprintln(os.Stderr, "--- ACS Custom Resources ---")
285+
for _, ns := range clusterDumpNamespaces {
286+
runKubectlDump("get", "centrals.platform.stackrox.io", "-n", ns, "-o", "yaml")
287+
runKubectlDump("get", "securedclusters.platform.stackrox.io", "-n", ns, "-o", "yaml")
288+
}
289+
}
290+
291+
func dumpOLMResources() {
292+
cmd := exec.Command("kubectl", "api-resources", "--api-group=operators.coreos.com", "-o", "name")
293+
output, err := cmd.Output()
294+
if err != nil || strings.TrimSpace(string(output)) == "" {
295+
fmt.Fprintln(os.Stderr, "[dump] OLM not installed, skipping OLM resource dump")
296+
return
297+
}
298+
299+
fmt.Fprintln(os.Stderr, "--- OLM Resources ---")
300+
olmNamespace := "rhacs-operator-system"
301+
runKubectlDump("get", "subscriptions.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
302+
runKubectlDump("describe", "subscriptions.operators.coreos.com", "-n", olmNamespace)
303+
runKubectlDump("get", "installplans.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
304+
runKubectlDump("describe", "installplans.operators.coreos.com", "-n", olmNamespace)
305+
runKubectlDump("get", "catalogsources.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
306+
runKubectlDump("describe", "catalogsources.operators.coreos.com", "-n", olmNamespace)
307+
runKubectlDump("get", "clusterserviceversions.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
308+
runKubectlDump("describe", "clusterserviceversions.operators.coreos.com", "-n", olmNamespace)
309+
runKubectlDump("get", "operatorgroups.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
310+
runKubectlDump("describe", "operatorgroups.operators.coreos.com", "-n", olmNamespace)
311+
}
312+
313+
func runKubectlDump(args ...string) {
314+
fmt.Fprintf(os.Stderr, "## kubectl %s\n", strings.Join(args, " "))
315+
cmd := exec.Command("kubectl", args...)
316+
cmd.Stdout = os.Stderr
317+
cmd.Stderr = os.Stderr
318+
if err := cmd.Run(); err != nil {
319+
fmt.Fprintf(os.Stderr, "kubectl failed: %v\n", err)
320+
}
321+
fmt.Fprintln(os.Stderr)
322+
}
323+
324+
func dumpLogsForFailingPods(namespace string) {
325+
cmd := exec.Command("kubectl", "get", "pods", "-n", namespace,
326+
"-o", "jsonpath={range .items[*]}{.metadata.name}{\"\\t\"}{.status.phase}{\"\\n\"}{end}")
327+
output, err := cmd.Output()
328+
if err != nil {
329+
fmt.Fprintf(os.Stderr, "[dump] failed to list pods in %s: %v\n", namespace, err)
330+
return
331+
}
332+
333+
for line := range strings.SplitSeq(strings.TrimSpace(string(output)), "\n") {
334+
if line == "" {
335+
continue
336+
}
337+
parts := strings.SplitN(line, "\t", 2)
338+
if len(parts) != 2 {
339+
continue
340+
}
341+
podName, phase := parts[0], parts[1]
342+
if phase == "Running" || phase == "Succeeded" {
343+
continue
344+
}
345+
fmt.Fprintf(os.Stderr, "[dump] logs for pod %s/%s (phase=%s):\n", namespace, podName, phase)
346+
runKubectlDump("logs", "-n", namespace, podName, "--all-containers", "--tail=100")
347+
runKubectlDump("logs", "-n", namespace, podName, "--all-containers", "--previous", "--tail=50")
348+
}
349+
}
350+
242351
func verifyAnnotation(t *testing.T, resourceType, resourceName, namespace, annotationKey, expectedValue string) {
243352
t.Helper()
244353

tests/e2e/olm_switch_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ func verifyOperatorDeploymentExists(t *testing.T) {
6565

6666
// TestOLMToNonOLMSwitch tests switching from OLM operator to non-OLM operator
6767
func TestOLMToNonOLMSwitch(t *testing.T) {
68+
dumpClusterOnFailure(t)
69+
6870
if os.Getenv("SKIP_OLM_TESTS") != "" {
6971
t.Skip("SKIP_OLM_TESTS is set")
7072
}
@@ -113,6 +115,8 @@ func TestOLMToNonOLMSwitch(t *testing.T) {
113115

114116
// TestNonOLMToOLMSwitch tests switching from non-OLM operator to OLM operator
115117
func TestNonOLMToOLMSwitch(t *testing.T) {
118+
dumpClusterOnFailure(t)
119+
116120
if os.Getenv("SKIP_OLM_TESTS") != "" {
117121
t.Skip("SKIP_OLM_TESTS is set")
118122
}
@@ -161,6 +165,8 @@ func TestNonOLMToOLMSwitch(t *testing.T) {
161165

162166
// TestOLMOperatorVersionUpgrade tests that OLM operator version mismatches trigger teardown and redeploy
163167
func TestOLMOperatorVersionUpgrade(t *testing.T) {
168+
dumpClusterOnFailure(t)
169+
164170
if os.Getenv("SKIP_OLM_TESTS") != "" {
165171
t.Skip("SKIP_OLM_TESTS is set")
166172
}
@@ -223,6 +229,8 @@ func TestOLMOperatorVersionUpgrade(t *testing.T) {
223229

224230
// TestSecuredClusterWithOLMSwitch tests that secured-cluster deployment also respects OLM mode switches
225231
func TestSecuredClusterWithOLMSwitch(t *testing.T) {
232+
dumpClusterOnFailure(t)
233+
226234
if os.Getenv("SKIP_OLM_TESTS") != "" {
227235
t.Skip("SKIP_OLM_TESTS is set")
228236
}

0 commit comments

Comments
 (0)