Skip to content

Commit fadad70

Browse files
author
Moritz Clasmeier
committed
Dump cluster state on failure
1 parent 7bd230b commit fadad70

4 files changed

Lines changed: 121 additions & 0 deletions

File tree

tests/e2e/basic_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111

1212
// TestDeployBothSimple tests deploying both components together (simplest scenario)
1313
func TestDeployBothSimple(t *testing.T) {
14+
dumpClusterOnFailure(t)
15+
1416
// Create temporary envrc file
1517
envrcFile, err := os.CreateTemp(t.TempDir(), ".envrc.roxie-test-*")
1618
if err != nil {

tests/e2e/e2e_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ func TestMain(m *testing.M) {
3939
}
4040

4141
func TestDeployBothComponentsTogetherInSingleNamespace(t *testing.T) {
42+
dumpClusterOnFailure(t)
43+
4244
// Create temporary envrc file.
4345
envrcFile, err := os.CreateTemp(t.TempDir(), ".envrc.roxie-test-*")
4446
if err != nil {

tests/e2e/helpers.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,115 @@ func verifySecuredClusterNotInstalled(t *testing.T, namespace string) {
238238
}
239239
}
240240

241+
var clusterDumpNamespaces = []string{
242+
"rhacs-operator-system",
243+
"acs-central",
244+
"acs-sensor",
245+
"stackrox",
246+
}
247+
248+
func dumpClusterOnFailure(t *testing.T) {
249+
t.Helper()
250+
t.Cleanup(func() {
251+
if !t.Failed() {
252+
return
253+
}
254+
dumpClusterResources(t)
255+
})
256+
}
257+
258+
func dumpClusterResources(t *testing.T) {
259+
t.Helper()
260+
fmt.Fprintf(os.Stderr, "=== CLUSTER RESOURCE DUMP (test %s failed) ===\n", t.Name())
261+
262+
runKubectlDump("get", "namespaces")
263+
264+
for _, ns := range clusterDumpNamespaces {
265+
fmt.Fprintf(os.Stderr, "--- Namespace: %s ---\n", ns)
266+
runKubectlDump("get", "pods", "-n", ns, "-o", "wide")
267+
runKubectlDump("describe", "pods", "-n", ns)
268+
runKubectlDump("get", "deployments", "-n", ns, "-o", "wide")
269+
runKubectlDump("describe", "deployments", "-n", ns)
270+
runKubectlDump("get", "daemonsets", "-n", ns, "-o", "wide")
271+
runKubectlDump("describe", "daemonsets", "-n", ns)
272+
runKubectlDump("get", "events", "-n", ns, "--sort-by=.lastTimestamp")
273+
dumpLogsForFailingPods(ns)
274+
}
275+
276+
dumpACSCustomResources()
277+
dumpOLMResources()
278+
279+
fmt.Fprintln(os.Stderr, "=== END CLUSTER RESOURCE DUMP ===")
280+
}
281+
282+
func dumpACSCustomResources() {
283+
fmt.Fprintln(os.Stderr, "--- ACS Custom Resources ---")
284+
for _, ns := range clusterDumpNamespaces {
285+
runKubectlDump("get", "centrals.platform.stackrox.io", "-n", ns, "-o", "yaml")
286+
runKubectlDump("get", "securedclusters.platform.stackrox.io", "-n", ns, "-o", "yaml")
287+
}
288+
}
289+
290+
func dumpOLMResources() {
291+
cmd := exec.Command("kubectl", "api-resources", "--api-group=operators.coreos.com", "-o", "name")
292+
output, err := cmd.Output()
293+
if err != nil || strings.TrimSpace(string(output)) == "" {
294+
fmt.Fprintln(os.Stderr, "[dump] OLM not installed, skipping OLM resource dump")
295+
return
296+
}
297+
298+
fmt.Fprintln(os.Stderr, "--- OLM Resources ---")
299+
olmNamespace := "rhacs-operator-system"
300+
runKubectlDump("get", "subscriptions.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
301+
runKubectlDump("describe", "subscriptions.operators.coreos.com", "-n", olmNamespace)
302+
runKubectlDump("get", "installplans.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
303+
runKubectlDump("describe", "installplans.operators.coreos.com", "-n", olmNamespace)
304+
runKubectlDump("get", "catalogsources.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
305+
runKubectlDump("describe", "catalogsources.operators.coreos.com", "-n", olmNamespace)
306+
runKubectlDump("get", "clusterserviceversions.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
307+
runKubectlDump("describe", "clusterserviceversions.operators.coreos.com", "-n", olmNamespace)
308+
runKubectlDump("get", "operatorgroups.operators.coreos.com", "-n", olmNamespace, "-o", "wide")
309+
runKubectlDump("describe", "operatorgroups.operators.coreos.com", "-n", olmNamespace)
310+
}
311+
312+
func runKubectlDump(args ...string) {
313+
fmt.Fprintf(os.Stderr, "## kubectl %s\n", strings.Join(args, " "))
314+
cmd := exec.Command("kubectl", args...)
315+
cmd.Stdout = os.Stderr
316+
cmd.Stderr = os.Stderr
317+
if err := cmd.Run(); err != nil {
318+
fmt.Fprintf(os.Stderr, "kubectl failed: %v\n", err)
319+
}
320+
fmt.Fprintln(os.Stderr)
321+
}
322+
323+
func dumpLogsForFailingPods(namespace string) {
324+
cmd := exec.Command("kubectl", "get", "pods", "-n", namespace,
325+
"-o", "jsonpath={range .items[*]}{.metadata.name}{\"\\t\"}{.status.phase}{\"\\n\"}{end}")
326+
output, err := cmd.Output()
327+
if err != nil {
328+
fmt.Fprintf(os.Stderr, "[dump] failed to list pods in %s: %v\n", namespace, err)
329+
return
330+
}
331+
332+
for line := range strings.SplitSeq(strings.TrimSpace(string(output)), "\n") {
333+
if line == "" {
334+
continue
335+
}
336+
parts := strings.SplitN(line, "\t", 2)
337+
if len(parts) != 2 {
338+
continue
339+
}
340+
podName, phase := parts[0], parts[1]
341+
if phase == "Running" || phase == "Succeeded" {
342+
continue
343+
}
344+
fmt.Fprintf(os.Stderr, "[dump] logs for pod %s/%s (phase=%s):\n", namespace, podName, phase)
345+
runKubectlDump("logs", "-n", namespace, podName, "--all-containers", "--tail=100")
346+
runKubectlDump("logs", "-n", namespace, podName, "--all-containers", "--previous", "--tail=50")
347+
}
348+
}
349+
241350
func verifyAnnotation(t *testing.T, resourceType, resourceName, namespace, annotationKey, expectedValue string) {
242351
t.Helper()
243352

tests/e2e/olm_switch_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ func verifyOperatorDeploymentExists(t *testing.T) {
6565

6666
// TestOLMToNonOLMSwitch tests switching from OLM operator to non-OLM operator
6767
func TestOLMToNonOLMSwitch(t *testing.T) {
68+
dumpClusterOnFailure(t)
69+
6870
if os.Getenv("SKIP_OLM_TESTS") != "" {
6971
t.Skip("SKIP_OLM_TESTS is set")
7072
}
@@ -113,6 +115,8 @@ func TestOLMToNonOLMSwitch(t *testing.T) {
113115

114116
// TestNonOLMToOLMSwitch tests switching from non-OLM operator to OLM operator
115117
func TestNonOLMToOLMSwitch(t *testing.T) {
118+
dumpClusterOnFailure(t)
119+
116120
if os.Getenv("SKIP_OLM_TESTS") != "" {
117121
t.Skip("SKIP_OLM_TESTS is set")
118122
}
@@ -161,6 +165,8 @@ func TestNonOLMToOLMSwitch(t *testing.T) {
161165

162166
// TestOLMOperatorVersionUpgrade tests that OLM operator version mismatches trigger teardown and redeploy
163167
func TestOLMOperatorVersionUpgrade(t *testing.T) {
168+
dumpClusterOnFailure(t)
169+
164170
if os.Getenv("SKIP_OLM_TESTS") != "" {
165171
t.Skip("SKIP_OLM_TESTS is set")
166172
}
@@ -223,6 +229,8 @@ func TestOLMOperatorVersionUpgrade(t *testing.T) {
223229

224230
// TestSecuredClusterWithOLMSwitch tests that secured-cluster deployment also respects OLM mode switches
225231
func TestSecuredClusterWithOLMSwitch(t *testing.T) {
232+
dumpClusterOnFailure(t)
233+
226234
if os.Getenv("SKIP_OLM_TESTS") != "" {
227235
t.Skip("SKIP_OLM_TESTS is set")
228236
}

0 commit comments

Comments
 (0)