Skip to content

Commit b453e4b

Browse files
committed
Fix OCP-44493 test event matching and add err variable
- Add missing err variable declaration after removing featureSet skip - Fix event matching to look for Killing and Started events in proper format - Update to match actual oc describe pod event output format - Increase timeout to 5 minutes for probe failure detection
1 parent 286851d commit b453e4b

1 file changed

Lines changed: 47 additions & 34 deletions

File tree

test/extended/node/node_e2e/node.go

Lines changed: 47 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -174,14 +174,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
174174
//automates: https://issues.redhat.com/browse/OCPBUGS-44493
175175
g.It("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
176176
ctx := context.Background()
177-
178-
g.By("Check if featureSet is empty in cluster")
179-
featureSet, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("featuregate", "cluster", "-o=jsonpath={.spec.featureSet}").Output()
180-
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get featuregate")
181-
e2e.Logf("featureSet is: %s", featureSet)
182-
if featureSet != "" {
183-
g.Skip("featureSet is not empty, skipping test")
184-
}
177+
var err error
185178

186179
oc.SetupProject()
187180
namespace := oc.Namespace()
@@ -218,58 +211,78 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
218211
}
219212

220213
// Helper to verify probe termination period
221-
verifyProbeTermination := func(podName string, expectedTerminationSec int) error {
222-
return wait.PollUntilContextTimeout(ctx, 10*time.Second, 4*time.Minute, true, func(ctx context.Context) (bool, error) {
214+
verifyProbeTermination := func(podName, containerName string, expectedTerminationSec int) error {
215+
return wait.PollUntilContextTimeout(ctx, 10*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
223216
podDesc, err := oc.AsAdmin().WithoutNamespace().Run("describe").Args("pod", podName, "-n", namespace).Output()
224217
if err != nil {
225218
e2e.Logf("Error describing pod: %v", err)
226219
return false, nil
227220
}
228221

229-
// Look for probe failure and container start events
230-
probeFailLine := ""
231-
containerStartLine := ""
222+
// Look for probe failure (killing) and container restart events
223+
// Event format: "Normal Killing <time> kubelet Container <name> failed <probe> probe, will be restarted"
224+
// Event format: "Normal Started <time> kubelet Container started"
225+
killingLine := ""
226+
restartLine := ""
227+
228+
inEvents := false
232229
for _, line := range strings.Split(podDesc, "\n") {
233-
if strings.Contains(line, "Container") && strings.Contains(line, "failed") && strings.Contains(line, "probe") && strings.Contains(line, "will be restarted") {
234-
probeFailLine = line
230+
if strings.Contains(line, "Events:") {
231+
inEvents = true
232+
continue
233+
}
234+
if !inEvents {
235+
continue
236+
}
237+
238+
// Look for killing event with container name
239+
if strings.Contains(line, "Killing") && strings.Contains(line, containerName) &&
240+
strings.Contains(line, "failed") && strings.Contains(line, "probe") &&
241+
strings.Contains(line, "will be restarted") {
242+
killingLine = line
235243
}
236-
if strings.Contains(line, "Started container") {
237-
containerStartLine = line
244+
// Look for Started event after Killing
245+
if killingLine != "" && strings.Contains(line, "Started") && strings.Contains(line, "Container started") {
246+
restartLine = line
247+
break
238248
}
239249
}
240250

241-
if probeFailLine == "" || containerStartLine == "" {
242-
e2e.Logf("Waiting for probe failure and container start events")
251+
if killingLine == "" || restartLine == "" {
252+
e2e.Logf("Waiting for probe failure (killing) and container restart events")
243253
return false, nil
244254
}
245255

246-
e2e.Logf("Probe failure event: %s", probeFailLine)
247-
e2e.Logf("Container start event: %s", containerStartLine)
256+
e2e.Logf("Killing event: %s", killingLine)
257+
e2e.Logf("Restart event: %s", restartLine)
248258

249259
// Extract timestamps (format: "1m30s" or "45s")
250-
probeFailFields := strings.Fields(probeFailLine)
251-
containerStartFields := strings.Fields(containerStartLine)
252-
if len(probeFailFields) < 3 || len(containerStartFields) < 3 {
260+
// Event format: "Normal Killing 2m30s kubelet Container..."
261+
killingFields := strings.Fields(killingLine)
262+
restartFields := strings.Fields(restartLine)
263+
if len(killingFields) < 3 || len(restartFields) < 3 {
253264
e2e.Logf("Unable to parse event timestamps")
254265
return false, nil
255266
}
256267

257-
probeFailTime := probeFailFields[2]
258-
containerStartTime := containerStartFields[2]
268+
killingTime := killingFields[2]
269+
restartTime := restartFields[2]
259270

260-
probeFailSec, err := parseDurationToSeconds(probeFailTime)
271+
killingSec, err := parseDurationToSeconds(killingTime)
261272
if err != nil {
262-
e2e.Logf("Error parsing probe fail time: %v", err)
273+
e2e.Logf("Error parsing killing time: %v", err)
263274
return false, nil
264275
}
265276

266-
containerStartSec, err := parseDurationToSeconds(containerStartTime)
277+
restartSec, err := parseDurationToSeconds(restartTime)
267278
if err != nil {
268-
e2e.Logf("Error parsing container start time: %v", err)
279+
e2e.Logf("Error parsing restart time: %v", err)
269280
return false, nil
270281
}
271282

272-
timeDiff := probeFailSec - containerStartSec
283+
// Time difference: killing happened earlier, restart happened later
284+
// So we need to calculate how long between them
285+
timeDiff := killingSec - restartSec
273286
e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec)
274287

275288
// Allow range: [expectedTerminationSec-3, expectedTerminationSec+10]
@@ -331,7 +344,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
331344
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod")
332345
g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "liveness-probe", metav1.DeleteOptions{})
333346

334-
err = verifyProbeTermination("liveness-probe", 10)
347+
err = verifyProbeTermination("liveness-probe", "test", 10)
335348
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe termination grace period not honored")
336349

337350
g.By("Test startup probe with probe-level terminationGracePeriodSeconds")
@@ -382,7 +395,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
382395
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod")
383396
g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "startup-probe", metav1.DeleteOptions{})
384397

385-
err = verifyProbeTermination("startup-probe", 10)
398+
err = verifyProbeTermination("startup-probe", "teststartup", 10)
386399
o.Expect(err).NotTo(o.HaveOccurred(), "startup probe termination grace period not honored")
387400

388401
g.By("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)")
@@ -433,7 +446,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
433446
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe termination")
434447
g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "liveness-probe-no-term", metav1.DeleteOptions{})
435448

436-
err = verifyProbeTermination("liveness-probe-no-term", 60)
449+
err = verifyProbeTermination("liveness-probe-no-term", "test", 60)
437450
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe should use pod-level termination grace period when probe-level not set")
438451
})
439452
})

0 commit comments

Comments
 (0)