Skip to content

Commit caa5bd1

Browse files
committed
Migrate OCP-44493: configurable terminationGracePeriod for liveness and startup probes
Migrates test from openshift-tests-private to origin. Test validates probe-level terminationGracePeriodSeconds for: - Liveness probes with probe-level terminationGracePeriodSeconds (10s) - Startup probes with probe-level terminationGracePeriodSeconds (10s) - Liveness probes without probe-level (falls back to pod-level 60s) The test creates pods with failing probes and verifies the time difference between probe failure (Killing event) and container restart (Started event) matches the expected termination grace period within acceptable range. Event matching logic parses 'oc describe pod' output for: - Killing events with container name - Started events after restart Updates: - Add test to test/extended/node/node_e2e/node.go - Document test in test/extended/node/README.md Relates: https://issues.redhat.com/browse/OCPBUGS-44493 Signed-off-by: Bhargavi Gudi <BhargaviGudi@users.noreply.github.com>
1 parent 38c4fba commit caa5bd1

2 files changed

Lines changed: 219 additions & 0 deletions

File tree

test/extended/node/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ This directory contains OpenShift end-to-end tests for node-related features.
1919
- **image_volume.go** - Tests mounting container images as volumes in pods, including subPath and error handling
2020
- **node_swap.go** - Tests default kubelet swap settings (failSwapOn and swapBehavior) and rejection of user overrides
2121
- **zstd_chunked.go** - Tests building and running images with zstd:chunked compression format
22+
- **node_e2e/node.go** - Probe-level terminationGracePeriodSeconds (OCP-44493) - Tests configurable termination grace period for liveness and startup probes [Lifecycle:informing]
2223

2324
## Directory Structure
2425

test/extended/node/node_e2e/node.go

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,17 @@ import (
99

1010
g "github.com/onsi/ginkgo/v2"
1111
o "github.com/onsi/gomega"
12+
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
1213

1314
configv1 "github.com/openshift/api/config/v1"
1415
"github.com/openshift/origin/test/extended/imagepolicy"
16+
corev1 "k8s.io/api/core/v1"
1517
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/apimachinery/pkg/util/intstr"
1619
utilrand "k8s.io/apimachinery/pkg/util/rand"
1720
"k8s.io/apimachinery/pkg/util/wait"
1821
e2e "k8s.io/kubernetes/test/e2e/framework"
22+
"k8s.io/utils/ptr"
1923

2024
nodeutils "github.com/openshift/origin/test/extended/node"
2125
exutil "github.com/openshift/origin/test/extended/util"
@@ -164,6 +168,220 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
164168
e2e.Logf("/dev/fuse mount output: %s", output)
165169
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
166170
})
171+
172+
//author: bgudi@redhat.com
173+
g.It("[OTP] add configurable terminationGracePeriodSeconds to liveness and startup probes [OCP-44493]", ote.Informing(), func() {
174+
ctx := context.Background()
175+
var err error
176+
177+
oc.SetupProject()
178+
namespace := oc.Namespace()
179+
180+
// Helper to verify probe termination period by checking Events API
181+
verifyProbeTermination := func(podName, containerName string, expectedTerminationSec int) error {
182+
return wait.PollUntilContextTimeout(ctx, 10*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
183+
// Get events using the Events API
184+
events, err := oc.KubeClient().CoreV1().Events(namespace).List(ctx, metav1.ListOptions{
185+
FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Pod", podName),
186+
})
187+
if err != nil {
188+
e2e.Logf("Error getting events: %v", err)
189+
return false, nil
190+
}
191+
192+
// Look for probe failure (Killing) and container restart (Started) events
193+
var killingEvent, startedEvent *corev1.Event
194+
for i := range events.Items {
195+
event := &events.Items[i]
196+
if event.Reason == "Killing" && strings.Contains(event.Message, containerName) &&
197+
strings.Contains(event.Message, "failed") && strings.Contains(event.Message, "probe") {
198+
if killingEvent == nil || event.LastTimestamp.Time.After(killingEvent.LastTimestamp.Time) {
199+
killingEvent = event
200+
}
201+
}
202+
if event.Reason == "Started" && strings.Contains(event.Message, "Started container") {
203+
// Find Started event after the Killing event
204+
if killingEvent != nil && event.FirstTimestamp.Time.After(killingEvent.LastTimestamp.Time) {
205+
if startedEvent == nil || event.FirstTimestamp.Time.Before(startedEvent.FirstTimestamp.Time) {
206+
startedEvent = event
207+
}
208+
}
209+
}
210+
}
211+
212+
if killingEvent == nil || startedEvent == nil {
213+
e2e.Logf("Waiting for probe failure (Killing) and container restart (Started) events")
214+
return false, nil
215+
}
216+
217+
e2e.Logf("Killing event: %s at %v", killingEvent.Message, killingEvent.LastTimestamp)
218+
e2e.Logf("Started event: %s at %v", startedEvent.Message, startedEvent.FirstTimestamp)
219+
220+
// Calculate time difference in seconds
221+
timeDiff := int(startedEvent.FirstTimestamp.Sub(killingEvent.LastTimestamp.Time).Seconds())
222+
e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec)
223+
224+
// Use gomega BeNumerically for range checking
225+
o.Expect(timeDiff).To(o.BeNumerically(">=", expectedTerminationSec-3),
226+
"Time difference is less than expected minimum")
227+
o.Expect(timeDiff).To(o.BeNumerically("<=", expectedTerminationSec+10),
228+
"Time difference is greater than expected maximum")
229+
230+
e2e.Logf("Termination grace period check passed")
231+
return true, nil
232+
})
233+
}
234+
235+
g.By("Test liveness probe with probe-level terminationGracePeriodSeconds")
236+
livenessPod := &corev1.Pod{
237+
ObjectMeta: metav1.ObjectMeta{
238+
Name: "liveness-probe",
239+
Namespace: namespace,
240+
},
241+
Spec: corev1.PodSpec{
242+
TerminationGracePeriodSeconds: ptr.To[int64](60),
243+
SecurityContext: &corev1.PodSecurityContext{
244+
RunAsNonRoot: ptr.To(true),
245+
SeccompProfile: &corev1.SeccompProfile{
246+
Type: corev1.SeccompProfileTypeRuntimeDefault,
247+
},
248+
},
249+
Containers: []corev1.Container{
250+
{
251+
Name: "test",
252+
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
253+
SecurityContext: &corev1.SecurityContext{
254+
AllowPrivilegeEscalation: ptr.To(false),
255+
Capabilities: &corev1.Capabilities{
256+
Drop: []corev1.Capability{"ALL"},
257+
},
258+
},
259+
Command: []string{"sh", "-c", "sleep 100000000"},
260+
Ports: []corev1.ContainerPort{
261+
{ContainerPort: 8080},
262+
},
263+
LivenessProbe: &corev1.Probe{
264+
ProbeHandler: corev1.ProbeHandler{
265+
HTTPGet: &corev1.HTTPGetAction{
266+
Path: "/healthz",
267+
Port: intstr.FromInt(8080),
268+
},
269+
},
270+
FailureThreshold: 1,
271+
PeriodSeconds: 60,
272+
TerminationGracePeriodSeconds: ptr.To[int64](10),
273+
},
274+
},
275+
},
276+
},
277+
}
278+
279+
_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPod, metav1.CreateOptions{})
280+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod")
281+
282+
err = verifyProbeTermination("liveness-probe", "test", 10)
283+
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe termination grace period not honored")
284+
285+
g.By("Test startup probe with probe-level terminationGracePeriodSeconds")
286+
startupPod := &corev1.Pod{
287+
ObjectMeta: metav1.ObjectMeta{
288+
Name: "startup-probe",
289+
Namespace: namespace,
290+
},
291+
Spec: corev1.PodSpec{
292+
TerminationGracePeriodSeconds: ptr.To[int64](60),
293+
SecurityContext: &corev1.PodSecurityContext{
294+
RunAsNonRoot: ptr.To(true),
295+
SeccompProfile: &corev1.SeccompProfile{
296+
Type: corev1.SeccompProfileTypeRuntimeDefault,
297+
},
298+
},
299+
Containers: []corev1.Container{
300+
{
301+
Name: "teststartup",
302+
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
303+
SecurityContext: &corev1.SecurityContext{
304+
AllowPrivilegeEscalation: ptr.To(false),
305+
Capabilities: &corev1.Capabilities{
306+
Drop: []corev1.Capability{"ALL"},
307+
},
308+
},
309+
Command: []string{"sh", "-c", "sleep 100000000"},
310+
Ports: []corev1.ContainerPort{
311+
{ContainerPort: 8080},
312+
},
313+
StartupProbe: &corev1.Probe{
314+
ProbeHandler: corev1.ProbeHandler{
315+
HTTPGet: &corev1.HTTPGetAction{
316+
Path: "/healthz",
317+
Port: intstr.FromInt(8080),
318+
},
319+
},
320+
FailureThreshold: 1,
321+
PeriodSeconds: 60,
322+
TerminationGracePeriodSeconds: ptr.To[int64](10),
323+
},
324+
},
325+
},
326+
},
327+
}
328+
329+
_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, startupPod, metav1.CreateOptions{})
330+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod")
331+
332+
err = verifyProbeTermination("startup-probe", "teststartup", 10)
333+
o.Expect(err).NotTo(o.HaveOccurred(), "startup probe termination grace period not honored")
334+
335+
g.By("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)")
336+
livenessPodNoProbeTerm := &corev1.Pod{
337+
ObjectMeta: metav1.ObjectMeta{
338+
Name: "liveness-probe-no-term",
339+
Namespace: namespace,
340+
},
341+
Spec: corev1.PodSpec{
342+
TerminationGracePeriodSeconds: ptr.To[int64](60),
343+
SecurityContext: &corev1.PodSecurityContext{
344+
RunAsNonRoot: ptr.To(true),
345+
SeccompProfile: &corev1.SeccompProfile{
346+
Type: corev1.SeccompProfileTypeRuntimeDefault,
347+
},
348+
},
349+
Containers: []corev1.Container{
350+
{
351+
Name: "test",
352+
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
353+
SecurityContext: &corev1.SecurityContext{
354+
AllowPrivilegeEscalation: ptr.To(false),
355+
Capabilities: &corev1.Capabilities{
356+
Drop: []corev1.Capability{"ALL"},
357+
},
358+
},
359+
Command: []string{"sh", "-c", "sleep 100000000"},
360+
Ports: []corev1.ContainerPort{
361+
{ContainerPort: 8080},
362+
},
363+
LivenessProbe: &corev1.Probe{
364+
ProbeHandler: corev1.ProbeHandler{
365+
HTTPGet: &corev1.HTTPGetAction{
366+
Path: "/healthz",
367+
Port: intstr.FromInt(8080),
368+
},
369+
},
370+
FailureThreshold: 1,
371+
PeriodSeconds: 60,
372+
// No TerminationGracePeriodSeconds - should use pod-level (60s)
373+
},
374+
},
375+
},
376+
},
377+
}
378+
379+
_, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPodNoProbeTerm, metav1.CreateOptions{})
380+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe termination")
381+
382+
err = verifyProbeTermination("liveness-probe-no-term", "test", 60)
383+
o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe should use pod-level termination grace period when probe-level not set")
384+
})
167385
})
168386

169387
// author: asahay@redhat.com

0 commit comments

Comments
 (0)