Skip to content

Commit a708334

Browse files
committed
Migrate OCP-44493: configurable terminationGracePeriod for liveness and startup probes
Migrates test from openshift-tests-private to origin. Test validates probe-level terminationGracePeriodSeconds for: - Liveness probes with probe-level terminationGracePeriodSeconds (10s) - Startup probes with probe-level terminationGracePeriodSeconds (10s) - Liveness probes without probe-level (falls back to pod-level 60s) The test creates pods with failing probes and verifies the time difference between probe failure (Killing event) and container restart (Started event) matches the expected termination grace period within acceptable range. Event matching logic parses 'oc describe pod' output for: - Killing events with container name - Started events after restart Updates: - Add test to test/extended/node/node_e2e/node.go - Document test in test/extended/node/README.md Relates: https://issues.redhat.com/browse/OCPBUGS-44493 Signed-off-by: Bhargavi Gudi <BhargaviGudi@users.noreply.github.com>
1 parent 00c4cba commit a708334

3 files changed

Lines changed: 267 additions & 0 deletions

File tree

test/extended/node/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ This directory contains OpenShift end-to-end tests for node-related features.
1919
- **image_volume.go** - Tests mounting container images as volumes in pods, including subPath and error handling
2020
- **node_swap.go** - Tests default kubelet swap settings (failSwapOn and swapBehavior) and rejection of user overrides
2121
- **zstd_chunked.go** - Tests building and running images with zstd:chunked compression format
22+
- **node_e2e/probe_termination.go** - Probe-level terminationGracePeriodSeconds (OCP-44493) - Tests configurable termination grace period for liveness and startup probes. Includes 3 test cases: probe-level config for liveness probe, probe-level config for startup probe, and fallback to pod-level config when probe-level is not set [Lifecycle:informing]
2223

2324
## Directory Structure
2425

test/extended/node/node_e2e/node.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
164164
e2e.Logf("/dev/fuse mount output: %s", output)
165165
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
166166
})
167+
167168
})
168169

169170
// author: asahay@redhat.com
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
package node
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
"time"
8+
9+
g "github.com/onsi/ginkgo/v2"
10+
o "github.com/onsi/gomega"
11+
ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
12+
13+
corev1 "k8s.io/api/core/v1"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
"k8s.io/apimachinery/pkg/util/intstr"
16+
"k8s.io/apimachinery/pkg/util/wait"
17+
e2e "k8s.io/kubernetes/test/e2e/framework"
18+
"k8s.io/utils/ptr"
19+
20+
exutil "github.com/openshift/origin/test/extended/util"
21+
)
22+
23+
var _ = g.Describe("[sig-node] Probe configuration", func() {
24+
var (
25+
oc = exutil.NewCLIWithoutNamespace("probe-termination")
26+
)
27+
28+
//author: bgudi@redhat.com
29+
g.It("[OTP] Liveness probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() {
30+
ctx := context.Background()
31+
32+
oc.SetupProject()
33+
namespace := oc.Namespace()
34+
35+
g.By("Create pod with liveness probe having probe-level terminationGracePeriodSeconds=10s")
36+
pod := &corev1.Pod{
37+
ObjectMeta: metav1.ObjectMeta{
38+
Name: "liveness-probe-level",
39+
Namespace: namespace,
40+
},
41+
Spec: corev1.PodSpec{
42+
TerminationGracePeriodSeconds: ptr.To[int64](60),
43+
SecurityContext: &corev1.PodSecurityContext{
44+
RunAsNonRoot: ptr.To(true),
45+
SeccompProfile: &corev1.SeccompProfile{
46+
Type: corev1.SeccompProfileTypeRuntimeDefault,
47+
},
48+
},
49+
Containers: []corev1.Container{
50+
{
51+
Name: "test",
52+
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
53+
SecurityContext: &corev1.SecurityContext{
54+
AllowPrivilegeEscalation: ptr.To(false),
55+
Capabilities: &corev1.Capabilities{
56+
Drop: []corev1.Capability{"ALL"},
57+
},
58+
},
59+
Command: []string{"sh", "-c", "sleep 100000000"},
60+
Ports: []corev1.ContainerPort{
61+
{ContainerPort: 8080},
62+
},
63+
LivenessProbe: &corev1.Probe{
64+
ProbeHandler: corev1.ProbeHandler{
65+
HTTPGet: &corev1.HTTPGetAction{
66+
Path: "/healthz",
67+
Port: intstr.FromInt(8080),
68+
},
69+
},
70+
FailureThreshold: 1,
71+
PeriodSeconds: 60,
72+
TerminationGracePeriodSeconds: ptr.To[int64](10),
73+
},
74+
},
75+
},
76+
},
77+
}
78+
79+
_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
80+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod")
81+
82+
g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)")
83+
timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-probe-level", "test", 10)
84+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events")
85+
o.Expect(timeDiff).To(o.BeNumerically(">=", 10-3), "time difference is less than expected minimum")
86+
o.Expect(timeDiff).To(o.BeNumerically("<=", 10+10), "time difference is greater than expected maximum")
87+
})
88+
89+
//author: bgudi@redhat.com
90+
g.It("[OTP] Startup probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() {
91+
ctx := context.Background()
92+
93+
oc.SetupProject()
94+
namespace := oc.Namespace()
95+
96+
g.By("Create pod with startup probe having probe-level terminationGracePeriodSeconds=10s")
97+
pod := &corev1.Pod{
98+
ObjectMeta: metav1.ObjectMeta{
99+
Name: "startup-probe-level",
100+
Namespace: namespace,
101+
},
102+
Spec: corev1.PodSpec{
103+
TerminationGracePeriodSeconds: ptr.To[int64](60),
104+
SecurityContext: &corev1.PodSecurityContext{
105+
RunAsNonRoot: ptr.To(true),
106+
SeccompProfile: &corev1.SeccompProfile{
107+
Type: corev1.SeccompProfileTypeRuntimeDefault,
108+
},
109+
},
110+
Containers: []corev1.Container{
111+
{
112+
Name: "teststartup",
113+
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
114+
SecurityContext: &corev1.SecurityContext{
115+
AllowPrivilegeEscalation: ptr.To(false),
116+
Capabilities: &corev1.Capabilities{
117+
Drop: []corev1.Capability{"ALL"},
118+
},
119+
},
120+
Command: []string{"sh", "-c", "sleep 100000000"},
121+
Ports: []corev1.ContainerPort{
122+
{ContainerPort: 8080},
123+
},
124+
StartupProbe: &corev1.Probe{
125+
ProbeHandler: corev1.ProbeHandler{
126+
HTTPGet: &corev1.HTTPGetAction{
127+
Path: "/healthz",
128+
Port: intstr.FromInt(8080),
129+
},
130+
},
131+
FailureThreshold: 1,
132+
PeriodSeconds: 60,
133+
TerminationGracePeriodSeconds: ptr.To[int64](10),
134+
},
135+
},
136+
},
137+
},
138+
}
139+
140+
_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
141+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod")
142+
143+
g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)")
144+
timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "startup-probe-level", "teststartup", 10)
145+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events")
146+
o.Expect(timeDiff).To(o.BeNumerically(">=", 10-3), "time difference is less than expected minimum")
147+
o.Expect(timeDiff).To(o.BeNumerically("<=", 10+10), "time difference is greater than expected maximum")
148+
})
149+
150+
//author: bgudi@redhat.com
151+
g.It("[OTP] Liveness probe should fall back to pod-level terminationGracePeriodSeconds when probe-level is not set [OCP-44493]", ote.Informing(), func() {
152+
ctx := context.Background()
153+
154+
oc.SetupProject()
155+
namespace := oc.Namespace()
156+
157+
g.By("Create pod with liveness probe without probe-level terminationGracePeriodSeconds")
158+
pod := &corev1.Pod{
159+
ObjectMeta: metav1.ObjectMeta{
160+
Name: "liveness-pod-level",
161+
Namespace: namespace,
162+
},
163+
Spec: corev1.PodSpec{
164+
TerminationGracePeriodSeconds: ptr.To[int64](60),
165+
SecurityContext: &corev1.PodSecurityContext{
166+
RunAsNonRoot: ptr.To(true),
167+
SeccompProfile: &corev1.SeccompProfile{
168+
Type: corev1.SeccompProfileTypeRuntimeDefault,
169+
},
170+
},
171+
Containers: []corev1.Container{
172+
{
173+
Name: "test",
174+
Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0",
175+
SecurityContext: &corev1.SecurityContext{
176+
AllowPrivilegeEscalation: ptr.To(false),
177+
Capabilities: &corev1.Capabilities{
178+
Drop: []corev1.Capability{"ALL"},
179+
},
180+
},
181+
Command: []string{"sh", "-c", "sleep 100000000"},
182+
Ports: []corev1.ContainerPort{
183+
{ContainerPort: 8080},
184+
},
185+
LivenessProbe: &corev1.Probe{
186+
ProbeHandler: corev1.ProbeHandler{
187+
HTTPGet: &corev1.HTTPGetAction{
188+
Path: "/healthz",
189+
Port: intstr.FromInt(8080),
190+
},
191+
},
192+
FailureThreshold: 1,
193+
PeriodSeconds: 60,
194+
// No TerminationGracePeriodSeconds - should use pod-level (60s)
195+
},
196+
},
197+
},
198+
},
199+
}
200+
201+
_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
202+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe-level termination")
203+
204+
g.By("Verify pod-level terminationGracePeriodSeconds is used (60s)")
205+
timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-pod-level", "test", 60)
206+
o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events")
207+
o.Expect(timeDiff).To(o.BeNumerically(">=", 60-3), "time difference is less than expected minimum")
208+
o.Expect(timeDiff).To(o.BeNumerically("<=", 60+10), "time difference is greater than expected maximum")
209+
})
210+
})
211+
212+
// verifyProbeTermination verifies that the probe termination grace period is honored
213+
// by checking the time difference between probe failure (Killing) and container restart (Started) events
214+
// Returns the time difference in seconds, or an error if events are not found
215+
func verifyProbeTermination(ctx context.Context, oc *exutil.CLI, namespace, podName, containerName string, expectedTerminationSec int) (int, error) {
216+
var timeDiff int
217+
err := wait.PollUntilContextTimeout(ctx, 10*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
218+
// Get events using the Events API
219+
events, err := oc.KubeClient().CoreV1().Events(namespace).List(ctx, metav1.ListOptions{
220+
FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Pod", podName),
221+
})
222+
if err != nil {
223+
e2e.Logf("Error getting events: %v", err)
224+
return false, nil
225+
}
226+
227+
// Look for probe failure (Killing) and container restart (Started) events
228+
var killingEvent, startedEvent *corev1.Event
229+
for i := range events.Items {
230+
event := &events.Items[i]
231+
if event.Reason == "Killing" && strings.Contains(event.Message, containerName) &&
232+
strings.Contains(event.Message, "failed") && strings.Contains(event.Message, "probe") {
233+
if killingEvent == nil || event.LastTimestamp.Time.After(killingEvent.LastTimestamp.Time) {
234+
killingEvent = event
235+
}
236+
}
237+
if event.Reason == "Started" && strings.Contains(event.Message, "Started container") {
238+
// Find Started event after the Killing event
239+
if killingEvent != nil && event.FirstTimestamp.Time.After(killingEvent.LastTimestamp.Time) {
240+
if startedEvent == nil || event.FirstTimestamp.Time.Before(startedEvent.FirstTimestamp.Time) {
241+
startedEvent = event
242+
}
243+
}
244+
}
245+
}
246+
247+
if killingEvent == nil || startedEvent == nil {
248+
e2e.Logf("Waiting for probe failure (Killing) and container restart (Started) events")
249+
return false, nil
250+
}
251+
252+
e2e.Logf("Killing event: %s at %v", killingEvent.Message, killingEvent.LastTimestamp)
253+
e2e.Logf("Started event: %s at %v", startedEvent.Message, startedEvent.FirstTimestamp)
254+
255+
// Calculate time difference in seconds
256+
timeDiff = int(startedEvent.FirstTimestamp.Sub(killingEvent.LastTimestamp.Time).Seconds())
257+
e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec)
258+
259+
return true, nil
260+
})
261+
if err != nil {
262+
return 0, err
263+
}
264+
return timeDiff, nil
265+
}

0 commit comments

Comments
 (0)