@@ -4,15 +4,19 @@ import (
44 "context"
55 "fmt"
66 "path/filepath"
7+ "strconv"
78 "strings"
89 "time"
910
1011 g "github.com/onsi/ginkgo/v2"
1112 o "github.com/onsi/gomega"
13+ ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
1214
1315 configv1 "github.com/openshift/api/config/v1"
1416 "github.com/openshift/origin/test/extended/imagepolicy"
17+ corev1 "k8s.io/api/core/v1"
1518 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
19+ "k8s.io/apimachinery/pkg/util/intstr"
1620 utilrand "k8s.io/apimachinery/pkg/util/rand"
1721 "k8s.io/apimachinery/pkg/util/wait"
1822 e2e "k8s.io/kubernetes/test/e2e/framework"
@@ -164,6 +168,284 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
164168 e2e .Logf ("/dev/fuse mount output: %s" , output )
165169 o .Expect (output ).To (o .ContainSubstring ("fuse" ), "dev fuse is not mounted inside pod" )
166170 })
171+
172+ //author: minmli@redhat.com
173+ //migrated from openshift-tests-private
174+ //automates: https://issues.redhat.com/browse/OCPBUGS-44493
175+ g .It ("[OTP] add configurable terminationGracePeriodSeconds to liveness and startup probes [OCP-44493]" , ote .Informing (), func () {
176+ ctx := context .Background ()
177+ var err error
178+
179+ oc .SetupProject ()
180+ namespace := oc .Namespace ()
181+
182+ // Helper function to parse duration string like "1m30s" or "45s" to seconds
183+ parseDurationToSeconds := func (durationStr string ) (int , error ) {
184+ var totalSeconds int
185+ if strings .Contains (durationStr , "m" ) {
186+ parts := strings .Split (durationStr , "m" )
187+ minutes , err := strconv .Atoi (parts [0 ])
188+ if err != nil {
189+ return 0 , err
190+ }
191+ totalSeconds = minutes * 60
192+ if len (parts ) > 1 && strings .Contains (parts [1 ], "s" ) {
193+ secStr := strings .TrimSuffix (parts [1 ], "s" )
194+ if secStr != "" {
195+ seconds , err := strconv .Atoi (secStr )
196+ if err != nil {
197+ return 0 , err
198+ }
199+ totalSeconds += seconds
200+ }
201+ }
202+ } else if strings .Contains (durationStr , "s" ) {
203+ secStr := strings .TrimSuffix (durationStr , "s" )
204+ seconds , err := strconv .Atoi (secStr )
205+ if err != nil {
206+ return 0 , err
207+ }
208+ totalSeconds = seconds
209+ }
210+ return totalSeconds , nil
211+ }
212+
213+ // Helper to verify probe termination period
214+ verifyProbeTermination := func (podName , containerName string , expectedTerminationSec int ) error {
215+ return wait .PollUntilContextTimeout (ctx , 10 * time .Second , 5 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
216+ podDesc , err := oc .AsAdmin ().WithoutNamespace ().Run ("describe" ).Args ("pod" , podName , "-n" , namespace ).Output ()
217+ if err != nil {
218+ e2e .Logf ("Error describing pod: %v" , err )
219+ return false , nil
220+ }
221+
222+ // Look for probe failure (killing) and container restart events
223+ // Event format: "Normal Killing <time> kubelet Container <name> failed <probe> probe, will be restarted"
224+ // Event format: "Normal Started <time> kubelet Container started"
225+ killingLine := ""
226+ restartLine := ""
227+
228+ inEvents := false
229+ for _ , line := range strings .Split (podDesc , "\n " ) {
230+ if strings .Contains (line , "Events:" ) {
231+ inEvents = true
232+ continue
233+ }
234+ if ! inEvents {
235+ continue
236+ }
237+
238+ // Look for killing event with container name
239+ if strings .Contains (line , "Killing" ) && strings .Contains (line , containerName ) &&
240+ strings .Contains (line , "failed" ) && strings .Contains (line , "probe" ) &&
241+ strings .Contains (line , "will be restarted" ) {
242+ killingLine = line
243+ }
244+ // Look for Started event after Killing
245+ if killingLine != "" && strings .Contains (line , "Started" ) && strings .Contains (line , "Container started" ) {
246+ restartLine = line
247+ break
248+ }
249+ }
250+
251+ if killingLine == "" || restartLine == "" {
252+ e2e .Logf ("Waiting for probe failure (killing) and container restart events" )
253+ return false , nil
254+ }
255+
256+ e2e .Logf ("Killing event: %s" , killingLine )
257+ e2e .Logf ("Restart event: %s" , restartLine )
258+
259+ // Extract timestamps (format: "1m30s" or "45s")
260+ // Event format: "Normal Killing 2m30s kubelet Container..."
261+ killingFields := strings .Fields (killingLine )
262+ restartFields := strings .Fields (restartLine )
263+ if len (killingFields ) < 3 || len (restartFields ) < 3 {
264+ e2e .Logf ("Unable to parse event timestamps" )
265+ return false , nil
266+ }
267+
268+ killingTime := killingFields [2 ]
269+ restartTime := restartFields [2 ]
270+
271+ killingSec , err := parseDurationToSeconds (killingTime )
272+ if err != nil {
273+ e2e .Logf ("Error parsing killing time: %v" , err )
274+ return false , nil
275+ }
276+
277+ restartSec , err := parseDurationToSeconds (restartTime )
278+ if err != nil {
279+ e2e .Logf ("Error parsing restart time: %v" , err )
280+ return false , nil
281+ }
282+
283+ // Time difference: killing happened earlier, restart happened later
284+ // So we need to calculate how long between them
285+ timeDiff := killingSec - restartSec
286+ e2e .Logf ("Time difference: %d seconds (expected: %d ±10 seconds)" , timeDiff , expectedTerminationSec )
287+
288+ // Allow range: [expectedTerminationSec-3, expectedTerminationSec+10]
289+ if timeDiff >= (expectedTerminationSec - 3 ) && timeDiff <= (expectedTerminationSec + 10 ) {
290+ e2e .Logf ("Termination grace period check passed" )
291+ return true , nil
292+ }
293+
294+ e2e .Logf ("Time difference %d is outside expected range [%d, %d]" , timeDiff , expectedTerminationSec - 3 , expectedTerminationSec + 10 )
295+ return false , nil
296+ })
297+ }
298+
299+ g .By ("Test liveness probe with probe-level terminationGracePeriodSeconds" )
300+ livenessPod := & corev1.Pod {
301+ ObjectMeta : metav1.ObjectMeta {
302+ Name : "liveness-probe" ,
303+ Namespace : namespace ,
304+ },
305+ Spec : corev1.PodSpec {
306+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
307+ SecurityContext : & corev1.PodSecurityContext {
308+ RunAsNonRoot : & []bool {true }[0 ],
309+ SeccompProfile : & corev1.SeccompProfile {
310+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
311+ },
312+ },
313+ Containers : []corev1.Container {
314+ {
315+ Name : "test" ,
316+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
317+ SecurityContext : & corev1.SecurityContext {
318+ AllowPrivilegeEscalation : & []bool {false }[0 ],
319+ Capabilities : & corev1.Capabilities {
320+ Drop : []corev1.Capability {"ALL" },
321+ },
322+ },
323+ Command : []string {"sh" , "-c" , "sleep 100000000" },
324+ Ports : []corev1.ContainerPort {
325+ {ContainerPort : 8080 },
326+ },
327+ LivenessProbe : & corev1.Probe {
328+ ProbeHandler : corev1.ProbeHandler {
329+ HTTPGet : & corev1.HTTPGetAction {
330+ Path : "/healthz" ,
331+ Port : intstr .FromInt (8080 ),
332+ },
333+ },
334+ FailureThreshold : 1 ,
335+ PeriodSeconds : 60 ,
336+ TerminationGracePeriodSeconds : & []int64 {10 }[0 ],
337+ },
338+ },
339+ },
340+ },
341+ }
342+
343+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , livenessPod , metav1.CreateOptions {})
344+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod" )
345+
346+ err = verifyProbeTermination ("liveness-probe" , "test" , 10 )
347+ o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe termination grace period not honored" )
348+
349+ g .By ("Test startup probe with probe-level terminationGracePeriodSeconds" )
350+ startupPod := & corev1.Pod {
351+ ObjectMeta : metav1.ObjectMeta {
352+ Name : "startup-probe" ,
353+ Namespace : namespace ,
354+ },
355+ Spec : corev1.PodSpec {
356+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
357+ SecurityContext : & corev1.PodSecurityContext {
358+ RunAsNonRoot : & []bool {true }[0 ],
359+ SeccompProfile : & corev1.SeccompProfile {
360+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
361+ },
362+ },
363+ Containers : []corev1.Container {
364+ {
365+ Name : "teststartup" ,
366+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
367+ SecurityContext : & corev1.SecurityContext {
368+ AllowPrivilegeEscalation : & []bool {false }[0 ],
369+ Capabilities : & corev1.Capabilities {
370+ Drop : []corev1.Capability {"ALL" },
371+ },
372+ },
373+ Command : []string {"sh" , "-c" , "sleep 100000000" },
374+ Ports : []corev1.ContainerPort {
375+ {ContainerPort : 8080 },
376+ },
377+ StartupProbe : & corev1.Probe {
378+ ProbeHandler : corev1.ProbeHandler {
379+ HTTPGet : & corev1.HTTPGetAction {
380+ Path : "/healthz" ,
381+ Port : intstr .FromInt (8080 ),
382+ },
383+ },
384+ FailureThreshold : 1 ,
385+ PeriodSeconds : 60 ,
386+ TerminationGracePeriodSeconds : & []int64 {10 }[0 ],
387+ },
388+ },
389+ },
390+ },
391+ }
392+
393+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , startupPod , metav1.CreateOptions {})
394+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create startup probe pod" )
395+
396+ err = verifyProbeTermination ("startup-probe" , "teststartup" , 10 )
397+ o .Expect (err ).NotTo (o .HaveOccurred (), "startup probe termination grace period not honored" )
398+
399+ g .By ("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)" )
400+ livenessPodNoProbeTerm := & corev1.Pod {
401+ ObjectMeta : metav1.ObjectMeta {
402+ Name : "liveness-probe-no-term" ,
403+ Namespace : namespace ,
404+ },
405+ Spec : corev1.PodSpec {
406+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
407+ SecurityContext : & corev1.PodSecurityContext {
408+ RunAsNonRoot : & []bool {true }[0 ],
409+ SeccompProfile : & corev1.SeccompProfile {
410+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
411+ },
412+ },
413+ Containers : []corev1.Container {
414+ {
415+ Name : "test" ,
416+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
417+ SecurityContext : & corev1.SecurityContext {
418+ AllowPrivilegeEscalation : & []bool {false }[0 ],
419+ Capabilities : & corev1.Capabilities {
420+ Drop : []corev1.Capability {"ALL" },
421+ },
422+ },
423+ Command : []string {"sh" , "-c" , "sleep 100000000" },
424+ Ports : []corev1.ContainerPort {
425+ {ContainerPort : 8080 },
426+ },
427+ LivenessProbe : & corev1.Probe {
428+ ProbeHandler : corev1.ProbeHandler {
429+ HTTPGet : & corev1.HTTPGetAction {
430+ Path : "/healthz" ,
431+ Port : intstr .FromInt (8080 ),
432+ },
433+ },
434+ FailureThreshold : 1 ,
435+ PeriodSeconds : 60 ,
436+ // No TerminationGracePeriodSeconds - should use pod-level (60s)
437+ },
438+ },
439+ },
440+ },
441+ }
442+
443+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , livenessPodNoProbeTerm , metav1.CreateOptions {})
444+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod without probe termination" )
445+
446+ err = verifyProbeTermination ("liveness-probe-no-term" , "test" , 60 )
447+ o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe should use pod-level termination grace period when probe-level not set" )
448+ })
167449})
168450
169451// author: asahay@redhat.com
0 commit comments