@@ -4,15 +4,19 @@ import (
44 "context"
55 "fmt"
66 "path/filepath"
7+ "strconv"
78 "strings"
89 "time"
910
1011 g "github.com/onsi/ginkgo/v2"
1112 o "github.com/onsi/gomega"
13+ ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
1214
1315 configv1 "github.com/openshift/api/config/v1"
1416 "github.com/openshift/origin/test/extended/imagepolicy"
17+ corev1 "k8s.io/api/core/v1"
1518 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
19+ "k8s.io/apimachinery/pkg/util/intstr"
1620 utilrand "k8s.io/apimachinery/pkg/util/rand"
1721 "k8s.io/apimachinery/pkg/util/wait"
1822 e2e "k8s.io/kubernetes/test/e2e/framework"
@@ -164,6 +168,287 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
164168 e2e .Logf ("/dev/fuse mount output: %s" , output )
165169 o .Expect (output ).To (o .ContainSubstring ("fuse" ), "dev fuse is not mounted inside pod" )
166170 })
171+
172+ //author: minmli@redhat.com
173+ //migrated from openshift-tests-private
174+ //automates: https://issues.redhat.com/browse/OCPBUGS-44493
175+ g .It ("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]" , ote .Informing (), func () {
176+ ctx := context .Background ()
177+ var err error
178+
179+ oc .SetupProject ()
180+ namespace := oc .Namespace ()
181+
182+ // Helper function to parse duration string like "1m30s" or "45s" to seconds
183+ parseDurationToSeconds := func (durationStr string ) (int , error ) {
184+ var totalSeconds int
185+ if strings .Contains (durationStr , "m" ) {
186+ parts := strings .Split (durationStr , "m" )
187+ minutes , err := strconv .Atoi (parts [0 ])
188+ if err != nil {
189+ return 0 , err
190+ }
191+ totalSeconds = minutes * 60
192+ if len (parts ) > 1 && strings .Contains (parts [1 ], "s" ) {
193+ secStr := strings .TrimSuffix (parts [1 ], "s" )
194+ if secStr != "" {
195+ seconds , err := strconv .Atoi (secStr )
196+ if err != nil {
197+ return 0 , err
198+ }
199+ totalSeconds += seconds
200+ }
201+ }
202+ } else if strings .Contains (durationStr , "s" ) {
203+ secStr := strings .TrimSuffix (durationStr , "s" )
204+ seconds , err := strconv .Atoi (secStr )
205+ if err != nil {
206+ return 0 , err
207+ }
208+ totalSeconds = seconds
209+ }
210+ return totalSeconds , nil
211+ }
212+
213+ // Helper to verify probe termination period
214+ verifyProbeTermination := func (podName , containerName string , expectedTerminationSec int ) error {
215+ return wait .PollUntilContextTimeout (ctx , 10 * time .Second , 5 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
216+ podDesc , err := oc .AsAdmin ().WithoutNamespace ().Run ("describe" ).Args ("pod" , podName , "-n" , namespace ).Output ()
217+ if err != nil {
218+ e2e .Logf ("Error describing pod: %v" , err )
219+ return false , nil
220+ }
221+
222+ // Look for probe failure (killing) and container restart events
223+ // Event format: "Normal Killing <time> kubelet Container <name> failed <probe> probe, will be restarted"
224+ // Event format: "Normal Started <time> kubelet Container started"
225+ killingLine := ""
226+ restartLine := ""
227+
228+ inEvents := false
229+ for _ , line := range strings .Split (podDesc , "\n " ) {
230+ if strings .Contains (line , "Events:" ) {
231+ inEvents = true
232+ continue
233+ }
234+ if ! inEvents {
235+ continue
236+ }
237+
238+ // Look for killing event with container name
239+ if strings .Contains (line , "Killing" ) && strings .Contains (line , containerName ) &&
240+ strings .Contains (line , "failed" ) && strings .Contains (line , "probe" ) &&
241+ strings .Contains (line , "will be restarted" ) {
242+ killingLine = line
243+ }
244+ // Look for Started event after Killing
245+ if killingLine != "" && strings .Contains (line , "Started" ) && strings .Contains (line , "Container started" ) {
246+ restartLine = line
247+ break
248+ }
249+ }
250+
251+ if killingLine == "" || restartLine == "" {
252+ e2e .Logf ("Waiting for probe failure (killing) and container restart events" )
253+ return false , nil
254+ }
255+
256+ e2e .Logf ("Killing event: %s" , killingLine )
257+ e2e .Logf ("Restart event: %s" , restartLine )
258+
259+ // Extract timestamps (format: "1m30s" or "45s")
260+ // Event format: "Normal Killing 2m30s kubelet Container..."
261+ killingFields := strings .Fields (killingLine )
262+ restartFields := strings .Fields (restartLine )
263+ if len (killingFields ) < 3 || len (restartFields ) < 3 {
264+ e2e .Logf ("Unable to parse event timestamps" )
265+ return false , nil
266+ }
267+
268+ killingTime := killingFields [2 ]
269+ restartTime := restartFields [2 ]
270+
271+ killingSec , err := parseDurationToSeconds (killingTime )
272+ if err != nil {
273+ e2e .Logf ("Error parsing killing time: %v" , err )
274+ return false , nil
275+ }
276+
277+ restartSec , err := parseDurationToSeconds (restartTime )
278+ if err != nil {
279+ e2e .Logf ("Error parsing restart time: %v" , err )
280+ return false , nil
281+ }
282+
283+ // Time difference: killing happened earlier, restart happened later
284+ // So we need to calculate how long between them
285+ timeDiff := killingSec - restartSec
286+ e2e .Logf ("Time difference: %d seconds (expected: %d ±10 seconds)" , timeDiff , expectedTerminationSec )
287+
288+ // Allow range: [expectedTerminationSec-3, expectedTerminationSec+10]
289+ if timeDiff >= (expectedTerminationSec - 3 ) && timeDiff <= (expectedTerminationSec + 10 ) {
290+ e2e .Logf ("Termination grace period check passed" )
291+ return true , nil
292+ }
293+
294+ e2e .Logf ("Time difference %d is outside expected range [%d, %d]" , timeDiff , expectedTerminationSec - 3 , expectedTerminationSec + 10 )
295+ return false , nil
296+ })
297+ }
298+
299+ g .By ("Test liveness probe with probe-level terminationGracePeriodSeconds" )
300+ livenessPod := & corev1.Pod {
301+ ObjectMeta : metav1.ObjectMeta {
302+ Name : "liveness-probe" ,
303+ Namespace : namespace ,
304+ },
305+ Spec : corev1.PodSpec {
306+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
307+ SecurityContext : & corev1.PodSecurityContext {
308+ RunAsNonRoot : & []bool {true }[0 ],
309+ SeccompProfile : & corev1.SeccompProfile {
310+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
311+ },
312+ },
313+ Containers : []corev1.Container {
314+ {
315+ Name : "test" ,
316+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
317+ SecurityContext : & corev1.SecurityContext {
318+ AllowPrivilegeEscalation : & []bool {false }[0 ],
319+ Capabilities : & corev1.Capabilities {
320+ Drop : []corev1.Capability {"ALL" },
321+ },
322+ },
323+ Command : []string {"bash" , "-c" , "sleep 100000000" },
324+ Ports : []corev1.ContainerPort {
325+ {ContainerPort : 8080 },
326+ },
327+ LivenessProbe : & corev1.Probe {
328+ ProbeHandler : corev1.ProbeHandler {
329+ HTTPGet : & corev1.HTTPGetAction {
330+ Path : "/healthz" ,
331+ Port : intstr .FromInt (8080 ),
332+ },
333+ },
334+ FailureThreshold : 1 ,
335+ PeriodSeconds : 60 ,
336+ TerminationGracePeriodSeconds : & []int64 {10 }[0 ],
337+ },
338+ },
339+ },
340+ },
341+ }
342+
343+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , livenessPod , metav1.CreateOptions {})
344+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod" )
345+ g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "liveness-probe" , metav1.DeleteOptions {})
346+
347+ err = verifyProbeTermination ("liveness-probe" , "test" , 10 )
348+ o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe termination grace period not honored" )
349+
350+ g .By ("Test startup probe with probe-level terminationGracePeriodSeconds" )
351+ startupPod := & corev1.Pod {
352+ ObjectMeta : metav1.ObjectMeta {
353+ Name : "startup-probe" ,
354+ Namespace : namespace ,
355+ },
356+ Spec : corev1.PodSpec {
357+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
358+ SecurityContext : & corev1.PodSecurityContext {
359+ RunAsNonRoot : & []bool {true }[0 ],
360+ SeccompProfile : & corev1.SeccompProfile {
361+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
362+ },
363+ },
364+ Containers : []corev1.Container {
365+ {
366+ Name : "teststartup" ,
367+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
368+ SecurityContext : & corev1.SecurityContext {
369+ AllowPrivilegeEscalation : & []bool {false }[0 ],
370+ Capabilities : & corev1.Capabilities {
371+ Drop : []corev1.Capability {"ALL" },
372+ },
373+ },
374+ Command : []string {"bash" , "-c" , "sleep 100000000" },
375+ Ports : []corev1.ContainerPort {
376+ {ContainerPort : 8080 },
377+ },
378+ StartupProbe : & corev1.Probe {
379+ ProbeHandler : corev1.ProbeHandler {
380+ HTTPGet : & corev1.HTTPGetAction {
381+ Path : "/healthz" ,
382+ Port : intstr .FromInt (8080 ),
383+ },
384+ },
385+ FailureThreshold : 1 ,
386+ PeriodSeconds : 60 ,
387+ TerminationGracePeriodSeconds : & []int64 {10 }[0 ],
388+ },
389+ },
390+ },
391+ },
392+ }
393+
394+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , startupPod , metav1.CreateOptions {})
395+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create startup probe pod" )
396+ g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "startup-probe" , metav1.DeleteOptions {})
397+
398+ err = verifyProbeTermination ("startup-probe" , "teststartup" , 10 )
399+ o .Expect (err ).NotTo (o .HaveOccurred (), "startup probe termination grace period not honored" )
400+
401+ g .By ("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)" )
402+ livenessPodNoProbeTerm := & corev1.Pod {
403+ ObjectMeta : metav1.ObjectMeta {
404+ Name : "liveness-probe-no-term" ,
405+ Namespace : namespace ,
406+ },
407+ Spec : corev1.PodSpec {
408+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
409+ SecurityContext : & corev1.PodSecurityContext {
410+ RunAsNonRoot : & []bool {true }[0 ],
411+ SeccompProfile : & corev1.SeccompProfile {
412+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
413+ },
414+ },
415+ Containers : []corev1.Container {
416+ {
417+ Name : "test" ,
418+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
419+ SecurityContext : & corev1.SecurityContext {
420+ AllowPrivilegeEscalation : & []bool {false }[0 ],
421+ Capabilities : & corev1.Capabilities {
422+ Drop : []corev1.Capability {"ALL" },
423+ },
424+ },
425+ Command : []string {"bash" , "-c" , "sleep 100000000" },
426+ Ports : []corev1.ContainerPort {
427+ {ContainerPort : 8080 },
428+ },
429+ LivenessProbe : & corev1.Probe {
430+ ProbeHandler : corev1.ProbeHandler {
431+ HTTPGet : & corev1.HTTPGetAction {
432+ Path : "/healthz" ,
433+ Port : intstr .FromInt (8080 ),
434+ },
435+ },
436+ FailureThreshold : 1 ,
437+ PeriodSeconds : 60 ,
438+ // No TerminationGracePeriodSeconds - should use pod-level (60s)
439+ },
440+ },
441+ },
442+ },
443+ }
444+
445+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , livenessPodNoProbeTerm , metav1.CreateOptions {})
446+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod without probe termination" )
447+ g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "liveness-probe-no-term" , metav1.DeleteOptions {})
448+
449+ err = verifyProbeTermination ("liveness-probe-no-term" , "test" , 60 )
450+ o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe should use pod-level termination grace period when probe-level not set" )
451+ })
167452})
168453
169454// author: asahay@redhat.com
0 commit comments