@@ -4,15 +4,19 @@ import (
44 "context"
55 "fmt"
66 "path/filepath"
7+ "strconv"
78 "strings"
89 "time"
910
1011 g "github.com/onsi/ginkgo/v2"
1112 o "github.com/onsi/gomega"
13+ ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
1214
1315 configv1 "github.com/openshift/api/config/v1"
1416 "github.com/openshift/origin/test/extended/imagepolicy"
17+ corev1 "k8s.io/api/core/v1"
1518 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
19+ "k8s.io/apimachinery/pkg/util/intstr"
1620 utilrand "k8s.io/apimachinery/pkg/util/rand"
1721 "k8s.io/apimachinery/pkg/util/wait"
1822 e2e "k8s.io/kubernetes/test/e2e/framework"
@@ -164,6 +168,274 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
164168 e2e .Logf ("/dev/fuse mount output: %s" , output )
165169 o .Expect (output ).To (o .ContainSubstring ("fuse" ), "dev fuse is not mounted inside pod" )
166170 })
171+
172+ //author: minmli@redhat.com
173+ //migrated from openshift-tests-private
174+ //automates: https://issues.redhat.com/browse/OCPBUGS-44493
175+ g .It ("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]" , ote .Informing (), func () {
176+ ctx := context .Background ()
177+
178+ g .By ("Check if featureSet is empty in cluster" )
179+ featureSet , err := oc .AsAdmin ().WithoutNamespace ().Run ("get" ).Args ("featuregate" , "cluster" , "-o=jsonpath={.spec.featureSet}" ).Output ()
180+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to get featuregate" )
181+ e2e .Logf ("featureSet is: %s" , featureSet )
182+ if featureSet != "" {
183+ g .Skip ("featureSet is not empty, skipping test" )
184+ }
185+
186+ oc .SetupProject ()
187+ namespace := oc .Namespace ()
188+
189+ // Helper function to parse duration string like "1m30s" or "45s" to seconds
190+ parseDurationToSeconds := func (durationStr string ) (int , error ) {
191+ var totalSeconds int
192+ if strings .Contains (durationStr , "m" ) {
193+ parts := strings .Split (durationStr , "m" )
194+ minutes , err := strconv .Atoi (parts [0 ])
195+ if err != nil {
196+ return 0 , err
197+ }
198+ totalSeconds = minutes * 60
199+ if len (parts ) > 1 && strings .Contains (parts [1 ], "s" ) {
200+ secStr := strings .TrimSuffix (parts [1 ], "s" )
201+ if secStr != "" {
202+ seconds , err := strconv .Atoi (secStr )
203+ if err != nil {
204+ return 0 , err
205+ }
206+ totalSeconds += seconds
207+ }
208+ }
209+ } else if strings .Contains (durationStr , "s" ) {
210+ secStr := strings .TrimSuffix (durationStr , "s" )
211+ seconds , err := strconv .Atoi (secStr )
212+ if err != nil {
213+ return 0 , err
214+ }
215+ totalSeconds = seconds
216+ }
217+ return totalSeconds , nil
218+ }
219+
220+ // Helper to verify probe termination period
221+ verifyProbeTermination := func (podName string , expectedTerminationSec int ) error {
222+ return wait .PollUntilContextTimeout (ctx , 10 * time .Second , 4 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
223+ podDesc , err := oc .AsAdmin ().WithoutNamespace ().Run ("describe" ).Args ("pod" , podName , "-n" , namespace ).Output ()
224+ if err != nil {
225+ e2e .Logf ("Error describing pod: %v" , err )
226+ return false , nil
227+ }
228+
229+ // Look for probe failure and container start events
230+ probeFailLine := ""
231+ containerStartLine := ""
232+ for _ , line := range strings .Split (podDesc , "\n " ) {
233+ if strings .Contains (line , "Container" ) && strings .Contains (line , "failed" ) && strings .Contains (line , "probe" ) && strings .Contains (line , "will be restarted" ) {
234+ probeFailLine = line
235+ }
236+ if strings .Contains (line , "Started container" ) {
237+ containerStartLine = line
238+ }
239+ }
240+
241+ if probeFailLine == "" || containerStartLine == "" {
242+ e2e .Logf ("Waiting for probe failure and container start events" )
243+ return false , nil
244+ }
245+
246+ e2e .Logf ("Probe failure event: %s" , probeFailLine )
247+ e2e .Logf ("Container start event: %s" , containerStartLine )
248+
249+ // Extract timestamps (format: "1m30s" or "45s")
250+ probeFailFields := strings .Fields (probeFailLine )
251+ containerStartFields := strings .Fields (containerStartLine )
252+ if len (probeFailFields ) < 3 || len (containerStartFields ) < 3 {
253+ e2e .Logf ("Unable to parse event timestamps" )
254+ return false , nil
255+ }
256+
257+ probeFailTime := probeFailFields [2 ]
258+ containerStartTime := containerStartFields [2 ]
259+
260+ probeFailSec , err := parseDurationToSeconds (probeFailTime )
261+ if err != nil {
262+ e2e .Logf ("Error parsing probe fail time: %v" , err )
263+ return false , nil
264+ }
265+
266+ containerStartSec , err := parseDurationToSeconds (containerStartTime )
267+ if err != nil {
268+ e2e .Logf ("Error parsing container start time: %v" , err )
269+ return false , nil
270+ }
271+
272+ timeDiff := probeFailSec - containerStartSec
273+ e2e .Logf ("Time difference: %d seconds (expected: %d ±10 seconds)" , timeDiff , expectedTerminationSec )
274+
275+ // Allow range: [expectedTerminationSec-3, expectedTerminationSec+10]
276+ if timeDiff >= (expectedTerminationSec - 3 ) && timeDiff <= (expectedTerminationSec + 10 ) {
277+ e2e .Logf ("Termination grace period check passed" )
278+ return true , nil
279+ }
280+
281+ e2e .Logf ("Time difference %d is outside expected range [%d, %d]" , timeDiff , expectedTerminationSec - 3 , expectedTerminationSec + 10 )
282+ return false , nil
283+ })
284+ }
285+
286+ g .By ("Test liveness probe with probe-level terminationGracePeriodSeconds" )
287+ livenessPod := & corev1.Pod {
288+ ObjectMeta : metav1.ObjectMeta {
289+ Name : "liveness-probe" ,
290+ Namespace : namespace ,
291+ },
292+ Spec : corev1.PodSpec {
293+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
294+ SecurityContext : & corev1.PodSecurityContext {
295+ RunAsNonRoot : & []bool {true }[0 ],
296+ SeccompProfile : & corev1.SeccompProfile {
297+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
298+ },
299+ },
300+ Containers : []corev1.Container {
301+ {
302+ Name : "test" ,
303+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
304+ SecurityContext : & corev1.SecurityContext {
305+ AllowPrivilegeEscalation : & []bool {false }[0 ],
306+ Capabilities : & corev1.Capabilities {
307+ Drop : []corev1.Capability {"ALL" },
308+ },
309+ },
310+ Command : []string {"bash" , "-c" , "sleep 100000000" },
311+ Ports : []corev1.ContainerPort {
312+ {ContainerPort : 8080 },
313+ },
314+ LivenessProbe : & corev1.Probe {
315+ ProbeHandler : corev1.ProbeHandler {
316+ HTTPGet : & corev1.HTTPGetAction {
317+ Path : "/healthz" ,
318+ Port : intstr .FromInt (8080 ),
319+ },
320+ },
321+ FailureThreshold : 1 ,
322+ PeriodSeconds : 60 ,
323+ TerminationGracePeriodSeconds : & []int64 {10 }[0 ],
324+ },
325+ },
326+ },
327+ },
328+ }
329+
330+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , livenessPod , metav1.CreateOptions {})
331+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod" )
332+ g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "liveness-probe" , metav1.DeleteOptions {})
333+
334+ err = verifyProbeTermination ("liveness-probe" , 10 )
335+ o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe termination grace period not honored" )
336+
337+ g .By ("Test startup probe with probe-level terminationGracePeriodSeconds" )
338+ startupPod := & corev1.Pod {
339+ ObjectMeta : metav1.ObjectMeta {
340+ Name : "startup-probe" ,
341+ Namespace : namespace ,
342+ },
343+ Spec : corev1.PodSpec {
344+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
345+ SecurityContext : & corev1.PodSecurityContext {
346+ RunAsNonRoot : & []bool {true }[0 ],
347+ SeccompProfile : & corev1.SeccompProfile {
348+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
349+ },
350+ },
351+ Containers : []corev1.Container {
352+ {
353+ Name : "teststartup" ,
354+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
355+ SecurityContext : & corev1.SecurityContext {
356+ AllowPrivilegeEscalation : & []bool {false }[0 ],
357+ Capabilities : & corev1.Capabilities {
358+ Drop : []corev1.Capability {"ALL" },
359+ },
360+ },
361+ Command : []string {"bash" , "-c" , "sleep 100000000" },
362+ Ports : []corev1.ContainerPort {
363+ {ContainerPort : 8080 },
364+ },
365+ StartupProbe : & corev1.Probe {
366+ ProbeHandler : corev1.ProbeHandler {
367+ HTTPGet : & corev1.HTTPGetAction {
368+ Path : "/healthz" ,
369+ Port : intstr .FromInt (8080 ),
370+ },
371+ },
372+ FailureThreshold : 1 ,
373+ PeriodSeconds : 60 ,
374+ TerminationGracePeriodSeconds : & []int64 {10 }[0 ],
375+ },
376+ },
377+ },
378+ },
379+ }
380+
381+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , startupPod , metav1.CreateOptions {})
382+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create startup probe pod" )
383+ g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "startup-probe" , metav1.DeleteOptions {})
384+
385+ err = verifyProbeTermination ("startup-probe" , 10 )
386+ o .Expect (err ).NotTo (o .HaveOccurred (), "startup probe termination grace period not honored" )
387+
388+ g .By ("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)" )
389+ livenessPodNoProbeTerm := & corev1.Pod {
390+ ObjectMeta : metav1.ObjectMeta {
391+ Name : "liveness-probe-no-term" ,
392+ Namespace : namespace ,
393+ },
394+ Spec : corev1.PodSpec {
395+ TerminationGracePeriodSeconds : & []int64 {60 }[0 ],
396+ SecurityContext : & corev1.PodSecurityContext {
397+ RunAsNonRoot : & []bool {true }[0 ],
398+ SeccompProfile : & corev1.SeccompProfile {
399+ Type : corev1 .SeccompProfileTypeRuntimeDefault ,
400+ },
401+ },
402+ Containers : []corev1.Container {
403+ {
404+ Name : "test" ,
405+ Image : "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0" ,
406+ SecurityContext : & corev1.SecurityContext {
407+ AllowPrivilegeEscalation : & []bool {false }[0 ],
408+ Capabilities : & corev1.Capabilities {
409+ Drop : []corev1.Capability {"ALL" },
410+ },
411+ },
412+ Command : []string {"bash" , "-c" , "sleep 100000000" },
413+ Ports : []corev1.ContainerPort {
414+ {ContainerPort : 8080 },
415+ },
416+ LivenessProbe : & corev1.Probe {
417+ ProbeHandler : corev1.ProbeHandler {
418+ HTTPGet : & corev1.HTTPGetAction {
419+ Path : "/healthz" ,
420+ Port : intstr .FromInt (8080 ),
421+ },
422+ },
423+ FailureThreshold : 1 ,
424+ PeriodSeconds : 60 ,
425+ // No TerminationGracePeriodSeconds - should use pod-level (60s)
426+ },
427+ },
428+ },
429+ },
430+ }
431+
432+ _ , err = oc .KubeClient ().CoreV1 ().Pods (namespace ).Create (ctx , livenessPodNoProbeTerm , metav1.CreateOptions {})
433+ o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod without probe termination" )
434+ g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "liveness-probe-no-term" , metav1.DeleteOptions {})
435+
436+ err = verifyProbeTermination ("liveness-probe-no-term" , 60 )
437+ o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe should use pod-level termination grace period when probe-level not set" )
438+ })
167439})
168440
169441// author: asahay@redhat.com
0 commit comments