@@ -174,14 +174,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
174174 //automates: https://issues.redhat.com/browse/OCPBUGS-44493
175175 g .It ("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]" , ote .Informing (), func () {
176176 ctx := context .Background ()
177-
178- g .By ("Check if featureSet is empty in cluster" )
179- featureSet , err := oc .AsAdmin ().WithoutNamespace ().Run ("get" ).Args ("featuregate" , "cluster" , "-o=jsonpath={.spec.featureSet}" ).Output ()
180- o .Expect (err ).NotTo (o .HaveOccurred (), "failed to get featuregate" )
181- e2e .Logf ("featureSet is: %s" , featureSet )
182- if featureSet != "" {
183- g .Skip ("featureSet is not empty, skipping test" )
184- }
177+ var err error
185178
186179 oc .SetupProject ()
187180 namespace := oc .Namespace ()
@@ -218,58 +211,78 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
218211 }
219212
220213 // Helper to verify probe termination period
221- verifyProbeTermination := func (podName string , expectedTerminationSec int ) error {
222- return wait .PollUntilContextTimeout (ctx , 10 * time .Second , 4 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
214+ verifyProbeTermination := func (podName , containerName string , expectedTerminationSec int ) error {
215+ return wait .PollUntilContextTimeout (ctx , 10 * time .Second , 5 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
223216 podDesc , err := oc .AsAdmin ().WithoutNamespace ().Run ("describe" ).Args ("pod" , podName , "-n" , namespace ).Output ()
224217 if err != nil {
225218 e2e .Logf ("Error describing pod: %v" , err )
226219 return false , nil
227220 }
228221
229- // Look for probe failure and container start events
230- probeFailLine := ""
231- containerStartLine := ""
222+ // Look for probe failure (killing) and container restart events
223+ // Event format: "Normal Killing <time> kubelet Container <name> failed <probe> probe, will be restarted"
224+ // Event format: "Normal Started <time> kubelet Container started"
225+ killingLine := ""
226+ restartLine := ""
227+
228+ inEvents := false
232229 for _ , line := range strings .Split (podDesc , "\n " ) {
233- if strings .Contains (line , "Container" ) && strings .Contains (line , "failed" ) && strings .Contains (line , "probe" ) && strings .Contains (line , "will be restarted" ) {
234- probeFailLine = line
230+ if strings .Contains (line , "Events:" ) {
231+ inEvents = true
232+ continue
233+ }
234+ if ! inEvents {
235+ continue
236+ }
237+
238+ // Look for killing event with container name
239+ if strings .Contains (line , "Killing" ) && strings .Contains (line , containerName ) &&
240+ strings .Contains (line , "failed" ) && strings .Contains (line , "probe" ) &&
241+ strings .Contains (line , "will be restarted" ) {
242+ killingLine = line
235243 }
236- if strings .Contains (line , "Started container" ) {
237- containerStartLine = line
244+ // Look for Started event after Killing
245+ if killingLine != "" && strings .Contains (line , "Started" ) && strings .Contains (line , "Container started" ) {
246+ restartLine = line
247+ break
238248 }
239249 }
240250
241- if probeFailLine == "" || containerStartLine == "" {
242- e2e .Logf ("Waiting for probe failure and container start events" )
251+ if killingLine == "" || restartLine == "" {
252+ e2e .Logf ("Waiting for probe failure (killing) and container restart events" )
243253 return false , nil
244254 }
245255
246- e2e .Logf ("Probe failure event: %s" , probeFailLine )
247- e2e .Logf ("Container start event: %s" , containerStartLine )
256+ e2e .Logf ("Killing event: %s" , killingLine )
257+ e2e .Logf ("Restart event: %s" , restartLine )
248258
249259 // Extract timestamps (format: "1m30s" or "45s")
250- probeFailFields := strings .Fields (probeFailLine )
251- containerStartFields := strings .Fields (containerStartLine )
252- if len (probeFailFields ) < 3 || len (containerStartFields ) < 3 {
260+ // Event format: "Normal Killing 2m30s kubelet Container..."
261+ killingFields := strings .Fields (killingLine )
262+ restartFields := strings .Fields (restartLine )
263+ if len (killingFields ) < 3 || len (restartFields ) < 3 {
253264 e2e .Logf ("Unable to parse event timestamps" )
254265 return false , nil
255266 }
256267
257- probeFailTime := probeFailFields [2 ]
258- containerStartTime := containerStartFields [2 ]
268+ killingTime := killingFields [2 ]
269+ restartTime := restartFields [2 ]
259270
260- probeFailSec , err := parseDurationToSeconds (probeFailTime )
271+ killingSec , err := parseDurationToSeconds (killingTime )
261272 if err != nil {
262- e2e .Logf ("Error parsing probe fail time: %v" , err )
273+ e2e .Logf ("Error parsing killing time: %v" , err )
263274 return false , nil
264275 }
265276
266- containerStartSec , err := parseDurationToSeconds (containerStartTime )
277+ restartSec , err := parseDurationToSeconds (restartTime )
267278 if err != nil {
268- e2e .Logf ("Error parsing container start time: %v" , err )
279+ e2e .Logf ("Error parsing restart time: %v" , err )
269280 return false , nil
270281 }
271282
272- timeDiff := probeFailSec - containerStartSec
283+ // Time difference: killing happened earlier, restart happened later
284+ // So we need to calculate how long between them
285+ timeDiff := killingSec - restartSec
273286 e2e .Logf ("Time difference: %d seconds (expected: %d ±10 seconds)" , timeDiff , expectedTerminationSec )
274287
275288 // Allow range: [expectedTerminationSec-3, expectedTerminationSec+10]
@@ -331,7 +344,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
331344 o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod" )
332345 g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "liveness-probe" , metav1.DeleteOptions {})
333346
334- err = verifyProbeTermination ("liveness-probe" , 10 )
347+ err = verifyProbeTermination ("liveness-probe" , "test" , 10 )
335348 o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe termination grace period not honored" )
336349
337350 g .By ("Test startup probe with probe-level terminationGracePeriodSeconds" )
@@ -382,7 +395,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
382395 o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create startup probe pod" )
383396 g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "startup-probe" , metav1.DeleteOptions {})
384397
385- err = verifyProbeTermination ("startup-probe" , 10 )
398+ err = verifyProbeTermination ("startup-probe" , "teststartup" , 10 )
386399 o .Expect (err ).NotTo (o .HaveOccurred (), "startup probe termination grace period not honored" )
387400
388401 g .By ("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)" )
@@ -433,7 +446,7 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
433446 o .Expect (err ).NotTo (o .HaveOccurred (), "failed to create liveness probe pod without probe termination" )
434447 g .DeferCleanup (oc .KubeClient ().CoreV1 ().Pods (namespace ).Delete , ctx , "liveness-probe-no-term" , metav1.DeleteOptions {})
435448
436- err = verifyProbeTermination ("liveness-probe-no-term" , 60 )
449+ err = verifyProbeTermination ("liveness-probe-no-term" , "test" , 60 )
437450 o .Expect (err ).NotTo (o .HaveOccurred (), "liveness probe should use pod-level termination grace period when probe-level not set" )
438451 })
439452})
0 commit comments