|
1 | 1 | package node |
2 | 2 |
|
3 | 3 | import ( |
| 4 | + "context" |
4 | 5 | "path/filepath" |
| 6 | + "strconv" |
5 | 7 | "strings" |
6 | 8 | "time" |
7 | 9 |
|
8 | 10 | g "github.com/onsi/ginkgo/v2" |
9 | 11 | o "github.com/onsi/gomega" |
| 12 | + ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo" |
| 13 | + corev1 "k8s.io/api/core/v1" |
| 14 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 15 | + "k8s.io/apimachinery/pkg/util/intstr" |
10 | 16 | "k8s.io/apimachinery/pkg/util/wait" |
11 | 17 | e2e "k8s.io/kubernetes/test/e2e/framework" |
12 | 18 |
|
@@ -157,4 +163,272 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", |
157 | 163 | e2e.Logf("/dev/fuse mount output: %s", output) |
158 | 164 | o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod") |
159 | 165 | }) |
| 166 | + |
| 167 | + //author: minmli@redhat.com |
| 168 | + //migrated from openshift-tests-private |
| 169 | + //automates: https://issues.redhat.com/browse/OCPBUGS-44493 |
| 170 | + g.It("[OTP] add configurable terminationGracePeriod to liveness and startup probes [OCP-44493]", ote.Informing(), func() { |
| 171 | + ctx := context.Background() |
| 172 | + |
| 173 | + g.By("Check if featureSet is empty in cluster") |
| 174 | + featureSet, err := oc.AsAdmin().WithoutNamespace().Run("get").Args("featuregate", "cluster", "-o=jsonpath={.spec.featureSet}").Output() |
| 175 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get featuregate") |
| 176 | + e2e.Logf("featureSet is: %s", featureSet) |
| 177 | + if featureSet != "" { |
| 178 | + g.Skip("featureSet is not empty, skipping test") |
| 179 | + } |
| 180 | + |
| 181 | + oc.SetupProject() |
| 182 | + namespace := oc.Namespace() |
| 183 | + |
| 184 | + // Helper function to parse duration string like "1m30s" or "45s" to seconds |
| 185 | + parseDurationToSeconds := func(durationStr string) (int, error) { |
| 186 | + var totalSeconds int |
| 187 | + if strings.Contains(durationStr, "m") { |
| 188 | + parts := strings.Split(durationStr, "m") |
| 189 | + minutes, err := strconv.Atoi(parts[0]) |
| 190 | + if err != nil { |
| 191 | + return 0, err |
| 192 | + } |
| 193 | + totalSeconds = minutes * 60 |
| 194 | + if len(parts) > 1 && strings.Contains(parts[1], "s") { |
| 195 | + secStr := strings.TrimSuffix(parts[1], "s") |
| 196 | + if secStr != "" { |
| 197 | + seconds, err := strconv.Atoi(secStr) |
| 198 | + if err != nil { |
| 199 | + return 0, err |
| 200 | + } |
| 201 | + totalSeconds += seconds |
| 202 | + } |
| 203 | + } |
| 204 | + } else if strings.Contains(durationStr, "s") { |
| 205 | + secStr := strings.TrimSuffix(durationStr, "s") |
| 206 | + seconds, err := strconv.Atoi(secStr) |
| 207 | + if err != nil { |
| 208 | + return 0, err |
| 209 | + } |
| 210 | + totalSeconds = seconds |
| 211 | + } |
| 212 | + return totalSeconds, nil |
| 213 | + } |
| 214 | + |
| 215 | + // Helper to verify probe termination period |
| 216 | + verifyProbeTermination := func(podName string, expectedTerminationSec int) error { |
| 217 | + return wait.PollUntilContextTimeout(ctx, 10*time.Second, 4*time.Minute, true, func(ctx context.Context) (bool, error) { |
| 218 | + podDesc, err := oc.AsAdmin().WithoutNamespace().Run("describe").Args("pod", podName, "-n", namespace).Output() |
| 219 | + if err != nil { |
| 220 | + e2e.Logf("Error describing pod: %v", err) |
| 221 | + return false, nil |
| 222 | + } |
| 223 | + |
| 224 | + // Look for probe failure and container start events |
| 225 | + probeFailLine := "" |
| 226 | + containerStartLine := "" |
| 227 | + for _, line := range strings.Split(podDesc, "\n") { |
| 228 | + if strings.Contains(line, "Container") && strings.Contains(line, "failed") && strings.Contains(line, "probe") && strings.Contains(line, "will be restarted") { |
| 229 | + probeFailLine = line |
| 230 | + } |
| 231 | + if strings.Contains(line, "Started container") { |
| 232 | + containerStartLine = line |
| 233 | + } |
| 234 | + } |
| 235 | + |
| 236 | + if probeFailLine == "" || containerStartLine == "" { |
| 237 | + e2e.Logf("Waiting for probe failure and container start events") |
| 238 | + return false, nil |
| 239 | + } |
| 240 | + |
| 241 | + e2e.Logf("Probe failure event: %s", probeFailLine) |
| 242 | + e2e.Logf("Container start event: %s", containerStartLine) |
| 243 | + |
| 244 | + // Extract timestamps (format: "1m30s" or "45s") |
| 245 | + probeFailFields := strings.Fields(probeFailLine) |
| 246 | + containerStartFields := strings.Fields(containerStartLine) |
| 247 | + if len(probeFailFields) < 3 || len(containerStartFields) < 3 { |
| 248 | + e2e.Logf("Unable to parse event timestamps") |
| 249 | + return false, nil |
| 250 | + } |
| 251 | + |
| 252 | + probeFailTime := probeFailFields[2] |
| 253 | + containerStartTime := containerStartFields[2] |
| 254 | + |
| 255 | + probeFailSec, err := parseDurationToSeconds(probeFailTime) |
| 256 | + if err != nil { |
| 257 | + e2e.Logf("Error parsing probe fail time: %v", err) |
| 258 | + return false, nil |
| 259 | + } |
| 260 | + |
| 261 | + containerStartSec, err := parseDurationToSeconds(containerStartTime) |
| 262 | + if err != nil { |
| 263 | + e2e.Logf("Error parsing container start time: %v", err) |
| 264 | + return false, nil |
| 265 | + } |
| 266 | + |
| 267 | + timeDiff := probeFailSec - containerStartSec |
| 268 | + e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec) |
| 269 | + |
| 270 | + // Allow range: [expectedTerminationSec-3, expectedTerminationSec+10] |
| 271 | + if timeDiff >= (expectedTerminationSec-3) && timeDiff <= (expectedTerminationSec+10) { |
| 272 | + e2e.Logf("Termination grace period check passed") |
| 273 | + return true, nil |
| 274 | + } |
| 275 | + |
| 276 | + e2e.Logf("Time difference %d is outside expected range [%d, %d]", timeDiff, expectedTerminationSec-3, expectedTerminationSec+10) |
| 277 | + return false, nil |
| 278 | + }) |
| 279 | + } |
| 280 | + |
| 281 | + g.By("Test liveness probe with probe-level terminationGracePeriodSeconds") |
| 282 | + livenessPod := &corev1.Pod{ |
| 283 | + ObjectMeta: metav1.ObjectMeta{ |
| 284 | + Name: "liveness-probe", |
| 285 | + Namespace: namespace, |
| 286 | + }, |
| 287 | + Spec: corev1.PodSpec{ |
| 288 | + TerminationGracePeriodSeconds: &[]int64{60}[0], |
| 289 | + SecurityContext: &corev1.PodSecurityContext{ |
| 290 | + RunAsNonRoot: &[]bool{true}[0], |
| 291 | + SeccompProfile: &corev1.SeccompProfile{ |
| 292 | + Type: corev1.SeccompProfileTypeRuntimeDefault, |
| 293 | + }, |
| 294 | + }, |
| 295 | + Containers: []corev1.Container{ |
| 296 | + { |
| 297 | + Name: "test", |
| 298 | + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", |
| 299 | + SecurityContext: &corev1.SecurityContext{ |
| 300 | + AllowPrivilegeEscalation: &[]bool{false}[0], |
| 301 | + Capabilities: &corev1.Capabilities{ |
| 302 | + Drop: []corev1.Capability{"ALL"}, |
| 303 | + }, |
| 304 | + }, |
| 305 | + Command: []string{"bash", "-c", "sleep 100000000"}, |
| 306 | + Ports: []corev1.ContainerPort{ |
| 307 | + {ContainerPort: 8080}, |
| 308 | + }, |
| 309 | + LivenessProbe: &corev1.Probe{ |
| 310 | + ProbeHandler: corev1.ProbeHandler{ |
| 311 | + HTTPGet: &corev1.HTTPGetAction{ |
| 312 | + Path: "/healthz", |
| 313 | + Port: intstr.FromInt(8080), |
| 314 | + }, |
| 315 | + }, |
| 316 | + FailureThreshold: 1, |
| 317 | + PeriodSeconds: 60, |
| 318 | + TerminationGracePeriodSeconds: &[]int64{10}[0], |
| 319 | + }, |
| 320 | + }, |
| 321 | + }, |
| 322 | + }, |
| 323 | + } |
| 324 | + |
| 325 | + _, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPod, metav1.CreateOptions{}) |
| 326 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod") |
| 327 | + g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "liveness-probe", metav1.DeleteOptions{}) |
| 328 | + |
| 329 | + err = verifyProbeTermination("liveness-probe", 10) |
| 330 | + o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe termination grace period not honored") |
| 331 | + |
| 332 | + g.By("Test startup probe with probe-level terminationGracePeriodSeconds") |
| 333 | + startupPod := &corev1.Pod{ |
| 334 | + ObjectMeta: metav1.ObjectMeta{ |
| 335 | + Name: "startup-probe", |
| 336 | + Namespace: namespace, |
| 337 | + }, |
| 338 | + Spec: corev1.PodSpec{ |
| 339 | + TerminationGracePeriodSeconds: &[]int64{60}[0], |
| 340 | + SecurityContext: &corev1.PodSecurityContext{ |
| 341 | + RunAsNonRoot: &[]bool{true}[0], |
| 342 | + SeccompProfile: &corev1.SeccompProfile{ |
| 343 | + Type: corev1.SeccompProfileTypeRuntimeDefault, |
| 344 | + }, |
| 345 | + }, |
| 346 | + Containers: []corev1.Container{ |
| 347 | + { |
| 348 | + Name: "teststartup", |
| 349 | + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", |
| 350 | + SecurityContext: &corev1.SecurityContext{ |
| 351 | + AllowPrivilegeEscalation: &[]bool{false}[0], |
| 352 | + Capabilities: &corev1.Capabilities{ |
| 353 | + Drop: []corev1.Capability{"ALL"}, |
| 354 | + }, |
| 355 | + }, |
| 356 | + Command: []string{"bash", "-c", "sleep 100000000"}, |
| 357 | + Ports: []corev1.ContainerPort{ |
| 358 | + {ContainerPort: 8080}, |
| 359 | + }, |
| 360 | + StartupProbe: &corev1.Probe{ |
| 361 | + ProbeHandler: corev1.ProbeHandler{ |
| 362 | + HTTPGet: &corev1.HTTPGetAction{ |
| 363 | + Path: "/healthz", |
| 364 | + Port: intstr.FromInt(8080), |
| 365 | + }, |
| 366 | + }, |
| 367 | + FailureThreshold: 1, |
| 368 | + PeriodSeconds: 60, |
| 369 | + TerminationGracePeriodSeconds: &[]int64{10}[0], |
| 370 | + }, |
| 371 | + }, |
| 372 | + }, |
| 373 | + }, |
| 374 | + } |
| 375 | + |
| 376 | + _, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, startupPod, metav1.CreateOptions{}) |
| 377 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod") |
| 378 | + g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "startup-probe", metav1.DeleteOptions{}) |
| 379 | + |
| 380 | + err = verifyProbeTermination("startup-probe", 10) |
| 381 | + o.Expect(err).NotTo(o.HaveOccurred(), "startup probe termination grace period not honored") |
| 382 | + |
| 383 | + g.By("Test liveness probe without probe-level terminationGracePeriodSeconds (should use pod-level)") |
| 384 | + livenessPodNoProbeTerm := &corev1.Pod{ |
| 385 | + ObjectMeta: metav1.ObjectMeta{ |
| 386 | + Name: "liveness-probe-no-term", |
| 387 | + Namespace: namespace, |
| 388 | + }, |
| 389 | + Spec: corev1.PodSpec{ |
| 390 | + TerminationGracePeriodSeconds: &[]int64{60}[0], |
| 391 | + SecurityContext: &corev1.PodSecurityContext{ |
| 392 | + RunAsNonRoot: &[]bool{true}[0], |
| 393 | + SeccompProfile: &corev1.SeccompProfile{ |
| 394 | + Type: corev1.SeccompProfileTypeRuntimeDefault, |
| 395 | + }, |
| 396 | + }, |
| 397 | + Containers: []corev1.Container{ |
| 398 | + { |
| 399 | + Name: "test", |
| 400 | + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", |
| 401 | + SecurityContext: &corev1.SecurityContext{ |
| 402 | + AllowPrivilegeEscalation: &[]bool{false}[0], |
| 403 | + Capabilities: &corev1.Capabilities{ |
| 404 | + Drop: []corev1.Capability{"ALL"}, |
| 405 | + }, |
| 406 | + }, |
| 407 | + Command: []string{"bash", "-c", "sleep 100000000"}, |
| 408 | + Ports: []corev1.ContainerPort{ |
| 409 | + {ContainerPort: 8080}, |
| 410 | + }, |
| 411 | + LivenessProbe: &corev1.Probe{ |
| 412 | + ProbeHandler: corev1.ProbeHandler{ |
| 413 | + HTTPGet: &corev1.HTTPGetAction{ |
| 414 | + Path: "/healthz", |
| 415 | + Port: intstr.FromInt(8080), |
| 416 | + }, |
| 417 | + }, |
| 418 | + FailureThreshold: 1, |
| 419 | + PeriodSeconds: 60, |
| 420 | + // No TerminationGracePeriodSeconds - should use pod-level (60s) |
| 421 | + }, |
| 422 | + }, |
| 423 | + }, |
| 424 | + }, |
| 425 | + } |
| 426 | + |
| 427 | + _, err = oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, livenessPodNoProbeTerm, metav1.CreateOptions{}) |
| 428 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe termination") |
| 429 | + g.DeferCleanup(oc.KubeClient().CoreV1().Pods(namespace).Delete, ctx, "liveness-probe-no-term", metav1.DeleteOptions{}) |
| 430 | + |
| 431 | + err = verifyProbeTermination("liveness-probe-no-term", 60) |
| 432 | + o.Expect(err).NotTo(o.HaveOccurred(), "liveness probe should use pod-level termination grace period when probe-level not set") |
| 433 | + }) |
160 | 434 | }) |
0 commit comments