|
| 1 | +package node |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "strings" |
| 7 | + "time" |
| 8 | + |
| 9 | + g "github.com/onsi/ginkgo/v2" |
| 10 | + o "github.com/onsi/gomega" |
| 11 | + ote "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo" |
| 12 | + |
| 13 | + corev1 "k8s.io/api/core/v1" |
| 14 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 15 | + "k8s.io/apimachinery/pkg/util/intstr" |
| 16 | + "k8s.io/apimachinery/pkg/util/wait" |
| 17 | + e2e "k8s.io/kubernetes/test/e2e/framework" |
| 18 | + "k8s.io/utils/ptr" |
| 19 | + |
| 20 | + exutil "github.com/openshift/origin/test/extended/util" |
| 21 | +) |
| 22 | + |
| 23 | +var _ = g.Describe("[sig-node] Probe configuration", func() { |
| 24 | + var ( |
| 25 | + oc = exutil.NewCLIWithoutNamespace("probe-termination") |
| 26 | + ) |
| 27 | + |
| 28 | + //author: bgudi@redhat.com |
| 29 | + g.It("[OTP] Liveness probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() { |
| 30 | + ctx := context.Background() |
| 31 | + |
| 32 | + oc.SetupProject() |
| 33 | + namespace := oc.Namespace() |
| 34 | + |
| 35 | + g.By("Create pod with liveness probe having probe-level terminationGracePeriodSeconds=10s") |
| 36 | + pod := &corev1.Pod{ |
| 37 | + ObjectMeta: metav1.ObjectMeta{ |
| 38 | + Name: "liveness-probe-level", |
| 39 | + Namespace: namespace, |
| 40 | + }, |
| 41 | + Spec: corev1.PodSpec{ |
| 42 | + TerminationGracePeriodSeconds: ptr.To[int64](60), |
| 43 | + SecurityContext: &corev1.PodSecurityContext{ |
| 44 | + RunAsNonRoot: ptr.To(true), |
| 45 | + SeccompProfile: &corev1.SeccompProfile{ |
| 46 | + Type: corev1.SeccompProfileTypeRuntimeDefault, |
| 47 | + }, |
| 48 | + }, |
| 49 | + Containers: []corev1.Container{ |
| 50 | + { |
| 51 | + Name: "test", |
| 52 | + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", |
| 53 | + SecurityContext: &corev1.SecurityContext{ |
| 54 | + AllowPrivilegeEscalation: ptr.To(false), |
| 55 | + Capabilities: &corev1.Capabilities{ |
| 56 | + Drop: []corev1.Capability{"ALL"}, |
| 57 | + }, |
| 58 | + }, |
| 59 | + Command: []string{"sh", "-c", "sleep 100000000"}, |
| 60 | + Ports: []corev1.ContainerPort{ |
| 61 | + {ContainerPort: 8080}, |
| 62 | + }, |
| 63 | + LivenessProbe: &corev1.Probe{ |
| 64 | + ProbeHandler: corev1.ProbeHandler{ |
| 65 | + HTTPGet: &corev1.HTTPGetAction{ |
| 66 | + Path: "/healthz", |
| 67 | + Port: intstr.FromInt(8080), |
| 68 | + }, |
| 69 | + }, |
| 70 | + FailureThreshold: 1, |
| 71 | + PeriodSeconds: 60, |
| 72 | + TerminationGracePeriodSeconds: ptr.To[int64](10), |
| 73 | + }, |
| 74 | + }, |
| 75 | + }, |
| 76 | + }, |
| 77 | + } |
| 78 | + |
| 79 | + _, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) |
| 80 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod") |
| 81 | + |
| 82 | + g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)") |
| 83 | + timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-probe-level", "test", 10) |
| 84 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events") |
| 85 | + o.Expect(timeDiff).To(o.BeNumerically(">=", 10-3), "time difference is less than expected minimum") |
| 86 | + o.Expect(timeDiff).To(o.BeNumerically("<=", 10+10), "time difference is greater than expected maximum") |
| 87 | + }) |
| 88 | + |
| 89 | + //author: bgudi@redhat.com |
| 90 | + g.It("[OTP] Startup probe should respect probe-level terminationGracePeriodSeconds [OCP-44493]", ote.Informing(), func() { |
| 91 | + ctx := context.Background() |
| 92 | + |
| 93 | + oc.SetupProject() |
| 94 | + namespace := oc.Namespace() |
| 95 | + |
| 96 | + g.By("Create pod with startup probe having probe-level terminationGracePeriodSeconds=10s") |
| 97 | + pod := &corev1.Pod{ |
| 98 | + ObjectMeta: metav1.ObjectMeta{ |
| 99 | + Name: "startup-probe-level", |
| 100 | + Namespace: namespace, |
| 101 | + }, |
| 102 | + Spec: corev1.PodSpec{ |
| 103 | + TerminationGracePeriodSeconds: ptr.To[int64](60), |
| 104 | + SecurityContext: &corev1.PodSecurityContext{ |
| 105 | + RunAsNonRoot: ptr.To(true), |
| 106 | + SeccompProfile: &corev1.SeccompProfile{ |
| 107 | + Type: corev1.SeccompProfileTypeRuntimeDefault, |
| 108 | + }, |
| 109 | + }, |
| 110 | + Containers: []corev1.Container{ |
| 111 | + { |
| 112 | + Name: "teststartup", |
| 113 | + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", |
| 114 | + SecurityContext: &corev1.SecurityContext{ |
| 115 | + AllowPrivilegeEscalation: ptr.To(false), |
| 116 | + Capabilities: &corev1.Capabilities{ |
| 117 | + Drop: []corev1.Capability{"ALL"}, |
| 118 | + }, |
| 119 | + }, |
| 120 | + Command: []string{"sh", "-c", "sleep 100000000"}, |
| 121 | + Ports: []corev1.ContainerPort{ |
| 122 | + {ContainerPort: 8080}, |
| 123 | + }, |
| 124 | + StartupProbe: &corev1.Probe{ |
| 125 | + ProbeHandler: corev1.ProbeHandler{ |
| 126 | + HTTPGet: &corev1.HTTPGetAction{ |
| 127 | + Path: "/healthz", |
| 128 | + Port: intstr.FromInt(8080), |
| 129 | + }, |
| 130 | + }, |
| 131 | + FailureThreshold: 1, |
| 132 | + PeriodSeconds: 60, |
| 133 | + TerminationGracePeriodSeconds: ptr.To[int64](10), |
| 134 | + }, |
| 135 | + }, |
| 136 | + }, |
| 137 | + }, |
| 138 | + } |
| 139 | + |
| 140 | + _, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) |
| 141 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create startup probe pod") |
| 142 | + |
| 143 | + g.By("Verify probe-level terminationGracePeriodSeconds is honored (10s)") |
| 144 | + timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "startup-probe-level", "teststartup", 10) |
| 145 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events") |
| 146 | + o.Expect(timeDiff).To(o.BeNumerically(">=", 10-3), "time difference is less than expected minimum") |
| 147 | + o.Expect(timeDiff).To(o.BeNumerically("<=", 10+10), "time difference is greater than expected maximum") |
| 148 | + }) |
| 149 | + |
| 150 | + //author: bgudi@redhat.com |
| 151 | + g.It("[OTP] Liveness probe should fall back to pod-level terminationGracePeriodSeconds when probe-level is not set [OCP-44493]", ote.Informing(), func() { |
| 152 | + ctx := context.Background() |
| 153 | + |
| 154 | + oc.SetupProject() |
| 155 | + namespace := oc.Namespace() |
| 156 | + |
| 157 | + g.By("Create pod with liveness probe without probe-level terminationGracePeriodSeconds") |
| 158 | + pod := &corev1.Pod{ |
| 159 | + ObjectMeta: metav1.ObjectMeta{ |
| 160 | + Name: "liveness-pod-level", |
| 161 | + Namespace: namespace, |
| 162 | + }, |
| 163 | + Spec: corev1.PodSpec{ |
| 164 | + TerminationGracePeriodSeconds: ptr.To[int64](60), |
| 165 | + SecurityContext: &corev1.PodSecurityContext{ |
| 166 | + RunAsNonRoot: ptr.To(true), |
| 167 | + SeccompProfile: &corev1.SeccompProfile{ |
| 168 | + Type: corev1.SeccompProfileTypeRuntimeDefault, |
| 169 | + }, |
| 170 | + }, |
| 171 | + Containers: []corev1.Container{ |
| 172 | + { |
| 173 | + Name: "test", |
| 174 | + Image: "quay.io/openshifttest/nginx-alpine@sha256:04f316442d48ba60e3ea0b5a67eb89b0b667abf1c198a3d0056ca748736336a0", |
| 175 | + SecurityContext: &corev1.SecurityContext{ |
| 176 | + AllowPrivilegeEscalation: ptr.To(false), |
| 177 | + Capabilities: &corev1.Capabilities{ |
| 178 | + Drop: []corev1.Capability{"ALL"}, |
| 179 | + }, |
| 180 | + }, |
| 181 | + Command: []string{"sh", "-c", "sleep 100000000"}, |
| 182 | + Ports: []corev1.ContainerPort{ |
| 183 | + {ContainerPort: 8080}, |
| 184 | + }, |
| 185 | + LivenessProbe: &corev1.Probe{ |
| 186 | + ProbeHandler: corev1.ProbeHandler{ |
| 187 | + HTTPGet: &corev1.HTTPGetAction{ |
| 188 | + Path: "/healthz", |
| 189 | + Port: intstr.FromInt(8080), |
| 190 | + }, |
| 191 | + }, |
| 192 | + FailureThreshold: 1, |
| 193 | + PeriodSeconds: 60, |
| 194 | + // No TerminationGracePeriodSeconds - should use pod-level (60s) |
| 195 | + }, |
| 196 | + }, |
| 197 | + }, |
| 198 | + }, |
| 199 | + } |
| 200 | + |
| 201 | + _, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) |
| 202 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create liveness probe pod without probe-level termination") |
| 203 | + |
| 204 | + g.By("Verify pod-level terminationGracePeriodSeconds is used (60s)") |
| 205 | + timeDiff, err := verifyProbeTermination(ctx, oc, namespace, "liveness-pod-level", "test", 60) |
| 206 | + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get probe termination events") |
| 207 | + o.Expect(timeDiff).To(o.BeNumerically(">=", 60-3), "time difference is less than expected minimum") |
| 208 | + o.Expect(timeDiff).To(o.BeNumerically("<=", 60+10), "time difference is greater than expected maximum") |
| 209 | + }) |
| 210 | +}) |
| 211 | + |
| 212 | +// verifyProbeTermination verifies that the probe termination grace period is honored |
| 213 | +// by checking the time difference between probe failure (Killing) and container restart (Started) events |
| 214 | +// Returns the time difference in seconds, or an error if events are not found |
| 215 | +func verifyProbeTermination(ctx context.Context, oc *exutil.CLI, namespace, podName, containerName string, expectedTerminationSec int) (int, error) { |
| 216 | + var timeDiff int |
| 217 | + err := wait.PollUntilContextTimeout(ctx, 10*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { |
| 218 | + // Get events using the Events API |
| 219 | + events, err := oc.KubeClient().CoreV1().Events(namespace).List(ctx, metav1.ListOptions{ |
| 220 | + FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Pod", podName), |
| 221 | + }) |
| 222 | + if err != nil { |
| 223 | + e2e.Logf("Error getting events: %v", err) |
| 224 | + return false, nil |
| 225 | + } |
| 226 | + |
| 227 | + // Look for probe failure (Killing) and container restart (Started) events |
| 228 | + var killingEvent, startedEvent *corev1.Event |
| 229 | + for i := range events.Items { |
| 230 | + event := &events.Items[i] |
| 231 | + if event.Reason == "Killing" && strings.Contains(event.Message, containerName) && |
| 232 | + strings.Contains(event.Message, "failed") && strings.Contains(event.Message, "probe") { |
| 233 | + if killingEvent == nil || event.LastTimestamp.Time.After(killingEvent.LastTimestamp.Time) { |
| 234 | + killingEvent = event |
| 235 | + } |
| 236 | + } |
| 237 | + if event.Reason == "Started" && strings.Contains(event.Message, "Started container") { |
| 238 | + // Find Started event after the Killing event |
| 239 | + if killingEvent != nil && event.FirstTimestamp.Time.After(killingEvent.LastTimestamp.Time) { |
| 240 | + if startedEvent == nil || event.FirstTimestamp.Time.Before(startedEvent.FirstTimestamp.Time) { |
| 241 | + startedEvent = event |
| 242 | + } |
| 243 | + } |
| 244 | + } |
| 245 | + } |
| 246 | + |
| 247 | + if killingEvent == nil || startedEvent == nil { |
| 248 | + e2e.Logf("Waiting for probe failure (Killing) and container restart (Started) events") |
| 249 | + return false, nil |
| 250 | + } |
| 251 | + |
| 252 | + e2e.Logf("Killing event: %s at %v", killingEvent.Message, killingEvent.LastTimestamp) |
| 253 | + e2e.Logf("Started event: %s at %v", startedEvent.Message, startedEvent.FirstTimestamp) |
| 254 | + |
| 255 | + // Calculate time difference in seconds |
| 256 | + timeDiff = int(startedEvent.FirstTimestamp.Sub(killingEvent.LastTimestamp.Time).Seconds()) |
| 257 | + e2e.Logf("Time difference: %d seconds (expected: %d ±10 seconds)", timeDiff, expectedTerminationSec) |
| 258 | + |
| 259 | + return true, nil |
| 260 | + }) |
| 261 | + if err != nil { |
| 262 | + return 0, err |
| 263 | + } |
| 264 | + return timeDiff, nil |
| 265 | +} |
0 commit comments