From 1b6be7ba0ea5c4684929b53fbb9906988c1e8b99 Mon Sep 17 00:00:00 2001 From: Martin Schuppert Date: Thu, 24 Jul 2025 16:17:05 +0200 Subject: [PATCH 1/2] Fix OpenStackClient pod relocation during node failures These changes ensure OpenStackClient pods are automatically rescheduled when nodes fail, instead of requiring manual intervention to delete stuck pods. The 120-second tolerations provide faster failover compared to the 5min default, while the stuck pod detection handles edge cases where normal eviction fails. - Adds tolerations for faster pod eviction (120s vs 5min default) * Handle node.kubernetes.io/not-ready taints * Handle node.kubernetes.io/unreachable taints - Force delete stuck pods with grace period 0 Note: - going lower then 120s could be too aggressive and result in pod eviction e.g. during a network issue, or kubelet restarts - in a follow up same tolerations should be added to the operator controller manager deployments, since the openstack-operator-controller-manager is the one handling the openstackclient pod. Jira: OSPRH-18450 Signed-off-by: Martin Schuppert --- controllers/client/openstackclient_controller.go | 12 ++++++++++++ pkg/openstackclient/funcs.go | 14 ++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/controllers/client/openstackclient_controller.go b/controllers/client/openstackclient_controller.go index 268976bc5d..180b39c28f 100644 --- a/controllers/client/openstackclient_controller.go +++ b/controllers/client/openstackclient_controller.go @@ -378,6 +378,18 @@ func (r *OpenStackClientReconciler) Reconcile(ctx context.Context, req ctrl.Requ ) } + // if pod is stuck in terminating state for more than 3 minutes, force delete it + if osclient.DeletionTimestamp != nil { + terminatingDuration := time.Since(osclient.DeletionTimestamp.Time) + if terminatingDuration > time.Minute*3 { + // Force delete only truly stuck pods + err := r.Client.Delete(ctx, osclient, client.GracePeriodSeconds(0)) + if err != nil { + return ctrl.Result{}, fmt.Errorf("Failed to force delete pod: %w", err) + } + } + } + podReady := false for _, condition := range osclient.Status.Conditions { diff --git a/pkg/openstackclient/funcs.go b/pkg/openstackclient/funcs.go index 14bfe5c258..994b48221e 100644 --- a/pkg/openstackclient/funcs.go +++ b/pkg/openstackclient/funcs.go @@ -95,6 +95,20 @@ func ClientPodSpec( VolumeMounts: volumeMounts, }, }, + Tolerations: []corev1.Toleration{ + { + Key: "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: &[]int64{120}[0], + }, + { + Key: "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: &[]int64{120}[0], + }, + }, } if instance.Spec.NodeSelector != nil { From c9d18237b6566d6f5c65cce084919cc268155ca5 Mon Sep 17 00:00:00 2001 From: Martin Schuppert Date: Thu, 24 Jul 2025 17:56:48 +0200 Subject: [PATCH 2/2] Add node failure tolerations to all service operators This change adds 120s tolerations for node.kubernetes.io/not-ready and unreachable taints to reduce pod failover during a node failure. The total eviction time is ~160s (5min+ default). 120s was choosen to prevents pod rescheduling e.g. on kubelet restarts or network issues Jira: OSPRH-18450 Signed-off-by: Martin Schuppert --- bindata/operator/managers.yaml | 9 +++++++++ bindata/operator/operator.yaml | 9 +++++++++ bindata/operator/rabbit.yaml | 9 +++++++++ config/manager/manager.yaml | 9 +++++++++ config/operator/deployment/deployment.yaml | 9 +++++++++ config/operator/managers.yaml | 9 +++++++++ config/operator/rabbit.yaml | 9 +++++++++ 7 files changed, 63 insertions(+) diff --git a/bindata/operator/managers.yaml b/bindata/operator/managers.yaml index 53c3122486..a42ba323fa 100644 --- a/bindata/operator/managers.yaml +++ b/bindata/operator/managers.yaml @@ -85,6 +85,15 @@ spec: runAsNonRoot: true serviceAccountName: {{ .Name }}-operator-controller-manager terminationGracePeriodSeconds: 10 + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 {{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }} volumes: - name: cert diff --git a/bindata/operator/operator.yaml b/bindata/operator/operator.yaml index c2468ba251..f58c18c29b 100644 --- a/bindata/operator/operator.yaml +++ b/bindata/operator/operator.yaml @@ -133,6 +133,15 @@ spec: runAsNonRoot: true serviceAccountName: openstack-operator-controller-manager terminationGracePeriodSeconds: 10 + tolerations: + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 120 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 120 volumes: - name: cert secret: diff --git a/bindata/operator/rabbit.yaml b/bindata/operator/rabbit.yaml index 1b5fe2ecde..a1d4ee3e15 100644 --- a/bindata/operator/rabbit.yaml +++ b/bindata/operator/rabbit.yaml @@ -46,3 +46,12 @@ spec: memory: {{ .RabbitmqOperator.Deployment.Manager.Resources.Requests.Memory }} serviceAccountName: rabbitmq-cluster-operator-controller-manager terminationGracePeriodSeconds: 10 + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 1a6cc835b0..90925bae6e 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -72,3 +72,12 @@ spec: customRequests: replace_me #NOTE: this is used via the Makefile to inject a custom template that kustomize won't allow serviceAccountName: openstack-operator-controller-manager terminationGracePeriodSeconds: 10 + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 diff --git a/config/operator/deployment/deployment.yaml b/config/operator/deployment/deployment.yaml index d1195ef5d5..fa44c8bde8 100644 --- a/config/operator/deployment/deployment.yaml +++ b/config/operator/deployment/deployment.yaml @@ -106,3 +106,12 @@ spec: memory: 128Mi serviceAccountName: openstack-operator-controller-operator terminationGracePeriodSeconds: 10 + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 diff --git a/config/operator/managers.yaml b/config/operator/managers.yaml index 53c3122486..a42ba323fa 100644 --- a/config/operator/managers.yaml +++ b/config/operator/managers.yaml @@ -85,6 +85,15 @@ spec: runAsNonRoot: true serviceAccountName: {{ .Name }}-operator-controller-manager terminationGracePeriodSeconds: 10 + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 {{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }} volumes: - name: cert diff --git a/config/operator/rabbit.yaml b/config/operator/rabbit.yaml index 1b5fe2ecde..a1d4ee3e15 100644 --- a/config/operator/rabbit.yaml +++ b/config/operator/rabbit.yaml @@ -46,3 +46,12 @@ spec: memory: {{ .RabbitmqOperator.Deployment.Manager.Resources.Requests.Memory }} serviceAccountName: rabbitmq-cluster-operator-controller-manager terminationGracePeriodSeconds: 10 + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120 + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 120