From c55ec272c31d65a5550dac8f1c9c881969384f62 Mon Sep 17 00:00:00 2001 From: Martin Schuppert Date: Tue, 5 Aug 2025 09:00:29 +0200 Subject: [PATCH] Add tolerations customization interface for service operators Adds ability for service operators to customize pod tolerations similar to how resource limits/requests are currently handled. Features: - Add Tolerations field to ContainerSpec API type - Implement merge behavior: custom tolerations are merged with defaults, overriding by key when same key exists - Set global default tolerations (node.kubernetes.io/not-ready and node.kubernetes.io/unreachable with 120s timeout) in controller - Update deployment templates (managers.yaml, operator.yaml) to render custom tolerations from Deployment struct - Add test coverage for merge logic and override behavior Example usage: ```yaml operatorOverrides: - name: "keystone" controllerManager: tolerations: - key: "node.kubernetes.io/not-ready" # Override default timeout operator: "Exists" effect: "NoExecute" tolerationSeconds: 600 - key: "node.example.com/gpu" # Add new toleration operator: "Equal" value: "nvidia" effect: "NoSchedule" ``` The merge behavior ensures operators get both default tolerations (unless overridden by matching key) plus any additional custom ones, providing flexibility while maintaining safe defaults. Jira: OSPRH-18693 Assisted-by: claude-4-sonnet Signed-off-by: Martin Schuppert --- Makefile | 5 +- .../operator.openstack.org_openstacks.yaml | 16 + apis/operator/v1beta1/openstack_types.go | 23 + .../operator/v1beta1/zz_generated.deepcopy.go | 8 + bindata/operator/managers.yaml | 23 +- bindata/operator/operator.yaml | 25 +- .../operator.openstack.org_openstacks.yaml | 16 + config/manager/manager.yaml | 10 +- config/operator/managers.yaml | 23 +- ...v1beta1_openstack_tolerations_example.yaml | 76 ++++ controllers/operator/openstack_controller.go | 1 + pkg/operator/override.go | 33 ++ pkg/operator/override_test.go | 400 +++++++++++++++++- 13 files changed, 616 insertions(+), 43 deletions(-) create mode 100644 config/samples/operator_v1beta1_openstack_tolerations_example.yaml diff --git a/Makefile b/Makefile index 2aa64dd7e2..ae872ab9cf 100644 --- a/Makefile +++ b/Makefile @@ -156,6 +156,7 @@ bindata: kustomize yq ## Call sync bindata script sed -i bindata/operator/operator.yaml -e "/customLimits/c\\ cpu: {{ .OpenStackOperator.Deployment.Manager.Resources.Limits.CPU }}\n memory: {{ .OpenStackOperator.Deployment.Manager.Resources.Limits.Memory }}" sed -i bindata/operator/operator.yaml -e "/customRequests/c\\ cpu: {{ .OpenStackOperator.Deployment.Manager.Resources.Requests.CPU }}\n memory: {{ .OpenStackOperator.Deployment.Manager.Resources.Requests.Memory }}" sed -i bindata/operator/operator.yaml -e "s|kube-rbac-proxy:replace_me.*|'{{ .OpenStackOperator.Deployment.KubeRbacProxy.Image }}'|" + sed -i bindata/operator/operator.yaml -e "/customTolerations/c\\ tolerations:\n{{- range .OpenStackOperator.Deployment.Tolerations }}\n - key: \"{{ .Key }}\"\n{{- if .Operator }}\n operator: \"{{ .Operator }}\"\n{{- end }}\n{{- if .Value }}\n value: \"{{ .Value }}\"\n{{- end }}\n{{- if .Effect }}\n effect: \"{{ .Effect }}\"\n{{- end }}\n{{- if .TolerationSeconds }}\n tolerationSeconds: {{ .TolerationSeconds }}\n{{- end }}\n{{- end }}" cp config/operator/managers.yaml bindata/operator/ cp config/operator/rabbit.yaml bindata/operator/ $(KUSTOMIZE) build config/rbac > bindata/rbac/rbac.yaml @@ -203,7 +204,7 @@ ginkgo-run: ## Run ginkgo. source hack/export_related_images.sh && \ KUBEBUILDER_ASSETS="$(shell $(ENVTEST) -v debug --bin-dir $(LOCALBIN) use $(ENVTEST_K8S_VERSION) -p path)" \ OPERATOR_TEMPLATES="$(PWD)/templates" \ - $(GINKGO) --trace --cover --coverpkg=./pkg/operator,./pkg/openstack,./pkg/openstackclient,./pkg/util,./pkg/dataplane/...,./controllers/...,./apis/client/v1beta1,./apis/core/v1beta1,./apis/dataplane/v1beta1 --coverprofile cover.out --covermode=atomic ${PROC_CMD} $(GINKGO_ARGS) $(GINKGO_TESTS) + $(GINKGO) --trace --cover --coverpkg=./pkg/...,./controllers/...,./apis/... --coverprofile cover.out --covermode=atomic ${PROC_CMD} $(GINKGO_ARGS) $(GINKGO_TESTS) .PHONY: test-all test-all: test golint golangci golangci-lint ## Run all tests. @@ -302,7 +303,7 @@ CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen ENVTEST ?= $(LOCALBIN)/setup-envtest CRD_MARKDOWN ?= $(LOCALBIN)/crd-to-markdown GINKGO ?= $(LOCALBIN)/ginkgo -GINKGO_TESTS ?= ./tests/... ./apis/client/... ./apis/core/... ./apis/dataplane/... ./pkg/dataplane/... +GINKGO_TESTS ?= ./tests/... ./apis/client/... ./apis/core/... ./apis/dataplane/... ./pkg/... KUTTL ?= $(LOCALBIN)/kubectl-kuttl diff --git a/apis/bases/operator.openstack.org_openstacks.yaml b/apis/bases/operator.openstack.org_openstacks.yaml index eaef6c42db..78d156b307 100644 --- a/apis/bases/operator.openstack.org_openstacks.yaml +++ b/apis/bases/operator.openstack.org_openstacks.yaml @@ -70,6 +70,22 @@ spec: x-kubernetes-int-or-string: true type: object type: object + tolerations: + items: + properties: + effect: + type: string + key: + type: string + operator: + type: string + tolerationSeconds: + format: int64 + type: integer + value: + type: string + type: object + type: array type: object name: enum: diff --git a/apis/operator/v1beta1/openstack_types.go b/apis/operator/v1beta1/openstack_types.go index 868b65a774..e6b4f6c8b8 100644 --- a/apis/operator/v1beta1/openstack_types.go +++ b/apis/operator/v1beta1/openstack_types.go @@ -21,6 +21,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" ) const ( @@ -69,6 +70,23 @@ var ( DefaultRbacProxyMemoryLimit resource.Quantity = resource.MustParse("128Mi") // DefaultRbacProxyMemoryRequests - Default kube rbac proxy container memory requests DefaultRbacProxyMemoryRequests resource.Quantity = resource.MustParse("64Mi") + + // DefaultTolerations - Default tolerations for all operators + DefaultTolerations = []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready" + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable" + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + } + // OperatorList - list of all operators with optional different defaults then the above. // NOTE: test-operator was deployed as a independant package so it may or may not be installed // NOTE: depending on how watcher-operator is released for FR2 and then in FR3 it may need to be @@ -210,6 +228,11 @@ type ContainerSpec struct { // Resources - Compute Resources for the service operator controller manager // https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ Resources corev1.ResourceRequirements `json:"resources,omitempty"` + + // +kubebuilder:validation:Optional + // Tolerations - Tolerations for the service operator controller manager + // https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` } // OpenStackStatus defines the observed state of OpenStack diff --git a/apis/operator/v1beta1/zz_generated.deepcopy.go b/apis/operator/v1beta1/zz_generated.deepcopy.go index 7bfa5c84de..7cd2cf52f1 100644 --- a/apis/operator/v1beta1/zz_generated.deepcopy.go +++ b/apis/operator/v1beta1/zz_generated.deepcopy.go @@ -22,6 +22,7 @@ package v1beta1 import ( "github.com/openstack-k8s-operators/lib-common/modules/common/condition" + "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -29,6 +30,13 @@ import ( func (in *ContainerSpec) DeepCopyInto(out *ContainerSpec) { *out = *in in.Resources.DeepCopyInto(&out.Resources) + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]v1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerSpec. diff --git a/bindata/operator/managers.yaml b/bindata/operator/managers.yaml index a42ba323fa..8b13757f05 100644 --- a/bindata/operator/managers.yaml +++ b/bindata/operator/managers.yaml @@ -86,14 +86,21 @@ spec: serviceAccountName: {{ .Name }}-operator-controller-manager terminationGracePeriodSeconds: 10 tolerations: - - key: "node.kubernetes.io/not-ready" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 120 - - key: "node.kubernetes.io/unreachable" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 120 +{{- range .Deployment.Tolerations }} + - key: "{{ .Key }}" +{{- if .Operator }} + operator: "{{ .Operator }}" +{{- end }} +{{- if .Value }} + value: "{{ .Value }}" +{{- end }} +{{- if .Effect }} + effect: "{{ .Effect }}" +{{- end }} +{{- if .TolerationSeconds }} + tolerationSeconds: {{ .TolerationSeconds }} +{{- end }} +{{- end }} {{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }} volumes: - name: cert diff --git a/bindata/operator/operator.yaml b/bindata/operator/operator.yaml index f58c18c29b..5d43029d7a 100644 --- a/bindata/operator/operator.yaml +++ b/bindata/operator/operator.yaml @@ -129,19 +129,26 @@ spec: memory: 64Mi securityContext: allowPrivilegeEscalation: false + tolerations: +{{- range .OpenStackOperator.Deployment.Tolerations }} + - key: "{{ .Key }}" +{{- if .Operator }} + operator: "{{ .Operator }}" +{{- end }} +{{- if .Value }} + value: "{{ .Value }}" +{{- end }} +{{- if .Effect }} + effect: "{{ .Effect }}" +{{- end }} +{{- if .TolerationSeconds }} + tolerationSeconds: {{ .TolerationSeconds }} +{{- end }} +{{- end }} securityContext: runAsNonRoot: true serviceAccountName: openstack-operator-controller-manager terminationGracePeriodSeconds: 10 - tolerations: - - effect: NoExecute - key: node.kubernetes.io/not-ready - operator: Exists - tolerationSeconds: 120 - - effect: NoExecute - key: node.kubernetes.io/unreachable - operator: Exists - tolerationSeconds: 120 volumes: - name: cert secret: diff --git a/config/crd/bases/operator.openstack.org_openstacks.yaml b/config/crd/bases/operator.openstack.org_openstacks.yaml index eaef6c42db..78d156b307 100644 --- a/config/crd/bases/operator.openstack.org_openstacks.yaml +++ b/config/crd/bases/operator.openstack.org_openstacks.yaml @@ -70,6 +70,22 @@ spec: x-kubernetes-int-or-string: true type: object type: object + tolerations: + items: + properties: + effect: + type: string + key: + type: string + operator: + type: string + tolerationSeconds: + format: int64 + type: integer + value: + type: string + type: object + type: array type: object name: enum: diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 90925bae6e..0e9d9b8608 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -72,12 +72,4 @@ spec: customRequests: replace_me #NOTE: this is used via the Makefile to inject a custom template that kustomize won't allow serviceAccountName: openstack-operator-controller-manager terminationGracePeriodSeconds: 10 - tolerations: - - key: "node.kubernetes.io/not-ready" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 120 - - key: "node.kubernetes.io/unreachable" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 120 + customTolerations: replace_me #NOTE: this is used via the Makefile to inject a custom template that kustomize won't allow diff --git a/config/operator/managers.yaml b/config/operator/managers.yaml index a42ba323fa..8b13757f05 100644 --- a/config/operator/managers.yaml +++ b/config/operator/managers.yaml @@ -86,14 +86,21 @@ spec: serviceAccountName: {{ .Name }}-operator-controller-manager terminationGracePeriodSeconds: 10 tolerations: - - key: "node.kubernetes.io/not-ready" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 120 - - key: "node.kubernetes.io/unreachable" - operator: "Exists" - effect: "NoExecute" - tolerationSeconds: 120 +{{- range .Deployment.Tolerations }} + - key: "{{ .Key }}" +{{- if .Operator }} + operator: "{{ .Operator }}" +{{- end }} +{{- if .Value }} + value: "{{ .Value }}" +{{- end }} +{{- if .Effect }} + effect: "{{ .Effect }}" +{{- end }} +{{- if .TolerationSeconds }} + tolerationSeconds: {{ .TolerationSeconds }} +{{- end }} +{{- end }} {{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }} volumes: - name: cert diff --git a/config/samples/operator_v1beta1_openstack_tolerations_example.yaml b/config/samples/operator_v1beta1_openstack_tolerations_example.yaml new file mode 100644 index 0000000000..c11b75d664 --- /dev/null +++ b/config/samples/operator_v1beta1_openstack_tolerations_example.yaml @@ -0,0 +1,76 @@ +apiVersion: operator.openstack.org/v1beta1 +kind: OpenStack +metadata: + labels: + app.kubernetes.io/name: openstack + app.kubernetes.io/instance: openstack-sample + app.kubernetes.io/part-of: openstack-operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: openstack-operator + name: openstack-sample +spec: + # Example showing how to customize tolerations for different service operators + # + # MERGE BEHAVIOR: + # - Custom tolerations are MERGED with the default tolerations + # - If a custom toleration has the same KEY as a default, it OVERRIDES the default + # - If a custom toleration has a different KEY, it is ADDED to the defaults + # + # Default tolerations (applied automatically unless overridden): + # - key: "node.kubernetes.io/not-ready" + # operator: "Exists" + # effect: "NoExecute" + # tolerationSeconds: 120 + # - key: "node.kubernetes.io/unreachable" + # operator: "Exists" + # effect: "NoExecute" + # tolerationSeconds: 120 + operatorOverrides: + - name: "keystone" + # Custom tolerations for keystone operator pods + controllerManager: + tolerations: + - key: "example.com/special-nodes" + operator: "Equal" + value: "keystone" + effect: "NoSchedule" + - key: "node.kubernetes.io/memory-pressure" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 300 + - name: "nova" + # Example: Override default tolerations and add new ones + # Result will be: + # 1. node.kubernetes.io/not-ready (OVERRIDDEN - 600s instead of 120s) + # 2. node.kubernetes.io/unreachable (OVERRIDDEN - 400s instead of 120s) + # 3. node.example.com/compute (ADDED - new toleration) + controllerManager: + tolerations: + - key: "node.kubernetes.io/not-ready" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 600 # Override default 120s + - key: "node.kubernetes.io/unreachable" + operator: "Exists" + effect: "NoExecute" + tolerationSeconds: 400 # Override default 120s + - key: "node.example.com/compute" # Add new toleration + operator: "Equal" + value: "true" + effect: "NoSchedule" + - name: "glance" + # Custom resource limits AND tolerations example + controllerManager: + resources: + limits: + cpu: "2" + memory: "4Gi" + requests: + cpu: "1" + memory: "2Gi" + tolerations: + - key: "storage-node" + operator: "Equal" + value: "true" + effect: "NoSchedule" + # Note: Operators not listed (like mariadb, neutron, etc.) will use the default tolerations diff --git a/controllers/operator/openstack_controller.go b/controllers/operator/openstack_controller.go index fa03f6cb14..59f9d65092 100644 --- a/controllers/operator/openstack_controller.go +++ b/controllers/operator/openstack_controller.go @@ -566,6 +566,7 @@ func (r *OpenStackReconciler) applyOperator(ctx context.Context, instance *opera }, }, KubeRbacProxy: kubeRbacProxyContainer, + Tolerations: operatorv1beta1.DefaultTolerations, }, } diff --git a/pkg/operator/override.go b/pkg/operator/override.go index 3cea549c12..6de1efb6b7 100644 --- a/pkg/operator/override.go +++ b/pkg/operator/override.go @@ -38,6 +38,7 @@ type Deployment struct { Replicas *int32 Manager Container KubeRbacProxy Container + Tolerations []corev1.Toleration } // Container - @@ -108,6 +109,38 @@ func SetOverrides(opOvr operatorv1beta1.OperatorSpec, op *Operator) { op.Deployment.Manager.Resources.Requests.Memory = opOvr.ControllerManager.Resources.Requests.Memory().String() } } + if len(opOvr.ControllerManager.Tolerations) > 0 { + op.Deployment.Tolerations = mergeTolerations(op.Deployment.Tolerations, opOvr.ControllerManager.Tolerations) + } +} + +// mergeTolerations merges custom tolerations with default tolerations. +// If a custom toleration has the same key as a default one, it overrides the default. +// Otherwise, the custom toleration is added to the list. +func mergeTolerations(defaults, custom []corev1.Toleration) []corev1.Toleration { + if len(custom) == 0 { + return defaults + } + + // Start with a copy of defaults + merged := make([]corev1.Toleration, len(defaults)) + copy(merged, defaults) + + // For each custom toleration, check if it should override a default one + for _, customTol := range custom { + + f := func(c corev1.Toleration) bool { + return c.Key == customTol.Key + } + idx := slices.IndexFunc(merged, f) + if idx >= 0 { + merged[idx] = customTol + } else { + merged = append(merged, customTol) + } + } + + return merged } func GetOperator(operators []Operator, name string) (int, Operator) { diff --git a/pkg/operator/override_test.go b/pkg/operator/override_test.go index d797d0b3f4..09195e7249 100644 --- a/pkg/operator/override_test.go +++ b/pkg/operator/override_test.go @@ -104,6 +104,22 @@ func TestApplyOperatorOverrides(t *testing.T) { }, } + // Define custom tolerations for testing + customTolerations := []corev1.Toleration{ + { + Key: "example.com/special-node", + Operator: corev1.TolerationOpEqual, + Value: "special", + Effect: corev1.TaintEffectNoSchedule, + }, + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](300), + }, + } + // A list of potential overrides that can be applied allOverrides := []operatorv1beta1.OperatorSpec{ { @@ -140,6 +156,12 @@ func TestApplyOperatorOverrides(t *testing.T) { }, }, }, + { + Name: "nova", + ControllerManager: operatorv1beta1.ContainerSpec{ + Tolerations: customTolerations, + }, + }, } // --- Define Test Cases --- @@ -149,9 +171,10 @@ func TestApplyOperatorOverrides(t *testing.T) { initialOp *Operator overrideList []operatorv1beta1.OperatorSpec // We will assert on specific fields instead of the whole struct - expectedReplicas *int32 - expectedLimits *ResourceList - expectedRequests *ResourceList + expectedReplicas *int32 + expectedLimits *ResourceList + expectedRequests *ResourceList + expectedTolerations []corev1.Toleration }{ { name: "Scenario 1: Override is found and applied (Keystone)", @@ -218,14 +241,30 @@ func TestApplyOperatorOverrides(t *testing.T) { }, { name: "Scenario 4: Operator not in override list, no changes applied", - operatorName: "nova", + operatorName: "neutron", initialOp: &Operator{ - Name: "nova", + Name: "neutron", Deployment: Deployment{Replicas: ptr.To[int32](1)}, }, overrideList: allOverrides, expectedReplicas: ptr.To[int32](1), // Expect this to remain unchanged }, + { + name: "Scenario 5: Tolerations override is applied (Nova)", + operatorName: "nova", + initialOp: &Operator{ + Name: "nova", + Deployment: Deployment{ + Replicas: ptr.To[int32](1), + Manager: Container{ + Resources: defaultResources, + }, + }, + }, + overrideList: allOverrides, + expectedReplicas: ptr.To[int32](1), + expectedTolerations: customTolerations, + }, } // --- Run Test Cases --- @@ -265,11 +304,358 @@ func TestApplyOperatorOverrides(t *testing.T) { } // Assert Requests - if tc.expectedLimits != nil { + if tc.expectedRequests != nil { if !reflect.DeepEqual(tc.initialOp.Deployment.Manager.Resources.Requests, tc.expectedRequests) { - t.Errorf("wrong resource limits:\n got: %+v\nwant: %+v", tc.initialOp.Deployment.Manager.Resources.Requests, tc.expectedRequests) + t.Errorf("wrong resource requests:\n got: %+v\nwant: %+v", tc.initialOp.Deployment.Manager.Resources.Requests, tc.expectedRequests) } } + + // Assert Tolerations + if tc.expectedTolerations != nil { + if !reflect.DeepEqual(tc.initialOp.Deployment.Tolerations, tc.expectedTolerations) { + t.Errorf("wrong tolerations:\n got: %+v\nwant: %+v", tc.initialOp.Deployment.Tolerations, tc.expectedTolerations) + } + } + }) + } +} + +// --- Test specifically for tolerations functionality --- + +func TestTolerationsOverride(t *testing.T) { + testTolerations := []corev1.Toleration{ + { + Key: "node.example.com/gpu", + Operator: corev1.TolerationOpEqual, + Value: "nvidia", + Effect: corev1.TaintEffectNoSchedule, + }, + { + Key: corev1.TaintNodeMemoryPressure, // "node.kubernetes.io/memory-pressure", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](600), + }, + } + + // Default tolerations for testing merge behavior + defaultTolerations := []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + } + + testCases := []struct { + name string + operatorSpec operatorv1beta1.OperatorSpec + initialTolerations []corev1.Toleration + expectedTolerations []corev1.Toleration + }{ + { + name: "Add tolerations to empty list", + operatorSpec: operatorv1beta1.OperatorSpec{ + Name: "test-operator", + ControllerManager: operatorv1beta1.ContainerSpec{ + Tolerations: testTolerations, + }, + }, + initialTolerations: nil, + expectedTolerations: testTolerations, + }, + { + name: "No custom tolerations, keep defaults unchanged", + operatorSpec: operatorv1beta1.OperatorSpec{ + Name: "test-operator", + ControllerManager: operatorv1beta1.ContainerSpec{ + // No tolerations specified + }, + }, + initialTolerations: defaultTolerations, + expectedTolerations: defaultTolerations, + }, + { + name: "Merge custom tolerations with defaults (different keys)", + operatorSpec: operatorv1beta1.OperatorSpec{ + Name: "test-operator", + ControllerManager: operatorv1beta1.ContainerSpec{ + Tolerations: testTolerations, // Different keys than defaults + }, + }, + initialTolerations: defaultTolerations, + expectedTolerations: append(defaultTolerations, testTolerations...), + }, + { + name: "Override default tolerations (same key)", + operatorSpec: operatorv1beta1.OperatorSpec{ + Name: "test-operator", + ControllerManager: operatorv1beta1.ContainerSpec{ + Tolerations: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", // Same key as default + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](600), // Different value + }, + }, + }, + }, + initialTolerations: defaultTolerations, + expectedTolerations: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](600), // Overridden value + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), // Unchanged default + }, + }, + }, + { + name: "Mixed scenario: override one default, add new custom", + operatorSpec: operatorv1beta1.OperatorSpec{ + Name: "test-operator", + ControllerManager: operatorv1beta1.ContainerSpec{ + Tolerations: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", // Override default + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](300), + }, + { + Key: "node.example.com/gpu", // Add new + Operator: corev1.TolerationOpEqual, + Value: "nvidia", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + }, + }, + initialTolerations: defaultTolerations, + expectedTolerations: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", // Overridden + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](300), + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", // Unchanged default + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + { + Key: "node.example.com/gpu", // New addition + Operator: corev1.TolerationOpEqual, + Value: "nvidia", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + op := &Operator{ + Name: tc.operatorSpec.Name, + Deployment: Deployment{ + Tolerations: tc.initialTolerations, + }, + } + + SetOverrides(tc.operatorSpec, op) + + if !reflect.DeepEqual(op.Deployment.Tolerations, tc.expectedTolerations) { + t.Errorf("wrong tolerations after override:\n got: %+v\nwant: %+v", op.Deployment.Tolerations, tc.expectedTolerations) + } }) } } + +// --- Test for mergeTolerations function --- + +func TestMergeTolerations(t *testing.T) { + defaultTolerations := []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + } + + testCases := []struct { + name string + defaults []corev1.Toleration + custom []corev1.Toleration + expected []corev1.Toleration + }{ + { + name: "Empty custom tolerations should return defaults", + defaults: defaultTolerations, + custom: []corev1.Toleration{}, + expected: defaultTolerations, + }, + { + name: "Nil custom tolerations should return defaults", + defaults: defaultTolerations, + custom: nil, + expected: defaultTolerations, + }, + { + name: "Add new toleration to defaults", + defaults: defaultTolerations, + custom: []corev1.Toleration{ + { + Key: "node.example.com/gpu", + Operator: corev1.TolerationOpEqual, + Value: "nvidia", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + expected: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + { + Key: "node.example.com/gpu", + Operator: corev1.TolerationOpEqual, + Value: "nvidia", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + }, + { + name: "Override existing toleration", + defaults: defaultTolerations, + custom: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](600), + }, + }, + expected: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](600), // Overridden + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), // Unchanged + }, + }, + }, + { + name: "Mixed: override one, add one", + defaults: defaultTolerations, + custom: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](300), + }, + { + Key: "node.example.com/special", + Operator: corev1.TolerationOpEqual, + Value: "true", + Effect: corev1.TaintEffectNoSchedule, + }, + }, + expected: []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](300), // Overridden + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), // Unchanged + }, + { + Key: "node.example.com/special", + Operator: corev1.TolerationOpEqual, + Value: "true", + Effect: corev1.TaintEffectNoSchedule, // Added + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := mergeTolerations(tc.defaults, tc.custom) + if !reflect.DeepEqual(result, tc.expected) { + t.Errorf("mergeTolerations() failed:\n got: %+v\nwant: %+v", result, tc.expected) + } + }) + } +} + +// --- Test for global defaults initialization --- + +func TestGlobalTolerationsDefaults(t *testing.T) { + // Test that the default tolerations are correctly defined + if len(operatorv1beta1.DefaultTolerations) != 2 { + t.Errorf("Expected 2 default tolerations, got %d", len(operatorv1beta1.DefaultTolerations)) + } + + // Verify the specific default tolerations + expectedDefaults := []corev1.Toleration{ + { + Key: corev1.TaintNodeNotReady, // "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + { + Key: corev1.TaintNodeUnreachable, // "node.kubernetes.io/unreachable", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoExecute, + TolerationSeconds: ptr.To[int64](120), + }, + } + + if !reflect.DeepEqual(operatorv1beta1.DefaultTolerations, expectedDefaults) { + t.Errorf("Default tolerations don't match expected:\n got: %+v\nwant: %+v", operatorv1beta1.DefaultTolerations, expectedDefaults) + } +}