Skip to content

Commit 35b23d8

Browse files
committed
Add tolerations customization interface for service operators
Adds ability for service operators to customize pod tolerations similar to how resource limits/requests are currently handled. Features: - Add Tolerations field to ContainerSpec API type - Implement merge behavior: custom tolerations are merged with defaults, overriding by key when same key exists - Set global default tolerations (node.kubernetes.io/not-ready and node.kubernetes.io/unreachable with 120s timeout) in controller - Update deployment templates (managers.yaml, operator.yaml) to render custom tolerations from Deployment struct - Add test coverage for merge logic and override behavior Example usage: ```yaml operatorOverrides: - name: "keystone" controllerManager: tolerations: - key: "node.kubernetes.io/not-ready" # Override default timeout operator: "Exists" effect: "NoExecute" tolerationSeconds: 600 - key: "node.example.com/gpu" # Add new toleration operator: "Equal" value: "nvidia" effect: "NoSchedule" ``` The merge behavior ensures operators get both default tolerations (unless overridden by matching key) plus any additional custom ones, providing flexibility while maintaining safe defaults. Assisted-by: claude-4-sonnet Signed-off-by: Martin Schuppert <mschuppert@redhat.com>
1 parent 69d50de commit 35b23d8

13 files changed

Lines changed: 614 additions & 41 deletions

File tree

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ bindata: kustomize yq ## Call sync bindata script
156156
sed -i bindata/operator/operator.yaml -e "/customLimits/c\\ cpu: {{ .OpenStackOperator.Deployment.Manager.Resources.Limits.CPU }}\n memory: {{ .OpenStackOperator.Deployment.Manager.Resources.Limits.Memory }}"
157157
sed -i bindata/operator/operator.yaml -e "/customRequests/c\\ cpu: {{ .OpenStackOperator.Deployment.Manager.Resources.Requests.CPU }}\n memory: {{ .OpenStackOperator.Deployment.Manager.Resources.Requests.Memory }}"
158158
sed -i bindata/operator/operator.yaml -e "s|kube-rbac-proxy:replace_me.*|'{{ .OpenStackOperator.Deployment.KubeRbacProxy.Image }}'|"
159+
sed -i bindata/operator/operator.yaml -e "/customTolerations/c\\ tolerations:\n{{- range .OpenStackOperator.Deployment.Tolerations }}\n - key: \"{{ .Key }}\"\n{{- if .Operator }}\n operator: \"{{ .Operator }}\"\n{{- end }}\n{{- if .Value }}\n value: \"{{ .Value }}\"\n{{- end }}\n{{- if .Effect }}\n effect: \"{{ .Effect }}\"\n{{- end }}\n{{- if .TolerationSeconds }}\n tolerationSeconds: {{ .TolerationSeconds }}\n{{- end }}\n{{- end }}"
159160
cp config/operator/managers.yaml bindata/operator/
160161
cp config/operator/rabbit.yaml bindata/operator/
161162
$(KUSTOMIZE) build config/rbac > bindata/rbac/rbac.yaml

apis/bases/operator.openstack.org_openstacks.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ spec:
7070
x-kubernetes-int-or-string: true
7171
type: object
7272
type: object
73+
tolerations:
74+
items:
75+
properties:
76+
effect:
77+
type: string
78+
key:
79+
type: string
80+
operator:
81+
type: string
82+
tolerationSeconds:
83+
format: int64
84+
type: integer
85+
value:
86+
type: string
87+
type: object
88+
type: array
7389
type: object
7490
name:
7591
enum:

apis/operator/v1beta1/openstack_types.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
corev1 "k8s.io/api/core/v1"
2222
"k8s.io/apimachinery/pkg/api/resource"
2323
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
"k8s.io/utils/ptr"
2425
)
2526

2627
const (
@@ -69,6 +70,23 @@ var (
6970
DefaultRbacProxyMemoryLimit resource.Quantity = resource.MustParse("128Mi")
7071
// DefaultRbacProxyMemoryRequests - Default kube rbac proxy container memory requests
7172
DefaultRbacProxyMemoryRequests resource.Quantity = resource.MustParse("64Mi")
73+
74+
// DefaultTolerations - Default tolerations for all operators
75+
DefaultTolerations = []corev1.Toleration{
76+
{
77+
Key: "node.kubernetes.io/not-ready",
78+
Operator: corev1.TolerationOpExists,
79+
Effect: corev1.TaintEffectNoExecute,
80+
TolerationSeconds: ptr.To[int64](120),
81+
},
82+
{
83+
Key: "node.kubernetes.io/unreachable",
84+
Operator: corev1.TolerationOpExists,
85+
Effect: corev1.TaintEffectNoExecute,
86+
TolerationSeconds: ptr.To[int64](120),
87+
},
88+
}
89+
7290
// OperatorList - list of all operators with optional different defaults then the above.
7391
// NOTE: test-operator was deployed as a independant package so it may or may not be installed
7492
// NOTE: depending on how watcher-operator is released for FR2 and then in FR3 it may need to be
@@ -210,6 +228,11 @@ type ContainerSpec struct {
210228
// Resources - Compute Resources for the service operator controller manager
211229
// https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
212230
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
231+
232+
// +kubebuilder:validation:Optional
233+
// Tolerations - Tolerations for the service operator controller manager
234+
// https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
235+
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
213236
}
214237

215238
// OpenStackStatus defines the observed state of OpenStack

apis/operator/v1beta1/zz_generated.deepcopy.go

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bindata/operator/managers.yaml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,21 @@ spec:
8686
serviceAccountName: {{ .Name }}-operator-controller-manager
8787
terminationGracePeriodSeconds: 10
8888
tolerations:
89-
- key: "node.kubernetes.io/not-ready"
90-
operator: "Exists"
91-
effect: "NoExecute"
92-
tolerationSeconds: 120
93-
- key: "node.kubernetes.io/unreachable"
94-
operator: "Exists"
95-
effect: "NoExecute"
96-
tolerationSeconds: 120
89+
{{- range .Deployment.Tolerations }}
90+
- key: "{{ .Key }}"
91+
{{- if .Operator }}
92+
operator: "{{ .Operator }}"
93+
{{- end }}
94+
{{- if .Value }}
95+
value: "{{ .Value }}"
96+
{{- end }}
97+
{{- if .Effect }}
98+
effect: "{{ .Effect }}"
99+
{{- end }}
100+
{{- if .TolerationSeconds }}
101+
tolerationSeconds: {{ .TolerationSeconds }}
102+
{{- end }}
103+
{{- end }}
97104
{{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }}
98105
volumes:
99106
- name: cert

bindata/operator/operator.yaml

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -129,19 +129,26 @@ spec:
129129
memory: 64Mi
130130
securityContext:
131131
allowPrivilegeEscalation: false
132+
tolerations:
133+
{{- range .OpenStackOperator.Deployment.Tolerations }}
134+
- key: "{{ .Key }}"
135+
{{- if .Operator }}
136+
operator: "{{ .Operator }}"
137+
{{- end }}
138+
{{- if .Value }}
139+
value: "{{ .Value }}"
140+
{{- end }}
141+
{{- if .Effect }}
142+
effect: "{{ .Effect }}"
143+
{{- end }}
144+
{{- if .TolerationSeconds }}
145+
tolerationSeconds: {{ .TolerationSeconds }}
146+
{{- end }}
147+
{{- end }}
132148
securityContext:
133149
runAsNonRoot: true
134150
serviceAccountName: openstack-operator-controller-manager
135151
terminationGracePeriodSeconds: 10
136-
tolerations:
137-
- effect: NoExecute
138-
key: node.kubernetes.io/not-ready
139-
operator: Exists
140-
tolerationSeconds: 120
141-
- effect: NoExecute
142-
key: node.kubernetes.io/unreachable
143-
operator: Exists
144-
tolerationSeconds: 120
145152
volumes:
146153
- name: cert
147154
secret:

config/crd/bases/operator.openstack.org_openstacks.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ spec:
7070
x-kubernetes-int-or-string: true
7171
type: object
7272
type: object
73+
tolerations:
74+
items:
75+
properties:
76+
effect:
77+
type: string
78+
key:
79+
type: string
80+
operator:
81+
type: string
82+
tolerationSeconds:
83+
format: int64
84+
type: integer
85+
value:
86+
type: string
87+
type: object
88+
type: array
7389
type: object
7490
name:
7591
enum:

config/manager/manager.yaml

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,4 @@ spec:
7272
customRequests: replace_me #NOTE: this is used via the Makefile to inject a custom template that kustomize won't allow
7373
serviceAccountName: openstack-operator-controller-manager
7474
terminationGracePeriodSeconds: 10
75-
tolerations:
76-
- key: "node.kubernetes.io/not-ready"
77-
operator: "Exists"
78-
effect: "NoExecute"
79-
tolerationSeconds: 120
80-
- key: "node.kubernetes.io/unreachable"
81-
operator: "Exists"
82-
effect: "NoExecute"
83-
tolerationSeconds: 120
75+
customTolerations: replace_me #NOTE: this is used via the Makefile to inject a custom template that kustomize won't allow

config/operator/managers.yaml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,21 @@ spec:
8686
serviceAccountName: {{ .Name }}-operator-controller-manager
8787
terminationGracePeriodSeconds: 10
8888
tolerations:
89-
- key: "node.kubernetes.io/not-ready"
90-
operator: "Exists"
91-
effect: "NoExecute"
92-
tolerationSeconds: 120
93-
- key: "node.kubernetes.io/unreachable"
94-
operator: "Exists"
95-
effect: "NoExecute"
96-
tolerationSeconds: 120
89+
{{- range .Deployment.Tolerations }}
90+
- key: "{{ .Key }}"
91+
{{- if .Operator }}
92+
operator: "{{ .Operator }}"
93+
{{- end }}
94+
{{- if .Value }}
95+
value: "{{ .Value }}"
96+
{{- end }}
97+
{{- if .Effect }}
98+
effect: "{{ .Effect }}"
99+
{{- end }}
100+
{{- if .TolerationSeconds }}
101+
tolerationSeconds: {{ .TolerationSeconds }}
102+
{{- end }}
103+
{{- end }}
97104
{{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }}
98105
volumes:
99106
- name: cert
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
apiVersion: operator.openstack.org/v1beta1
2+
kind: OpenStack
3+
metadata:
4+
labels:
5+
app.kubernetes.io/name: openstack
6+
app.kubernetes.io/instance: openstack-sample
7+
app.kubernetes.io/part-of: openstack-operator
8+
app.kubernetes.io/managed-by: kustomize
9+
app.kubernetes.io/created-by: openstack-operator
10+
name: openstack-sample
11+
spec:
12+
# Example showing how to customize tolerations for different service operators
13+
#
14+
# MERGE BEHAVIOR:
15+
# - Custom tolerations are MERGED with the default tolerations
16+
# - If a custom toleration has the same KEY as a default, it OVERRIDES the default
17+
# - If a custom toleration has a different KEY, it is ADDED to the defaults
18+
#
19+
# Default tolerations (applied automatically unless overridden):
20+
# - key: "node.kubernetes.io/not-ready"
21+
# operator: "Exists"
22+
# effect: "NoExecute"
23+
# tolerationSeconds: 120
24+
# - key: "node.kubernetes.io/unreachable"
25+
# operator: "Exists"
26+
# effect: "NoExecute"
27+
# tolerationSeconds: 120
28+
operatorOverrides:
29+
- name: "keystone"
30+
# Custom tolerations for keystone operator pods
31+
controllerManager:
32+
tolerations:
33+
- key: "example.com/special-nodes"
34+
operator: "Equal"
35+
value: "keystone"
36+
effect: "NoSchedule"
37+
- key: "node.kubernetes.io/memory-pressure"
38+
operator: "Exists"
39+
effect: "NoExecute"
40+
tolerationSeconds: 300
41+
- name: "nova"
42+
# Example: Override default tolerations and add new ones
43+
# Result will be:
44+
# 1. node.kubernetes.io/not-ready (OVERRIDDEN - 600s instead of 120s)
45+
# 2. node.kubernetes.io/unreachable (OVERRIDDEN - 400s instead of 120s)
46+
# 3. node.example.com/compute (ADDED - new toleration)
47+
controllerManager:
48+
tolerations:
49+
- key: "node.kubernetes.io/not-ready"
50+
operator: "Exists"
51+
effect: "NoExecute"
52+
tolerationSeconds: 600 # Override default 120s
53+
- key: "node.kubernetes.io/unreachable"
54+
operator: "Exists"
55+
effect: "NoExecute"
56+
tolerationSeconds: 400 # Override default 120s
57+
- key: "node.example.com/compute" # Add new toleration
58+
operator: "Equal"
59+
value: "true"
60+
effect: "NoSchedule"
61+
- name: "glance"
62+
# Custom resource limits AND tolerations example
63+
controllerManager:
64+
resources:
65+
limits:
66+
cpu: "2"
67+
memory: "4Gi"
68+
requests:
69+
cpu: "1"
70+
memory: "2Gi"
71+
tolerations:
72+
- key: "storage-node"
73+
operator: "Equal"
74+
value: "true"
75+
effect: "NoSchedule"
76+
# Note: Operators not listed (like mariadb, neutron, etc.) will use the default tolerations

0 commit comments

Comments
 (0)