Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bindata/operator/managers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ spec:
runAsNonRoot: true
serviceAccountName: {{ .Name }}-operator-controller-manager
terminationGracePeriodSeconds: 10
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
{{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }}
volumes:
- name: cert
Expand Down
9 changes: 9 additions & 0 deletions bindata/operator/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,15 @@ spec:
runAsNonRoot: true
serviceAccountName: openstack-operator-controller-manager
terminationGracePeriodSeconds: 10
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 120
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 120
volumes:
- name: cert
secret:
Expand Down
9 changes: 9 additions & 0 deletions bindata/operator/rabbit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,12 @@ spec:
memory: {{ .RabbitmqOperator.Deployment.Manager.Resources.Requests.Memory }}
serviceAccountName: rabbitmq-cluster-operator-controller-manager
terminationGracePeriodSeconds: 10
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
9 changes: 9 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,12 @@ spec:
customRequests: replace_me #NOTE: this is used via the Makefile to inject a custom template that kustomize won't allow
serviceAccountName: openstack-operator-controller-manager
terminationGracePeriodSeconds: 10
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
9 changes: 9 additions & 0 deletions config/operator/deployment/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,12 @@ spec:
memory: 128Mi
serviceAccountName: openstack-operator-controller-operator
terminationGracePeriodSeconds: 10
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
9 changes: 9 additions & 0 deletions config/operator/managers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ spec:
runAsNonRoot: true
serviceAccountName: {{ .Name }}-operator-controller-manager
terminationGracePeriodSeconds: 10
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wondering if we should have an interface to customize the tolerations, like we did for the resource limits/requests?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, could do that as a followup I suppose

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good and is probably better to do it in a follow up, instead of increasing the size of this PR.

- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
{{- if isEnvVarTrue .Deployment.Manager.Env "ENABLE_WEBHOOKS" }}
volumes:
- name: cert
Expand Down
9 changes: 9 additions & 0 deletions config/operator/rabbit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,12 @@ spec:
memory: {{ .RabbitmqOperator.Deployment.Manager.Resources.Requests.Memory }}
serviceAccountName: rabbitmq-cluster-operator-controller-manager
terminationGracePeriodSeconds: 10
tolerations:
- key: "node.kubernetes.io/not-ready"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
- key: "node.kubernetes.io/unreachable"
operator: "Exists"
effect: "NoExecute"
tolerationSeconds: 120
12 changes: 12 additions & 0 deletions controllers/client/openstackclient_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,18 @@ func (r *OpenStackClientReconciler) Reconcile(ctx context.Context, req ctrl.Requ
)
}

// if pod is stuck in terminating state for more than 3 minutes, force delete it
if osclient.DeletionTimestamp != nil {
terminatingDuration := time.Since(osclient.DeletionTimestamp.Time)
if terminatingDuration > time.Minute*3 {
// Force delete only truly stuck pods
err := r.Client.Delete(ctx, osclient, client.GracePeriodSeconds(0))
if err != nil {
return ctrl.Result{}, fmt.Errorf("Failed to force delete pod: %w", err)
}
}
}

podReady := false

for _, condition := range osclient.Status.Conditions {
Expand Down
14 changes: 14 additions & 0 deletions pkg/openstackclient/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,20 @@ func ClientPodSpec(
VolumeMounts: volumeMounts,
},
},
Tolerations: []corev1.Toleration{
{
Key: "node.kubernetes.io/not-ready",
Operator: corev1.TolerationOpExists,
Effect: corev1.TaintEffectNoExecute,
TolerationSeconds: &[]int64{120}[0],
},
{
Key: "node.kubernetes.io/unreachable",
Operator: corev1.TolerationOpExists,
Effect: corev1.TaintEffectNoExecute,
TolerationSeconds: &[]int64{120}[0],
},
},
}

if instance.Spec.NodeSelector != nil {
Expand Down