Skip to content

Commit a59f97d

Browse files
committed
Maintenance: Ensure there is always a blocking pod
We observed that during the node being terminated, that the pod shortly disappeared due to some taint, which then allowed gardener to tear the host down despite VMs being still active. To counter that, tolerate any taint and effect. Also change the deployment to a rolling-update, so that instead of deleting that blocking pod and allowing a teardown until the pod has been re-created, create it first, and then let the other being torn down.
1 parent 645017d commit a59f97d

1 file changed

Lines changed: 12 additions & 3 deletions

File tree

internal/controller/maintenance_controller.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,14 +186,20 @@ func (r *MaintenanceController) ensureSignallingDeployment(ctx context.Context,
186186
}
187187

188188
var one int64 = 1
189+
zeroStr := intstr.FromInt(0)
190+
oneStr := intstr.FromInt(1)
189191

190192
deployment.Spec = appsv1.DeploymentSpec{
191193
Replicas: &scale,
192194
Selector: &metav1.LabelSelector{
193195
MatchLabels: labels,
194196
},
195197
Strategy: appsv1.DeploymentStrategy{
196-
Type: appsv1.RecreateDeploymentStrategyType,
198+
Type: appsv1.RollingUpdateDeploymentStrategyType,
199+
RollingUpdate: &appsv1.RollingUpdateDeployment{
200+
MaxUnavailable: &zeroStr,
201+
MaxSurge: &oneStr,
202+
},
197203
},
198204
Template: corev1.PodTemplateSpec{
199205
ObjectMeta: metav1.ObjectMeta{
@@ -207,9 +213,12 @@ func (r *MaintenanceController) ensureSignallingDeployment(ctx context.Context,
207213
TerminationGracePeriodSeconds: &one, // busybox sleep doesn't handle TERM so well as pid 1
208214
Tolerations: []corev1.Toleration{
209215
{
210-
Key: labelCriticalComponentsNotReady,
216+
Effect: corev1.TaintEffectNoExecute,
217+
Operator: corev1.TolerationOpExists,
218+
},
219+
{
211220
Effect: corev1.TaintEffectNoSchedule,
212-
Operator: corev1.TolerationOpEqual,
221+
Operator: corev1.TolerationOpExists,
213222
},
214223
},
215224
Containers: []corev1.Container{

0 commit comments

Comments
 (0)