Skip to content

Commit 06e5a9a

Browse files
committed
EvictionController: Retry on re-enablement failures
We will likely encounter transient errors, and we should not stop deleting the eviction because of that.
1 parent 3471a3c commit 06e5a9a

1 file changed

Lines changed: 43 additions & 21 deletions

File tree

internal/controller/eviction_controller.go

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,13 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
7676

7777
// Being deleted
7878
if !eviction.DeletionTimestamp.IsZero() {
79-
err := r.handleFinalizer(ctx, eviction)
79+
retry, err := r.handleFinalizer(ctx, eviction)
80+
if err != nil {
81+
return ctrl.Result{}, err
82+
}
83+
if retry {
84+
return ctrl.Result{RequeueAfter: defaultWaitTime}, err
85+
}
8086
log.Info("deleted")
8187
return ctrl.Result{}, err
8288
}
@@ -364,82 +370,98 @@ func (r *EvictionReconciler) evictionReason(eviction *kvmv1.Eviction) string {
364370
return fmt.Sprintf("Eviction %v/%v: %v", eviction.Namespace, eviction.Name, eviction.Spec.Reason)
365371
}
366372

367-
func (r *EvictionReconciler) handleFinalizer(ctx context.Context, eviction *kvmv1.Eviction) error {
373+
func (r *EvictionReconciler) handleFinalizer(ctx context.Context, eviction *kvmv1.Eviction) (bool, error) {
368374
if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) {
369-
return nil
375+
return false, nil
370376
}
371377

378+
// As long as we didn't succeed to re-enable the hypervisor, which includes
379+
// - the hypervisor being gone, because it has been torn down
380+
// - the hypervisor having been enabled by someone else
372381
if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorReEnabled) {
373-
if err := r.enableHypervisorService(ctx, eviction); err != nil {
374-
if errors.Is(err, openstack.ErrNoHypervisor) {
375-
log := logger.FromContext(ctx)
376-
log.Info("Can't enable host, it is gone")
377-
} else {
378-
return err
379-
}
382+
retry, err := r.enableHypervisorService(ctx, eviction)
383+
if err != nil || retry {
384+
return retry, err
380385
}
381386
}
382387

383388
evictionBase := eviction.DeepCopy()
384389
controllerutil.RemoveFinalizer(eviction, evictionFinalizerName)
385-
return r.Patch(ctx, eviction, client.MergeFromWithOptions(evictionBase, client.MergeFromWithOptimisticLock{}))
390+
return false, r.Patch(ctx, eviction, client.MergeFromWithOptions(evictionBase, client.MergeFromWithOptimisticLock{}))
386391
}
387392

388-
func (r *EvictionReconciler) enableHypervisorService(ctx context.Context, eviction *kvmv1.Eviction) error {
393+
func (r *EvictionReconciler) enableHypervisorService(ctx context.Context, eviction *kvmv1.Eviction) (bool, error) {
389394
log := logger.FromContext(ctx)
390395

391396
hypervisor, err := openstack.GetHypervisorByName(ctx, r.computeClient, eviction.Spec.Hypervisor, false)
392397
if err != nil {
398+
changed := false
399+
retry := true
393400
if errors.Is(err, openstack.ErrNoHypervisor) {
394-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
401+
changed = meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
395402
Type: kvmv1.ConditionTypeHypervisorReEnabled,
396403
Status: metav1.ConditionTrue,
397404
Message: "Hypervisor is gone, no need to re-enable",
398405
Reason: kvmv1.ConditionReasonSucceeded,
399406
})
407+
retry = false
400408
} else {
401-
// update the condition to reflect the error, but do not fail the reconciliation
402-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
409+
// update the condition to reflect the error, and retry the reconciliation
410+
changed = meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
403411
Type: kvmv1.ConditionTypeHypervisorReEnabled,
404412
Status: metav1.ConditionFalse,
405413
Message: fmt.Sprintf("failed to get hypervisor due to %s", err),
406414
Reason: kvmv1.ConditionReasonFailed,
407415
})
408416
}
409-
return r.Status().Update(ctx, eviction)
417+
if changed {
418+
return retry, r.Status().Update(ctx, eviction)
419+
} else {
420+
return retry, nil
421+
}
410422
}
411423

412424
if hypervisor.Service.DisabledReason != r.evictionReason(eviction) {
413-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
425+
changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
414426
Type: kvmv1.ConditionTypeHypervisorReEnabled,
415427
Status: metav1.ConditionTrue,
416428
Message: fmt.Sprintf("Hypervisor already re-enabled for reason: %s",
417429
hypervisor.Service.DisabledReason),
418430
Reason: kvmv1.ConditionReasonSucceeded,
419431
})
420-
return r.Status().Update(ctx, eviction)
432+
if changed {
433+
return false, r.Status().Update(ctx, eviction)
434+
} else {
435+
return false, nil
436+
}
421437
}
422438

423439
enableService := services.UpdateOpts{Status: services.ServiceEnabled}
424440
log.Info("Enabling hypervisor", "id", hypervisor.Service.ID)
425441
_, err = services.Update(ctx, r.computeClient, hypervisor.Service.ID, enableService).Extract()
426442

443+
changed := false
427444
if err != nil {
428-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
445+
changed = meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
429446
Type: kvmv1.ConditionTypeHypervisorReEnabled,
430447
Status: metav1.ConditionFalse,
431448
Message: fmt.Sprintf("failed to enable hypervisor due to %s", err),
432449
Reason: kvmv1.ConditionReasonFailed,
433450
})
434451
} else {
435-
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
452+
changed = meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
436453
Type: kvmv1.ConditionTypeHypervisorReEnabled,
437454
Status: metav1.ConditionTrue,
438455
Message: "Hypervisor re-enabled successfully",
439456
Reason: kvmv1.ConditionReasonSucceeded,
440457
})
441458
}
442-
return r.Status().Update(ctx, eviction)
459+
460+
if changed {
461+
return err != nil, r.Status().Update(ctx, eviction)
462+
} else {
463+
return err != nil, nil
464+
}
443465
}
444466

445467
// disableHypervisor disables the hypervisor service and adds a finalizer to the eviction

0 commit comments

Comments
 (0)