Skip to content

Commit d7d1714

Browse files
committed
Eviction -> Evicting, introduce addition conditions, remove Pending
This Commit renames the primary status condition `Eviction` to `Evicting` to make it clear it's meaning an eviction is running or not. Additional Condition introduced: * ConditionTypePreflight: True if preflight is completed, like OS hypervisor check * ConditionTypeHypervisorDisabled: true if hypervisor has been successfully disabled * ConditionTypeHypervisorReEnabled: true if hypervisor has been successfully re-enabled (if needed) * Evicting: the primary status that show if the eviction is running or not Also, the "pending" status has been removed and unit tests has been adapted. Minor cleanup of functions to make it more expressive about what's happening (e.g. condition updates).
1 parent a2e31f2 commit d7d1714

5 files changed

Lines changed: 187 additions & 96 deletions

File tree

api/v1/eviction_types.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,17 @@ type EvictionSpec struct {
3838
}
3939

4040
const (
41+
// ConditionTypePreflight is a condition for preflight checks, e.g. OS Hypervisor validation
42+
ConditionTypePreflight = "Preflight"
43+
44+
// ConditionTypeHypervisorReEnabled is the type of condition for hypervisor re-enabled status
45+
ConditionTypeHypervisorReEnabled = "HypervisorReEnabled"
46+
47+
// ConditionTypeHypervisorDisabled is the type of condition for hypervisor disabled status
48+
ConditionTypeHypervisorDisabled = "HypervisorDisabled"
49+
4150
// ConditionTypeEviction is the type of condition for eviction status
42-
ConditionTypeEviction = "Eviction"
51+
ConditionTypeEviction = "Evicting"
4352

4453
// ConditionReasonRunning means the eviction is currently running
4554
ConditionReasonRunning string = "Running"
@@ -70,7 +79,7 @@ type EvictionStatus struct {
7079
// +kubebuilder:resource:scope=Cluster,shortName=evi
7180
// +kubebuilder:printcolumn:JSONPath=".spec.hypervisor",name="Hypervisor",type="string"
7281
// +kubebuilder:printcolumn:JSONPath=".spec.reason",name="Reason",type="string"
73-
// +kubebuilder:printcolumn:JSONPath=".status.conditions[?(@.type==\"Eviction\")].reason",name="State",type="string"
82+
// +kubebuilder:printcolumn:JSONPath=".status.conditions[?(@.type==\"Evicting\")].reason",name="State",type="string"
7483
// +kubebuilder:printcolumn:JSONPath=".metadata.creationTimestamp",name="Created",type="date"
7584

7685
// Eviction is the Schema for the evictions API

charts/openstack-hypervisor-operator/crds/eviction-crd.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ spec:
2222
- jsonPath: .spec.reason
2323
name: Reason
2424
type: string
25-
- jsonPath: .status.conditions[?(@.type=="Eviction")].reason
25+
- jsonPath: .status.conditions[?(@.type=="Evicting")].reason
2626
name: State
2727
type: string
2828
- jsonPath: .metadata.creationTimestamp

config/crd/bases/kvm.cloud.sap_evictions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ spec:
2323
- jsonPath: .spec.reason
2424
name: Reason
2525
type: string
26-
- jsonPath: .status.conditions[?(@.type=="Eviction")].reason
26+
- jsonPath: .status.conditions[?(@.type=="Evicting")].reason
2727
name: State
2828
type: string
2929
- jsonPath: .metadata.creationTimestamp

internal/controller/eviction_controller.go

Lines changed: 108 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,11 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
8484

8585
statusCondition := meta.FindStatusCondition(eviction.Status.Conditions, kvmv1.ConditionTypeEviction)
8686
if statusCondition == nil {
87-
// No condition, so we are in the initial state
88-
return r.handlePending(ctx, eviction)
87+
// No status condition, so we need to add it
88+
if r.addCondition(ctx, eviction, metav1.ConditionTrue, "Running", kvmv1.ConditionReasonRunning) {
89+
log.Info("running")
90+
return ctrl.Result{}, nil
91+
}
8992
} else if statusCondition.Status == metav1.ConditionTrue {
9093
// We are running, so we need to evict the next instance
9194
return r.handleRunning(ctx, eviction)
@@ -104,6 +107,11 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
104107
}
105108

106109
func (r *EvictionReconciler) handleRunning(ctx context.Context, eviction *kvmv1.Eviction) (reconcile.Result, error) {
110+
if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypePreflight) {
111+
// Ensure the hypervisor is disabled and we have the preflight condition
112+
return r.handlePreflight(ctx, eviction)
113+
}
114+
107115
// That should leave us with "Running" and the hypervisor should be deactivated
108116
if len(eviction.Status.OutstandingInstances) > 0 {
109117
return r.evictNext(ctx, eviction)
@@ -137,7 +145,7 @@ func (r *EvictionReconciler) getOwnerNode(ctx context.Context, eviction *kvmv1.E
137145
return nil, nil
138146
}
139147

140-
func (r *EvictionReconciler) handlePending(ctx context.Context, eviction *kvmv1.Eviction) (reconcile.Result, error) {
148+
func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv1.Eviction) (reconcile.Result, error) {
141149
hypervisorName := eviction.Spec.Hypervisor
142150

143151
// Does the hypervisor even exist? Is it enabled/disabled?
@@ -155,8 +163,13 @@ func (r *EvictionReconciler) handlePending(ctx context.Context, eviction *kvmv1.
155163
if expectHypervisor {
156164
// Abort eviction
157165
err = fmt.Errorf("failed to get hypervisor %w", err)
158-
r.addErrorCondition(ctx, eviction, err)
159-
return ctrl.Result{}, err
166+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
167+
Type: kvmv1.ConditionTypeEviction,
168+
Status: metav1.ConditionFalse,
169+
Message: err.Error(),
170+
Reason: kvmv1.ConditionReasonFailed,
171+
})
172+
return ctrl.Result{}, r.Status().Update(ctx, eviction)
160173
} else {
161174
// That is (likely) an eviction for a node that never registered
162175
// so we are good to go
@@ -184,20 +197,12 @@ func (r *EvictionReconciler) handlePending(ctx context.Context, eviction *kvmv1.
184197
Message: err.Error(),
185198
Reason: kvmv1.ConditionReasonFailed,
186199
})
187-
log.Info("Update", "status", eviction.Status)
188-
if err = r.Status().Update(ctx, eviction); err != nil {
189-
return ctrl.Result{}, err
190-
}
191-
192-
return ctrl.Result{}, nil
200+
return ctrl.Result{}, r.Status().Update(ctx, eviction)
193201
}
194202

195-
crdModified, err := r.disableHypervisor(ctx, hypervisor, eviction)
196-
if err != nil {
197-
return ctrl.Result{}, fmt.Errorf("could not disable the hypervisor %v due to %w", hypervisorName, err)
198-
}
199-
if crdModified {
200-
return ctrl.Result{}, nil
203+
if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) {
204+
// Hypervisor is not disabled/ensured, so we need to disable it
205+
return ctrl.Result{}, r.disableHypervisor(ctx, hypervisor, eviction)
201206
}
202207

203208
// Fetch all virtual machines on the hypervisor
@@ -218,12 +223,11 @@ func (r *EvictionReconciler) handlePending(ctx context.Context, eviction *kvmv1.
218223
eviction.Status.HypervisorServiceId = hypervisor.ID
219224
eviction.Status.OutstandingRamMb = hypervisor.MemoryMbUsed
220225
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
221-
Type: kvmv1.ConditionTypeEviction,
226+
Type: kvmv1.ConditionTypePreflight,
222227
Status: metav1.ConditionTrue,
223-
Message: "eviction started",
224-
Reason: kvmv1.ConditionReasonRunning,
228+
Message: "Preflight checks passed, hypervisor is disabled and ready for eviction",
229+
Reason: kvmv1.ConditionReasonSuceeded,
225230
})
226-
227231
return ctrl.Result{}, r.Status().Update(ctx, eviction)
228232
}
229233

@@ -336,74 +340,124 @@ func (r *EvictionReconciler) handleFinalizer(ctx context.Context, eviction *kvmv
336340
return nil
337341
}
338342

339-
err := r.enableHypervisorService(ctx, eviction)
340-
if err != nil {
341-
if errors.Is(err, openstack.ErrNoHypervisor) {
342-
log := logger.FromContext(ctx)
343-
log.Info("Can't enable host, it is gone")
344-
} else {
345-
return err
343+
if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorReEnabled) {
344+
if err := r.enableHypervisorService(ctx, eviction); err != nil {
345+
if errors.Is(err, openstack.ErrNoHypervisor) {
346+
log := logger.FromContext(ctx)
347+
log.Info("Can't enable host, it is gone")
348+
} else {
349+
return err
350+
}
346351
}
347352
}
348353

349354
evictionBase := eviction.DeepCopy()
350355
controllerutil.RemoveFinalizer(eviction, evictionFinalizerName)
351-
err = r.Patch(ctx, eviction, client.MergeFromWithOptions(evictionBase, client.MergeFromWithOptimisticLock{}))
352-
return err
356+
return r.Patch(ctx, eviction, client.MergeFromWithOptions(evictionBase, client.MergeFromWithOptimisticLock{}))
353357
}
354358

355359
func (r *EvictionReconciler) enableHypervisorService(ctx context.Context, eviction *kvmv1.Eviction) error {
360+
log := logger.FromContext(ctx)
361+
356362
hypervisor, err := openstack.GetHypervisorByName(ctx, r.computeClient, eviction.Spec.Hypervisor, false)
357363
if err != nil {
358-
err2 := fmt.Errorf("failed to get hypervisor due to %w", err)
359-
// Abort eviction
360-
r.addErrorCondition(ctx, eviction, err2)
361-
return err
364+
if errors.Is(err, openstack.ErrNoHypervisor) {
365+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
366+
Type: kvmv1.ConditionTypeHypervisorReEnabled,
367+
Status: metav1.ConditionTrue,
368+
Message: "Hypervisor is gone, no need to re-enable",
369+
Reason: kvmv1.ConditionReasonSuceeded,
370+
})
371+
} else {
372+
// update the condition to reflect the error, but do not fail the reconciliation
373+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
374+
Type: kvmv1.ConditionTypeHypervisorReEnabled,
375+
Status: metav1.ConditionFalse,
376+
Message: fmt.Sprintf("failed to get hypervisor due to %s", err),
377+
Reason: kvmv1.ConditionReasonFailed,
378+
})
379+
}
380+
return r.Status().Update(ctx, eviction)
362381
}
363382

364383
if hypervisor.Service.DisabledReason != r.evictionReason(eviction) {
365-
return nil
384+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
385+
Type: kvmv1.ConditionTypeHypervisorReEnabled,
386+
Status: metav1.ConditionTrue,
387+
Message: fmt.Sprintf("Hypervisor already re-enabled for reason: %s",
388+
hypervisor.Service.DisabledReason),
389+
Reason: kvmv1.ConditionReasonSuceeded,
390+
})
391+
return r.Status().Update(ctx, eviction)
366392
}
367393

368-
log := logger.FromContext(ctx)
369394
enableService := services.UpdateOpts{Status: services.ServiceEnabled}
370395
log.Info("Enabling hypervisor", "id", hypervisor.Service.ID)
371396
_, err = services.Update(ctx, r.computeClient, hypervisor.Service.ID, enableService).Extract()
372-
return err
397+
398+
if err != nil {
399+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
400+
Type: kvmv1.ConditionTypeHypervisorReEnabled,
401+
Status: metav1.ConditionFalse,
402+
Message: fmt.Sprintf("failed to enable hypervisor due to %s", err),
403+
Reason: kvmv1.ConditionReasonFailed,
404+
})
405+
} else {
406+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
407+
Type: kvmv1.ConditionTypeHypervisorReEnabled,
408+
Status: metav1.ConditionTrue,
409+
Message: "Hypervisor re-enabled successfully",
410+
Reason: kvmv1.ConditionReasonSuceeded,
411+
})
412+
}
413+
return r.Status().Update(ctx, eviction)
373414
}
374415

375-
func (r *EvictionReconciler) disableHypervisor(ctx context.Context, hypervisor *openstack.Hypervisor, eviction *kvmv1.Eviction) (bool, error) {
416+
// disableHypervisor disables the hypervisor service and adds a finalizer to the eviction
417+
// will add Condition HypervisorDisabled to the eviction status with the outcome
418+
func (r *EvictionReconciler) disableHypervisor(ctx context.Context, hypervisor *openstack.Hypervisor, eviction *kvmv1.Eviction) error {
376419
evictionReason := r.evictionReason(eviction)
377420
disabledReason := hypervisor.Service.DisabledReason
378421

379422
if disabledReason != nil && disabledReason != "" && disabledReason != evictionReason {
380423
// Disabled for another reason already
381-
return r.addProgressCondition(ctx, eviction, "Found host already disabled", kvmv1.ConditionReasonRunning), nil
424+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
425+
Type: kvmv1.ConditionTypeHypervisorDisabled,
426+
Status: metav1.ConditionTrue,
427+
Message: fmt.Sprintf("Hypervisor already disabled for reason %q", disabledReason),
428+
Reason: kvmv1.ConditionReasonSuceeded,
429+
})
430+
return r.Status().Update(ctx, eviction)
382431
}
383432

384433
if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) {
385434
evictionBase := eviction.DeepCopy()
386435
controllerutil.AddFinalizer(eviction, evictionFinalizerName)
387-
return true, r.Patch(ctx, eviction, client.MergeFromWithOptions(evictionBase, client.MergeFromWithOptimisticLock{}))
388-
}
389-
390-
if hypervisor.Service.DisabledReason == evictionReason {
391-
return false, nil
436+
return r.Patch(ctx, eviction, client.MergeFromWithOptions(evictionBase, client.MergeFromWithOptimisticLock{}))
392437
}
393438

394439
disableService := services.UpdateOpts{Status: services.ServiceDisabled,
395440
DisabledReason: r.evictionReason(eviction)}
396441

397442
_, err := services.Update(ctx, r.computeClient, hypervisor.Service.ID, disableService).Extract()
398443
if err != nil {
399-
return r.addErrorCondition(ctx, eviction, err), err
400-
}
401-
402-
if r.addProgressCondition(ctx, eviction, "Host disabled", kvmv1.ConditionReasonRunning) {
403-
return true, nil
444+
// We expect OpenStack calls to be transient errors, so we retry for the next reconciliation
445+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
446+
Type: kvmv1.ConditionTypeHypervisorDisabled,
447+
Status: metav1.ConditionFalse,
448+
Message: fmt.Sprintf("Failed to disable hypervisor: %v", err),
449+
Reason: kvmv1.ConditionReasonFailed,
450+
})
451+
return r.Status().Update(ctx, eviction)
404452
}
405453

406-
return false, nil
454+
meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
455+
Type: kvmv1.ConditionTypeHypervisorDisabled,
456+
Status: metav1.ConditionTrue,
457+
Message: "Hypervisor disabled successfully",
458+
Reason: kvmv1.ConditionReasonSuceeded,
459+
})
460+
return r.Status().Update(ctx, eviction)
407461
}
408462

409463
func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid string, eviction *kvmv1.Eviction) error {
@@ -416,7 +470,7 @@ func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid string, evict
416470
res := servers.LiveMigrate(ctx, r.computeClient, uuid, liveMigrateOpts)
417471
if res.Err != nil {
418472
err := fmt.Errorf("failed to evict VM %s due to %w", uuid, res.Err)
419-
r.addErrorCondition(ctx, eviction, err)
473+
r.addCondition(ctx, eviction, metav1.ConditionFalse, err.Error(), kvmv1.ConditionReasonFailed)
420474
return res.Err
421475
}
422476

@@ -430,7 +484,7 @@ func (r *EvictionReconciler) coldMigrate(ctx context.Context, uuid string, evict
430484
res := servers.Migrate(ctx, r.computeClient, uuid)
431485
if res.Err != nil {
432486
err := fmt.Errorf("failed to evict stopped server %s due to %w", uuid, res.Err)
433-
r.addErrorCondition(ctx, eviction, err)
487+
r.addCondition(ctx, eviction, metav1.ConditionFalse, err.Error(), kvmv1.ConditionReasonFailed)
434488
return err
435489
}
436490

@@ -439,7 +493,8 @@ func (r *EvictionReconciler) coldMigrate(ctx context.Context, uuid string, evict
439493
}
440494

441495
// addCondition adds a condition to the Eviction status and updates the status
442-
func (r *EvictionReconciler) addCondition(ctx context.Context, eviction *kvmv1.Eviction, status metav1.ConditionStatus, message string, reason string) bool {
496+
func (r *EvictionReconciler) addCondition(ctx context.Context, eviction *kvmv1.Eviction,
497+
status metav1.ConditionStatus, message string, reason string) bool {
443498
if !meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{
444499
Type: kvmv1.ConditionTypeEviction,
445500
Status: status,
@@ -458,14 +513,6 @@ func (r *EvictionReconciler) addCondition(ctx context.Context, eviction *kvmv1.E
458513
return true
459514
}
460515

461-
func (r *EvictionReconciler) addProgressCondition(ctx context.Context, eviction *kvmv1.Eviction, message string, reason string) bool {
462-
return r.addCondition(ctx, eviction, metav1.ConditionTrue, message, reason)
463-
}
464-
465-
func (r *EvictionReconciler) addErrorCondition(ctx context.Context, eviction *kvmv1.Eviction, err error) bool {
466-
return r.addCondition(ctx, eviction, metav1.ConditionFalse, err.Error(), kvmv1.ConditionReasonFailed)
467-
}
468-
469516
// SetupWithManager sets up the controller with the Manager.
470517
func (r *EvictionReconciler) SetupWithManager(mgr ctrl.Manager) error {
471518
ctx := context.Background()

0 commit comments

Comments
 (0)