Skip to content

Commit 7ea28e6

Browse files
committed
[onboarding-controller] use RetryOnConflict, add error for test fail
1 parent c3c0000 commit 7ea28e6

1 file changed

Lines changed: 21 additions & 9 deletions

File tree

internal/controller/onboarding_controller.go

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request)
114114
computeHost := hv.Name
115115
// We bail here out, because the openstack api is not the best to poll
116116
if hv.Status.HypervisorID == "" || hv.Status.ServiceID == "" {
117-
if err := r.ensureNovaProperties(ctx, hv); err != nil {
117+
if err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
118+
return r.ensureNovaProperties(ctx, hv)
119+
}); err != nil {
118120
if errors.Is(err, errRequeue) {
119121
return ctrl.Result{RequeueAfter: defaultWaitTime}, nil
120122
}
@@ -161,7 +163,7 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request)
161163
return r.smokeTest(ctx, node, hv, computeHost)
162164
}
163165
default:
164-
// No idea how we ended up here.
166+
// Nothing to be done
165167
return ctrl.Result{}, nil
166168
}
167169
}
@@ -221,19 +223,31 @@ func (r *OnboardingController) smokeTest(ctx context.Context, node *corev1.Node,
221223

222224
switch server.Status {
223225
case "ERROR":
224-
if err := servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil {
225-
log.Error(err, "failed to delete test instance", "id", server.ID)
226+
// servers.List doesn't provide the fault field, so fetch the server again
227+
id := server.ID
228+
server, err = servers.Get(ctx, r.testComputeClient, id).Extract()
229+
if err != nil {
230+
// should not happened
231+
log.Error(err, "failed to get test instance, instance vanished", "id", id)
232+
return ctrl.Result{RequeueAfter: defaultWaitTime}, nil
226233
}
234+
227235
// Set condition back to testing
228236
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
229237
Type: ConditionTypeOnboarding,
230238
Status: metav1.ConditionTrue,
231239
Reason: ConditionReasonTesting,
232-
Message: "Server ended up in error state, retrying",
240+
Message: "Server ended up in error state: " + server.Fault.Message,
233241
})
234-
if err := r.Status().Update(ctx, hv); err != nil {
242+
if err = r.Status().Update(ctx, hv); err != nil {
235243
return ctrl.Result{}, err
236244
}
245+
246+
// now delete the server and requeue
247+
if err = servers.Delete(ctx, r.testComputeClient, id).ExtractErr(); err != nil {
248+
log.Error(err, "failed to delete test instance", "id", id)
249+
}
250+
237251
return ctrl.Result{RequeueAfter: defaultWaitTime}, nil
238252
case "ACTIVE":
239253
consoleOutput, err := servers.ShowConsoleOutput(ctx, r.testComputeClient, server.ID, servers.ShowConsoleOutputOpts{Length: 11}).Extract()
@@ -358,9 +372,7 @@ func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvm
358372

359373
hv.Status.HypervisorID = myHypervisor.ID
360374
hv.Status.ServiceID = myHypervisor.Service.ID
361-
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
362-
return r.Status().Update(ctx, hv)
363-
})
375+
return r.Status().Update(ctx, hv)
364376
}
365377

366378
func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, computeHost string, nodeUid types.UID) (*servers.Server, error) {

0 commit comments

Comments
 (0)