@@ -114,7 +114,9 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request)
114114 computeHost := hv .Name
115115 // We bail here out, because the openstack api is not the best to poll
116116 if hv .Status .HypervisorID == "" || hv .Status .ServiceID == "" {
117- if err := r .ensureNovaProperties (ctx , hv ); err != nil {
117+ if err := retry .RetryOnConflict (retry .DefaultRetry , func () error {
118+ return r .ensureNovaProperties (ctx , hv )
119+ }); err != nil {
118120 if errors .Is (err , errRequeue ) {
119121 return ctrl.Result {RequeueAfter : defaultWaitTime }, nil
120122 }
@@ -161,7 +163,7 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request)
161163 return r .smokeTest (ctx , node , hv , computeHost )
162164 }
163165 default :
164- // No idea how we ended up here.
166+ // Nothing to be done
165167 return ctrl.Result {}, nil
166168 }
167169}
@@ -221,19 +223,31 @@ func (r *OnboardingController) smokeTest(ctx context.Context, node *corev1.Node,
221223
222224 switch server .Status {
223225 case "ERROR" :
224- if err := servers .Delete (ctx , r .testComputeClient , server .ID ).ExtractErr (); err != nil {
225- log .Error (err , "failed to delete test instance" , "id" , server .ID )
226+ // servers.List doesn't provide the fault field, so fetch the server again
227+ id := server .ID
228+ server , err = servers .Get (ctx , r .testComputeClient , id ).Extract ()
229+ if err != nil {
230+ // should not happened
231+ log .Error (err , "failed to get test instance, instance vanished" , "id" , id )
232+ return ctrl.Result {RequeueAfter : defaultWaitTime }, nil
226233 }
234+
227235 // Set condition back to testing
228236 meta .SetStatusCondition (& hv .Status .Conditions , metav1.Condition {
229237 Type : ConditionTypeOnboarding ,
230238 Status : metav1 .ConditionTrue ,
231239 Reason : ConditionReasonTesting ,
232- Message : "Server ended up in error state, retrying" ,
240+ Message : "Server ended up in error state: " + server . Fault . Message ,
233241 })
234- if err : = r .Status ().Update (ctx , hv ); err != nil {
242+ if err = r .Status ().Update (ctx , hv ); err != nil {
235243 return ctrl.Result {}, err
236244 }
245+
246+ // now delete the server and requeue
247+ if err = servers .Delete (ctx , r .testComputeClient , id ).ExtractErr (); err != nil {
248+ log .Error (err , "failed to delete test instance" , "id" , id )
249+ }
250+
237251 return ctrl.Result {RequeueAfter : defaultWaitTime }, nil
238252 case "ACTIVE" :
239253 consoleOutput , err := servers .ShowConsoleOutput (ctx , r .testComputeClient , server .ID , servers.ShowConsoleOutputOpts {Length : 11 }).Extract ()
@@ -358,9 +372,7 @@ func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvm
358372
359373 hv .Status .HypervisorID = myHypervisor .ID
360374 hv .Status .ServiceID = myHypervisor .Service .ID
361- return retry .RetryOnConflict (retry .DefaultRetry , func () error {
362- return r .Status ().Update (ctx , hv )
363- })
375+ return r .Status ().Update (ctx , hv )
364376}
365377
366378func (r * OnboardingController ) createOrGetTestServer (ctx context.Context , zone , computeHost string , nodeUid types.UID ) (* servers.Server , error ) {
0 commit comments