Skip to content

Commit ebc9e83

Browse files
committed
requeue logic for pending NAD
The operator currently returns a misleading reconcile error when Pod interfaces are not yet ready with assigned IPs. This change introduces a requeue mechanism to allow time for NetworkAttachmentDefinitions (NAD) to be fully provisioned.
1 parent 395fa31 commit ebc9e83

2 files changed

Lines changed: 42 additions & 16 deletions

File tree

OWNERS_ALIASES

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ aliases:
77
- stuggi
88
test-approvers:
99
- lpiwowar
10+
- adrianfusco
1011
- arxcruz
1112
- kstrenkova
1213
test-reviewers:
1314
- evallesp
14-
- frenzyfriday
15-
- sdatko
16-
- adrianfusco
15+
- rebtoor
16+
- Valkyrie00
17+
- imatza-rh
18+
- posikoya

internal/controller/common.go

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -598,17 +598,21 @@ func (r *Reconciler) ReleaseLock(ctx context.Context, instance client.Object) (b
598598
return false, ErrFailedToDeleteLock
599599
}
600600

601-
// PodExists checks if a pod exists for the given instance and workflow step
602-
func (r *Reconciler) PodExists(ctx context.Context, instance client.Object, workflowStepIndex int) bool {
603-
pod := &corev1.Pod{}
601+
// GetPodIfExists returns the pod for the given instance and workflow step if it exists
602+
func (r *Reconciler) GetPodIfExists(
603+
ctx context.Context,
604+
instance client.Object,
605+
workflowStepIndex int,
606+
) (*corev1.Pod, error) {
604607
podName := r.GetPodName(instance, workflowStepIndex)
605-
objectKey := client.ObjectKey{Namespace: instance.GetNamespace(), Name: podName}
606-
err := r.Client.Get(ctx, objectKey, pod)
607-
if err != nil && k8s_errors.IsNotFound(err) {
608-
return false
608+
pod, err := r.GetPod(ctx, podName, instance.GetNamespace())
609+
if err != nil {
610+
if k8s_errors.IsNotFound(err) {
611+
return nil, nil
612+
}
613+
return nil, err
609614
}
610-
611-
return true
615+
return pod, nil
612616
}
613617

614618
// GetCommonRbacRules returns the common RBAC rules for test operations, with optional privileged permissions
@@ -690,8 +694,9 @@ func (r *Reconciler) VerifyNetworkAttachments(
690694
conditions *condition.Conditions,
691695
networkAttachmentStatus *map[string][]string,
692696
) (ctrl.Result, error) {
693-
if !r.PodExists(ctx, instance, workflowStepIndex) {
694-
return ctrl.Result{}, nil
697+
pod, err := r.GetPodIfExists(ctx, instance, workflowStepIndex)
698+
if pod == nil {
699+
return ctrl.Result{}, err
695700
}
696701

697702
networkReady, status, err := nad.VerifyNetworkStatusFromAnnotation(
@@ -713,14 +718,33 @@ func (r *Reconciler) VerifyNetworkAttachments(
713718
condition.NetworkAttachmentsReadyMessage)
714719
} else {
715720
err := fmt.Errorf("%w: %s", ErrNetworkAttachmentsMismatch, networkAttachments)
721+
722+
// maxWaitTime to wait for NAD that should escalate to a hard error
723+
const maxWaitTime = 5 * time.Minute
724+
elaspedTime := time.Since(pod.GetCreationTimestamp().Time)
725+
if elaspedTime > maxWaitTime {
726+
conditions.Set(condition.FalseCondition(
727+
condition.NetworkAttachmentsReadyCondition,
728+
condition.ErrorReason,
729+
condition.SeverityError,
730+
condition.NetworkAttachmentsReadyErrorMessage,
731+
fmt.Errorf("timed out waiting for network attachments: %w", err).Error()))
732+
733+
return ctrl.Result{}, err
734+
}
735+
716736
conditions.Set(condition.FalseCondition(
717737
condition.NetworkAttachmentsReadyCondition,
718-
condition.ErrorReason,
738+
"NetworkAttachmentsWaiting",
719739
condition.SeverityWarning,
720740
condition.NetworkAttachmentsReadyErrorMessage,
721741
err.Error()))
722742

723-
return ctrl.Result{}, err
743+
Log.Info("Waiting for network attachments to become ready",
744+
"elaspedTime", elaspedTime,
745+
"maxWaitTime", maxWaitTime)
746+
747+
return ctrl.Result{RequeueAfter: time.Second * 10}, nil
724748
}
725749

726750
return ctrl.Result{}, nil

0 commit comments

Comments
 (0)