@@ -209,8 +209,7 @@ func (d *daemon) getIbSriovNetwork(networkID string) (string, *utils.IbSriovCniS
209209 // Check if this network's resource is managed by this daemon
210210 resourceName := netAttInfo .Annotations ["k8s.v1.cni.cncf.io/resourceName" ]
211211 if resourceName == "" || ! d .config .IsManagedResource (resourceName ) {
212- // TODO(Nik) dev qol, check if someone else manages this resource or if it is orphan
213- // checkResourceOwner(networkNamespace, networkName)
212+ // TODO(Nik) qol, check if someone else manages this resource or if it is orphan
214213 return "" , nil , fmt .Errorf ("network %s uses resource %s which is not managed by this daemon" , networkName , resourceName )
215214 }
216215
@@ -254,8 +253,8 @@ func getPodNetworkInfo(netName string, pod *kapi.Pod, netMap networksMap) (*podN
254253
255254// addPodFinalizer adds the GUID cleanup finalizer to a pod
256255func (d * daemon ) addPodFinalizer (pod * kapi.Pod , networkName string ) error {
256+ podFinalizer := fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
257257 return wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
258- podFinalizer := fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
259258 if err := d .kubeClient .AddFinalizerToPod (pod , podFinalizer ); err != nil {
260259 log .Warn ().Msgf ("failed to add finalizer to pod %s/%s: %v" ,
261260 pod .Namespace , pod .Name , err )
@@ -267,8 +266,8 @@ func (d *daemon) addPodFinalizer(pod *kapi.Pod, networkName string) error {
267266
268267// removePodFinalizer removes the GUID cleanup finalizer from a pod
269268func (d * daemon ) removePodFinalizer (pod * kapi.Pod , networkName string ) error {
269+ podFinalizer := fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
270270 return wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
271- podFinalizer := fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
272271 if err := d .kubeClient .RemoveFinalizerFromPod (pod , podFinalizer ); err != nil {
273272 log .Warn ().Msgf ("failed to remove finalizer from pod %s/%s: %v" ,
274273 pod .Namespace , pod .Name , err )
@@ -480,44 +479,102 @@ func syncGUIDPool(smClient plugins.SubnetManagerClient, guidPool guid.Pool) erro
480479
481480// Update and set Pod's network annotation.
482481// If failed to update annotation, pod's GUID added into the list to be removed from Pkey.
483- func (d * daemon ) updatePodNetworkAnnotation (pi * podNetworkInfo , removedList * []net.HardwareAddr ) error {
482+ func (d * daemon ) updatePodNetworkAnnotation (pi * podNetworkInfo , removedList * []net.HardwareAddr ) {
484483 if pi .ibNetwork .CNIArgs == nil {
485484 pi .ibNetwork .CNIArgs = & map [string ]interface {}{}
486485 }
487486
488487 (* pi .ibNetwork .CNIArgs )[utils .InfiniBandAnnotation ] = utils .ConfiguredInfiniBandPod
489- netAnnotations , err := json .Marshal (pi .networks )
490- if err != nil {
491- return fmt .Errorf ("failed to dump networks %+v of pod into json with error: %v" , pi .networks , err )
492- }
493-
494- pi .pod .Annotations [v1 .NetworkAttachmentAnnot ] = string (netAnnotations )
495488
496489 // Try to set pod's annotations in backoff loop
497- if err = wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
498- log .Info ().Msgf ("updatePodNetworkAnnotation(): Updating pod annotation for pod: %s with anootation: %s" , pi .pod .Name , pi .pod .Annotations )
490+ if err := wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
491+
492+ // Get latest annotations state to avoid conflicts
493+ latestPodAnnotations , networks , err := d .getLatestPodAnnotations (pi .pod )
494+ if err != nil {
495+ log .Warn ().Msgf ("failed to get latest pod annotations for %s/%s: %v" , pi .pod .Namespace , pi .pod .Name , err )
496+ return false , nil
497+ }
498+
499+ targetNetwork , err := utils .GetPodNetwork (networks , pi .ibNetwork .Name )
500+ if err != nil {
501+ return false , fmt .Errorf ("failed to locate network %s in pod %s/%s annotations: %v" , pi .ibNetwork .Name , pi .pod .Namespace , pi .pod .Name , err )
502+ }
503+
504+ err = updateInfiniBandNetwork (targetNetwork , pi .ibNetwork )
505+ if err != nil {
506+ return false , fmt .Errorf ("failed to update infiniband network for pod %s/%s: %v" , pi .pod .Namespace , pi .pod .Name , err )
507+ }
508+
509+ netAnnotations , err := json .Marshal (networks )
510+ if err != nil {
511+ return false , fmt .Errorf ("failed to marshal updated networks for pod %s/%s: %v" , pi .pod .Namespace , pi .pod .Name , err )
512+ }
513+
514+ if latestPodAnnotations == nil {
515+ return false , fmt .Errorf ("latestPodAnnotations is nil for pod %s/%s" , pi .pod .Namespace , pi .pod .Name )
516+ }
517+
518+ latestPodAnnotations [v1 .NetworkAttachmentAnnot ] = string (netAnnotations )
519+ pi .pod .Annotations = latestPodAnnotations
520+
521+ log .Info ().Msgf ("updatePodNetworkAnnotation(): Updating pod annotation for pod: %s/%s" , pi .pod .Namespace , pi .pod .Name )
499522 if err = d .kubeClient .SetAnnotationsOnPod (pi .pod , pi .pod .Annotations ); err != nil {
500523 if kerrors .IsNotFound (err ) {
501524 return false , err
502525 }
503- log .Warn ().Msgf ("failed to update pod annotations with err: %v" , err )
526+ if kerrors .IsConflict (err ) {
527+ log .Warn ().Msgf ("conflict while updating pod annotations for %s/%s, will retry" , pi .pod .Namespace , pi .pod .Name )
528+ return false , nil
529+ }
530+ log .Warn ().Msgf ("failed to update pod annotations for %s/%s with err: %v" , pi .pod .Namespace , pi .pod .Name , err )
504531 return false , nil
505532 }
506- log .Info ().Msgf ("updatePodNetworkAnnotation(): Success on updating pod annotation for pod: %s with anootation: %s" , pi .pod .Name , pi .pod .Annotations )
533+
534+ log .Info ().Msgf ("updatePodNetworkAnnotation(): Success on updating pod annotation for pod: %s/%s with annotations: %s" , pi .pod .Namespace , pi .pod .Name , pi .pod .Annotations )
507535 return true , nil
508536 }); err != nil {
509- log .Error ().Msgf ("failed to update pod annotations" )
537+ log .Error ().Msgf ("failed to update pod annotations for %s/%s with error: %v" , pi . pod . Namespace , pi . pod . Name , err )
510538
511539 if err = d .guidPool .ReleaseGUID (pi .addr .String ()); err != nil {
512- log .Warn ().Msgf ("failed to release guid \" %s\" from removed pod \" %s\" in namespace " +
513- "\" %s\" with error: %v" , pi .addr .String (), pi .pod .Name , pi .pod .Namespace , err )
540+ log .Warn ().Msgf ("failed to release guid \" %s\" from removed pod \" %s\" in namespace \" %s\" with error: %v" , pi .addr .String (), pi .pod .Name , pi .pod .Namespace , err )
514541 } else {
515542 delete (d .guidPodNetworkMap , pi .addr .String ())
516543 }
517544
518545 * removedList = append (* removedList , pi .addr )
519546 }
547+ }
548+
549+ // Retrieves the latest annotations for a pod and returns the annotations and the pod networks.
550+ func (d * daemon ) getLatestPodAnnotations (pod * kapi.Pod ) (map [string ]string , []* v1.NetworkSelectionElement , error ) {
551+ latestPod , err := d .kubeClient .GetPod (pod .Namespace , pod .Name )
552+ if err != nil {
553+ return nil , nil , err
554+ }
555+
556+ networks , err := netAttUtils .ParsePodNetworkAnnotation (latestPod )
557+ if err != nil {
558+ return nil , nil , err
559+ }
520560
561+ return latestPod .Annotations , networks , nil
562+ }
563+
564+ // Replaces target network with source network, erroring if source is already configured.
565+ func updateInfiniBandNetwork (target * v1.NetworkSelectionElement , source * v1.NetworkSelectionElement ) error {
566+ if target == nil || source == nil {
567+ return fmt .Errorf ("target or source network is nil" )
568+ }
569+
570+ if target .CNIArgs != nil {
571+ if (* target .CNIArgs )[utils .InfiniBandAnnotation ] == utils .ConfiguredInfiniBandPod {
572+ return fmt .Errorf ("target network is already configured" )
573+ }
574+ }
575+
576+ target .InfinibandGUIDRequest = source .InfinibandGUIDRequest
577+ target .CNIArgs = source .CNIArgs
521578 return nil
522579}
523580
@@ -609,10 +666,7 @@ func (d *daemon) AddPeriodicUpdate() {
609666 var removedGUIDList []net.HardwareAddr
610667 for _ , pi := range passedPods {
611668 log .Info ().Msgf ("Updating annotations for the pod %s, network %s" , pi .pod .Name , pi .ibNetwork .Name )
612- err = d .updatePodNetworkAnnotation (pi , & removedGUIDList )
613- if err != nil {
614- log .Error ().Msgf ("%v" , err )
615- }
669+ d .updatePodNetworkAnnotation (pi , & removedGUIDList )
616670 }
617671
618672 if ibCniSpec .PKey != "" && len (removedGUIDList ) != 0 {
0 commit comments