@@ -24,7 +24,6 @@ import (
2424 "k8s.io/apimachinery/pkg/types"
2525 "k8s.io/apimachinery/pkg/util/wait"
2626 "k8s.io/client-go/rest"
27- "k8s.io/utils/ptr"
2827 cr "sigs.k8s.io/controller-runtime"
2928 crclient "sigs.k8s.io/controller-runtime/pkg/client"
3029)
@@ -449,239 +448,6 @@ func WaitForPodVolumeBackup(ctx context.Context, c crclient.Client, log logrus.F
449448 return true , nil
450449}
451450
452- // WaitForPausedPropagated waits for the HostedControlPlane (HCP) associated with the given HostedCluster (HC)
453- // to be paused. It polls the status of the HCP at regular intervals until the HCP is found to be paused or the
454- // specified timeout is reached.
455- func WaitForPausedPropagated (ctx context.Context , c crclient.Client , log logrus.FieldLogger , hc * hyperv1.HostedCluster , timeout time.Duration , paused string ) error {
456- if timeout == 0 {
457- timeout = defaultWaitForPausedTimeout
458- }
459- hcpNamespace := GetHCPNamespace (hc .Name , hc .Namespace )
460-
461- log = log .WithFields (logrus.Fields {
462- "namespace" : hc .Namespace ,
463- "name" : hc .Name ,
464- })
465-
466- waitCtx , cancel := context .WithTimeout (ctx , timeout )
467- defer cancel ()
468-
469- err := wait .PollUntilContextCancel (waitCtx , 5 * time .Second , true , func (ctx context.Context ) (bool , error ) {
470- hcp := & hyperv1.HostedControlPlane {}
471- if err := c .Get (ctx , types.NamespacedName {Name : hc .Name , Namespace : hcpNamespace }, hcp ); err != nil {
472- if apierrors .IsNotFound (err ) {
473- return true , nil
474- }
475- log .Info (err , "failed to get HostedControlPlane" )
476- return false , err
477- }
478- log .Infof ("waiting for HCP to set PausedUntil to %s" , paused )
479-
480- // For unpause (paused == ""), we expect PausedUntil to be nil
481- // For pause (paused != ""), we expect PausedUntil to equal paused
482- if paused == "" {
483- if hcp .Spec .PausedUntil == nil {
484- log .Debug ("HostedControlPlane is unpaused (PausedUntil is nil)" )
485- return true , nil
486- }
487- } else {
488- if hcp .Spec .PausedUntil != nil && * hcp .Spec .PausedUntil == paused {
489- log .Debug ("HostedControlPlane is paused" )
490- return true , nil
491- }
492- }
493-
494- return false , nil
495- })
496-
497- if err != nil {
498- log .Errorf ("giving up, HCP was not updated in the expected timeout: %v" , err )
499- return err
500- }
501-
502- return err
503- }
504-
505- // updateHypershiftResource updates Hypershift resources (HostedCluster or NodePool) with pause functionality and audit annotations.
506- // This unified function handles both resource types and includes atomic annotation updates.
507- func updateHypershiftResource (
508- ctx context.Context ,
509- c crclient.Client ,
510- log logrus.FieldLogger ,
511- paused string ,
512- namespaces []string ,
513- resourceType string ,
514- listObjFactory func () crclient.ObjectList ,
515- itemsExtractor func (crclient.ObjectList ) []crclient.Object ,
516- pausedUntilGetter func (crclient.Object ) * string ,
517- pausedUntilSetter func (crclient.Object , * string ),
518- postUpdateHook func (context.Context , crclient.Client , logrus.FieldLogger , crclient.Object , time.Duration , string ) error ,
519- ) error {
520- // Get the appropriate list object
521- listObj := listObjFactory ()
522-
523- // Search through namespaces for resources
524- var items []crclient.Object
525- for _ , ns := range namespaces {
526- if err := c .List (ctx , listObj , crclient .InNamespace (ns )); err != nil {
527- return fmt .Errorf ("failed to list %s in namespace %s: %w" , resourceType , ns , err )
528- }
529-
530- items = itemsExtractor (listObj )
531- if len (items ) > 0 {
532- log .Debugf ("found %s in namespace %s" , resourceType , ns )
533- break
534- }
535- }
536- for _ , item := range items {
537- // Create a retry loop with improved backoff for better conflict resolution
538- backoff := wait.Backoff {
539- Steps : 10 , // Increased from 5 to 10
540- Duration : 500 * time .Millisecond , // Reduced initial duration for faster retries
541- Factor : 2.0 ,
542- Jitter : 0.1 ,
543- Cap : 30 * time .Second , // Cap maximum wait time to 30 seconds
544- }
545-
546- err := wait .ExponentialBackoff (backoff , func () (bool , error ) {
547- // Get the latest version of the resource
548- current := item .DeepCopyObject ().(crclient.Object )
549- if err := c .Get (ctx , crclient .ObjectKeyFromObject (item ), current ); err != nil {
550- return false , err
551- }
552-
553- // Check if update is needed
554- currentPausedUntil := pausedUntilGetter (current )
555- var needsUpdate bool
556- if paused == "" {
557- // For unpause: we need to set pausedUntil to nil, so update needed if it's currently not nil
558- needsUpdate = currentPausedUntil != nil
559- } else {
560- // For pause: we need to set pausedUntil to the paused value
561- needsUpdate = currentPausedUntil == nil || * currentPausedUntil != paused
562- }
563-
564- if needsUpdate {
565- switch paused {
566- case "" :
567- log .Infof ("setting PauseUntil to nil (unpause) in %s %s" , resourceType , current .GetName ())
568- pausedUntilSetter (current , nil )
569- removePauseAuditAnnotations (current )
570- case "true" :
571- log .Infof ("setting PauseUntil to %s in %s %s" , paused , resourceType , current .GetName ())
572- pausedUntilSetter (current , ptr .To (paused ))
573- addPauseAuditAnnotations (current )
574- }
575-
576- // Perform the atomic update
577- if err := c .Update (ctx , current ); err != nil {
578- if apierrors .IsConflict (err ) {
579- log .Infof ("Conflict detected updating %s %s, retrying..." , resourceType , current .GetName ())
580- return false , nil
581- }
582- return false , err
583- }
584-
585- // Call post-update hook if provided (for HostedCluster propagation validation)
586- if postUpdateHook != nil {
587- if err := postUpdateHook (ctx , c , log , current , defaultWaitForPausedTimeout , paused ); err != nil {
588- return false , err
589- }
590- }
591-
592- log .Infof ("Successfully set %s %s PausedUntil to %s" , resourceType , current .GetName (), paused )
593- } else {
594- log .Debugf ("%s %s already has PausedUntil set to %s, no update needed" , resourceType , current .GetName (), paused )
595- }
596-
597- return true , nil
598- })
599-
600- if err != nil {
601- return fmt .Errorf ("failed to update %s %s after retries: %w" , resourceType , item .GetName (), err )
602- }
603- }
604-
605- return nil
606- }
607-
608- // UpdateHostedCluster updates the HostedCluster's necessary fields in the specified namespaces.
609- func UpdateHostedCluster (ctx context.Context , c crclient.Client , log logrus.FieldLogger , paused string , namespaces []string ) error {
610- return updateHypershiftResource (
611- ctx , c , log , paused , namespaces , "HostedCluster" ,
612-
613- // listObjFactory - creates a new HostedClusterList
614- func () crclient.ObjectList {
615- return & hyperv1.HostedClusterList {Items : []hyperv1.HostedCluster {}}
616- },
617-
618- // itemsExtractor - extracts HostedCluster items as crclient.Object slice
619- func (list crclient.ObjectList ) []crclient.Object {
620- hcList := list .(* hyperv1.HostedClusterList )
621- items := make ([]crclient.Object , len (hcList .Items ))
622- for i := range hcList .Items {
623- items [i ] = & hcList .Items [i ]
624- }
625- return items
626- },
627-
628- // pausedUntilGetter - gets PausedUntil from HostedCluster
629- func (obj crclient.Object ) * string {
630- hc := obj .(* hyperv1.HostedCluster )
631- return hc .Spec .PausedUntil
632- },
633-
634- // pausedUntilSetter - sets PausedUntil on HostedCluster
635- func (obj crclient.Object , paused * string ) {
636- hc := obj .(* hyperv1.HostedCluster )
637- hc .Spec .PausedUntil = paused
638- },
639-
640- // postUpdateHook - validates propagation for HostedClusters
641- func (ctx context.Context , c crclient.Client , log logrus.FieldLogger , obj crclient.Object , timeout time.Duration , paused string ) error {
642- hc := obj .(* hyperv1.HostedCluster )
643- log .Debug ("checking paused state propagation" )
644- return WaitForPausedPropagated (ctx , c , log , hc , timeout , paused )
645- },
646- )
647- }
648-
649- // UpdateNodepools updates the NodePool's necessary fields in the specified namespaces.
650- func UpdateNodepools (ctx context.Context , c crclient.Client , log logrus.FieldLogger , paused string , namespaces []string ) error {
651- return updateHypershiftResource (
652- ctx , c , log , paused , namespaces , "NodePool" ,
653-
654- // listObjFactory - creates a new NodePoolList
655- func () crclient.ObjectList {
656- return & hyperv1.NodePoolList {}
657- },
658-
659- // itemsExtractor - extracts NodePool items as crclient.Object slice
660- func (list crclient.ObjectList ) []crclient.Object {
661- npList := list .(* hyperv1.NodePoolList )
662- items := make ([]crclient.Object , len (npList .Items ))
663- for i := range npList .Items {
664- items [i ] = & npList .Items [i ]
665- }
666- return items
667- },
668-
669- // pausedUntilGetter - gets PausedUntil from NodePool
670- func (obj crclient.Object ) * string {
671- np := obj .(* hyperv1.NodePool )
672- return np .Spec .PausedUntil
673- },
674-
675- // pausedUntilSetter - sets PausedUntil on NodePool
676- func (obj crclient.Object , paused * string ) {
677- np := obj .(* hyperv1.NodePool )
678- np .Spec .PausedUntil = paused
679- },
680-
681- // postUpdateHook - NodePools don't need propagation validation
682- nil ,
683- )
684- }
685451
686452// GetCurrentNamespace reads the namespace from the Kubernetes service account
687453// token file and returns it as a string. The file is expected to be located at
@@ -730,20 +496,6 @@ func RemoveAnnotation(metadata metav1.Object, key string) {
730496 metadata .SetAnnotations (annotations )
731497}
732498
733- // addPauseAuditAnnotations adds OADP audit annotations when pausing a resource.
734- // It adds both the paused-by and paused-at annotations atomically.
735- func addPauseAuditAnnotations (obj metav1.Object ) {
736- timestamp := time .Now ().Format (time .RFC3339 )
737- AddAnnotation (obj , OADPPausedByAnnotation , HypershiftOADPPluginName )
738- AddAnnotation (obj , OADPPausedAtAnnotation , timestamp )
739- }
740-
741- // removePauseAuditAnnotations removes OADP audit annotations when unpausing a resource.
742- // It removes both the paused-by and paused-at annotations.
743- func removePauseAuditAnnotations (obj metav1.Object ) {
744- RemoveAnnotation (obj , OADPPausedByAnnotation )
745- RemoveAnnotation (obj , OADPPausedAtAnnotation )
746- }
747499
748500// AddLabel adds a label with the specified key and value to the given metadata object.
749501// If the metadata object does not have any labels, a new map is created to store the label.
0 commit comments