@@ -61,7 +61,7 @@ const (
6161)
6262
6363//+kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions,verbs=get;list;watch;update;patch
64- //+kubebuilder:rbac:groups=machineconfiguration.openshift.io,resources=machineconfigpools,verbs=get;list;watch
64+ //+kubebuilder:rbac:groups=machineconfiguration.openshift.io,resources=machineconfigpools,verbs=get;list;watch;update;patch
6565
6666//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradejobs,verbs=get;list;watch;create;update;patch;delete
6767//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradejobs/status,verbs=get;update;patch
@@ -113,7 +113,12 @@ func (r *UpgradeJobReconciler) Reconcile(ctx context.Context, req ctrl.Request)
113113 // Don't execute hooks created after the job was finished.
114114 _ , efaerr := r .executeHooks (ctx , & uj , managedupgradev1beta1 .EventFailure , noTrackingKey , eventInfoWithReason (fc .Reason ), fc .LastTransitionTime .Time )
115115 _ , efierr := r .executeHooks (ctx , & uj , managedupgradev1beta1 .EventFinish , noTrackingKey , eventInfoWithReason (fc .Reason ), fc .LastTransitionTime .Time )
116- return ctrl.Result {}, multierr .Combine (efaerr , efierr , r .cleanupLock (ctx , uj ))
116+ return ctrl.Result {}, multierr .Combine (
117+ efaerr ,
118+ efierr ,
119+ r .pauseMachinePoolsOnFailure (ctx , uj ),
120+ r .cleanupLock (ctx , uj ),
121+ )
117122 }
118123
119124 cont , err := r .executeHooks (ctx , & uj , managedupgradev1beta1 .EventCreate , noTrackingKey , eventInfoWithReason ("" ), time.Time {})
@@ -1106,6 +1111,53 @@ func (r *UpgradeJobReconciler) cleanupMachineConfigPools(ctx context.Context, uj
11061111 return multierr .Combine (errs ... )
11071112}
11081113
1114+ func (r * UpgradeJobReconciler ) pauseMachinePoolsOnFailure (ctx context.Context , uj managedupgradev1beta1.UpgradeJob ) error {
1115+ l := log .FromContext (ctx ).WithName ("UpgradeJobReconciler.pauseMachinePoolsOnFailure" )
1116+
1117+ if ! uj .Spec .PauseMachineConfigPoolsOnFailure .Enabled {
1118+ return nil
1119+ }
1120+ if cond := apimeta .FindStatusCondition (uj .Status .Conditions , managedupgradev1beta1 .UpgradeJobConditionMachineConfigPoolsPaused ); cond != nil &&
1121+ cond .Reason == managedupgradev1beta1 .UpgradeJobReasonPausedOnFailure && cond .Status == metav1 .ConditionTrue {
1122+ l .Info ("machine config pools already paused on failure" )
1123+ return nil
1124+ }
1125+
1126+ selector , err := metav1 .LabelSelectorAsSelector (& uj .Spec .PauseMachineConfigPoolsOnFailure .Selector )
1127+ if err != nil {
1128+ return fmt .Errorf ("failed to parse machine config pool selector: %w" , err )
1129+ }
1130+ var mcpl machineconfigurationv1.MachineConfigPoolList
1131+ if err := r .List (ctx , & mcpl , client.MatchingLabelsSelector {Selector : selector }); err != nil {
1132+ return fmt .Errorf ("failed to list machine config pools: %w" , err )
1133+ }
1134+ var errs []error
1135+ for _ , mcp := range mcpl .Items {
1136+ if mcp .Spec .Paused {
1137+ continue
1138+ }
1139+ if mcp .Status .MachineCount == mcp .Status .UpdatedMachineCount {
1140+ continue
1141+ }
1142+ l .Info ("pausing machine config pool due to upgrade job failure" , "pool" , mcp .Name )
1143+ mcp .Spec .Paused = true
1144+ if err := r .Update (ctx , & mcp ); err != nil {
1145+ errs = append (errs , fmt .Errorf ("failed to pause machine config pool %q: %w" , mcp .Name , err ))
1146+ }
1147+ }
1148+ if changed := r .setStatusCondition (& uj .Status .Conditions , metav1.Condition {
1149+ Type : managedupgradev1beta1 .UpgradeJobConditionMachineConfigPoolsPaused ,
1150+ Status : metav1 .ConditionTrue ,
1151+ Reason : managedupgradev1beta1 .UpgradeJobReasonPausedOnFailure ,
1152+ }); changed {
1153+ if err := r .Status ().Update (ctx , & uj ); err != nil {
1154+ errs = append (errs , fmt .Errorf ("failed to update upgrade job status: %w" , err ))
1155+ }
1156+ }
1157+
1158+ return multierr .Combine (errs ... )
1159+ }
1160+
11091161func eventInfoWithReason (reason string ) map [string ]any {
11101162 return map [string ]any {
11111163 "reason" : reason ,
0 commit comments