@@ -20,6 +20,7 @@ import (
2020 "context"
2121 "fmt"
2222 "reflect"
23+ "strings"
2324 "time"
2425
2526 appsv1 "k8s.io/api/apps/v1"
@@ -31,6 +32,7 @@ import (
3132 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3233 "k8s.io/apimachinery/pkg/labels"
3334 "k8s.io/apimachinery/pkg/runtime"
35+ "k8s.io/apimachinery/pkg/util/sets"
3436 "k8s.io/client-go/tools/record"
3537 "k8s.io/klog/v2"
3638
@@ -141,7 +143,7 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr
141143
142144 initialStatus := * instance .Status .DeepCopy ()
143145 if len (initialStatus .Conditions ) == 0 {
144- instance .Status .Conditions = status .DefaultBaseConditions ( time . Now () )
146+ instance .Status .Conditions = status .NewNUMAResourcesOperatorConditions ( )
145147 }
146148
147149 if req .Name != objectnames .DefaultNUMAResourcesOperatorCrName {
@@ -241,7 +243,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceAPI(ctx context.Conte
241243func (r * NUMAResourcesOperatorReconciler ) reconcileResourceMachineConfig (ctx context.Context , instance * nropv1.NUMAResourcesOperator , existing * rtestate.ExistingManifests , trees []nodegroupv1.Tree ) intreconcile.Step {
242244 // we need to sync machine configs first and wait for the MachineConfigPool updates
243245 // before checking additional components for updates
244- mcpUpdatedFunc , err := r .syncMachineConfigs (ctx , instance , existing , trees )
246+ mcpUpdatedFunc , pausedMCPs , err := r .syncMachineConfigs (ctx , instance , existing , trees )
245247 if err != nil {
246248 r .Recorder .Eventf (instance , corev1 .EventTypeWarning , "FailedMCSync" , "Failed to set up machine configuration for worker nodes: %v" , err )
247249 err = fmt .Errorf ("failed to sync machine configs: %w" , err )
@@ -251,7 +253,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con
251253
252254 // MCO needs to update the SELinux context removal and other stuff, and need to trigger a reboot.
253255 // It can take a while.
254- mcpStatuses , mcpNamePending := syncMachineConfigPoolsStatuses (instance .Name , trees , r .ForwardMCPConds , mcpUpdatedFunc )
256+ mcpStatuses , mcpNamePending := syncMachineConfigPoolsStatuses (instance .Name , trees , r .ForwardMCPConds , mcpUpdatedFunc , pausedMCPs )
255257 instance .Status .MachineConfigPools = mcpStatuses
256258
257259 if mcpNamePending != "" {
@@ -260,6 +262,8 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con
260262 }
261263 instance .Status .MachineConfigPools = syncMachineConfigPoolNodeGroupConfigStatuses (instance .Status .MachineConfigPools , trees )
262264
265+ updateMachineConfigPoolPausedCondition (instance .Status .Conditions , instance .Generation , pausedMCPs )
266+
263267 return intreconcile .StepSuccess ()
264268}
265269
@@ -399,7 +403,7 @@ func (r *NUMAResourcesOperatorReconciler) syncNodeResourceTopologyAPI(ctx contex
399403 return (updatedCount == len (objStates )), err
400404}
401405
402- func (r * NUMAResourcesOperatorReconciler ) syncMachineConfigs (ctx context.Context , instance * nropv1.NUMAResourcesOperator , existing * rtestate.ExistingManifests , trees []nodegroupv1.Tree ) (rtestate.MCPWaitForUpdatedFunc , error ) {
406+ func (r * NUMAResourcesOperatorReconciler ) syncMachineConfigs (ctx context.Context , instance * nropv1.NUMAResourcesOperator , existing * rtestate.ExistingManifests , trees []nodegroupv1.Tree ) (rtestate.MCPWaitForUpdatedFunc , sets. Set [ string ], error ) {
403407 klog .V (4 ).InfoS ("Machine Config Sync start" , "trees" , len (trees ))
404408 defer klog .V (4 ).Info ("Machine Config Sync stop" )
405409
@@ -409,7 +413,7 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context
409413 // In case of operator upgrade from 4.1X → 4.18, it's necessary to remove the old MachineConfig,
410414 // unless an emergency annotation is provided which forces the operator to use custom policy
411415
412- objStates , waitFunc := existing .MachineConfigsState (r .RTEManifests )
416+ objStates , waitFunc , pausedMCPs := existing .MachineConfigsState (r .RTEManifests )
413417 for _ , objState := range objStates {
414418 klog .InfoS ("objState" , "desired" , objState .Desired , "existing" , objState .Existing , "createOrUpdate" , objState .IsCreateOrUpdate ())
415419 if objState .IsCreateOrUpdate () {
@@ -429,10 +433,10 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context
429433 break
430434 }
431435 }
432- return waitFunc , err
436+ return waitFunc , pausedMCPs , err
433437}
434438
435- func syncMachineConfigPoolsStatuses (instanceName string , trees []nodegroupv1.Tree , forwardMCPConds bool , updatedFunc rtestate.MCPWaitForUpdatedFunc ) ([]nropv1.MachineConfigPool , string ) {
439+ func syncMachineConfigPoolsStatuses (instanceName string , trees []nodegroupv1.Tree , forwardMCPConds bool , updatedFunc rtestate.MCPWaitForUpdatedFunc , pausedMCPs sets. Set [ string ] ) ([]nropv1.MachineConfigPool , string ) {
436440 klog .V (4 ).InfoS ("Machine Config Status Sync start" , "trees" , len (trees ))
437441 defer klog .V (4 ).Info ("Machine Config Status Sync stop" )
438442
@@ -441,6 +445,11 @@ func syncMachineConfigPoolsStatuses(instanceName string, trees []nodegroupv1.Tre
441445 for _ , mcp := range tree .MachineConfigPools {
442446 mcpStatuses = append (mcpStatuses , extractMCPStatus (mcp , forwardMCPConds ))
443447
448+ if pausedMCPs .Has (mcp .Name ) {
449+ klog .V (5 ).InfoS ("Paused MachineConfigPool detected" , "name" , mcp .Name )
450+ continue
451+ }
452+
444453 isUpdated := updatedFunc (instanceName , mcp )
445454 klog .V (5 ).InfoS ("Machine Config Pool state" , "name" , mcp .Name , "instance" , instanceName , "updated" , isUpdated )
446455
@@ -813,3 +822,21 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr
813822 return nil , fmt .Errorf ("unsupported platform" )
814823 }
815824}
825+
826+ func updateMachineConfigPoolPausedCondition (conditions []metav1.Condition , generation int64 , pausedMCPs sets.Set [string ]) {
827+ pausedStatus := metav1 .ConditionFalse
828+ message := ""
829+ if pausedMCPs .Len () > 0 {
830+ pausedStatus = metav1 .ConditionTrue
831+ message = "detected paused MCPs: " + strings .Join (pausedMCPs .UnsortedList (), ", " )
832+ }
833+ condition := metav1.Condition {
834+ Type : status .ConditionMachineConfigPoolPaused ,
835+ Status : pausedStatus ,
836+ Reason : status .ConditionMachineConfigPoolPaused ,
837+ Message : message ,
838+ ObservedGeneration : generation ,
839+ LastTransitionTime : metav1 .Now (),
840+ }
841+ status .UpdateConditionsInPlace (conditions , condition , time.Time {})
842+ }
0 commit comments