@@ -20,6 +20,7 @@ import (
2020 "context"
2121 "fmt"
2222 "reflect"
23+ "strings"
2324 "time"
2425
2526 appsv1 "k8s.io/api/apps/v1"
@@ -31,6 +32,7 @@ import (
3132 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3233 "k8s.io/apimachinery/pkg/labels"
3334 "k8s.io/apimachinery/pkg/runtime"
35+ "k8s.io/apimachinery/pkg/util/sets"
3436 "k8s.io/client-go/tools/record"
3537 "k8s.io/klog/v2"
3638
@@ -141,7 +143,7 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr
141143
142144 initialStatus := * instance .Status .DeepCopy ()
143145 if len (initialStatus .Conditions ) == 0 {
144- instance .Status .Conditions = status .DefaultBaseConditions ( time . Now () )
146+ instance .Status .Conditions = status .NewNUMAResourcesOperatorConditions ( )
145147 }
146148
147149 if req .Name != objectnames .DefaultNUMAResourcesOperatorCrName {
@@ -236,7 +238,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceAPI(ctx context.Conte
236238func (r * NUMAResourcesOperatorReconciler ) reconcileResourceMachineConfig (ctx context.Context , instance * nropv1.NUMAResourcesOperator , existing * rtestate.ExistingManifests , trees []nodegroupv1.Tree ) intreconcile.Step {
237239 // we need to sync machine configs first and wait for the MachineConfigPool updates
238240 // before checking additional components for updates
239- mcpUpdatedFunc , err := r .syncMachineConfigs (ctx , instance , existing , trees )
241+ mcpUpdatedFunc , pausedMCPs , err := r .syncMachineConfigs (ctx , instance , existing , trees )
240242 if err != nil {
241243 r .Recorder .Eventf (instance , corev1 .EventTypeWarning , "FailedMCSync" , "Failed to set up machine configuration for worker nodes: %v" , err )
242244 err = fmt .Errorf ("failed to sync machine configs: %w" , err )
@@ -246,7 +248,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con
246248
247249 // MCO needs to update the SELinux context removal and other stuff, and need to trigger a reboot.
248250 // It can take a while.
249- mcpStatuses , mcpNamePending := syncMachineConfigPoolsStatuses (instance .Name , trees , r .ForwardMCPConds , mcpUpdatedFunc )
251+ mcpStatuses , mcpNamePending := syncMachineConfigPoolsStatuses (instance .Name , trees , r .ForwardMCPConds , mcpUpdatedFunc , pausedMCPs )
250252 instance .Status .MachineConfigPools = mcpStatuses
251253
252254 if mcpNamePending != "" {
@@ -255,6 +257,8 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con
255257 }
256258 instance .Status .MachineConfigPools = syncMachineConfigPoolNodeGroupConfigStatuses (instance .Status .MachineConfigPools , trees )
257259
260+ instance .Status .Conditions = updateMachineConfigPoolPausedCondition (instance .Status .Conditions , instance .Generation , pausedMCPs )
261+
258262 return intreconcile .StepSuccess ()
259263}
260264
@@ -390,7 +394,7 @@ func (r *NUMAResourcesOperatorReconciler) syncNodeResourceTopologyAPI(ctx contex
390394 return (updatedCount == len (objStates )), err
391395}
392396
393- func (r * NUMAResourcesOperatorReconciler ) syncMachineConfigs (ctx context.Context , instance * nropv1.NUMAResourcesOperator , existing * rtestate.ExistingManifests , trees []nodegroupv1.Tree ) (rtestate.MCPWaitForUpdatedFunc , error ) {
397+ func (r * NUMAResourcesOperatorReconciler ) syncMachineConfigs (ctx context.Context , instance * nropv1.NUMAResourcesOperator , existing * rtestate.ExistingManifests , trees []nodegroupv1.Tree ) (rtestate.MCPWaitForUpdatedFunc , sets. Set [ string ], error ) {
394398 klog .V (4 ).InfoS ("Machine Config Sync start" , "trees" , len (trees ))
395399 defer klog .V (4 ).Info ("Machine Config Sync stop" )
396400
@@ -400,7 +404,7 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context
400404 // In case of operator upgrade from 4.1X → 4.18, it's necessary to remove the old MachineConfig,
401405 // unless an emergency annotation is provided which forces the operator to use custom policy
402406
403- objStates , waitFunc := existing .MachineConfigsState (r .RTEManifests )
407+ objStates , waitFunc , pausedMCPs := existing .MachineConfigsState (r .RTEManifests )
404408 for _ , objState := range objStates {
405409 klog .InfoS ("objState" , "desired" , objState .Desired , "existing" , objState .Existing , "createOrUpdate" , objState .IsCreateOrUpdate ())
406410 if objState .IsCreateOrUpdate () {
@@ -420,10 +424,10 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context
420424 break
421425 }
422426 }
423- return waitFunc , err
427+ return waitFunc , pausedMCPs , err
424428}
425429
426- func syncMachineConfigPoolsStatuses (instanceName string , trees []nodegroupv1.Tree , forwardMCPConds bool , updatedFunc rtestate.MCPWaitForUpdatedFunc ) ([]nropv1.MachineConfigPool , string ) {
430+ func syncMachineConfigPoolsStatuses (instanceName string , trees []nodegroupv1.Tree , forwardMCPConds bool , updatedFunc rtestate.MCPWaitForUpdatedFunc , pausedMCPs sets. Set [ string ] ) ([]nropv1.MachineConfigPool , string ) {
427431 klog .V (4 ).InfoS ("Machine Config Status Sync start" , "trees" , len (trees ))
428432 defer klog .V (4 ).Info ("Machine Config Status Sync stop" )
429433
@@ -432,6 +436,11 @@ func syncMachineConfigPoolsStatuses(instanceName string, trees []nodegroupv1.Tre
432436 for _ , mcp := range tree .MachineConfigPools {
433437 mcpStatuses = append (mcpStatuses , extractMCPStatus (mcp , forwardMCPConds ))
434438
439+ if pausedMCPs .Has (mcp .Name ) {
440+ klog .V (5 ).InfoS ("Paused MachineConfigPool detected" , "name" , mcp .Name )
441+ continue
442+ }
443+
435444 isUpdated := updatedFunc (instanceName , mcp )
436445 klog .V (5 ).InfoS ("Machine Config Pool state" , "name" , mcp .Name , "instance" , instanceName , "updated" , isUpdated )
437446
@@ -802,3 +811,22 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr
802811 return nil , fmt .Errorf ("unsupported platform" )
803812 }
804813}
814+
815+ func updateMachineConfigPoolPausedCondition (conditions []metav1.Condition , generation int64 , pausedMCPs sets.Set [string ]) []metav1.Condition {
816+ pausedStatus := metav1 .ConditionFalse
817+ message := ""
818+ if pausedMCPs .Len () > 0 {
819+ pausedStatus = metav1 .ConditionTrue
820+ message = "detected paused MCPs: " + strings .Join (pausedMCPs .UnsortedList (), ", " )
821+ }
822+ condition := metav1.Condition {
823+ Type : status .ConditionMachineConfigPoolPaused ,
824+ Status : pausedStatus ,
825+ Reason : status .ConditionMachineConfigPoolPaused ,
826+ Message : message ,
827+ ObservedGeneration : generation ,
828+ LastTransitionTime : metav1 .Now (),
829+ }
830+ conditions , _ = status .ComputeConditions (conditions , condition , time .Now ())
831+ return conditions
832+ }
0 commit comments