@@ -48,7 +48,9 @@ import (
4848 "github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
4949
5050 nropv1 "github.com/openshift-kni/numaresources-operator/api/v1"
51+ intkubeletconfig "github.com/openshift-kni/numaresources-operator/internal/kubeletconfig"
5152 "github.com/openshift-kni/numaresources-operator/internal/machineconfigpools"
53+ intreconcile "github.com/openshift-kni/numaresources-operator/internal/reconcile"
5254 "github.com/openshift-kni/numaresources-operator/pkg/apply"
5355 "github.com/openshift-kni/numaresources-operator/pkg/kubeletconfig"
5456 "github.com/openshift-kni/numaresources-operator/pkg/objectnames"
@@ -57,7 +59,8 @@ import (
5759)
5860
5961const (
60- kubeletConfigRetryPeriod = 30 * time .Second
62+ kubeletConfigRetryPeriod = 30 * time .Second
63+ MachineConfigPoolPausedRetryPeriod = 2 * time .Minute
6164)
6265
6366const (
@@ -116,22 +119,20 @@ func (r *KubeletConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques
116119
117120 // KubeletConfig changes are expected to be sporadic, yet are important enough
118121 // to be made visible at kubernetes level. So we generate events to handle them
119- cm , err := r .reconcileConfigMap (ctx , instance , req .NamespacedName )
120- if err != nil {
121- var klErr * InvalidKubeletConfig
122- if errors .As (err , & klErr ) {
123- r .Recorder .Event (instance , "Normal" , "ProcessSkip" , "ignored kubelet config " + klErr .ObjectName )
124- return ctrl.Result {}, nil
125- }
126-
127- klog .ErrorS (err , "failed to reconcile configmap" , "controller" , "kubeletconfig" )
122+ cm , step := r .reconcileConfigMap (ctx , instance , req .NamespacedName )
123+ if step .Error != nil && step .ConditionInfo .Reason != intreconcile .EventProcessSkip {
124+ klog .ErrorS (step .Error , "failed to reconcile configmap" , "controller" , "kubeletconfig" )
125+ r .Recorder .Event (instance , step .ConditionInfo .Type , step .ConditionInfo .Reason , step .ConditionInfo .Message )
126+ return step .Result , step .Error
127+ }
128128
129- r . Recorder . Event ( instance , "Warning" , "ProcessFailed" , "Failed to update RTE config from kubelet config " + req . NamespacedName . String ())
130- return ctrl. Result {}, err
129+ if step . ConditionInfo . Reason == intreconcile . EventProcessSuccess {
130+ step = step . WithMessage ( fmt . Sprintf ( "Updated RTE config %s/%s from kubelet config %s" , cm . Namespace , cm . Name , req . NamespacedName . String ()))
131131 }
132132
133- r .Recorder .Event (instance , "Normal" , "ProcessOK" , fmt .Sprintf ("Updated RTE config %s/%s from kubelet config %s" , cm .Namespace , cm .Name , req .NamespacedName .String ()))
134- return ctrl.Result {}, nil
133+ r .Recorder .Event (instance , step .ConditionInfo .Type , step .ConditionInfo .Reason , step .ConditionInfo .Message )
134+
135+ return step .Result , nil
135136}
136137
137138func (r * KubeletConfigReconciler ) SetupWithManager (mgr ctrl.Manager ) error {
@@ -197,25 +198,29 @@ func (e *InvalidKubeletConfig) Unwrap() error {
197198 return e .Err
198199}
199200
200- func (r * KubeletConfigReconciler ) reconcileConfigMap (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* corev1.ConfigMap , error ) {
201+ func (r * KubeletConfigReconciler ) reconcileConfigMap (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* corev1.ConfigMap , intreconcile. Step ) {
201202 // first check if the ConfigMap should be deleted
202203 // to save all the additional work related for create/update
203204 cm , deleted , err := r .deleteConfigMap (ctx , instance , kcKey )
204205 if deleted {
205- return cm , err
206+ return cm , intreconcile . StepWarning ( fmt . Errorf ( "Failed to update RTE config from kubelet config %s: %v" , kcKey . Name , err ))
206207 }
207208
208- kcHandler , err := r .makeKCHandlerForPlatform (ctx , instance , kcKey )
209- if err != nil {
210- return nil , err
209+ kcHandler , step := r .makeKCHandlerForPlatform (ctx , instance , kcKey )
210+ if step . Error != nil {
211+ return nil , step
211212 }
213+
212214 kubeletConfig , err := kubeletconfig .MCOKubeletConfToKubeletConf (kcHandler .mcoKc )
213215 if err != nil {
214216 klog .ErrorS (err , "cannot extract KubeletConfiguration from MCO KubeletConfig" , "name" , kcKey .Name )
215- return nil , err
217+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
216218 }
217-
218- return r .syncConfigMap (ctx , kubeletConfig , instance , kcHandler )
219+ cm , err = r .syncConfigMap (ctx , kubeletConfig , instance , kcHandler )
220+ if err != nil {
221+ return cm , FailedConfigMapUpdateStep (kcKey .Name , err )
222+ }
223+ return cm , step
219224}
220225
221226func (r * KubeletConfigReconciler ) syncConfigMap (ctx context.Context , kubeletConfig * kubeletconfigv1beta1.KubeletConfiguration , instance * nropv1.NUMAResourcesOperator , kcHandler * kubeletConfigHandler ) (* corev1.ConfigMap , error ) {
@@ -244,63 +249,106 @@ func (r *KubeletConfigReconciler) syncConfigMap(ctx context.Context, kubeletConf
244249 return rendered , nil
245250}
246251
247- func (r * KubeletConfigReconciler ) makeKCHandlerForPlatform (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* kubeletConfigHandler , error ) {
252+ func (r * KubeletConfigReconciler ) makeKCHandlerForPlatform (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* kubeletConfigHandler , intreconcile. Step ) {
248253 switch r .Platform {
249254 case platform .OpenShift :
250255 mcoKc := & mcov1.KubeletConfig {}
251256 if err := r .Client .Get (ctx , kcKey , mcoKc ); err != nil {
252- return nil , err
257+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
253258 }
254259
255260 mcps , err := machineconfigpools .GetListByNodeGroupsV1 (ctx , r .Client , instance .Spec .NodeGroups )
256261 if err != nil {
257- return nil , err
262+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
258263 }
259264
260265 mcp , err := machineconfigpools .FindBySelector (mcps , mcoKc .Spec .MachineConfigPoolSelector )
261266 if err != nil {
262267 klog .ErrorS (err , "cannot find a matching mcp for MCO KubeletConfig" , "name" , kcKey .Name )
263268 var notFound * machineconfigpools.NotFound
264269 if errors .As (err , & notFound ) {
265- return nil , & InvalidKubeletConfig {
266- ObjectName : kcKey .Name ,
267- Err : notFound ,
268- }
270+ return nil , intreconcile .StepNormalSkip (fmt .Errorf ("%s: %v" , kcKey , notFound ))
269271 }
270- return nil , err
272+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
271273 }
272274
273275 klog .V (3 ).InfoS ("matched MCP to MCO KubeletConfig" , "kubeletconfig name" , kcKey .Name , "MCP name" , mcp .Name )
274276
275277 // nothing we care about, and we can't do much anyway
276278 if mcoKc .Spec .KubeletConfig == nil {
277279 klog .InfoS ("detected KubeletConfig with empty payload, ignoring" , "name" , kcKey .Name )
278- return nil , & InvalidKubeletConfig {ObjectName : kcKey .Name }
280+ return nil , intreconcile .StepNormalSkip (fmt .Errorf ("Invalid KubeletConfig %s" , kcKey .Name ))
281+ }
282+
283+ if mcp .Spec .Paused {
284+ klog .InfoS ("detected paused MCP" , "name" , mcp .Name )
285+ //if the CM exists -> just skip;
286+ //if the CM does not exist -> create it based on the current active machineConfig
287+
288+ expectedCMName := objectnames .GetComponentName (instance .Name , mcp .Name )
289+ existingCM := & corev1.ConfigMap {}
290+ if err := r .Client .Get (ctx , client.ObjectKey {Namespace : r .Namespace , Name : expectedCMName }, existingCM ); err != nil {
291+ if apierrors .IsNotFound (err ) {
292+ currentConfigName := mcp .Status .Configuration .Name
293+ currentConfigObj := & mcov1.MachineConfig {}
294+ if err := r .Client .Get (ctx , client.ObjectKey {Name : currentConfigName }, currentConfigObj ); err != nil {
295+ klog .ErrorS (err , "cannot find the current machineConfig" , "name" , currentConfigName )
296+ return nil , stepNormalSkipForPausedupdates ("failed to find the current machineConfig %s: %v" , currentConfigName , err )
297+ }
298+
299+ // use local version of github.com/openshift/machine-config-operator/pkg/controller/common.ParseAndConvertConfig
300+ _ , dataInBytes , err := intkubeletconfig .ParseKubeletConfigRawData (currentConfigObj .Spec .Config .Raw )
301+ if err != nil {
302+ klog .ErrorS (err , "cannot parse the current machineConfig" , "name" , currentConfigName )
303+ return nil , stepNormalSkipForPausedupdates ("failed to parse the current machineConfig %s: %v" , currentConfigName , err )
304+ }
305+
306+ decodeKc , err := intkubeletconfig .DecodeKubeletConfigurationFromData (dataInBytes )
307+ if err != nil {
308+ klog .ErrorS (err , "cannot decode the current KubeletConfig data from MachineConfig" , "name" , currentConfigName )
309+ return nil , stepNormalSkipForPausedupdates ("failed to decode the current KubeletConfig data from MachineConfig %s: %v" , currentConfigName , err )
310+ }
311+
312+ successStepWithRetry := intreconcile .StepNormalSucess (fmt .Sprintf ("Created ConfigMap based on the current machineConfig for paused MCP %s" , mcp .Name ))
313+ successStepWithRetry .Result = ctrl.Result {Requeue : true , RequeueAfter : MachineConfigPoolPausedRetryPeriod }
314+ return & kubeletConfigHandler {
315+ ownerObject : existingCM ,
316+ mcoKc : decodeKc ,
317+ poolName : mcp .Name ,
318+ setCtrlRef : controllerutil .SetControllerReference ,
319+ }, successStepWithRetry
320+ }
321+ }
322+
323+ step := intreconcile .StepNormalSkip (fmt .Errorf ("MachineConfigPool of KubeletConfig %s is paused" , kcKey .Name ))
324+ step .Result = ctrl.Result {Requeue : true , RequeueAfter : MachineConfigPoolPausedRetryPeriod }
325+ return nil , step
279326 }
327+
280328 return & kubeletConfigHandler {
281329 ownerObject : mcoKc ,
282330 mcoKc : mcoKc ,
283331 poolName : mcp .Name ,
284332 setCtrlRef : controllerutil .SetControllerReference ,
285- }, nil
333+ }, intreconcile . StepNormalSucess ( "" )
286334
287335 case platform .HyperShift :
288336 cmKc := & corev1.ConfigMap {}
289337 if err := r .Client .Get (ctx , kcKey , cmKc ); err != nil {
290- return nil , err
338+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
291339 }
292340
293341 nodePoolName := cmKc .Labels [HyperShiftNodePoolLabel ]
294342 kcData := cmKc .Data [HyperShiftConfigMapConfigKey ]
295343 mcoKc , err := kubeletconfig .DecodeFromData ([]byte (kcData ), r .Scheme )
296344 if err != nil {
297- return nil , err
345+ return nil , FailedConfigMapUpdateStep ( kcKey . Name , err )
298346 }
299347
300348 // nothing we care about, and we can't do much anyway
301349 if mcoKc .Spec .KubeletConfig == nil {
302350 klog .InfoS ("detected KubeletConfig with empty payload, ignoring" , "name" , kcKey .Name )
303- return nil , & InvalidKubeletConfig { ObjectName : kcKey .Name }
351+ return nil , intreconcile . StepNormalSkip ( fmt . Errorf ( "Invalid KubeletConfig %s" , kcKey .Name ))
304352 }
305353 return & kubeletConfigHandler {
306354 ownerObject : cmKc ,
@@ -312,9 +360,19 @@ func (r *KubeletConfigReconciler) makeKCHandlerForPlatform(ctx context.Context,
312360 setCtrlRef : func (owner , controlled metav1.Object , scheme * runtime.Scheme , opts ... controllerutil.OwnerReferenceOption ) error {
313361 return nil
314362 },
315- }, nil
363+ }, intreconcile . StepNormalSucess ( "" )
316364 }
317- return nil , fmt .Errorf ("unsupported platform: %s" , r .Platform )
365+ return nil , FailedConfigMapUpdateStep (kcKey .Name , fmt .Errorf ("unsupported platform: %s" , r .Platform ))
366+ }
367+
368+ func stepNormalSkipForPausedupdates (s string , args ... any ) intreconcile.Step {
369+ step := intreconcile .StepNormalSkip (fmt .Errorf (s , args ... ))
370+ step .Result = ctrl.Result {Requeue : true , RequeueAfter : MachineConfigPoolPausedRetryPeriod }
371+ return step
372+ }
373+
374+ func FailedConfigMapUpdateStep (objName string , err error ) intreconcile.Step {
375+ return intreconcile .StepWarning (fmt .Errorf ("Failed to update RTE config from kubelet config %s: %v" , objName , err ))
318376}
319377
320378func (r * KubeletConfigReconciler ) deleteConfigMap (ctx context.Context , instance * nropv1.NUMAResourcesOperator , kcKey client.ObjectKey ) (* corev1.ConfigMap , bool , error ) {
0 commit comments