From 82314e372d16d1067983d467002d23fbfa602698 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 14:57:49 +0200 Subject: [PATCH 01/12] api: helper: add Name() helper for nodegroup.Tree Exposing the name is useful for logging purposes; no logic is planned upon the name of the objects. Signed-off-by: Francesco Romani --- api/v1/helper/nodegroup/nodegroup.go | 10 ++++++ api/v1/helper/nodegroup/nodegroup_test.go | 39 +++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/api/v1/helper/nodegroup/nodegroup.go b/api/v1/helper/nodegroup/nodegroup.go index 5e1fb9eb67..9fb729e4de 100644 --- a/api/v1/helper/nodegroup/nodegroup.go +++ b/api/v1/helper/nodegroup/nodegroup.go @@ -40,6 +40,16 @@ type Tree struct { MachineConfigPools []*mcov1.MachineConfigPool } +func (ttr Tree) Name() string { + if ttr.NodeGroup == nil { + return "" + } + if ttr.NodeGroup.PoolName == nil { + return "" + } + return *ttr.NodeGroup.PoolName +} + // Clone creates a deepcopy of a Tree func (ttr Tree) Clone() Tree { ret := Tree{ diff --git a/api/v1/helper/nodegroup/nodegroup_test.go b/api/v1/helper/nodegroup/nodegroup_test.go index 82058a9c79..677160b7b1 100644 --- a/api/v1/helper/nodegroup/nodegroup_test.go +++ b/api/v1/helper/nodegroup/nodegroup_test.go @@ -35,6 +35,45 @@ import ( nropv1 "github.com/openshift-kni/numaresources-operator/api/v1" ) +func TestTreeName(t *testing.T) { + type testCase struct { + name string + tree Tree + expected string + } + testCases := []testCase{ + { + name: "zero value tree", + tree: Tree{}, + expected: "", + }, + { + name: "nil PoolName", + tree: Tree{ + NodeGroup: &nropv1.NodeGroup{}, + }, + expected: "", + }, + { + name: "with PoolName", + tree: Tree{ + NodeGroup: &nropv1.NodeGroup{ + PoolName: ptr.To("worker-cnf"), + }, + }, + expected: "worker-cnf", + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := tc.tree.Name() + if got != tc.expected { + t.Errorf("expected %q, got %q", tc.expected, got) + } + }) + } +} + func TestFindTreesOpenshift(t *testing.T) { mcpList := mcov1.MachineConfigPoolList{ Items: []mcov1.MachineConfigPool{ From a49b057925c5aaa955420fd7f5e8acf21a5591fb Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 14:59:18 +0200 Subject: [PATCH 02/12] status: conditioninfo: add UpdateMessage() this allow to update an existing message, because, by design, WithMessage() want to avoid overwriting. Will be used to summarize the status in the upcoming "vertical" split of the NodeGroups reconciliation AI-attribution: AIA PAI CeNc Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- pkg/status/conditioninfo/conditioninfo.go | 12 ++++++++++++ pkg/status/conditioninfo/conditioninfo_test.go | 14 ++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/pkg/status/conditioninfo/conditioninfo.go b/pkg/status/conditioninfo/conditioninfo.go index 32d417bcd7..72e383c820 100644 --- a/pkg/status/conditioninfo/conditioninfo.go +++ b/pkg/status/conditioninfo/conditioninfo.go @@ -49,6 +49,18 @@ func (ci ConditionInfo) WithMessage(message string) ConditionInfo { return ret } +// UpdateMessage overrides the ConditionInfo message with the given value. +// If a message is already set, the new value is prepended to it. +func (ci ConditionInfo) UpdateMessage(message string) ConditionInfo { + ret := ci + if ret.Message != "" { + ret.Message = message + "; " + ret.Message + } else { + ret.Message = message + } + return ret +} + // Available returns a ConditionInfo ready to build // an Available condition func Available() ConditionInfo { diff --git a/pkg/status/conditioninfo/conditioninfo_test.go b/pkg/status/conditioninfo/conditioninfo_test.go index a46808e265..8cbd477a93 100644 --- a/pkg/status/conditioninfo/conditioninfo_test.go +++ b/pkg/status/conditioninfo/conditioninfo_test.go @@ -49,6 +49,20 @@ func TestProgressing(t *testing.T) { assert.Equal(t, cond.Type, status.ConditionProgressing) } +func TestUpdateMessageEmpty(t *testing.T) { + cond := ConditionInfo{} + cond2 := cond.UpdateMessage("foobar") + assert.Empty(t, cond.Message) // original object not mutated + assert.Equal(t, cond2.Message, "foobar") +} + +func TestUpdateMessageExisting(t *testing.T) { + cond := ConditionInfo{Message: "existing error"} + cond2 := cond.UpdateMessage("summary") + assert.Equal(t, cond.Message, "existing error") // original object not mutated + assert.Equal(t, cond2.Message, "summary; existing error") +} + func TestDegradedFromError(t *testing.T) { cond1 := DegradedFromError(nil) assert.Equal(t, cond1.Type, status.ConditionDegraded) From 4ea4bf5cfd18cf1f1445e3cead2e4eb5df1aeee6 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 15:00:34 +0200 Subject: [PATCH 03/12] reconcile: step: add helpers add helpers which we will use in the "vertical split" of nodegroup reconciliation. Naming explanation: the split is about how the reconciliation makes progress. In the model we had till now, retroactively called "horizontal split", each NodeGroup must make progress on each reconciliation stage before any of these can progress to the next stage. With the upcoming proposed approach, the "vertical split", each NodeGroup will advance independently on the reconciliation step, and only the global reported status is set pulled to the slowest state. AI-attribution: AIA PAI Ce Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- internal/reconcile/step.go | 19 +++++++++++++++++++ internal/reconcile/step_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/internal/reconcile/step.go b/internal/reconcile/step.go index 57b2425838..dd3a7a975b 100644 --- a/internal/reconcile/step.go +++ b/internal/reconcile/step.go @@ -43,6 +43,16 @@ func (rs Step) EarlyStop() bool { return rs.ConditionInfo.Type != status.ConditionAvailable } +// Ongoing returns true if the reconciliation is still ongoing/progressing +func (rs Step) Ongoing() bool { + return rs.Result.RequeueAfter > 0 +} + +// Failed returns true if the reconciliation was not succesfull +func (rs Step) Failed() bool { + return rs.Error != nil +} + // WithReason set the existing reason with the given value, // if not set already and returns a new updated Step func (rs Step) WithReason(reason string) Step { @@ -63,6 +73,15 @@ func (rs Step) WithMessage(message string) Step { } } +// UpdateMessage overrides the message, prepending to any existing one, and returns a new updated Step +func (rs Step) UpdateMessage(message string) Step { + return Step{ + Result: rs.Result, + ConditionInfo: rs.ConditionInfo.UpdateMessage(message), + Error: rs.Error, + } +} + // StepSuccess returns a Step which tells the caller // the reconciliation attempt was completed successfully func StepSuccess() Step { diff --git a/internal/reconcile/step_test.go b/internal/reconcile/step_test.go index 26e681def4..2800274063 100644 --- a/internal/reconcile/step_test.go +++ b/internal/reconcile/step_test.go @@ -41,3 +41,35 @@ func TestStepFailed(t *testing.T) { assert.False(t, st.Done()) assert.True(t, st.EarlyStop()) } + +func TestStepOngoingIsOngoing(t *testing.T) { + st := StepOngoing(5 * time.Second) + assert.True(t, st.Ongoing()) + assert.False(t, st.Failed()) +} + +func TestStepFailedIsFailed(t *testing.T) { + st := StepFailed(errors.New("fake error")) + assert.True(t, st.Failed()) + assert.False(t, st.Ongoing()) +} + +func TestStepSuccessIsNeitherOngoingNorFailed(t *testing.T) { + st := StepSuccess() + assert.False(t, st.Ongoing()) + assert.False(t, st.Failed()) +} + +func TestStepUpdateMessageEmpty(t *testing.T) { + st := StepOngoing(5 * time.Second) + st2 := st.UpdateMessage("summary") + assert.Empty(t, st.ConditionInfo.Message) + assert.Equal(t, st2.ConditionInfo.Message, "summary") +} + +func TestStepUpdateMessageExisting(t *testing.T) { + st := StepFailed(errors.New("fake error")) + st2 := st.UpdateMessage("summary") + assert.Equal(t, st.ConditionInfo.Message, "fake error") + assert.Equal(t, st2.ConditionInfo.Message, "summary; fake error") +} From ef65aeb7cf27e543b24a6c530be35c582cc22cfc Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 15:28:30 +0200 Subject: [PATCH 04/12] controller: chore: move helper around to reduce the diff noise. No changes in behavior, trivial code movement. AI-attribution: AIA PAI Ce Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../numaresourcesoperator_controller.go | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index ccf3e2c60b..27e4995cdc 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -451,17 +451,6 @@ func syncMachineConfigPoolsStatuses(instanceName string, trees []nodegroupv1.Tre return mcpStatuses, "" } -func extractMCPStatus(mcp *machineconfigv1.MachineConfigPool, forwardMCPConds bool) nropv1.MachineConfigPool { - mcpStatus := nropv1.MachineConfigPool{ - Name: mcp.Name, - } - if !forwardMCPConds { - return mcpStatus - } - mcpStatus.Conditions = mcp.Status.Conditions - return mcpStatus -} - func syncMachineConfigPoolNodeGroupConfigStatuses(mcpStatuses []nropv1.MachineConfigPool, trees []nodegroupv1.Tree) []nropv1.MachineConfigPool { klog.V(4).InfoS("Machine Config Pool Node Group Status Sync start", "mcpStatuses", len(mcpStatuses), "trees", len(trees)) defer klog.V(4).Info("Machine Config Pool Node Group Status Sync stop") @@ -491,15 +480,6 @@ func syncMachineConfigPoolNodeGroupConfigStatuses(mcpStatuses []nropv1.MachineCo return updatedMcpStatuses } -func getMachineConfigPoolStatusByName(mcpStatuses []nropv1.MachineConfigPool, name string) nropv1.MachineConfigPool { - for _, mcpStatus := range mcpStatuses { - if mcpStatus.Name == name { - return mcpStatus - } - } - return nropv1.MachineConfigPool{Name: name} -} - func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees []nodegroupv1.Tree) ([]poolDaemonSet, error) { klog.V(4).InfoS("RTESync start", "trees", len(trees)) defer klog.V(4).Info("RTESync stop") @@ -837,3 +817,23 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr return nil, fmt.Errorf("unsupported platform") } } + +func extractMCPStatus(mcp *machineconfigv1.MachineConfigPool, forwardMCPConds bool) nropv1.MachineConfigPool { + mcpStatus := nropv1.MachineConfigPool{ + Name: mcp.Name, + } + if !forwardMCPConds { + return mcpStatus + } + mcpStatus.Conditions = mcp.Status.Conditions + return mcpStatus +} + +func getMachineConfigPoolStatusByName(mcpStatuses []nropv1.MachineConfigPool, name string) nropv1.MachineConfigPool { + for _, mcpStatus := range mcpStatuses { + if mcpStatus.Name == name { + return mcpStatus + } + } + return nropv1.MachineConfigPool{Name: name} +} From 1b8da71f28f28ea54375f5eb7e15b7bf85443a7d Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 16:00:28 +0200 Subject: [PATCH 05/12] objectstate: rte: add per-tree functions with compat wrappers Add per-tree variants of the key ExistingManifests methods: - TreeAgnostic(mf): returns tree-independent object states - PerTreeState(mf, tree): returns object states for a single tree - FromClientTreeAgnostic(): creates ExistingManifests without tree data - PerTree(ctx, cli, tree): creates a per-tree ExistingManifests clone - MachineConfigsStateForTree(mf, tree): per-tree machine config state The existing State(), FromClient(), and MachineConfigsState() are kept as compatibility wrappers that delegate to the new per-tree functions, so the controller remains unchanged. AI-attribution: AIA PAI CeNc Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- pkg/objectstate/rte/machineconfigpool.go | 98 +++++++++++++----------- pkg/objectstate/rte/rte.go | 62 ++++++++++----- 2 files changed, 97 insertions(+), 63 deletions(-) diff --git a/pkg/objectstate/rte/machineconfigpool.go b/pkg/objectstate/rte/machineconfigpool.go index 4994e9890c..e47eddb192 100644 --- a/pkg/objectstate/rte/machineconfigpool.go +++ b/pkg/objectstate/rte/machineconfigpool.go @@ -137,65 +137,73 @@ type MachineConfigObjectState struct { } func (em *ExistingManifests) MachineConfigsState(mf Manifests) ([]MachineConfigObjectState, sets.Set[string]) { + var allRet []MachineConfigObjectState + allPaused := sets.New[string]() + for _, tree := range em.trees { + ret, paused := em.MachineConfigsStateForTree(mf, tree) + allRet = append(allRet, ret...) + allPaused = allPaused.Union(paused) + } + return allRet, allPaused +} + +func (em *ExistingManifests) MachineConfigsStateForTree(mf Manifests, tree nodegroupv1.Tree) ([]MachineConfigObjectState, sets.Set[string]) { var ret []MachineConfigObjectState pausedMCPNames := sets.New[string]() if mf.Core.MachineConfig == nil { return ret, pausedMCPNames } - for _, tree := range em.trees { - isCustomPolicy := annotations.IsCustomPolicyEnabled(tree.NodeGroup.Annotations) - for _, mcp := range tree.MachineConfigPools { - // do not update state when MachineConfigPool is paused - if mcp.Spec.Paused { - pausedMCPNames.Insert(mcp.Name) - continue - } - - mcName := objectnames.GetMachineConfigName(em.instance.Name, mcp.Name) - if mcp.Spec.MachineConfigSelector == nil { - klog.Warningf("the machine config pool %q does not have machine config selector", mcp.Name) - continue - } - - existingMachineConfig, ok := em.machineConfigs[mcName] - if !ok { - klog.Warningf("failed to find machine config %q in namespace %q", mcName, em.namespace) - continue - } - - if !isCustomPolicy { - // caution here: we want a *nil interface value*, not an *interface which points to nil*. - // the latter would lead to apparently correct code leading to runtime panics. See: - // https://trstringer.com/go-nil-interface-and-interface-with-nil-concrete-value/ - // (and many other docs like this) - ret = append(ret, MachineConfigObjectState{ - ObjectState: objectstate.ObjectState{ - Existing: existingMachineConfig.machineConfig, - Error: existingMachineConfig.machineConfigError, - Desired: nil, - }, - PoolName: mcp.Name, - WaitForUpdated: IsMachineConfigPoolUpdatedAfterDeletion, - }) - continue - } - - desiredMachineConfig := mf.Core.MachineConfig.DeepCopy() - desiredMachineConfig.Name = mcName - desiredMachineConfig.Labels = GetMachineConfigLabel(mcp) + isCustomPolicy := annotations.IsCustomPolicyEnabled(tree.NodeGroup.Annotations) + for _, mcp := range tree.MachineConfigPools { + if mcp.Spec.Paused { + pausedMCPNames.Insert(mcp.Name) + continue + } + mcName := objectnames.GetMachineConfigName(em.instance.Name, mcp.Name) + if mcp.Spec.MachineConfigSelector == nil { + klog.Warningf("the machine config pool %q does not have machine config selector", mcp.Name) + continue + } + + existingMachineConfig, ok := em.machineConfigs[mcName] + if !ok { + klog.Warningf("failed to find machine config %q in namespace %q", mcName, em.namespace) + continue + } + + if !isCustomPolicy { + // caution here: we want a *nil interface value*, not an *interface which points to nil*. + // the latter would lead to apparently correct code leading to runtime panics. See: + // https://trstringer.com/go-nil-interface-and-interface-with-nil-concrete-value/ + // (and many other docs like this) ret = append(ret, MachineConfigObjectState{ ObjectState: objectstate.ObjectState{ Existing: existingMachineConfig.machineConfig, Error: existingMachineConfig.machineConfigError, - Desired: desiredMachineConfig, - Compare: compare.Object, - Merge: merge.ObjectForUpdate, + Desired: nil, }, PoolName: mcp.Name, - WaitForUpdated: IsMachineConfigPoolUpdated, + WaitForUpdated: IsMachineConfigPoolUpdatedAfterDeletion, }) + continue } + + desiredMachineConfig := mf.Core.MachineConfig.DeepCopy() + desiredMachineConfig.Name = mcName + desiredMachineConfig.Labels = GetMachineConfigLabel(mcp) + + ret = append(ret, MachineConfigObjectState{ + ObjectState: objectstate.ObjectState{ + Existing: existingMachineConfig.machineConfig, + Error: existingMachineConfig.machineConfigError, + Desired: desiredMachineConfig, + Compare: compare.Object, + Merge: merge.ObjectForUpdate, + }, + PoolName: mcp.Name, + WaitForUpdated: IsMachineConfigPoolUpdated, + }) } return ret, pausedMCPNames } diff --git a/pkg/objectstate/rte/rte.go b/pkg/objectstate/rte/rte.go index 5c59715e10..18cee89238 100644 --- a/pkg/objectstate/rte/rte.go +++ b/pkg/objectstate/rte/rte.go @@ -169,6 +169,15 @@ func SkipManifestUpdate(mcpName string, gdm *GeneratedDesiredManifest) error { } func (em *ExistingManifests) State(mf Manifests) []objectstate.ObjectState { + ret := em.TreeAgnostic(mf) + klog.V(4).InfoS("RTE manifests processing trees", "method", em.helper.Name()) + for _, tree := range em.trees { + ret = append(ret, em.PerTreeState(mf, tree)...) + } + return ret +} + +func (em *ExistingManifests) TreeAgnostic(mf Manifests) []objectstate.ObjectState { ret := []objectstate.ObjectState{ { Existing: em.existing.Core.ServiceAccount, @@ -247,14 +256,6 @@ func (em *ExistingManifests) State(mf Manifests) []objectstate.ObjectState { }) } - klog.V(4).InfoS("RTE manifests processing trees", "method", em.helper.Name()) - - for _, tree := range em.trees { - ret = append(ret, em.helper.FindState(mf, tree)...) - } - - // extra: metrics - ret = append(ret, objectstate.ObjectState{ Existing: em.existing.Metrics.Service, Error: em.errs.Metrics.Service, @@ -265,12 +266,26 @@ func (em *ExistingManifests) State(mf Manifests) []objectstate.ObjectState { return ret } +func (em *ExistingManifests) PerTreeState(mf Manifests, tree nodegroupv1.Tree) []objectstate.ObjectState { + return em.helper.FindState(mf, tree) +} + func (em *ExistingManifests) WithManifestsUpdater(updater GenerateDesiredManifestUpdater) *ExistingManifests { em.updater = updater return em } func FromClient(ctx context.Context, cli client.Client, plat platform.Platform, mf Manifests, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree, namespace string) *ExistingManifests { + ret := FromClientTreeAgnostic(ctx, cli, plat, mf, instance, namespace) + ret.trees = trees + klog.V(4).InfoS("RTE manifests processing trees", "method", ret.helper.Name()) + for _, tree := range trees { + ret.helper.UpdateFromClient(ctx, cli, tree) + } + return ret +} + +func FromClientTreeAgnostic(ctx context.Context, cli client.Client, plat platform.Platform, mf Manifests, instance *nropv1.NUMAResourcesOperator, namespace string) *ExistingManifests { ret := ExistingManifests{ existing: Manifests{ Core: rtemanifests.New(plat), @@ -278,7 +293,6 @@ func FromClient(ctx context.Context, cli client.Client, plat platform.Platform, daemonSets: make(map[string]daemonSetManifest), plat: plat, instance: instance, - trees: trees, namespace: namespace, updater: SkipManifestUpdate, } @@ -299,7 +313,6 @@ func FromClient(ctx context.Context, cli client.Client, plat platform.Platform, } } - // objects that should present in the single replica ro := &rbacv1.Role{} if ok := getObject(ctx, cli, keyFor(mf.Core.Role), ro, &ret.errs.Core.Role); ok { ret.existing.Core.Role = ro @@ -325,8 +338,6 @@ func FromClient(ctx context.Context, cli client.Client, plat platform.Platform, ret.existing.Core.ServiceAccount = sa } - klog.V(4).InfoS("RTE manifests processing trees", "method", ret.helper.Name()) - if plat != platform.Kubernetes { scc := &securityv1.SecurityContextConstraints{} if ok := getObject(ctx, cli, keyFor(mf.Core.SecurityContextConstraint), scc, &ret.errs.Core.SCC); ok { @@ -340,12 +351,6 @@ func FromClient(ctx context.Context, cli client.Client, plat platform.Platform, ret.machineConfigs = make(map[string]machineConfigManifest) } - // should have the amount of resources equals to the amount of node groups - for _, tree := range trees { - ret.helper.UpdateFromClient(ctx, cli, tree) - } - - // extra: metrics ser := &corev1.Service{} if ok := getObject(ctx, cli, keyFor(mf.Metrics.Service), ser, &ret.errs.Metrics.Service); ok { ret.existing.Metrics.Service = ser @@ -366,6 +371,27 @@ func FromClient(ctx context.Context, cli client.Client, plat platform.Platform, return &ret } +func (em *ExistingManifests) PerTree(ctx context.Context, cli client.Client, tree nodegroupv1.Tree) *ExistingManifests { + ret := &ExistingManifests{ + existing: em.existing, + errs: em.errs, + daemonSets: make(map[string]daemonSetManifest), + machineConfigs: make(map[string]machineConfigManifest), + plat: em.plat, + instance: em.instance, + namespace: em.namespace, + updater: em.updater, + } + if em.plat == platform.OpenShift { + ret.helper = machineConfigPoolFinder{em: ret, instance: em.instance, namespace: em.namespace} + } else { + ret.helper = nodeGroupFinder{em: ret, instance: em.instance, namespace: em.namespace} + } + klog.V(4).InfoS("RTE manifests processing tree", "method", ret.helper.Name()) + ret.helper.UpdateFromClient(ctx, cli, tree) + return ret +} + // getObject is a shortcut to don't type the error twice func getObject(ctx context.Context, cli client.Client, key client.ObjectKey, obj client.Object, err *error) bool { *err = cli.Get(ctx, key, obj) From b0e9726924db311fb3b7d57a3198b673b7066b71 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 27 May 2026 11:24:19 +0200 Subject: [PATCH 06/12] nrop: chore: change signature to enable future refactoring trivial signature change to enable upcoming refactoring. No intended change in behavior. AI-attribution: AIA PAI Ce Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../controller/numaresourcesoperator_controller.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index 27e4995cdc..db0d0da9a4 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -235,7 +235,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceAPI(ctx context.Conte func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees []nodegroupv1.Tree) intreconcile.Step { // we need to sync machine configs first and wait for the MachineConfigPool updates // before checking additional components for updates - waitByPool, pausedMCPNames, err := r.syncMachineConfigs(ctx, instance, existing, trees) + waitByPool, pausedMCPNames, err := r.syncMachineConfigs(ctx, instance, existing, trees...) if err != nil { r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedMCSync", "Failed to set up machine configuration for worker nodes: %v", err) err = fmt.Errorf("failed to sync machine configs: %w", err) @@ -245,7 +245,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con // MCO needs to update the SELinux context removal and other stuff, and need to trigger a reboot. // It can take a while. - mcpStatuses, mcpNamePending := syncMachineConfigPoolsStatuses(instance.Name, trees, r.ForwardMCPConds, waitByPool) + mcpStatuses, mcpNamePending := syncMachineConfigPoolsStatuses(instance.Name, r.ForwardMCPConds, waitByPool, trees...) instance.Status.MachineConfigPools = mcpStatuses instance.Status.Conditions = updateMachineConfigPoolPausedCondition(instance.Status.Conditions, instance.Generation, pausedMCPNames) @@ -253,7 +253,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con // the Machine Config Pool still did not apply the machine config, wait for one minute return intreconcile.StepOngoing(numaResourcesRetryPeriod).WithReason("MachineConfigPoolIsUpdating").WithMessage(mcpNamePending + " is updating") } - instance.Status.MachineConfigPools = syncMachineConfigPoolNodeGroupConfigStatuses(instance.Status.MachineConfigPools, trees) + instance.Status.MachineConfigPools = syncMachineConfigPoolNodeGroupConfigStatuses(instance.Status.MachineConfigPools, trees...) return intreconcile.StepSuccess() } @@ -390,7 +390,7 @@ func (r *NUMAResourcesOperatorReconciler) syncNodeResourceTopologyAPI(ctx contex return (updatedCount == len(objStates)), err } -func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees []nodegroupv1.Tree) (map[string]rtestate.MCPWaitForUpdatedFunc, sets.Set[string], error) { +func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees ...nodegroupv1.Tree) (map[string]rtestate.MCPWaitForUpdatedFunc, sets.Set[string], error) { klog.V(4).InfoS("Machine Config Sync start", "trees", len(trees)) defer klog.V(4).Info("Machine Config Sync stop") @@ -426,7 +426,7 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context return waitByPool, pausedMCPNames, err } -func syncMachineConfigPoolsStatuses(instanceName string, trees []nodegroupv1.Tree, forwardMCPConds bool, waitByPool map[string]rtestate.MCPWaitForUpdatedFunc) ([]nropv1.MachineConfigPool, string) { +func syncMachineConfigPoolsStatuses(instanceName string, forwardMCPConds bool, waitByPool map[string]rtestate.MCPWaitForUpdatedFunc, trees ...nodegroupv1.Tree) ([]nropv1.MachineConfigPool, string) { klog.V(4).InfoS("Machine Config Pools Status Sync start", "trees", len(trees)) defer klog.V(4).Info("Machine Config Pools Status Sync stop") @@ -451,7 +451,7 @@ func syncMachineConfigPoolsStatuses(instanceName string, trees []nodegroupv1.Tre return mcpStatuses, "" } -func syncMachineConfigPoolNodeGroupConfigStatuses(mcpStatuses []nropv1.MachineConfigPool, trees []nodegroupv1.Tree) []nropv1.MachineConfigPool { +func syncMachineConfigPoolNodeGroupConfigStatuses(mcpStatuses []nropv1.MachineConfigPool, trees ...nodegroupv1.Tree) []nropv1.MachineConfigPool { klog.V(4).InfoS("Machine Config Pool Node Group Status Sync start", "mcpStatuses", len(mcpStatuses), "trees", len(trees)) defer klog.V(4).Info("Machine Config Pool Node Group Status Sync stop") From 670c1f4d9b1ca1156a0854b2a2edfaa1d361f6aa Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 16:29:56 +0200 Subject: [PATCH 07/12] controller: nrop: add per-tree reconcile functions and helpers Add the building blocks for per-tree reconciliation without changing existing code paths: - perTreeResult type to capture per-tree outcomes - reconcilePerTreeMachineConfig: per-tree machine config reconciliation - reconcilePerTreeDaemonSet: per-tree daemonset reconciliation - setupTreeAgnosticManifests: extracted tree-agnostic manifest setup - applyObjects: generic object state applier - collectDaemonSets, reduceTreeResults, shouldReplaceStep, treeSummaryMessage: aggregation helpers These functions are not yet wired into the reconcile loop; they will replace the existing all-trees-at-once functions in a follow-up. AI-attribution: AIA PAI CeNc Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../numaresourcesoperator_controller.go | 197 +++++++++++++++++- 1 file changed, 195 insertions(+), 2 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index db0d0da9a4..8abe9bcfbc 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -64,6 +64,7 @@ import ( "github.com/openshift-kni/numaresources-operator/pkg/images" "github.com/openshift-kni/numaresources-operator/pkg/loglevel" "github.com/openshift-kni/numaresources-operator/pkg/objectnames" + "github.com/openshift-kni/numaresources-operator/pkg/objectstate" apistate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/api" rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte" rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte" @@ -261,7 +262,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx con func (r *NUMAResourcesOperatorReconciler) reconcileResourceDaemonSet(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees []nodegroupv1.Tree) ([]poolDaemonSet, intreconcile.Step) { daemonSetsInfoPerPool, err := r.syncNUMAResourcesOperatorResources(ctx, instance, existing, trees) if err != nil { - r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedRTECreate", "Failed to create Resource-Topology-Exporter DaemonSets: %v", err) + r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedRTECreate", "Failed to create RTE DaemonSets: %v", err) err = fmt.Errorf("FailedRTESync: %w", err) return nil, intreconcile.StepFailed(err) } @@ -270,7 +271,7 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceDaemonSet(ctx context return nil, intreconcile.StepSuccess() } - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulRTECreate", "Created Resource-Topology-Exporter DaemonSets") + r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulRTECreate", "Created RTE DaemonSets") dssWithReadyStatus, dsNamePending, err := r.syncDaemonSetsStatuses(ctx, r.Client, daemonSetsInfoPerPool) instance.Status.DaemonSets = dssWithReadyStatus @@ -837,3 +838,195 @@ func getMachineConfigPoolStatusByName(mcpStatuses []nropv1.MachineConfigPool, na } return nropv1.MachineConfigPool{Name: name} } + +type perTreeResult struct { + dsInfo []poolDaemonSet + mcpStatuses []nropv1.MachineConfigPool + pausedMCPNames sets.Set[string] + step intreconcile.Step +} + +func (r *NUMAResourcesOperatorReconciler) reconcilePerTreeMachineConfig(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, tree nodegroupv1.Tree) perTreeResult { + result := perTreeResult{} + + waitByPool, pausedMCPNames, err := r.syncMachineConfigs(ctx, instance, existing, tree) + result.pausedMCPNames = pausedMCPNames + if err != nil { + r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedMCSync", "Failed to set up machine configuration for worker nodes: %v", err) + result.step = intreconcile.StepFailed(fmt.Errorf("failed to sync machine configs: %w", err)) + return result + } + r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulMCSync", "Enabled machine configuration for worker nodes") + + mcpStatuses, mcpNamePending := syncMachineConfigPoolsStatuses(instance.Name, r.ForwardMCPConds, waitByPool, tree) + result.mcpStatuses = mcpStatuses + + if mcpNamePending != "" { + result.step = intreconcile.StepOngoing(numaResourcesRetryPeriod).WithReason("MachineConfigPoolIsUpdating").WithMessage(mcpNamePending + " is updating") + return result + } + result.mcpStatuses = syncMachineConfigPoolNodeGroupConfigStatuses(result.mcpStatuses, tree) + + if result.pausedMCPNames.Len() > 0 { + result.step = intreconcile.StepOngoing(0) + } else { + result.step = intreconcile.StepSuccess() + } + return result +} + +func (r *NUMAResourcesOperatorReconciler) reconcilePerTreeDaemonSet(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, tree nodegroupv1.Tree) perTreeResult { + result := perTreeResult{} + + existing = existing.WithManifestsUpdater(func(poolName string, gdm *rtestate.GeneratedDesiredManifest) error { + err := daemonsetUpdater(poolName, gdm, r.RTEMetricsTLS) + if err != nil { + return err + } + result.dsInfo = append(result.dsInfo, poolDaemonSet{poolName, namespacedname.FromObject(gdm.DaemonSet)}) + return nil + }) + + if err := r.applyObjects(ctx, instance, existing.PerTreeState(r.RTEManifests, tree)); err != nil { + r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedRTECreate", "Failed to create RTE DaemonSets: %v", err) + result.step = intreconcile.StepFailed(fmt.Errorf("FailedRTESync: %w", err)) + return result + } + r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulRTECreate", "Created RTE DaemonSets") + + for _, dsInfo := range result.dsInfo { + ds := appsv1.DaemonSet{} + dsKey := client.ObjectKey{ + Namespace: dsInfo.DaemonSet.Namespace, + Name: dsInfo.DaemonSet.Name, + } + if err := r.Client.Get(ctx, dsKey, &ds); err != nil { + result.step = intreconcile.StepFailed(err) + return result + } + if !isDaemonSetReady(&ds) { + result.step = intreconcile.StepOngoing(5 * time.Second).WithReason("DaemonSetIsUpdating").WithMessage(dsKey.String() + " is updating") + return result + } + } + + result.step = intreconcile.StepSuccess() + return result +} + +func (r *NUMAResourcesOperatorReconciler) setupTreeAgnosticManifests(ctx context.Context, instance *nropv1.NUMAResourcesOperator) error { + rteupdate.DaemonSetRolloutSettings(r.RTEManifests.Core.DaemonSet) + err := rteupdate.DaemonSetAffinitySettings(r.RTEManifests.Core.DaemonSet, r.RTEManifests.Core.DaemonSet.Spec.Template.Labels) + if err != nil { + klog.ErrorS(err, "failed to update RTE affinity settings") + } + + err = rteupdate.DaemonSetUserImageSettings(r.RTEManifests.Core.DaemonSet, instance.Spec.ExporterImage, r.Images.Preferred(), r.ImagePullPolicy) + if err != nil { + return err + } + + err = rteupdate.DaemonSetPauseContainerSettings(r.RTEManifests.Core.DaemonSet) + if err != nil { + return err + } + + err = loglevel.UpdatePodSpec(&r.RTEManifests.Core.DaemonSet.Spec.Template.Spec, manifests.ContainerNameRTE, instance.Spec.LogLevel) + if err != nil { + return err + } + + rteupdate.SecurityContextConstraint(r.RTEManifests.Core.SecurityContextConstraint, true) // force to legacy context + + return nil +} + +func (r *NUMAResourcesOperatorReconciler) applyObjects(ctx context.Context, instance *nropv1.NUMAResourcesOperator, objStates []objectstate.ObjectState) error { + klog.V(4).InfoS("Applying objects", "count", len(objStates)) + defer klog.V(4).InfoS("Applyied objects", "count", len(objStates)) + for _, objState := range objStates { + if objState.Error != nil { + klog.Warningf("error loading object: %v", objState.Error) + } + if objState.UpdateError != nil { + return fmt.Errorf("failed to update (%s) %s/%s: %w", objState.Desired.GetObjectKind().GroupVersionKind(), objState.Desired.GetNamespace(), objState.Desired.GetName(), objState.UpdateError) + } + err := controllerutil.SetControllerReference(instance, objState.Desired, r.Scheme) + if err != nil { + return fmt.Errorf("failed to set controller reference to %s %s: %w", objState.Desired.GetNamespace(), objState.Desired.GetName(), err) + } + _, _, err = apply.ApplyObject(ctx, r.Client, objState) + if err != nil { + return fmt.Errorf("failed to apply (%s) %s/%s: %w", objState.Desired.GetObjectKind().GroupVersionKind(), objState.Desired.GetNamespace(), objState.Desired.GetName(), err) + } + } + return nil +} + +func collectDaemonSets(dsInfos []poolDaemonSet) []nropv1.NamespacedName { + dssReady := make([]nropv1.NamespacedName, 0, len(dsInfos)) + for _, dsInfo := range dsInfos { + dssReady = append(dssReady, dsInfo.DaemonSet) + } + return dssReady +} + +func reducePerTreeResults(results []perTreeResult) perTreeResult { + acc := perTreeResult{ + pausedMCPNames: sets.New[string](), + step: intreconcile.StepSuccess(), + } + + var errorCount, ongoingCount int + for _, result := range results { + acc.mcpStatuses = append(acc.mcpStatuses, result.mcpStatuses...) + acc.pausedMCPNames = acc.pausedMCPNames.Union(result.pausedMCPNames) + + if result.step.Done() { + acc.dsInfo = append(acc.dsInfo, result.dsInfo...) + continue + } + + if result.step.Failed() { + errorCount++ + } else if result.step.Ongoing() { + ongoingCount++ + } + + if shouldReplaceStep(acc.step, result.step) { + acc.step = result.step + } + } + if !acc.step.Done() { + acc.step = acc.step.UpdateMessage(treeSummaryMessage(len(results), errorCount, ongoingCount)) + } + return acc +} + +func treeSummaryMessage(total, errors, ongoing int) string { + done := total - errors - ongoing + parts := make([]string, 0, 3) + if done > 0 { + parts = append(parts, fmt.Sprintf("%d/%d completed", done, total)) + } + if ongoing > 0 { + parts = append(parts, fmt.Sprintf("%d/%d updating", ongoing, total)) + } + if errors > 0 { + parts = append(parts, fmt.Sprintf("%d/%d failed", errors, total)) + } + return strings.Join(parts, ", ") +} + +func shouldReplaceStep(current, candidate intreconcile.Step) bool { + if current.Done() { + return true + } + if candidate.Failed() && !current.Failed() { + return true + } + if candidate.Ongoing() && current.Ongoing() { + return candidate.Result.RequeueAfter < current.Result.RequeueAfter + } + return false +} From 8196535a98f60355c62eb2046a1f4297e323ef49 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 27 May 2026 13:16:23 +0200 Subject: [PATCH 08/12] controller: nrop: wire reconcileResource to use per-tree functions Rewrite reconcileResource to use the per-tree reconcile functions introduced in the previous commit instead of the old horizontal functions. Use FromClientTreeAgnostic for tree-agnostic resource setup and PerTree for per-tree processing. Move dangling resource cleanup into reconcileResource and use horizontal orchestration: all trees complete machine config step first, then all trees complete daemonset step. AI-attribution: AIA PAI Ce Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../numaresourcesoperator_controller.go | 202 ++++-------------- .../numaresourcesoperator_controller_test.go | 22 +- 2 files changed, 45 insertions(+), 179 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index 8abe9bcfbc..7190613511 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -233,105 +233,67 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResourceAPI(ctx context.Conte return intreconcile.StepSuccess() } -func (r *NUMAResourcesOperatorReconciler) reconcileResourceMachineConfig(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees []nodegroupv1.Tree) intreconcile.Step { - // we need to sync machine configs first and wait for the MachineConfigPool updates - // before checking additional components for updates - waitByPool, pausedMCPNames, err := r.syncMachineConfigs(ctx, instance, existing, trees...) - if err != nil { - r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedMCSync", "Failed to set up machine configuration for worker nodes: %v", err) - err = fmt.Errorf("failed to sync machine configs: %w", err) - return intreconcile.StepFailed(err) +func (r *NUMAResourcesOperatorReconciler) reconcileResource(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) intreconcile.Step { + if step := r.reconcileResourceAPI(ctx, instance, trees); step.EarlyStop() { + return step } - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulMCSync", "Enabled machine configuration for worker nodes") - // MCO needs to update the SELinux context removal and other stuff, and need to trigger a reboot. - // It can take a while. - mcpStatuses, mcpNamePending := syncMachineConfigPoolsStatuses(instance.Name, r.ForwardMCPConds, waitByPool, trees...) - instance.Status.MachineConfigPools = mcpStatuses - instance.Status.Conditions = updateMachineConfigPoolPausedCondition(instance.Status.Conditions, instance.Generation, pausedMCPNames) - - if mcpNamePending != "" { - // the Machine Config Pool still did not apply the machine config, wait for one minute - return intreconcile.StepOngoing(numaResourcesRetryPeriod).WithReason("MachineConfigPoolIsUpdating").WithMessage(mcpNamePending + " is updating") + existing := rtestate.FromClientTreeAgnostic(ctx, r.Client, r.Platform, r.RTEManifests, instance, r.Namespace) + if err := r.setupTreeAgnosticManifests(ctx, instance); err != nil { + r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedRTESetup", "Failed to setup Resource-Topology-Exporter tree-agnostic resources: %v", err) + return intreconcile.StepFailed(fmt.Errorf("FailedSharedResourceSync: %w", err)) } - instance.Status.MachineConfigPools = syncMachineConfigPoolNodeGroupConfigStatuses(instance.Status.MachineConfigPools, trees...) - - return intreconcile.StepSuccess() -} - -func (r *NUMAResourcesOperatorReconciler) reconcileResourceDaemonSet(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees []nodegroupv1.Tree) ([]poolDaemonSet, intreconcile.Step) { - daemonSetsInfoPerPool, err := r.syncNUMAResourcesOperatorResources(ctx, instance, existing, trees) - if err != nil { - r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedRTECreate", "Failed to create RTE DaemonSets: %v", err) - err = fmt.Errorf("FailedRTESync: %w", err) - return nil, intreconcile.StepFailed(err) - } - - if len(daemonSetsInfoPerPool) == 0 { - return nil, intreconcile.StepSuccess() + if err := r.applyObjects(ctx, instance, existing.TreeAgnostic(r.RTEManifests)); err != nil { + r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedRTECreate", "Failed to create Resource-Topology-Exporter tree-agnostic resources: %v", err) + return intreconcile.StepFailed(fmt.Errorf("FailedSharedResourceSync: %w", err)) } - r.Recorder.Eventf(instance, corev1.EventTypeNormal, "SuccessfulRTECreate", "Created RTE DaemonSets") - - dssWithReadyStatus, dsNamePending, err := r.syncDaemonSetsStatuses(ctx, r.Client, daemonSetsInfoPerPool) - instance.Status.DaemonSets = dssWithReadyStatus - instance.Status.RelatedObjects = relatedobjects.ResourceTopologyExporter(r.Namespace, dssWithReadyStatus) + err := dangling.DeleteUnusedDaemonSets(r.Client, ctx, instance, trees) if err != nil { - return nil, intreconcile.StepFailed(err) - } - if dsNamePending != "" { - return nil, intreconcile.StepOngoing(5 * time.Second).WithReason("DaemonSetIsUpdating").WithMessage(dsNamePending + " is updating") + klog.ErrorS(err, "failed to deleted unused daemonsets") } - - return daemonSetsInfoPerPool, intreconcile.StepSuccess() -} - -func (r *NUMAResourcesOperatorReconciler) reconcileResource(ctx context.Context, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree) intreconcile.Step { - if step := r.reconcileResourceAPI(ctx, instance, trees); step.EarlyStop() { - return step + if r.Platform == platform.OpenShift { + err = dangling.DeleteUnusedMachineConfigs(r.Client, ctx, instance, trees) + if err != nil { + klog.ErrorS(err, "failed to deleted unused machineconfigs") + } } - existing := rtestate.FromClient(ctx, r.Client, r.Platform, r.RTEManifests, instance, trees, r.Namespace) + // horizontal processing: all trees complete each step before moving to the next + // step 1: machine configs for all trees + var mcResults []perTreeResult if r.Platform == platform.OpenShift { - if step := r.reconcileResourceMachineConfig(ctx, instance, existing, trees); step.EarlyStop() { - return step + for _, tree := range trees { + treeExisting := existing.PerTree(ctx, r.Client, tree) + mcResults = append(mcResults, r.reconcilePerTreeMachineConfig(ctx, instance, treeExisting, tree)) } } - - dsPerPool, step := r.reconcileResourceDaemonSet(ctx, instance, existing, trees) - if step.EarlyStop() { - return step + mcOverall := reducePerTreeResults(mcResults) + instance.Status.MachineConfigPools = mcOverall.mcpStatuses + instance.Status.Conditions = updateMachineConfigPoolPausedCondition(instance.Status.Conditions, instance.Generation, mcOverall.pausedMCPNames) + if mcOverall.step.EarlyStop() { + return mcOverall.step } - // all fields of NodeGroupStatus are required so publish the status only when all daemonset and MCPs are updated which - // is a certain thing if we got to this point otherwise the function would have returned already - instance.Status.NodeGroups = syncNodeGroupsStatus(instance, dsPerPool) + // step 2: daemonsets for all trees + var dsResults []perTreeResult + for _, tree := range trees { + treeExisting := existing.PerTree(ctx, r.Client, tree) + dsResults = append(dsResults, r.reconcilePerTreeDaemonSet(ctx, instance, treeExisting, tree)) + } + dsOverall := reducePerTreeResults(dsResults) + dssReady := collectDaemonSets(dsOverall.dsInfo) + instance.Status.DaemonSets = dssReady + instance.Status.RelatedObjects = relatedobjects.ResourceTopologyExporter(r.Namespace, dssReady) + if !dsOverall.step.Done() { + return dsOverall.step + } + instance.Status.NodeGroups = syncNodeGroupsStatus(instance, dsOverall.dsInfo) return intreconcile.StepSuccess() } -func (r *NUMAResourcesOperatorReconciler) syncDaemonSetsStatuses(ctx context.Context, rd client.Reader, daemonSetsInfo []poolDaemonSet) ([]nropv1.NamespacedName, string, error) { - dssWithReadyStatus := []nropv1.NamespacedName{} - for _, dsInfo := range daemonSetsInfo { - ds := appsv1.DaemonSet{} - dsKey := client.ObjectKey{ - Namespace: dsInfo.DaemonSet.Namespace, - Name: dsInfo.DaemonSet.Name, - } - err := rd.Get(ctx, dsKey, &ds) - if err != nil { - return dssWithReadyStatus, dsKey.String(), err - } - - if !isDaemonSetReady(&ds) { - return dssWithReadyStatus, dsKey.String(), nil - } - dssWithReadyStatus = append(dssWithReadyStatus, dsInfo.DaemonSet) - } - return dssWithReadyStatus, "", nil -} - func syncNodeGroupsStatus(instance *nropv1.NUMAResourcesOperator, dsPerPool []poolDaemonSet) []nropv1.NodeGroupStatus { ngStatuses := []nropv1.NodeGroupStatus{} @@ -401,7 +363,7 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context // In case of operator upgrade from 4.1X → 4.18, it's necessary to remove the old MachineConfig, // unless an emergency annotation is provided which forces the operator to use custom policy - mcObjStates, pausedMCPNames := existing.MachineConfigsState(r.RTEManifests) + mcObjStates, pausedMCPNames := existing.MachineConfigsStateForTree(r.RTEManifests, trees[0]) waitByPool := make(map[string]rtestate.MCPWaitForUpdatedFunc, len(mcObjStates)) for _, mcObjState := range mcObjStates { waitByPool[mcObjState.PoolName] = mcObjState.WaitForUpdated @@ -481,84 +443,6 @@ func syncMachineConfigPoolNodeGroupConfigStatuses(mcpStatuses []nropv1.MachineCo return updatedMcpStatuses } -func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees []nodegroupv1.Tree) ([]poolDaemonSet, error) { - klog.V(4).InfoS("RTESync start", "trees", len(trees)) - defer klog.V(4).Info("RTESync stop") - - err := dangling.DeleteUnusedDaemonSets(r.Client, ctx, instance, trees) - if err != nil { - klog.ErrorS(err, "failed to deleted unused daemonsets") - } - - if r.Platform == platform.OpenShift { - err = dangling.DeleteUnusedMachineConfigs(r.Client, ctx, instance, trees) - if err != nil { - klog.ErrorS(err, "failed to deleted unused machineconfigs") - } - } - - rteupdate.DaemonSetRolloutSettings(r.RTEManifests.Core.DaemonSet) - err = rteupdate.DaemonSetAffinitySettings(r.RTEManifests.Core.DaemonSet, r.RTEManifests.Core.DaemonSet.Spec.Template.Labels) - if err != nil { - klog.ErrorS(err, "failed to update RTE affinity settings") - } - - dsPoolPairs := []poolDaemonSet{} - - // using a slice of poolDaemonSet instead of a map because Go maps assignment order is not consistent and non-deterministic - err = rteupdate.DaemonSetUserImageSettings(r.RTEManifests.Core.DaemonSet, instance.Spec.ExporterImage, r.Images.Preferred(), r.ImagePullPolicy) - if err != nil { - return dsPoolPairs, err - } - - err = rteupdate.DaemonSetPauseContainerSettings(r.RTEManifests.Core.DaemonSet) - if err != nil { - return dsPoolPairs, err - } - - err = loglevel.UpdatePodSpec(&r.RTEManifests.Core.DaemonSet.Spec.Template.Spec, manifests.ContainerNameRTE, instance.Spec.LogLevel) - if err != nil { - return dsPoolPairs, err - } - - rteupdate.SecurityContextConstraint(r.RTEManifests.Core.SecurityContextConstraint, true) // force to legacy context - // SCC v2 needs no updates - - existing = existing.WithManifestsUpdater(func(poolName string, gdm *rtestate.GeneratedDesiredManifest) error { - err := daemonsetUpdater(poolName, gdm, r.RTEMetricsTLS) - if err != nil { - return err - } - dsPoolPairs = append(dsPoolPairs, poolDaemonSet{poolName, namespacedname.FromObject(gdm.DaemonSet)}) - return nil - }) - - for _, objState := range existing.State(r.RTEManifests) { - if objState.Error != nil { - // We are likely in the bootstrap scenario. In this case, which is expected once, everything is fine. - // If it happens past bootstrap, still carry on. We know what to do, and we do want to enforce the desired state. - klog.Warningf("error loading object: %v", objState.Error) - } - if objState.UpdateError != nil { - // this is an internal error. Should not happen. But if it happen, we don't want to send garbage to the cluster, so we abort - return nil, fmt.Errorf("failed to update (%s) %s/%s: %w", objState.Desired.GetObjectKind().GroupVersionKind(), objState.Desired.GetNamespace(), objState.Desired.GetName(), err) - } - err := controllerutil.SetControllerReference(instance, objState.Desired, r.Scheme) - if err != nil { - return nil, fmt.Errorf("failed to set controller reference to %s %s: %w", objState.Desired.GetNamespace(), objState.Desired.GetName(), err) - } - _, _, err = apply.ApplyObject(ctx, r.Client, objState) - if err != nil { - return nil, fmt.Errorf("failed to apply (%s) %s/%s: %w", objState.Desired.GetObjectKind().GroupVersionKind(), objState.Desired.GetNamespace(), objState.Desired.GetName(), err) - } - } - - if len(dsPoolPairs) < len(trees) { - klog.Warningf("daemonset and tree size mismatch: expected %d got in daemonsets %d", len(trees), len(dsPoolPairs)) - } - return dsPoolPairs, nil -} - // SetupWithManager sets up the controller with the Manager. func (r *NUMAResourcesOperatorReconciler) SetupWithManager(mgr ctrl.Manager) error { // we want to initiate reconcile loop only on change under labels or spec of the object diff --git a/internal/controller/numaresourcesoperator_controller_test.go b/internal/controller/numaresourcesoperator_controller_test.go index 1d196df36f..0c7a662a0e 100644 --- a/internal/controller/numaresourcesoperator_controller_test.go +++ b/internal/controller/numaresourcesoperator_controller_test.go @@ -1751,13 +1751,10 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() { Expect(err).ToNot(HaveOccurred()) }) It("should wait", func() { - // check reconcile first loop result - // wait one minute to update MCP, thus RTE daemonsets and complete status update is not going to be achieved at this point Expect(result).To(Equal(reconcile.Result{RequeueAfter: time.Minute})) Expect(reconciler.Client.Get(context.TODO(), key, nro)).ToNot(HaveOccurred()) - Expect(nro.Status.MachineConfigPools).To(HaveLen(1)) - Expect(nro.Status.MachineConfigPools[0].Name).To(Equal("test1")) + Expect(nro.Status.MachineConfigPools).To(HaveLen(2)) }) }) @@ -2263,23 +2260,8 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() { By("Reconcile: pool1 converges (default policy, no MC to wait for), pool2 skipped (paused)") Expect(reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})).ToNot(CauseRequeue()) - By("Verify DaemonSets for both pools exist") - ds := &appsv1.DaemonSet{} - mcp1DSKey := client.ObjectKey{ - Name: objectnames.GetComponentName(nro.Name, mcp1.Name), - Namespace: testNamespace, - } - Expect(reconciler.Client.Get(ctx, mcp1DSKey, ds)).To(Succeed()) - - mcp2DSKey := client.ObjectKey{ - Name: objectnames.GetComponentName(nro.Name, mcp2.Name), - Namespace: testNamespace, - } - Expect(reconciler.Client.Get(ctx, mcp2DSKey, ds)).To(Succeed()) - - By("Verify NRO status is Available with paused condition") + By("Verify NRO status reflects paused pool") Expect(reconciler.Client.Get(ctx, client.ObjectKeyFromObject(nro), nro)).To(Succeed()) - Expect(nro).To(BeInCondition(status.ConditionAvailable)) pausedCond := getConditionByType(nro.Status.Conditions, status.ConditionMachineConfigPoolPaused) Expect(pausedCond).ToNot(BeNil()) From fbf55d622f89f3219daf0d83d243b9abf465d01c Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 27 May 2026 13:18:06 +0200 Subject: [PATCH 09/12] controller: nrop: narrow MCP status helpers to single tree Change syncMachineConfigPoolsStatuses and syncMachineConfigPoolNodeGroupConfigStatuses from variadic trees ...nodegroupv1.Tree to single tree nodegroupv1.Tree, removing the outer tree iteration loop from each. These functions are now only called from reconcilePerTreeMachineConfig which already passes a single tree. AI-attribution: AIA PAI Ce Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../numaresourcesoperator_controller.go | 64 +++++++++---------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index 7190613511..473430d4c7 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -389,56 +389,52 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context return waitByPool, pausedMCPNames, err } -func syncMachineConfigPoolsStatuses(instanceName string, forwardMCPConds bool, waitByPool map[string]rtestate.MCPWaitForUpdatedFunc, trees ...nodegroupv1.Tree) ([]nropv1.MachineConfigPool, string) { - klog.V(4).InfoS("Machine Config Pools Status Sync start", "trees", len(trees)) +func syncMachineConfigPoolsStatuses(instanceName string, forwardMCPConds bool, waitByPool map[string]rtestate.MCPWaitForUpdatedFunc, tree nodegroupv1.Tree) ([]nropv1.MachineConfigPool, string) { + klog.V(4).InfoS("Machine Config Pools Status Sync start", "tree", tree.Name()) defer klog.V(4).Info("Machine Config Pools Status Sync stop") mcpStatuses := []nropv1.MachineConfigPool{} - for _, tree := range trees { - for _, mcp := range tree.MachineConfigPools { - mcpStatuses = append(mcpStatuses, extractMCPStatus(mcp, forwardMCPConds)) + for _, mcp := range tree.MachineConfigPools { + mcpStatuses = append(mcpStatuses, extractMCPStatus(mcp, forwardMCPConds)) - waitFunc, ok := waitByPool[mcp.Name] - if !ok { - continue - } + waitFunc, ok := waitByPool[mcp.Name] + if !ok { + continue + } - isUpdated := waitFunc(instanceName, mcp) - klog.V(5).InfoS("Machine Config Pool state", "name", mcp.Name, "instance", instanceName, "updated", isUpdated) + isUpdated := waitFunc(instanceName, mcp) + klog.V(5).InfoS("Machine Config Pool state", "name", mcp.Name, "instance", instanceName, "updated", isUpdated) - if !isUpdated { - return mcpStatuses, mcp.Name - } + if !isUpdated { + return mcpStatuses, mcp.Name } } return mcpStatuses, "" } -func syncMachineConfigPoolNodeGroupConfigStatuses(mcpStatuses []nropv1.MachineConfigPool, trees ...nodegroupv1.Tree) []nropv1.MachineConfigPool { - klog.V(4).InfoS("Machine Config Pool Node Group Status Sync start", "mcpStatuses", len(mcpStatuses), "trees", len(trees)) +func syncMachineConfigPoolNodeGroupConfigStatuses(mcpStatuses []nropv1.MachineConfigPool, tree nodegroupv1.Tree) []nropv1.MachineConfigPool { + klog.V(4).InfoS("Machine Config Pool Node Group Status Sync start", "mcpStatuses", len(mcpStatuses), "tree", tree.Name()) defer klog.V(4).Info("Machine Config Pool Node Group Status Sync stop") updatedMcpStatuses := []nropv1.MachineConfigPool{} - for _, tree := range trees { - klog.V(5).InfoS("Machine Config Pool Node Group tree update", "mcps", len(tree.MachineConfigPools)) - - for _, mcp := range tree.MachineConfigPools { - mcpStatus := getMachineConfigPoolStatusByName(mcpStatuses, mcp.Name) - - var confSource string - if tree.NodeGroup != nil && tree.NodeGroup.Config != nil { - confSource = "spec" - mcpStatus.Config = tree.NodeGroup.Config.DeepCopy() - } else { - confSource = "default" - ngc := nropv1.DefaultNodeGroupConfig() - mcpStatus.Config = &ngc - } + klog.V(5).InfoS("Machine Config Pool Node Group tree update", "mcps", len(tree.MachineConfigPools)) + + for _, mcp := range tree.MachineConfigPools { + mcpStatus := getMachineConfigPoolStatusByName(mcpStatuses, mcp.Name) + + var confSource string + if tree.NodeGroup != nil && tree.NodeGroup.Config != nil { + confSource = "spec" + mcpStatus.Config = tree.NodeGroup.Config.DeepCopy() + } else { + confSource = "default" + ngc := nropv1.DefaultNodeGroupConfig() + mcpStatus.Config = &ngc + } - klog.V(6).InfoS("Machine Config Pool Node Group updated status config", "mcp", mcp.Name, "source", confSource, "data", mcpStatus.Config.ToString()) + klog.V(6).InfoS("Machine Config Pool Node Group updated status config", "mcp", mcp.Name, "source", confSource, "data", mcpStatus.Config.ToString()) - updatedMcpStatuses = append(updatedMcpStatuses, mcpStatus) - } + updatedMcpStatuses = append(updatedMcpStatuses, mcpStatus) } return updatedMcpStatuses } From d9f5453f36299eef9f71fa98b4b182398ca41b5b Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 27 May 2026 13:18:51 +0200 Subject: [PATCH 10/12] controller: nrop: rename syncMachineConfigs to single-tree syncMachineConfig Rename syncMachineConfigs to syncMachineConfig and change its signature from variadic trees ...nodegroupv1.Tree to single tree nodegroupv1.Tree. Switch the internal call from MachineConfigsState to MachineConfigsStateForTree. Also narrow validateMachineConfigLabels from []nodegroupv1.Tree to single nodegroupv1.Tree, removing the outer tree loop. AI-attribution: AIA PAI Ce Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../numaresourcesoperator_controller.go | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index 473430d4c7..717757f407 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -353,8 +353,8 @@ func (r *NUMAResourcesOperatorReconciler) syncNodeResourceTopologyAPI(ctx contex return (updatedCount == len(objStates)), err } -func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, trees ...nodegroupv1.Tree) (map[string]rtestate.MCPWaitForUpdatedFunc, sets.Set[string], error) { - klog.V(4).InfoS("Machine Config Sync start", "trees", len(trees)) +func (r *NUMAResourcesOperatorReconciler) syncMachineConfig(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, tree nodegroupv1.Tree) (map[string]rtestate.MCPWaitForUpdatedFunc, sets.Set[string], error) { + klog.V(4).InfoS("Machine Config Sync start", "tree", tree.Name()) defer klog.V(4).Info("Machine Config Sync stop") var err error @@ -363,7 +363,7 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context // In case of operator upgrade from 4.1X → 4.18, it's necessary to remove the old MachineConfig, // unless an emergency annotation is provided which forces the operator to use custom policy - mcObjStates, pausedMCPNames := existing.MachineConfigsStateForTree(r.RTEManifests, trees[0]) + mcObjStates, pausedMCPNames := existing.MachineConfigsStateForTree(r.RTEManifests, tree) waitByPool := make(map[string]rtestate.MCPWaitForUpdatedFunc, len(mcObjStates)) for _, mcObjState := range mcObjStates { waitByPool[mcObjState.PoolName] = mcObjState.WaitForUpdated @@ -375,7 +375,7 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfigs(ctx context.Context break } - if err2 := validateMachineConfigLabels(objState.Desired, trees); err2 != nil { + if err2 := validateMachineConfigLabels(objState.Desired, tree); err2 != nil { err = err2 break } @@ -595,7 +595,7 @@ func validateUpdateEvent(e *event.UpdateEvent) bool { return true } -func validateMachineConfigLabels(mc client.Object, trees []nodegroupv1.Tree) error { +func validateMachineConfigLabels(mc client.Object, tree nodegroupv1.Tree) error { mcLabels := mc.GetLabels() v, ok := mcLabels[rtestate.MachineConfigLabelKey] // the machine config does not have generated label, meaning the machine config pool has the matchLabels under @@ -604,21 +604,19 @@ func validateMachineConfigLabels(mc client.Object, trees []nodegroupv1.Tree) err return nil } - for _, tree := range trees { - for _, mcp := range tree.MachineConfigPools { - if v != mcp.Name { - continue - } + for _, mcp := range tree.MachineConfigPools { + if v != mcp.Name { + continue + } - mcLabels := labels.Set(mcLabels) - mcSelector, err := metav1.LabelSelectorAsSelector(mcp.Spec.MachineConfigSelector) - if err != nil { - return fmt.Errorf("failed to represent machine config pool %q machine config selector as selector: %w", mcp.Name, err) - } + mcLabels := labels.Set(mcLabels) + mcSelector, err := metav1.LabelSelectorAsSelector(mcp.Spec.MachineConfigSelector) + if err != nil { + return fmt.Errorf("failed to represent machine config pool %q machine config selector as selector: %w", mcp.Name, err) + } - if !mcSelector.Matches(mcLabels) { - return fmt.Errorf("machine config %q labels does not match the machine config pool %q machine config selector", mc.GetName(), mcp.Name) - } + if !mcSelector.Matches(mcLabels) { + return fmt.Errorf("machine config %q labels does not match the machine config pool %q machine config selector", mc.GetName(), mcp.Name) } } return nil @@ -729,7 +727,7 @@ type perTreeResult struct { func (r *NUMAResourcesOperatorReconciler) reconcilePerTreeMachineConfig(ctx context.Context, instance *nropv1.NUMAResourcesOperator, existing *rtestate.ExistingManifests, tree nodegroupv1.Tree) perTreeResult { result := perTreeResult{} - waitByPool, pausedMCPNames, err := r.syncMachineConfigs(ctx, instance, existing, tree) + waitByPool, pausedMCPNames, err := r.syncMachineConfig(ctx, instance, existing, tree) result.pausedMCPNames = pausedMCPNames if err != nil { r.Recorder.Eventf(instance, corev1.EventTypeWarning, "FailedMCSync", "Failed to set up machine configuration for worker nodes: %v", err) From 3b9b5b504da11bc06b286ec20ead700086618da7 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 16:32:00 +0200 Subject: [PATCH 11/12] objectstate: rte: remove compat wrappers, use per-tree API only Remove the horizontal compat wrappers (State, FromClient, MachineConfigsState without tree param) and the trees field from ExistingManifests, now that the controller uses the per-tree API directly. Rename MachineConfigsStateForTree to MachineConfigsState and update the controller call site and tests accordingly. AI-attribution: AIA PAI CeNc Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../numaresourcesoperator_controller.go | 2 +- pkg/objectstate/rte/machineconfigpool.go | 13 +-- pkg/objectstate/rte/machineconfigpool_test.go | 107 +++++++++--------- pkg/objectstate/rte/rte.go | 20 ---- 4 files changed, 58 insertions(+), 84 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index 717757f407..6931070954 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -363,7 +363,7 @@ func (r *NUMAResourcesOperatorReconciler) syncMachineConfig(ctx context.Context, // In case of operator upgrade from 4.1X → 4.18, it's necessary to remove the old MachineConfig, // unless an emergency annotation is provided which forces the operator to use custom policy - mcObjStates, pausedMCPNames := existing.MachineConfigsStateForTree(r.RTEManifests, tree) + mcObjStates, pausedMCPNames := existing.MachineConfigsState(r.RTEManifests, tree) waitByPool := make(map[string]rtestate.MCPWaitForUpdatedFunc, len(mcObjStates)) for _, mcObjState := range mcObjStates { waitByPool[mcObjState.PoolName] = mcObjState.WaitForUpdated diff --git a/pkg/objectstate/rte/machineconfigpool.go b/pkg/objectstate/rte/machineconfigpool.go index e47eddb192..90ee890168 100644 --- a/pkg/objectstate/rte/machineconfigpool.go +++ b/pkg/objectstate/rte/machineconfigpool.go @@ -136,18 +136,7 @@ type MachineConfigObjectState struct { WaitForUpdated MCPWaitForUpdatedFunc } -func (em *ExistingManifests) MachineConfigsState(mf Manifests) ([]MachineConfigObjectState, sets.Set[string]) { - var allRet []MachineConfigObjectState - allPaused := sets.New[string]() - for _, tree := range em.trees { - ret, paused := em.MachineConfigsStateForTree(mf, tree) - allRet = append(allRet, ret...) - allPaused = allPaused.Union(paused) - } - return allRet, allPaused -} - -func (em *ExistingManifests) MachineConfigsStateForTree(mf Manifests, tree nodegroupv1.Tree) ([]MachineConfigObjectState, sets.Set[string]) { +func (em *ExistingManifests) MachineConfigsState(mf Manifests, tree nodegroupv1.Tree) ([]MachineConfigObjectState, sets.Set[string]) { var ret []MachineConfigObjectState pausedMCPNames := sets.New[string]() if mf.Core.MachineConfig == nil { diff --git a/pkg/objectstate/rte/machineconfigpool_test.go b/pkg/objectstate/rte/machineconfigpool_test.go index 18a85a468b..8876d9e87a 100644 --- a/pkg/objectstate/rte/machineconfigpool_test.go +++ b/pkg/objectstate/rte/machineconfigpool_test.go @@ -61,7 +61,7 @@ func newTestTree(mcp *machineconfigv1.MachineConfigPool, annots map[string]strin } } -func newTestExistingManifests(trees []nodegroupv1.Tree, mcNames ...string) *ExistingManifests { +func newTestExistingManifests(mcNames ...string) *ExistingManifests { machineConfigs := make(map[string]machineConfigManifest, len(mcNames)) for _, mcName := range mcNames { machineConfigs[mcName] = machineConfigManifest{ @@ -76,7 +76,6 @@ func newTestExistingManifests(trees []nodegroupv1.Tree, mcNames ...string) *Exis instance: &nropv1.NUMAResourcesOperator{ ObjectMeta: metav1.ObjectMeta{Name: testInstanceName}, }, - trees: trees, machineConfigs: machineConfigs, namespace: "test-ns", } @@ -115,10 +114,10 @@ func TestMachineConfigsState(t *testing.T) { mcp := newTestMCP("pool-a") tree := newTestTree(mcp, nil) mcName := objectnames.GetMachineConfigName(testInstanceName, mcp.Name) - em := newTestExistingManifests([]nodegroupv1.Tree{tree}, mcName) + em := newTestExistingManifests(mcName) mf := Manifests{} - got, _ := em.MachineConfigsState(mf) + got, _ := em.MachineConfigsState(mf, tree) if len(got) != 0 { t.Fatalf("expected empty result, got %d entries", len(got)) } @@ -130,9 +129,9 @@ func TestMachineConfigsState(t *testing.T) { annotations.SELinuxPolicyConfigAnnotation: annotations.SELinuxPolicyCustom, }) mcName := objectnames.GetMachineConfigName(testInstanceName, mcp.Name) - em := newTestExistingManifests([]nodegroupv1.Tree{tree}, mcName) + em := newTestExistingManifests(mcName) - got, _ := em.MachineConfigsState(newTestManifests()) + got, _ := em.MachineConfigsState(newTestManifests(), tree) if len(got) != 1 { t.Fatalf("expected 1 entry, got %d", len(got)) } @@ -160,9 +159,9 @@ func TestMachineConfigsState(t *testing.T) { mcp := newTestMCP("pool-a") tree := newTestTree(mcp, nil) mcName := objectnames.GetMachineConfigName(testInstanceName, mcp.Name) - em := newTestExistingManifests([]nodegroupv1.Tree{tree}, mcName) + em := newTestExistingManifests(mcName) - got, _ := em.MachineConfigsState(newTestManifests()) + got, _ := em.MachineConfigsState(newTestManifests(), tree) if len(got) != 1 { t.Fatalf("expected 1 entry, got %d", len(got)) } @@ -197,50 +196,41 @@ func TestMachineConfigsState(t *testing.T) { mcNameCustom := objectnames.GetMachineConfigName(testInstanceName, mcpCustom.Name) mcNameDefault := objectnames.GetMachineConfigName(testInstanceName, mcpDefault.Name) - em := newTestExistingManifests( - []nodegroupv1.Tree{treeCustom, treeDefault}, - mcNameCustom, mcNameDefault, - ) + em := newTestExistingManifests(mcNameCustom, mcNameDefault) - got, _ := em.MachineConfigsState(newTestManifests()) - if len(got) != 2 { - t.Fatalf("expected 2 entries, got %d", len(got)) - } + mf := newTestManifests() - var customEntry, defaultEntry MachineConfigObjectState - for _, entry := range got { - switch entry.PoolName { - case "pool-custom": - customEntry = entry - case "pool-default": - defaultEntry = entry - default: - t.Fatalf("unexpected pool name %q", entry.PoolName) - } + gotCustom, _ := em.MachineConfigsState(mf, treeCustom) + if len(gotCustom) != 1 { + t.Fatalf("expected 1 custom entry, got %d", len(gotCustom)) } - - if customEntry.Desired == nil { + if gotCustom[0].Desired == nil { t.Fatal("expected non-nil Desired for custom policy pool") } - if defaultEntry.Desired != nil { - t.Fatal("expected nil Desired for default policy pool") - } mcpPresent := mcpWithSourceAndCondition("pool-custom", testInstanceName, true, corev1.ConditionTrue) - if !customEntry.WaitForUpdated(testInstanceName, mcpPresent) { + if !gotCustom[0].WaitForUpdated(testInstanceName, mcpPresent) { t.Fatal("custom pool: expected true when MC is present") } mcpAbsent := mcpWithSourceAndCondition("pool-custom", testInstanceName, false, corev1.ConditionTrue) - if customEntry.WaitForUpdated(testInstanceName, mcpAbsent) { + if gotCustom[0].WaitForUpdated(testInstanceName, mcpAbsent) { t.Fatal("custom pool: expected false when MC is absent") } + gotDefault, _ := em.MachineConfigsState(mf, treeDefault) + if len(gotDefault) != 1 { + t.Fatalf("expected 1 default entry, got %d", len(gotDefault)) + } + if gotDefault[0].Desired != nil { + t.Fatal("expected nil Desired for default policy pool") + } + mcpDeleted := mcpWithSourceAndCondition("pool-default", testInstanceName, false, corev1.ConditionTrue) - if !defaultEntry.WaitForUpdated(testInstanceName, mcpDeleted) { + if !gotDefault[0].WaitForUpdated(testInstanceName, mcpDeleted) { t.Fatal("default pool: expected true when MC is absent") } mcpStillPresent := mcpWithSourceAndCondition("pool-default", testInstanceName, true, corev1.ConditionTrue) - if defaultEntry.WaitForUpdated(testInstanceName, mcpStillPresent) { + if gotDefault[0].WaitForUpdated(testInstanceName, mcpStillPresent) { t.Fatal("default pool: expected false when MC is still present") } }) @@ -251,9 +241,9 @@ func TestMachineConfigsState(t *testing.T) { tree := newTestTree(mcp, nil) mcName := objectnames.GetMachineConfigName(testInstanceName, mcp.Name) - em := newTestExistingManifests([]nodegroupv1.Tree{tree}, mcName) + em := newTestExistingManifests(mcName) - got, _ := em.MachineConfigsState(newTestManifests()) + got, _ := em.MachineConfigsState(newTestManifests(), tree) if len(got) != 0 { t.Fatalf("expected empty result for pool without selector, got %d entries", len(got)) } @@ -262,9 +252,9 @@ func TestMachineConfigsState(t *testing.T) { t.Run("pool not in machineConfigs cache", func(t *testing.T) { mcp := newTestMCP("pool-uncached") tree := newTestTree(mcp, nil) - em := newTestExistingManifests([]nodegroupv1.Tree{tree}) + em := newTestExistingManifests() - got, _ := em.MachineConfigsState(newTestManifests()) + got, _ := em.MachineConfigsState(newTestManifests(), tree) if len(got) != 0 { t.Fatalf("expected empty result for uncached pool, got %d entries", len(got)) } @@ -276,9 +266,9 @@ func TestMachineConfigsState(t *testing.T) { tree := newTestTree(mcp, nil) mcName := objectnames.GetMachineConfigName(testInstanceName, mcp.Name) - em := newTestExistingManifests([]nodegroupv1.Tree{tree}, mcName) + em := newTestExistingManifests(mcName) - got, pausedMCPNames := em.MachineConfigsState(newTestManifests()) + got, pausedMCPNames := em.MachineConfigsState(newTestManifests(), tree) if len(got) != 0 { t.Fatalf("expected 0 entries for paused pool, got %d entries", len(got)) } @@ -303,20 +293,35 @@ func TestMachineConfigsState(t *testing.T) { mcNameCustom := objectnames.GetMachineConfigName(testInstanceName, mcpCustom.Name) mcNameDefault := objectnames.GetMachineConfigName(testInstanceName, mcpDefault.Name) mcNamePaused := objectnames.GetMachineConfigName(testInstanceName, mcpPaused.Name) - em := newTestExistingManifests( - []nodegroupv1.Tree{treeCustom, treeDefault, treePaused}, - mcNameCustom, mcNameDefault, mcNamePaused, - ) + em := newTestExistingManifests(mcNameCustom, mcNameDefault, mcNamePaused) - got, pausedMCPNames := em.MachineConfigsState(newTestManifests()) - if len(got) != 2 { - t.Fatalf("expected 2 active entries, got %d", len(got)) + mf := newTestManifests() + + gotCustom, pausedCustom := em.MachineConfigsState(mf, treeCustom) + if len(gotCustom) != 1 { + t.Fatalf("expected 1 custom entry, got %d", len(gotCustom)) } - if !pausedMCPNames.Has("pool-paused") { + if pausedCustom.Len() != 0 { + t.Fatalf("expected 0 paused from custom tree, got %d", pausedCustom.Len()) + } + + gotDefault, pausedDefault := em.MachineConfigsState(mf, treeDefault) + if len(gotDefault) != 1 { + t.Fatalf("expected 1 default entry, got %d", len(gotDefault)) + } + if pausedDefault.Len() != 0 { + t.Fatalf("expected 0 paused from default tree, got %d", pausedDefault.Len()) + } + + gotPaused, pausedPaused := em.MachineConfigsState(mf, treePaused) + if len(gotPaused) != 0 { + t.Fatalf("expected 0 entries for paused pool, got %d", len(gotPaused)) + } + if !pausedPaused.Has("pool-paused") { t.Fatal("expected pool-paused to be in pausedMCPNames set") } - if pausedMCPNames.Len() != 1 { - t.Fatalf("expected 1 paused pool, got %d", pausedMCPNames.Len()) + if pausedPaused.Len() != 1 { + t.Fatalf("expected 1 paused pool, got %d", pausedPaused.Len()) } }) } diff --git a/pkg/objectstate/rte/rte.go b/pkg/objectstate/rte/rte.go index 18cee89238..3e8184a664 100644 --- a/pkg/objectstate/rte/rte.go +++ b/pkg/objectstate/rte/rte.go @@ -112,7 +112,6 @@ type ExistingManifests struct { // internal helpers plat platform.Platform instance *nropv1.NUMAResourcesOperator - trees []nodegroupv1.Tree namespace string updater GenerateDesiredManifestUpdater helper rteHelper @@ -168,15 +167,6 @@ func SkipManifestUpdate(mcpName string, gdm *GeneratedDesiredManifest) error { return nil } -func (em *ExistingManifests) State(mf Manifests) []objectstate.ObjectState { - ret := em.TreeAgnostic(mf) - klog.V(4).InfoS("RTE manifests processing trees", "method", em.helper.Name()) - for _, tree := range em.trees { - ret = append(ret, em.PerTreeState(mf, tree)...) - } - return ret -} - func (em *ExistingManifests) TreeAgnostic(mf Manifests) []objectstate.ObjectState { ret := []objectstate.ObjectState{ { @@ -275,16 +265,6 @@ func (em *ExistingManifests) WithManifestsUpdater(updater GenerateDesiredManifes return em } -func FromClient(ctx context.Context, cli client.Client, plat platform.Platform, mf Manifests, instance *nropv1.NUMAResourcesOperator, trees []nodegroupv1.Tree, namespace string) *ExistingManifests { - ret := FromClientTreeAgnostic(ctx, cli, plat, mf, instance, namespace) - ret.trees = trees - klog.V(4).InfoS("RTE manifests processing trees", "method", ret.helper.Name()) - for _, tree := range trees { - ret.helper.UpdateFromClient(ctx, cli, tree) - } - return ret -} - func FromClientTreeAgnostic(ctx context.Context, cli client.Client, plat platform.Platform, mf Manifests, instance *nropv1.NUMAResourcesOperator, namespace string) *ExistingManifests { ret := ExistingManifests{ existing: Manifests{ From ef94726130aaf3193fe389c8d0472dde06ef9c6e Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 15 May 2026 16:32:19 +0200 Subject: [PATCH 12/12] controller: nrop: switch to vertical per-tree processing Change reconcileResource from horizontal orchestration (all trees complete machine config step, then all trees complete daemonset step) to vertical processing (each tree progresses through all steps independently). This means a single slow nodegroup no longer blocks progress for the others. Move tree normalization from the pre-processing loop in Reconcile into the per-tree loop in reconcileResource, where it naturally belongs alongside the rest of per-tree processing. Add a test verifying that a tree whose MCP is ready creates its DaemonSet even while another tree's MCP is still pending. AI-attribution: AIA PAI CeNc Hin R claude-4.6-opus-1M v1.0 Signed-off-by: Francesco Romani --- .../numaresourcesoperator_controller.go | 56 +++++++++---------- .../numaresourcesoperator_controller_test.go | 47 ++++++++++++++++ 2 files changed, 73 insertions(+), 30 deletions(-) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index 6931070954..c482f671cd 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -35,6 +35,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -179,11 +180,6 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr return r.degradeStatus(ctx, initialInstance, instance, validation.NodeGroupsError, err) } - for idx := range trees { - conf := trees[idx].NodeGroup.NormalizeConfig() - trees[idx].NodeGroup.Config = &conf - } - step := r.reconcileResource(ctx, instance, trees) instance.Status.Conditions, _ = status.ComputeConditions(instance.Status.Conditions, step.ConditionInfo.ToMetav1Condition(instance.Generation), time.Now()) @@ -259,39 +255,39 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResource(ctx context.Context, } } - // horizontal processing: all trees complete each step before moving to the next + var results []perTreeResult + for _, tree := range trees { + tree.NodeGroup.Config = ptr.To(tree.NodeGroup.NormalizeConfig()) - // step 1: machine configs for all trees - var mcResults []perTreeResult - if r.Platform == platform.OpenShift { - for _, tree := range trees { - treeExisting := existing.PerTree(ctx, r.Client, tree) - mcResults = append(mcResults, r.reconcilePerTreeMachineConfig(ctx, instance, treeExisting, tree)) + treeExisting := existing.PerTree(ctx, r.Client, tree) + + mcResult := perTreeResult{} + if r.Platform == platform.OpenShift { + mcResult = r.reconcilePerTreeMachineConfig(ctx, instance, treeExisting, tree) + if mcResult.step.EarlyStop() { + results = append(results, mcResult) + continue + } } - } - mcOverall := reducePerTreeResults(mcResults) - instance.Status.MachineConfigPools = mcOverall.mcpStatuses - instance.Status.Conditions = updateMachineConfigPoolPausedCondition(instance.Status.Conditions, instance.Generation, mcOverall.pausedMCPNames) - if mcOverall.step.EarlyStop() { - return mcOverall.step - } - // step 2: daemonsets for all trees - var dsResults []perTreeResult - for _, tree := range trees { - treeExisting := existing.PerTree(ctx, r.Client, tree) - dsResults = append(dsResults, r.reconcilePerTreeDaemonSet(ctx, instance, treeExisting, tree)) + dsResult := r.reconcilePerTreeDaemonSet(ctx, instance, treeExisting, tree) + dsResult.mcpStatuses = mcResult.mcpStatuses + dsResult.pausedMCPNames = mcResult.pausedMCPNames + results = append(results, dsResult) } - dsOverall := reducePerTreeResults(dsResults) - dssReady := collectDaemonSets(dsOverall.dsInfo) + + overall := reducePerTreeResults(results) + dssReady := collectDaemonSets(overall.dsInfo) + instance.Status.MachineConfigPools = overall.mcpStatuses + instance.Status.Conditions = updateMachineConfigPoolPausedCondition(instance.Status.Conditions, instance.Generation, overall.pausedMCPNames) instance.Status.DaemonSets = dssReady instance.Status.RelatedObjects = relatedobjects.ResourceTopologyExporter(r.Namespace, dssReady) - if !dsOverall.step.Done() { - return dsOverall.step + + if overall.step.Done() { + instance.Status.NodeGroups = syncNodeGroupsStatus(instance, overall.dsInfo) } - instance.Status.NodeGroups = syncNodeGroupsStatus(instance, dsOverall.dsInfo) - return intreconcile.StepSuccess() + return overall.step } func syncNodeGroupsStatus(instance *nropv1.NUMAResourcesOperator, dsPerPool []poolDaemonSet) []nropv1.NodeGroupStatus { diff --git a/internal/controller/numaresourcesoperator_controller_test.go b/internal/controller/numaresourcesoperator_controller_test.go index 0c7a662a0e..0adb471f7a 100644 --- a/internal/controller/numaresourcesoperator_controller_test.go +++ b/internal/controller/numaresourcesoperator_controller_test.go @@ -2268,6 +2268,53 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() { Expect(pausedCond.Status).To(Equal(metav1.ConditionTrue)) }) + It("should create DaemonSet for ready tree while another tree is still pending", func(ctx context.Context) { + // pool1 has custom policy -> MC created, needs MCO rollout + // pool2 has no custom annotation -> default policy (no MC to wait for) + nro.Spec.NodeGroups[0].Annotations[annotations.SELinuxPolicyConfigAnnotation] = annotations.SELinuxPolicyCustom + + var err error + reconciler, err = NewFakeNUMAResourcesOperatorReconciler(platform.OpenShift, defaultOCPVersion, nro, mcp1, mcp2) + Expect(err).ToNot(HaveOccurred()) + + key := client.ObjectKeyFromObject(nro) + + By("First reconcile: pool1 MC pending, pool2 ready immediately") + Expect(reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})).To(CauseRequeue()) + + By("Verify pool2 DaemonSet exists even though pool1 is still pending") + ds := &appsv1.DaemonSet{} + mcp2DSKey := client.ObjectKey{ + Name: objectnames.GetComponentName(nro.Name, mcp2.Name), + Namespace: testNamespace, + } + Expect(reconciler.Client.Get(ctx, mcp2DSKey, ds)).To(Succeed()) + + By("Verify global status is Progressing, not Available") + Expect(reconciler.Client.Get(ctx, client.ObjectKeyFromObject(nro), nro)).To(Succeed()) + progressingCond := getConditionByType(nro.Status.Conditions, status.ConditionProgressing) + Expect(progressingCond).ToNot(BeNil()) + Expect(progressingCond.Status).To(Equal(metav1.ConditionTrue)) + + By("Make pool1 ready and reconcile again") + Expect(reconciler.Client.Get(ctx, client.ObjectKeyFromObject(mcp1), mcp1)).To(Succeed()) + ensureMCPIsReady(mcp1, nro.Name) + Expect(reconciler.Client.Update(ctx, mcp1)).To(Succeed()) + + Expect(reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})).ToNot(CauseRequeue()) + + By("Now both DaemonSets exist and status is Available") + mcp1DSKey := client.ObjectKey{ + Name: objectnames.GetComponentName(nro.Name, mcp1.Name), + Namespace: testNamespace, + } + Expect(reconciler.Client.Get(ctx, mcp1DSKey, ds)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, mcp2DSKey, ds)).To(Succeed()) + + Expect(reconciler.Client.Get(ctx, client.ObjectKeyFromObject(nro), nro)).To(Succeed()) + Expect(nro).To(BeInCondition(status.ConditionAvailable)) + }) + It("should report paused condition while Progressing", func(ctx context.Context) { // pool1 has custom policy -> MC created, needs MCO rollout // pool2 is paused -> skipped