diff --git a/bundle/manifests/numaresources-operator.clusterserviceversion.yaml b/bundle/manifests/numaresources-operator.clusterserviceversion.yaml index c49dd5f770..a00a0cfb45 100644 --- a/bundle/manifests/numaresources-operator.clusterserviceversion.yaml +++ b/bundle/manifests/numaresources-operator.clusterserviceversion.yaml @@ -66,7 +66,7 @@ metadata: } ] capabilities: Basic Install - createdAt: "2026-05-13T12:16:00Z" + createdAt: "2026-06-02T08:17:10Z" features.operators.openshift.io/cnf: "true" features.operators.openshift.io/cni: "false" features.operators.openshift.io/csi: "false" @@ -394,7 +394,9 @@ spec: resources: - nodes verbs: + - get - list + - patch - watch - apiGroups: - "" @@ -463,10 +465,10 @@ spec: - nodetopology.openshift.io resources: - numaresourcesoperators - - numaresourcesschedulers verbs: - get - list + - update - watch - apiGroups: - nodetopology.openshift.io @@ -484,6 +486,14 @@ spec: - get - patch - update + - apiGroups: + - nodetopology.openshift.io + resources: + - numaresourcesschedulers + verbs: + - get + - list + - watch - apiGroups: - rbac.authorization.k8s.io resources: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index b18c4080c2..8cba6e6b9e 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -23,7 +23,9 @@ rules: resources: - nodes verbs: + - get - list + - patch - watch - apiGroups: - "" @@ -92,10 +94,10 @@ rules: - nodetopology.openshift.io resources: - numaresourcesoperators - - numaresourcesschedulers verbs: - get - list + - update - watch - apiGroups: - nodetopology.openshift.io @@ -113,6 +115,14 @@ rules: - get - patch - update +- apiGroups: + - nodetopology.openshift.io + resources: + - numaresourcesschedulers + verbs: + - get + - list + - watch - apiGroups: - rbac.authorization.k8s.io resources: diff --git a/internal/api/labels/labels.go b/internal/api/labels/labels.go new file mode 100644 index 0000000000..354cbad029 --- /dev/null +++ b/internal/api/labels/labels.go @@ -0,0 +1,29 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package labels + +const ( + // NodePrimaryPool is set on each node managed by NRO to indicate + // its primary pool (MCP on OpenShift, node-pool on HyperShift). + // Using this exclusive label as DaemonSet NodeSelector prevents duplicate + // pods when a node's labels match multiple pools. + NodePrimaryPool = "numa-operator.openshift.io/primary-pool" + + // NodeFinalizer is set on the NUMAResourcesOperator CR to ensure + // node labels are cleaned up when the CR is deleted. + NodeFinalizer = "numa-operator.openshift.io/node-labels" +) diff --git a/internal/controller/numaresourcesoperator_controller.go b/internal/controller/numaresourcesoperator_controller.go index ccf3e2c60b..b9c7559b32 100644 --- a/internal/controller/numaresourcesoperator_controller.go +++ b/internal/controller/numaresourcesoperator_controller.go @@ -57,7 +57,9 @@ import ( "github.com/openshift-kni/numaresources-operator/api/v1/helper/namespacedname" nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/v1/helper/nodegroup" "github.com/openshift-kni/numaresources-operator/internal/api/annotations" + nrolabels "github.com/openshift-kni/numaresources-operator/internal/api/labels" "github.com/openshift-kni/numaresources-operator/internal/dangling" + "github.com/openshift-kni/numaresources-operator/internal/nodes" intreconcile "github.com/openshift-kni/numaresources-operator/internal/reconcile" "github.com/openshift-kni/numaresources-operator/internal/relatedobjects" "github.com/openshift-kni/numaresources-operator/pkg/apply" @@ -119,8 +121,8 @@ type NUMAResourcesOperatorReconciler struct { //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=* //+kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=* //+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch -//+kubebuilder:rbac:groups="",resources=nodes,verbs=list -//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;patch +//+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators,verbs=get;list;watch;update //+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators/status,verbs=get;update;patch //+kubebuilder:rbac:groups=nodetopology.openshift.io,resources=numaresourcesoperators/finalizers,verbs=update @@ -146,6 +148,26 @@ func (r *NUMAResourcesOperatorReconciler) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, err } + if !instance.DeletionTimestamp.IsZero() { + if controllerutil.ContainsFinalizer(instance, nrolabels.NodeFinalizer) { + if err := nodes.RemoveAllLabels(ctx, r.Client); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to clean up node labels: %w", err) + } + controllerutil.RemoveFinalizer(instance, nrolabels.NodeFinalizer) + if err := r.Update(ctx, instance); err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil + } + + if !controllerutil.ContainsFinalizer(instance, nrolabels.NodeFinalizer) { + controllerutil.AddFinalizer(instance, nrolabels.NodeFinalizer) + if err := r.Update(ctx, instance); err != nil { + return ctrl.Result{}, err + } + } + initialInstance := instance.DeepCopy() if len(initialInstance.Status.Conditions) == 0 { instance.Status.Conditions = status.NewNUMAResourcesOperatorConditions() @@ -298,6 +320,10 @@ func (r *NUMAResourcesOperatorReconciler) reconcileResource(ctx context.Context, } } + if err := nodes.LabelForTrees(ctx, r.Client, r.Platform, trees); err != nil { + return intreconcile.StepFailed(fmt.Errorf("failed to label nodes for primary pools: %w", err)) + } + dsPerPool, step := r.reconcileResourceDaemonSet(ctx, instance, existing, trees) if step.EarlyStop() { return step @@ -618,12 +644,29 @@ func (r *NUMAResourcesOperatorReconciler) SetupWithManager(mgr ctrl.Manager) err return ok }) + nodePredicates := predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + if !validateUpdateEvent(&e) { + return false + } + _, oldHas := e.ObjectOld.GetLabels()[nrolabels.NodePrimaryPool] + _, newHas := e.ObjectNew.GetLabels()[nrolabels.NodePrimaryPool] + if oldHas || newHas { + return true + } + return !apiequality.Semantic.DeepEqual(e.ObjectOld.GetLabels(), e.ObjectNew.GetLabels()) + }, + } + b := ctrl.NewControllerManagedBy(mgr).For(&nropv1.NUMAResourcesOperator{}) if r.Platform == platform.OpenShift { b.Watches( &machineconfigv1.MachineConfigPool{}, handler.EnqueueRequestsFromMapFunc(r.mcpToNUMAResourceOperator), builder.WithPredicates(mcpPredicates)). + Watches(&corev1.Node{}, + handler.EnqueueRequestsFromMapFunc(r.nodeToNUMAResourceOperator), + builder.WithPredicates(nodePredicates)). Owns(&securityv1.SecurityContextConstraints{}). Owns(&machineconfigv1.MachineConfig{}, builder.WithPredicates(p)) } @@ -676,6 +719,14 @@ func (r *NUMAResourcesOperatorReconciler) mcpToNUMAResourceOperator(ctx context. return requests } +func (r *NUMAResourcesOperatorReconciler) nodeToNUMAResourceOperator(ctx context.Context, nodeObj client.Object) []reconcile.Request { + return []reconcile.Request{{ + NamespacedName: client.ObjectKey{ + Name: objectnames.DefaultNUMAResourcesOperatorCrName, + }, + }} +} + func nodeGroupMatchesMCP(nodeGroup nropv1.NodeGroup, mcpName string, mcpLabels labels.Set) bool { if nodeGroup.PoolName != nil { return mcpName == *nodeGroup.PoolName diff --git a/internal/nodes/labels.go b/internal/nodes/labels.go new file mode 100644 index 0000000000..32fa71cbec --- /dev/null +++ b/internal/nodes/labels.go @@ -0,0 +1,194 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2026 Red Hat, Inc. + */ + +package nodes + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + "sigs.k8s.io/controller-runtime/pkg/client" + + mcov1 "github.com/openshift/api/machineconfiguration/v1" + + "github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform" + + nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/v1/helper/nodegroup" + nrolabels "github.com/openshift-kni/numaresources-operator/internal/api/labels" + mcpools "github.com/openshift-kni/numaresources-operator/pkg/machineconfigpools" +) + +// LabelForTrees labels each node with its primary pool name and removes +// stale labels from nodes that no longer belong to any NRO-managed pool. +// The primary pool is determined via MCO priority logic. +// On HyperShift this is a no-op because the nodePool label is already +// exclusive (each node belongs to exactly one node-pool). +func LabelForTrees(ctx context.Context, cli client.Client, plat platform.Platform, trees []nodegroupv1.Tree) error { + if plat != platform.OpenShift { + return nil + } + + allPools := &mcov1.MachineConfigPoolList{} + if err := cli.List(ctx, allPools); err != nil { + return fmt.Errorf("failed to list MachineConfigPools: %w", err) + } + + managedPools := managedPoolNames(trees) + + // collect all nodes that match any managed MCP selector + nodesByName := map[string]*corev1.Node{} + for _, tree := range trees { + for _, mcp := range tree.MachineConfigPools { + sel, err := metav1.LabelSelectorAsSelector(mcp.Spec.NodeSelector) + if err != nil { + klog.V(4).InfoS("Invalid MCP node selector, skipping", "pool", mcp.Name, "error", err) + continue + } + nodeList := &corev1.NodeList{} + if err := cli.List(ctx, nodeList, &client.ListOptions{LabelSelector: sel}); err != nil { + return fmt.Errorf("failed to list nodes for MCP %q: %w", mcp.Name, err) + } + for i := range nodeList.Items { + nodesByName[nodeList.Items[i].Name] = &nodeList.Items[i] + } + } + } + + // single pass: determine the best managed pool for each node and patch the label. + // Uses MCO priority (master > custom > worker) but falls back to the next + // matching pool when the primary pool is not in the NRO NodeGroups, so nodes + // retain RTE coverage even when their MCO primary pool is unmanaged. + desiredLabels := map[string]string{} // node name -> desired pool name + for nodeName, node := range nodesByName { + nodePools, err := mcpools.GetPoolsForNode(allPools.Items, node) + if err != nil { + klog.V(4).InfoS("Cannot determine pools for node, skipping", "node", nodeName, "error", err) + continue + } + poolName := firstManagedPool(nodePools, managedPools) + if poolName == "" { + continue + } + desiredLabels[nodeName] = poolName + } + + // apply labels to nodes that need them + for name, poolName := range desiredLabels { + node := nodesByName[name] + if current, ok := node.Labels[nrolabels.NodePrimaryPool]; ok && current == poolName { + continue + } + if err := patchLabel(ctx, cli, node, poolName); err != nil { + return fmt.Errorf("failed to label node %q with pool %q: %w", name, poolName, err) + } + klog.V(3).InfoS("Labeled node with primary pool", "node", name, "pool", poolName) + } + + return removeStaleLabels(ctx, cli, desiredLabels) +} + +// removeStaleLabels removes the NRO primary-pool label from nodes that +// should no longer have it (e.g. pool changed or node left scope). +func removeStaleLabels(ctx context.Context, cli client.Client, desiredLabels map[string]string) error { + labeledNodes := &corev1.NodeList{} + sel, _ := labels.Parse(nrolabels.NodePrimaryPool) + if err := cli.List(ctx, labeledNodes, &client.ListOptions{LabelSelector: sel}); err != nil { + return fmt.Errorf("failed to list nodes with NRO label: %w", err) + } + + for i := range labeledNodes.Items { + node := &labeledNodes.Items[i] + desiredPool, wantLabel := desiredLabels[node.Name] + currentPool := node.Labels[nrolabels.NodePrimaryPool] + + if wantLabel && currentPool == desiredPool { + continue + } + if err := removeLabel(ctx, cli, node); err != nil { + return fmt.Errorf("failed to remove stale label from node %q: %w", node.Name, err) + } + klog.V(3).InfoS("Removed stale primary pool label from node", "node", node.Name, "oldPool", currentPool) + } + return nil +} + +// RemoveAllLabels removes the NRO primary-pool label from all nodes. +// Used during finalizer cleanup when the NRO CR is deleted. +func RemoveAllLabels(ctx context.Context, cli client.Client) error { + labeledNodes := &corev1.NodeList{} + sel, _ := labels.Parse(nrolabels.NodePrimaryPool) + if err := cli.List(ctx, labeledNodes, &client.ListOptions{LabelSelector: sel}); err != nil { + return fmt.Errorf("failed to list nodes with NRO label: %w", err) + } + + for i := range labeledNodes.Items { + if err := removeLabel(ctx, cli, &labeledNodes.Items[i]); err != nil { + return fmt.Errorf("failed to remove label from node %q: %w", labeledNodes.Items[i].Name, err) + } + klog.V(3).InfoS("Removed primary pool label from node (finalizer cleanup)", "node", labeledNodes.Items[i].Name) + } + return nil +} + +func patchLabel(ctx context.Context, cli client.Client, node *corev1.Node, poolName string) error { + base := node.DeepCopy() + if node.Labels == nil { + node.Labels = make(map[string]string) + } + node.Labels[nrolabels.NodePrimaryPool] = poolName + return cli.Patch(ctx, node, client.MergeFrom(base)) +} + +func removeLabel(ctx context.Context, cli client.Client, node *corev1.Node) error { + if _, ok := node.Labels[nrolabels.NodePrimaryPool]; !ok { + return nil + } + base := node.DeepCopy() + delete(node.Labels, nrolabels.NodePrimaryPool) + return cli.Patch(ctx, node, client.MergeFrom(base)) +} + +// firstManagedPool returns the name of the first pool in the priority-ordered +// list that is managed by NRO. This ensures that when a node's MCO primary pool +// (e.g. a custom MCP) is not in the NodeGroups, the node falls back to a +// managed pool (e.g. worker) instead of losing RTE coverage. +func firstManagedPool(pools []*mcov1.MachineConfigPool, managed sets.Set[string]) string { + for _, p := range pools { + if managed.Has(p.Name) { + return p.Name + } + } + return "" +} + +func managedPoolNames(trees []nodegroupv1.Tree) sets.Set[string] { + names := sets.New[string]() + for _, tree := range trees { + for _, mcp := range tree.MachineConfigPools { + names.Insert(mcp.Name) + } + if tree.NodeGroup.PoolName != nil { + names.Insert(*tree.NodeGroup.PoolName) + } + } + return names +} diff --git a/internal/nodes/labels_test.go b/internal/nodes/labels_test.go new file mode 100644 index 0000000000..e7f687d4dc --- /dev/null +++ b/internal/nodes/labels_test.go @@ -0,0 +1,316 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2026 Red Hat, Inc. + */ + +package nodes + +import ( + "context" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/scheme" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mcov1 "github.com/openshift/api/machineconfiguration/v1" + + "github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform" + + nropv1 "github.com/openshift-kni/numaresources-operator/api/v1" + nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/v1/helper/nodegroup" + nrolabels "github.com/openshift-kni/numaresources-operator/internal/api/labels" +) + +func init() { + _ = mcov1.Install(scheme.Scheme) + _ = nropv1.AddToScheme(scheme.Scheme) +} + +func newTestNode(name string, nodeLabels map[string]string) *corev1.Node { + return &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: nodeLabels, + }, + } +} + +func newTestMCPWithSelector(name string, matchLabels map[string]string) *mcov1.MachineConfigPool { + return &mcov1.MachineConfigPool{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: mcov1.MachineConfigPoolSpec{ + NodeSelector: &metav1.LabelSelector{ + MatchLabels: matchLabels, + }, + }, + } +} + +func buildFakeClient(objs ...runtime.Object) client.Client { + return fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(objs...).Build() +} + +func getLabel(ctx context.Context, cli client.Client, nodeName string) (string, bool) { + node := &corev1.Node{} + if err := cli.Get(ctx, client.ObjectKey{Name: nodeName}, node); err != nil { + return "", false + } + val, ok := node.Labels[nrolabels.NodePrimaryPool] + return val, ok +} + +func TestLabelOpenShift_PrimaryPoolSelection(t *testing.T) { + worker := newTestMCPWithSelector("worker", map[string]string{"node-role.kubernetes.io/worker": ""}) + workerB := newTestMCPWithSelector("workerB", map[string]string{"node-role.kubernetes.io/workerB": ""}) + + // node1 matches both MCPs, workerB should win (custom > worker) + node1 := newTestNode("node1", map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }) + // node2 matches only worker + node2 := newTestNode("node2", map[string]string{ + "node-role.kubernetes.io/worker": "", + }) + + cli := buildFakeClient(worker, workerB, node1, node2) + ctx := context.Background() + + trees := []nodegroupv1.Tree{ + { + NodeGroup: &nropv1.NodeGroup{}, + MachineConfigPools: []*mcov1.MachineConfigPool{worker}, + }, + { + NodeGroup: &nropv1.NodeGroup{}, + MachineConfigPools: []*mcov1.MachineConfigPool{workerB}, + }, + } + + if err := LabelForTrees(ctx, cli, platform.OpenShift, trees); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + val, ok := getLabel(ctx, cli, "node1") + if !ok || val != "workerB" { + t.Fatalf("expected node1 to have label workerB, got %q (exists=%v)", val, ok) + } + + val, ok = getLabel(ctx, cli, "node2") + if !ok || val != "worker" { + t.Fatalf("expected node2 to have label worker, got %q (exists=%v)", val, ok) + } +} + +func TestLabelOpenShift_StaleLabelsRemoved(t *testing.T) { + worker := newTestMCPWithSelector("worker", map[string]string{"node-role.kubernetes.io/worker": ""}) + + // node1 was previously labeled but no longer matches any managed MCP + node1 := newTestNode("node1", map[string]string{ + nrolabels.NodePrimaryPool: "old-pool", + }) + // node2 matches the worker pool + node2 := newTestNode("node2", map[string]string{ + "node-role.kubernetes.io/worker": "", + }) + + cli := buildFakeClient(worker, node1, node2) + ctx := context.Background() + + trees := []nodegroupv1.Tree{ + { + NodeGroup: &nropv1.NodeGroup{}, + MachineConfigPools: []*mcov1.MachineConfigPool{worker}, + }, + } + + if err := LabelForTrees(ctx, cli, platform.OpenShift, trees); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := getLabel(ctx, cli, "node1"); ok { + t.Fatal("expected stale label on node1 to be removed") + } + + val, ok := getLabel(ctx, cli, "node2") + if !ok || val != "worker" { + t.Fatalf("expected node2 to have label worker, got %q (exists=%v)", val, ok) + } +} + +func TestLabelOpenShift_UnmanagedPoolNotLabeled(t *testing.T) { + worker := newTestMCPWithSelector("worker", map[string]string{"node-role.kubernetes.io/worker": ""}) + infra := newTestMCPWithSelector("infra", map[string]string{"node-role.kubernetes.io/infra": ""}) + + // node matches infra MCP, but infra is not in the managed trees + node := newTestNode("node1", map[string]string{ + "node-role.kubernetes.io/infra": "", + }) + + cli := buildFakeClient(worker, infra, node) + ctx := context.Background() + + trees := []nodegroupv1.Tree{ + { + NodeGroup: &nropv1.NodeGroup{}, + MachineConfigPools: []*mcov1.MachineConfigPool{worker}, + }, + } + + if err := LabelForTrees(ctx, cli, platform.OpenShift, trees); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := getLabel(ctx, cli, "node1"); ok { + t.Fatal("expected node matching unmanaged pool to not be labeled") + } +} + +func TestLabelOpenShift_FallbackToManagedPool(t *testing.T) { + worker := newTestMCPWithSelector("worker", map[string]string{"node-role.kubernetes.io/worker": ""}) + workerB := newTestMCPWithSelector("workerB", map[string]string{"node-role.kubernetes.io/workerB": ""}) + + // node matches both worker and workerB labels, MCO primary = workerB (custom > worker), + // but only worker is in NodeGroups -> should fall back to worker + node := newTestNode("node1", map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }) + + cli := buildFakeClient(worker, workerB, node) + ctx := context.Background() + + trees := []nodegroupv1.Tree{ + { + NodeGroup: &nropv1.NodeGroup{}, + MachineConfigPools: []*mcov1.MachineConfigPool{worker}, + }, + } + + if err := LabelForTrees(ctx, cli, platform.OpenShift, trees); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + val, ok := getLabel(ctx, cli, "node1") + if !ok || val != "worker" { + t.Fatalf("expected node1 to fall back to managed pool worker, got %q (exists=%v)", val, ok) + } +} + +func TestLabelHyperShift_NoOp(t *testing.T) { + poolName := "my-nodepool" + node1 := newTestNode("node1", map[string]string{ + "hypershift.openshift.io/nodePool": poolName, + }) + + cli := buildFakeClient(node1) + ctx := context.Background() + + trees := []nodegroupv1.Tree{ + { + NodeGroup: &nropv1.NodeGroup{ + PoolName: &poolName, + }, + }, + } + + if err := LabelForTrees(ctx, cli, platform.HyperShift, trees); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // HyperShift is a no-op, so the NRO label should NOT be set + if _, ok := getLabel(ctx, cli, "node1"); ok { + t.Fatal("expected no NRO label on HyperShift node (no-op)") + } +} + +func TestRemoveAllLabels(t *testing.T) { + node1 := newTestNode("node1", map[string]string{ + nrolabels.NodePrimaryPool: "worker", + "other-label": "value", + }) + node2 := newTestNode("node2", map[string]string{ + nrolabels.NodePrimaryPool: "workerB", + }) + node3 := newTestNode("node3", map[string]string{ + "unrelated": "label", + }) + + cli := buildFakeClient(node1, node2, node3) + ctx := context.Background() + + if err := RemoveAllLabels(ctx, cli); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := getLabel(ctx, cli, "node1"); ok { + t.Fatal("expected NRO label on node1 to be removed") + } + if _, ok := getLabel(ctx, cli, "node2"); ok { + t.Fatal("expected NRO label on node2 to be removed") + } + + // node1 should still have its other label + node := &corev1.Node{} + if err := cli.Get(ctx, client.ObjectKey{Name: "node1"}, node); err != nil { + t.Fatalf("unexpected error getting node1: %v", err) + } + if v, ok := node.Labels["other-label"]; !ok || v != "value" { + t.Fatal("expected other-label to be preserved on node1") + } + + // node3 should be untouched + if err := cli.Get(ctx, client.ObjectKey{Name: "node3"}, node); err != nil { + t.Fatalf("unexpected error getting node3: %v", err) + } + if _, ok := node.Labels[nrolabels.NodePrimaryPool]; ok { + t.Fatal("expected node3 to not have NRO label") + } +} + +func TestLabelOpenShift_Idempotent(t *testing.T) { + worker := newTestMCPWithSelector("worker", map[string]string{"node-role.kubernetes.io/worker": ""}) + node := newTestNode("node1", map[string]string{ + "node-role.kubernetes.io/worker": "", + }) + + cli := buildFakeClient(worker, node) + ctx := context.Background() + + trees := []nodegroupv1.Tree{ + { + NodeGroup: &nropv1.NodeGroup{}, + MachineConfigPools: []*mcov1.MachineConfigPool{worker}, + }, + } + + // run twice, should be idempotent + for i := 0; i < 2; i++ { + if err := LabelForTrees(ctx, cli, platform.OpenShift, trees); err != nil { + t.Fatalf("unexpected error on iteration %d: %v", i, err) + } + + val, ok := getLabel(ctx, cli, "node1") + if !ok || val != "worker" { + t.Fatalf("iteration %d: expected label worker, got %q (exists=%v)", i, val, ok) + } + } +} diff --git a/pkg/machineconfigpools/pools.go b/pkg/machineconfigpools/pools.go new file mode 100644 index 0000000000..d074b69bc1 --- /dev/null +++ b/pkg/machineconfigpools/pools.go @@ -0,0 +1,167 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2026 Red Hat, Inc. + */ + +package machineconfigpools + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/klog/v2" + + mcov1 "github.com/openshift/api/machineconfiguration/v1" +) + +const ( + MachineConfigPoolMaster = "master" + MachineConfigPoolWorker = "worker" +) + +// ListPools categorizes the given pools into master, worker, and custom pools +// based on which pools' node selectors match the given node's labels. +// Adapted from MCO: https://github.com/openshift/machine-config-operator/blob/99cb8a46e6a31b2b72d6a8371c6cd4ee45393263/pkg/helpers/helpers.go#L155 +func ListPools(node *corev1.Node, pools []mcov1.MachineConfigPool) (master *mcov1.MachineConfigPool, worker *mcov1.MachineConfigPool, custom []*mcov1.MachineConfigPool, err error) { + for i := range pools { + p := &pools[i] + selector, err := metav1.LabelSelectorAsSelector(p.Spec.NodeSelector) + if err != nil { + return nil, nil, nil, fmt.Errorf("invalid label selector: %w", err) + } + + if selector.Empty() || !selector.Matches(labels.Set(node.Labels)) { + continue + } + + switch p.Name { + case MachineConfigPoolMaster: + master = p + case MachineConfigPoolWorker: + worker = p + default: + custom = append(custom, p) + } + } + + return master, worker, custom, nil +} + +// GetPoolsForNode returns the ordered list of pools a node belongs to, following MCO's +// priority logic: master > custom > worker. The first element is always the primary pool. +// Adapted from MCO: https://github.com/openshift/machine-config-operator/blob/99cb8a46e6a31b2b72d6a8371c6cd4ee45393263/pkg/helpers/helpers.go#L91 +func GetPoolsForNode(pools []mcov1.MachineConfigPool, node *corev1.Node) ([]*mcov1.MachineConfigPool, error) { + master, worker, custom, err := ListPools(node, pools) + if err != nil { + return nil, err + } + if master == nil && custom == nil && worker == nil { + return nil, nil + } + + switch { + case len(custom) > 1: + return nil, fmt.Errorf("node %s belongs to %d custom roles, cannot proceed with this Node", node.Name, len(custom)) + case len(custom) == 1: + pls := []*mcov1.MachineConfigPool{} + if master != nil { + klog.V(4).InfoS("Found master node matching custom pool selector, defaulting to master", "customPool", custom[0].Name, "node", node.Name) + pls = append(pls, master) + } else { + pls = append(pls, custom[0]) + } + if worker != nil { + pls = append(pls, worker) + } + return pls, nil + case master != nil: + // On compact clusters nodes have both master and worker roles. + // Include worker so the fallback logic can find a managed pool + // when master is not in the NRO NodeGroups. + pls := []*mcov1.MachineConfigPool{master} + if worker != nil { + pls = append(pls, worker) + } + return pls, nil + default: + return []*mcov1.MachineConfigPool{worker}, nil + } +} + +// GetPrimaryPoolForNode returns the primary MCP for a node, i.e. the first pool +// returned by GetPoolsForNode. +// Adapted from MCO: https://github.com/openshift/machine-config-operator/blob/99cb8a46e6a31b2b72d6a8371c6cd4ee45393263/pkg/helpers/helpers.go#L84 +func GetPrimaryPoolForNode(pools []mcov1.MachineConfigPool, node *corev1.Node) (*mcov1.MachineConfigPool, error) { + nodePools, err := GetPoolsForNode(pools, node) + if err != nil { + return nil, err + } + if len(nodePools) == 0 { + return nil, nil + } + return nodePools[0], nil +} + +// GetNodesForPool returns the subset of nodes for which the given pool is the primary pool. +// Adapted from MCO: https://github.com/openshift/machine-config-operator/blob/99cb8a46e6a31b2b72d6a8371c6cd4ee45393263/pkg/helpers/helpers.go#L28 +func GetNodesForPool(allPools []mcov1.MachineConfigPool, pool *mcov1.MachineConfigPool, nodes []corev1.Node) ([]*corev1.Node, error) { + selector, err := metav1.LabelSelectorAsSelector(pool.Spec.NodeSelector) + if err != nil { + return nil, fmt.Errorf("invalid label selector: %w", err) + } + + var result []*corev1.Node + for i := range nodes { + n := &nodes[i] + if !selector.Matches(labels.Set(n.Labels)) { + continue + } + primary, err := GetPrimaryPoolForNode(allPools, n) + if err != nil { + klog.V(4).InfoS("Cannot get pool for node, skipping", "node", n.Name, "error", err) + continue + } + if primary == nil || primary.Name != pool.Name { + continue + } + result = append(result, n) + } + return result, nil +} + +// HasNodesForPool returns true if the given pool is the primary pool for at least one of the provided nodes. +func HasNodesForPool(allPools []mcov1.MachineConfigPool, pool *mcov1.MachineConfigPool, nodes []corev1.Node) (bool, error) { + selector, err := metav1.LabelSelectorAsSelector(pool.Spec.NodeSelector) + if err != nil { + return false, fmt.Errorf("invalid label selector: %w", err) + } + + for i := range nodes { + n := &nodes[i] + if !selector.Matches(labels.Set(n.Labels)) { + continue + } + primary, err := GetPrimaryPoolForNode(allPools, n) + if err != nil { + klog.V(4).InfoS("Cannot get pool for node, skipping", "node", n.Name, "error", err) + continue + } + if primary != nil && primary.Name == pool.Name { + return true, nil + } + } + return false, nil +} diff --git a/pkg/machineconfigpools/pools_test.go b/pkg/machineconfigpools/pools_test.go new file mode 100644 index 0000000000..bde584318e --- /dev/null +++ b/pkg/machineconfigpools/pools_test.go @@ -0,0 +1,406 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2026 Red Hat, Inc. + */ + +package machineconfigpools + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + mcov1 "github.com/openshift/api/machineconfiguration/v1" +) + +func newMCP(name string, nodeSelector *metav1.LabelSelector) mcov1.MachineConfigPool { + return mcov1.MachineConfigPool{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: mcov1.MachineConfigPoolSpec{ + NodeSelector: nodeSelector, + }, + } +} + +func newNode(name string, labels map[string]string) corev1.Node { + return corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: labels, + }, + } +} + +func TestGetPrimaryPoolForNode_WorkerOnly(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }, + } + + primary, err := GetPrimaryPoolForNode(pools, node) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if primary == nil { + t.Fatal("expected primary pool, got nil") + } + if primary.Name != "worker" { + t.Fatalf("expected worker, got %s", primary.Name) + } +} + +func TestGetPrimaryPoolForNode_CustomPoolOverWorker(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + newMCP("infra", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/infra": ""}, + }), + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/infra": "", + }, + }, + } + + primary, err := GetPrimaryPoolForNode(pools, node) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if primary == nil { + t.Fatal("expected primary pool, got nil") + } + if primary.Name != "infra" { + t.Fatalf("expected infra, got %s", primary.Name) + } +} + +func TestGetPrimaryPoolForNode_MasterOverCustom(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("master", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/master": ""}, + }), + newMCP("infra", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/infra": ""}, + }), + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "node-role.kubernetes.io/master": "", + "node-role.kubernetes.io/infra": "", + }, + }, + } + + primary, err := GetPrimaryPoolForNode(pools, node) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if primary == nil { + t.Fatal("expected primary pool, got nil") + } + if primary.Name != "master" { + t.Fatalf("expected master, got %s", primary.Name) + } +} + +func TestGetPrimaryPoolForNode_MultipleCustomPoolsError(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + newMCP("infra", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/infra": ""}, + }), + newMCP("storage", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/storage": ""}, + }), + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/infra": "", + "node-role.kubernetes.io/storage": "", + }, + }, + } + + _, err := GetPrimaryPoolForNode(pools, node) + if err == nil { + t.Fatal("expected error for multiple custom pools, got nil") + } +} + +func TestGetPrimaryPoolForNode_NoMatchingPool(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{"node-role.kubernetes.io/master": ""}, + }, + } + + primary, err := GetPrimaryPoolForNode(pools, node) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if primary != nil { + t.Fatalf("expected nil pool, got %s", primary.Name) + } +} + +func TestGetPoolsForNode_CustomWithWorker(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + newMCP("workerB", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/workerB": ""}, + }), + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }, + }, + } + + nodePools, err := GetPoolsForNode(pools, node) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(nodePools) != 2 { + t.Fatalf("expected 2 pools, got %d", len(nodePools)) + } + if nodePools[0].Name != "workerB" { + t.Fatalf("expected workerB as primary, got %s", nodePools[0].Name) + } + if nodePools[1].Name != "worker" { + t.Fatalf("expected worker as secondary, got %s", nodePools[1].Name) + } +} + +func TestHasNodesForPool_Primary(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + newMCP("workerB", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/workerB": ""}, + }), + } + nodes := []corev1.Node{ + newNode("node1", map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }), + newNode("node2", map[string]string{ + "node-role.kubernetes.io/worker": "", + }), + } + + has, err := HasNodesForPool(pools, &pools[1], nodes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !has { + t.Fatal("expected workerB to have nodes (node1), got false") + } + + has, err = HasNodesForPool(pools, &pools[0], nodes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !has { + t.Fatal("expected worker to have nodes (node2), got false") + } +} + +func TestHasNodesForPool_NoNodesForWorkerWhenAllCustom(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + newMCP("workerB", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/workerB": ""}, + }), + } + nodes := []corev1.Node{ + newNode("node1", map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }), + newNode("node2", map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }), + } + + has, err := HasNodesForPool(pools, &pools[0], nodes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if has { + t.Fatal("expected worker to have no primary nodes when all are in custom pool, got true") + } + + has, err = HasNodesForPool(pools, &pools[1], nodes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !has { + t.Fatal("expected workerB to have nodes, got false") + } +} + +func TestGetNodesForPool(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + newMCP("workerB", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/workerB": ""}, + }), + } + nodes := []corev1.Node{ + newNode("node1", map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }), + newNode("node2", map[string]string{ + "node-role.kubernetes.io/worker": "", + }), + newNode("node3", map[string]string{ + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/workerB": "", + }), + } + + customNodes, err := GetNodesForPool(pools, &pools[1], nodes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(customNodes) != 2 { + t.Fatalf("expected 2 nodes for workerB, got %d", len(customNodes)) + } + + workerNodes, err := GetNodesForPool(pools, &pools[0], nodes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(workerNodes) != 1 { + t.Fatalf("expected 1 node for worker, got %d", len(workerNodes)) + } + if workerNodes[0].Name != "node2" { + t.Fatalf("expected node2 for worker pool, got %s", workerNodes[0].Name) + } +} + +func TestHasNodesForPool_NoMatchingNodesReturnsFalse(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + newMCP("workerB", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/workerB": ""}, + }), + } + nodes := []corev1.Node{ + newNode("node1", map[string]string{ + "node-role.kubernetes.io/worker": "", + }), + } + + has, err := HasNodesForPool(pools, &pools[1], nodes) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if has { + t.Fatal("expected false when no nodes match the pool's selector, got true") + } +} + +func TestListPools_EmptySelector(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("empty", &metav1.LabelSelector{}), + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }, + } + + master, worker, custom, err := ListPools(node, pools) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if master != nil || worker != nil || len(custom) > 0 { + t.Fatal("expected no pools to match for empty selector") + } +} + +func TestGetPoolsForNode_CompactCluster(t *testing.T) { + pools := []mcov1.MachineConfigPool{ + newMCP("master", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/master": ""}, + }), + newMCP("worker", &metav1.LabelSelector{ + MatchLabels: map[string]string{"node-role.kubernetes.io/worker": ""}, + }), + } + + node := newNode("compact-node", map[string]string{ + "node-role.kubernetes.io/master": "", + "node-role.kubernetes.io/worker": "", + }) + + result, err := GetPoolsForNode(pools, &node) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(result) != 2 { + t.Fatalf("expected 2 pools (master, worker), got %d", len(result)) + } + if result[0].Name != "master" { + t.Fatalf("expected first pool to be master, got %s", result[0].Name) + } + if result[1].Name != "worker" { + t.Fatalf("expected second pool to be worker, got %s", result[1].Name) + } +} diff --git a/pkg/objectstate/rte/machineconfigpool.go b/pkg/objectstate/rte/machineconfigpool.go index 4994e9890c..94d7d84060 100644 --- a/pkg/objectstate/rte/machineconfigpool.go +++ b/pkg/objectstate/rte/machineconfigpool.go @@ -31,6 +31,7 @@ import ( nropv1 "github.com/openshift-kni/numaresources-operator/api/v1" nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/v1/helper/nodegroup" "github.com/openshift-kni/numaresources-operator/internal/api/annotations" + labels "github.com/openshift-kni/numaresources-operator/internal/api/labels" "github.com/openshift-kni/numaresources-operator/pkg/objectnames" "github.com/openshift-kni/numaresources-operator/pkg/objectstate" "github.com/openshift-kni/numaresources-operator/pkg/objectstate/compare" @@ -85,12 +86,10 @@ func (obj machineConfigPoolFinder) FindState(mf Manifests, tree nodegroupv1.Tree desiredDaemonSet := mf.Core.DaemonSet.DeepCopy() desiredDaemonSet.Name = generatedName - var updateError error - if mcp.Spec.NodeSelector != nil { - desiredDaemonSet.Spec.Template.Spec.NodeSelector = mcp.Spec.NodeSelector.MatchLabels - } else { - updateError = fmt.Errorf("the machine config pool %q does not have node selector", mcp.Name) + desiredDaemonSet.Spec.Template.Spec.NodeSelector = map[string]string{ + labels.NodePrimaryPool: mcp.Name, } + var updateError error gdm := GeneratedDesiredManifest{ ClusterPlatform: obj.em.plat, diff --git a/test/e2e/install/install_test.go b/test/e2e/install/install_test.go index b3cf3cc3a7..1aed69a11f 100644 --- a/test/e2e/install/install_test.go +++ b/test/e2e/install/install_test.go @@ -37,11 +37,13 @@ import ( machineconfigv1 "github.com/openshift/api/machineconfiguration/v1" "github.com/k8stopologyawareschedwg/deployer/pkg/assets/selinux" + "github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform" "github.com/k8stopologyawareschedwg/deployer/pkg/manifests/rte" nrtv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" nropv1 "github.com/openshift-kni/numaresources-operator/api/v1" inthelper "github.com/openshift-kni/numaresources-operator/internal/api/annotations/helper" + nrolabels "github.com/openshift-kni/numaresources-operator/internal/api/labels" nropmcp "github.com/openshift-kni/numaresources-operator/internal/machineconfigpools" "github.com/openshift-kni/numaresources-operator/internal/podlogs" nrowait "github.com/openshift-kni/numaresources-operator/internal/wait" @@ -151,6 +153,8 @@ var _ = Describe("[Install] continuousIntegration", Serial, func() { rteContainer, err := findContainerByName(*ds, containerNameRTE) Expect(err).ToNot(HaveOccurred()) Expect(rteContainer.SecurityContext.SELinuxOptions.Type).To(Equal(selinux.RTEContextType), "container %s is running with wrong selinux context", rteContainer.Name) + + expectNROPrimaryPoolLabelsPresent(ds) }) }) }) @@ -265,10 +269,14 @@ var _ = Describe("[Install] durability", Serial, func() { return err }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + expectNROPrimaryPoolLabelsPresent(ds) + deleteNROPSync(e2eclient.Client, nroObj) deploy.WaitForMCPUpdatedAfterNRODeleted(nroObj) + expectNROPrimaryPoolLabelsAbsent() + By("checking there are no leftovers") // by taking the ns from the ds we're avoiding the need to figure out in advanced // at which ns we should look for the resources @@ -326,6 +334,7 @@ var _ = Describe("[Install] durability", Serial, func() { return ds.Spec.Template.Spec.Containers[0].Image == e2eimages.RTETestImageCI }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).Should(BeTrue()) }) + It("should have the desired topology manager configuration under the NRT object", func() { rteConfigMap := &corev1.ConfigMap{} Eventually(func() bool { @@ -415,6 +424,45 @@ func getDaemonSetByOwnerReference(uid types.UID) (*appsv1.DaemonSet, error) { return nil, fmt.Errorf("failed to get daemonset with owner reference uid: %s", uid) } +func expectNROPrimaryPoolLabelsPresent(ds *appsv1.DaemonSet) { + GinkgoHelper() + if configuration.Plat != platform.OpenShift { + return + } + + By("checking that managed nodes have the NRO primary-pool label") + sel, err := labels.Parse(nrolabels.NodePrimaryPool) + Expect(err).ToNot(HaveOccurred()) + nodeList := &corev1.NodeList{} + err = e2eclient.Client.List(context.TODO(), nodeList, &client.ListOptions{LabelSelector: sel}) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeList.Items).ToNot(BeEmpty(), "expected at least one node with the NRO primary-pool label") + for _, node := range nodeList.Items { + val := node.Labels[nrolabels.NodePrimaryPool] + Expect(val).ToNot(BeEmpty(), "node %s has the NRO label key but empty value", node.Name) + klog.InfoS("node has NRO primary-pool label", "node", node.Name, "pool", val) + } + + By("checking that the DaemonSet NodeSelector uses the NRO primary-pool label") + _, hasKey := ds.Spec.Template.Spec.NodeSelector[nrolabels.NodePrimaryPool] + Expect(hasKey).To(BeTrue(), "DaemonSet NodeSelector should contain %s", nrolabels.NodePrimaryPool) +} + +func expectNROPrimaryPoolLabelsAbsent() { + GinkgoHelper() + if configuration.Plat != platform.OpenShift { + return + } + + By("checking that the NRO primary-pool labels were removed from all nodes") + sel, err := labels.Parse(nrolabels.NodePrimaryPool) + Expect(err).ToNot(HaveOccurred()) + nodeList := &corev1.NodeList{} + err = e2eclient.Client.List(context.TODO(), nodeList, &client.ListOptions{LabelSelector: sel}) + Expect(err).ToNot(HaveOccurred()) + Expect(nodeList.Items).To(BeEmpty(), "expected no nodes with the NRO primary-pool label after deletion, but found %d", len(nodeList.Items)) +} + func logRTEPodsLogs(cli client.Client, k8sCli *kubernetes.Clientset, ctx context.Context, nroObj *nropv1.NUMAResourcesOperator, reason string) { dss, err := objects.GetDaemonSetsByNamespacedName(cli, ctx, nroObj.Status.DaemonSets...) if err != nil {