diff --git a/cmd/manager/main.go b/cmd/manager/main.go index d819749d4..897e66cac 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -166,8 +166,8 @@ func main() { cmd.FatalError(setupLogger, err, "unable to create controller", "name", controllers.NodeModulesConfigReconcilerName) } - if err = controllers.NewDevicePluginPodReconciler(client).SetupWithManager(mgr); err != nil { - cmd.FatalError(setupLogger, err, "unable to create controller", "name", controllers.DevicePluginPodReconcilerName) + if err = controllers.NewPodNodeLabelReconciler(client).SetupWithManager(mgr); err != nil { + cmd.FatalError(setupLogger, err, "unable to create controller", "name", controllers.PodNodeLabelReconcilerName) } if err = controllers.NewDRAReconciler(client, nodeAPI, scheme).SetupWithManager(mgr); err != nil { diff --git a/internal/controllers/device_plugin_pod_reconciler_test.go b/internal/controllers/device_plugin_pod_reconciler_test.go deleted file mode 100644 index 04988dc21..000000000 --- a/internal/controllers/device_plugin_pod_reconciler_test.go +++ /dev/null @@ -1,214 +0,0 @@ -package controllers - -import ( - "context" - "errors" - - mock_client "github.com/kubernetes-sigs/kernel-module-management/internal/client" - "github.com/kubernetes-sigs/kernel-module-management/internal/constants" - "github.com/kubernetes-sigs/kernel-module-management/internal/utils" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "go.uber.org/mock/gomock" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -var _ = Describe("DevicePluginPodReconciler_Reconcile", func() { - const ( - moduleName = "module-name" - nodeName = "node-name" - podName = "pod-name" - podNamespace = "pod-namespace" - ) - - var ( - kubeClient *mock_client.MockClient - r *DevicePluginPodReconciler - ) - - BeforeEach(func() { - ctrl := gomock.NewController(GinkgoT()) - kubeClient = mock_client.NewMockClient(ctrl) - r = NewDevicePluginPodReconciler(kubeClient) - }) - - ctx := context.Background() - - It("should return an error if the pod is not labeled", func() { - _, err := r.Reconcile(ctx, &v1.Pod{}) - Expect(err).To(HaveOccurred()) - }) - - It("should return an error if we failed to get the list of pods", func() { - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{constants.ModuleNameLabel: moduleName}, - Name: podName, - }, - Spec: v1.PodSpec{NodeName: nodeName}, - } - - kubeClient.EXPECT().List(ctx, gomock.Any(), gomock.Any(), gomock.Any()).Return(errors.New("some error")) - - _, err := r.Reconcile(ctx, pod) - Expect(err).To(HaveOccurred()) - }) - - It("should unlabel the node when a Pod is not ready", func() { - var ( - labelSelector = client.MatchingLabels{constants.ModuleNameLabel: moduleName} - fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} - ) - - gomock.InOrder( - kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Return(nil), - kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Do( - func(_ interface{}, _ interface{}, node *v1.Node, _ ...client.GetOption) { - node.SetLabels(map[string]string{utils.GetDevicePluginNodeLabel(podNamespace, moduleName): ""}) - }, - ), - kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do( - func(_ interface{}, node *v1.Node, p client.Patch, _ ...client.GetOption) { - Expect(p.Type()).To(Equal(types.MergePatchType)) - Expect(p.Data(node)).To(Equal([]byte(`{"metadata":{"labels":null}}`))) - }, - ), - ) - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{constants.ModuleNameLabel: moduleName}, - Name: podName, - Namespace: podNamespace, - }, - Spec: v1.PodSpec{NodeName: nodeName}, - } - - _, err := r.Reconcile(ctx, pod) - Expect(err).NotTo(HaveOccurred()) - }) - - It("should NOT unlabel the node when a Pod is not ready but there is a new running pod", func() { - var ( - labelSelector = client.MatchingLabels{constants.ModuleNameLabel: moduleName} - fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} - ) - - kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Do( - func(_ interface{}, modulePodsList *v1.PodList, _ ...client.ListOption) { - modulePodsList.Items = []v1.Pod{ - { - Status: v1.PodStatus{ - Conditions: []v1.PodCondition{ - { - Type: v1.PodReady, - Status: v1.ConditionTrue, - }, - }, - }, - }, - } - }, - ) - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{constants.ModuleNameLabel: moduleName}, - Name: podName, - }, - Spec: v1.PodSpec{NodeName: nodeName}, - } - - _, err := r.Reconcile(ctx, pod) - Expect(err).NotTo(HaveOccurred()) - }) - - now := metav1.Now() - - patchRemoveFinalizerFunc := func(_ interface{}, pod *v1.Pod, p client.Patch, _ ...client.GetOption) { - Expect(p.Type()).To(Equal(types.MergePatchType)) - Expect(p.Data(pod)).To(Equal([]byte(`{"metadata":{"finalizers":null}}`))) - } - - It("should NOT label or unlabel when the Pod has no .spec.nodeName", func() { - kubeClient.EXPECT().Patch(ctx, gomock.AssignableToTypeOf(&v1.Pod{}), gomock.Any()).Do(patchRemoveFinalizerFunc) - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - DeletionTimestamp: &now, - Finalizers: []string{constants.NodeLabelerFinalizer}, - Labels: map[string]string{constants.ModuleNameLabel: moduleName}, - Name: podName, - }, - } - - _, err := r.Reconcile(ctx, pod) - Expect(err).NotTo(HaveOccurred()) - }) - - It("should label the node when a Pod is ready", func() { - gomock.InOrder( - kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Return(nil), - kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do( - func(_ interface{}, node *v1.Node, p client.Patch, _ ...client.GetOption) { - Expect(p.Type()).To(Equal(types.MergePatchType)) - data, err := p.Data(node) - Expect(err).NotTo(HaveOccurred()) - Expect(data).To(ContainSubstring("labels")) - Expect(data).To(ContainSubstring(utils.GetDevicePluginNodeLabel(podNamespace, moduleName))) - }, - ), - ) - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Finalizers: []string{constants.NodeLabelerFinalizer}, - Labels: map[string]string{constants.ModuleNameLabel: moduleName}, - Name: podName, - Namespace: podNamespace, - }, - Spec: v1.PodSpec{NodeName: nodeName}, - Status: v1.PodStatus{ - Conditions: []v1.PodCondition{ - { - Type: v1.PodReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - _, err := r.Reconcile(ctx, pod) - Expect(err).NotTo(HaveOccurred()) - }) - - It("should remove the pod finalizer when the pod is being deleted", func() { - var ( - labelSelector = client.MatchingLabels{constants.ModuleNameLabel: moduleName} - fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} - ) - - gomock.InOrder( - kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Return(nil), - kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Return(nil), - kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Return(nil), - kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do(patchRemoveFinalizerFunc), - ) - - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - DeletionTimestamp: &now, - Finalizers: []string{constants.NodeLabelerFinalizer}, - Labels: map[string]string{constants.ModuleNameLabel: moduleName}, - Name: podName, - }, - Spec: v1.PodSpec{NodeName: nodeName}, - } - - _, err := r.Reconcile(ctx, pod) - Expect(err).NotTo(HaveOccurred()) - }) -}) diff --git a/internal/controllers/device_plugin_pod_reconciler.go b/internal/controllers/pod_node_label_reconciler.go similarity index 64% rename from internal/controllers/device_plugin_pod_reconciler.go rename to internal/controllers/pod_node_label_reconciler.go index 62893013a..dc9cb7f5e 100644 --- a/internal/controllers/device_plugin_pod_reconciler.go +++ b/internal/controllers/pod_node_label_reconciler.go @@ -12,23 +12,22 @@ import ( "k8s.io/kubectl/pkg/util/podutils" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -const DevicePluginPodReconcilerName = "DevicePluginPod" +const PodNodeLabelReconcilerName = "PodNodeLabel" -type DevicePluginPodReconciler struct { +type PodNodeLabelReconciler struct { client client.Client } -func NewDevicePluginPodReconciler(client client.Client) *DevicePluginPodReconciler { - return &DevicePluginPodReconciler{client: client} +func NewPodNodeLabelReconciler(client client.Client) *PodNodeLabelReconciler { + return &PodNodeLabelReconciler{client: client} } -func (dppr *DevicePluginPodReconciler) Reconcile(ctx context.Context, pod *v1.Pod) (ctrl.Result, error) { +func (r *PodNodeLabelReconciler) Reconcile(ctx context.Context, pod *v1.Pod) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) nodeName := pod.Spec.NodeName @@ -38,18 +37,26 @@ func (dppr *DevicePluginPodReconciler) Reconcile(ctx context.Context, pod *v1.Po return ctrl.Result{}, fmt.Errorf("pod %s/%s has no %q label", pod.Namespace, pod.Name, constants.ModuleNameLabel) } + isDRA := pod.Labels[constants.DaemonSetRole] == constants.DRARoleLabelValue + + roleLabelValue := constants.DevicePluginRoleLabelValue labelName := utils.GetDevicePluginNodeLabel(pod.Namespace, moduleName) + if isDRA { + roleLabelValue = constants.DRARoleLabelValue + labelName = utils.GetDRANodeLabel(pod.Namespace, moduleName) + } logger = logger.WithValues( "node name", nodeName, "module name", moduleName, "label name", labelName, + "is DRA", isDRA, ) // when Daemonset/ReplicaSet controller deletes pod, the pods state stays Ready, // but its deletion timestamp is set. We use deletion timestamp to delete the label, // and not wait a probable TerminationGracePeriod, since Pre-Stop hooks is run - // at the beginning. IsodReady condition should still be checked, in case pod state + // at the beginning. IsPodReady condition should still be checked, in case pod state // has changed not due to Daemonset termination, but due to internal state of Daemonset on // cluster if !podutils.IsPodReady(pod) || !pod.DeletionTimestamp.IsZero() { @@ -59,23 +66,28 @@ func (dppr *DevicePluginPodReconciler) Reconcile(ctx context.Context, pod *v1.Po if nodeName != "" { logger.Info("Unlabeling node") - // Make sure we don't already have a new running pod before unlabeling the node - labelSelector := client.MatchingLabels{constants.ModuleNameLabel: moduleName} + // Making sure there is no other pod of the same role already running + labelSelector := client.MatchingLabels{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: roleLabelValue, + } fieldSelector := client.MatchingFields{"spec.nodeName": nodeName} - var modulePodsList v1.PodList - err := dppr.client.List(ctx, &modulePodsList, labelSelector, fieldSelector) - if err != nil { - return ctrl.Result{}, fmt.Errorf("failed to get list of all pods for module %s on node %s: %v", moduleName, nodeName, err) + + var podsList v1.PodList + if err := r.client.List(ctx, &podsList, labelSelector, fieldSelector); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to get list of pods for module %s on node %s: %v", moduleName, nodeName, err) } + var foundRunningPod bool - for _, p := range modulePodsList.Items { + for _, p := range podsList.Items { if podutils.IsPodReady(&p) && p.DeletionTimestamp.IsZero() { foundRunningPod = true break } } + if !foundRunningPod { - if err := dppr.deleteLabel(ctx, nodeName, labelName); err != nil { + if err := r.deleteLabel(ctx, nodeName, labelName); err != nil { return ctrl.Result{}, fmt.Errorf("could not unlabel node %s with label %s: %v", nodeName, labelName, err) } @@ -89,7 +101,7 @@ func (dppr *DevicePluginPodReconciler) Reconcile(ctx context.Context, pod *v1.Po // the specified Pod has already been deleted. By ignoring NotFound errors we ensure // that no additional, unnecessary reconciliation request will be queued (since a // reconciliation result with a non-nil error will be requeued). - if err := dppr.deleteFinalizer(ctx, pod); client.IgnoreNotFound(err) != nil { + if err := r.deleteFinalizer(ctx, pod); client.IgnoreNotFound(err) != nil { return ctrl.Result{}, fmt.Errorf("could not delete the pod finalizer: %v", err) } } @@ -99,15 +111,54 @@ func (dppr *DevicePluginPodReconciler) Reconcile(ctx context.Context, pod *v1.Po logger.Info("Labeling node") - if err := dppr.addLabel(ctx, nodeName, labelName); err != nil { + if err := r.addLabel(ctx, nodeName, labelName); err != nil { return ctrl.Result{}, fmt.Errorf("could not label node %s with %s: %v", nodeName, labelName, err) } return ctrl.Result{}, nil } -// SetupWithManager sets up the controller with the Manager. -func (dppr *DevicePluginPodReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *PodNodeLabelReconciler) addLabel(ctx context.Context, nodeName string, labelName string) error { + node := v1.Node{} + + if err := r.client.Get(ctx, types.NamespacedName{Name: nodeName}, &node); err != nil { + return fmt.Errorf("could not get node %s: %v", nodeName, err) + } + + nodeCopy := node.DeepCopy() + + if node.Labels == nil { + node.Labels = make(map[string]string) + } + + node.Labels[labelName] = "" + + return r.client.Patch(ctx, &node, client.MergeFrom(nodeCopy)) +} + +func (r *PodNodeLabelReconciler) deleteLabel(ctx context.Context, nodeName string, labelName string) error { + node := v1.Node{} + + if err := r.client.Get(ctx, types.NamespacedName{Name: nodeName}, &node); err != nil { + return fmt.Errorf("could not get node %s: %v", nodeName, err) + } + + nodeCopy := node.DeepCopy() + + delete(node.Labels, labelName) + + return r.client.Patch(ctx, &node, client.MergeFrom(nodeCopy)) +} + +func (r *PodNodeLabelReconciler) deleteFinalizer(ctx context.Context, pod *v1.Pod) error { + podCopy := pod.DeepCopy() + + controllerutil.RemoveFinalizer(pod, constants.NodeLabelerFinalizer) + + return r.client.Patch(ctx, pod, client.MergeFrom(podCopy)) +} + +func (r *PodNodeLabelReconciler) SetupWithManager(mgr ctrl.Manager) error { if err := mgr.GetFieldIndexer().IndexField(context.Background(), &v1.Pod{}, "spec.nodeName", func(rawObj client.Object) []string { pod := rawObj.(*v1.Pod) @@ -129,7 +180,7 @@ func (dppr *DevicePluginPodReconciler) SetupWithManager(mgr ctrl.Manager) error p := predicate.And( predicate.Or( filter.PodReadinessChangedPredicate( - mgr.GetLogger().WithName("pod-readiness-changed"), + mgr.GetLogger().WithName("pod-node-label-readiness-changed"), ), filter.DeletingPredicate(), ), @@ -139,50 +190,10 @@ func (dppr *DevicePluginPodReconciler) SetupWithManager(mgr ctrl.Manager) error return ctrl. NewControllerManagedBy(mgr). - Named(DevicePluginPodReconcilerName). + Named(PodNodeLabelReconcilerName). For(&v1.Pod{}). WithEventFilter(p). Complete( - reconcile.AsReconciler[*v1.Pod](dppr.client, dppr), + reconcile.AsReconciler[*v1.Pod](r.client, r), ) } - -func (dppr *DevicePluginPodReconciler) addLabel(ctx context.Context, nodeName string, labelName string) error { - node := v1.Node{} - - if err := dppr.client.Get(ctx, types.NamespacedName{Name: nodeName}, &node); err != nil { - return fmt.Errorf("could not get node %s: %v", nodeName, err) - } - - nodeCopy := node.DeepCopy() - - if node.Labels == nil { - node.Labels = make(map[string]string) - } - - node.Labels[labelName] = "" - - return dppr.client.Patch(ctx, &node, client.MergeFrom(nodeCopy)) -} - -func (dppr *DevicePluginPodReconciler) deleteFinalizer(ctx context.Context, pod *v1.Pod) error { - podCopy := pod.DeepCopy() - - controllerutil.RemoveFinalizer(pod, constants.NodeLabelerFinalizer) - - return dppr.client.Patch(ctx, pod, client.MergeFrom(podCopy)) -} - -func (dppr *DevicePluginPodReconciler) deleteLabel(ctx context.Context, nodeName string, labelName string) error { - node := v1.Node{} - - if err := dppr.client.Get(ctx, types.NamespacedName{Name: nodeName}, &node); err != nil { - return fmt.Errorf("could not get node %s: %v", nodeName, err) - } - - nodeCopy := node.DeepCopy() - - delete(node.Labels, labelName) - - return dppr.client.Patch(ctx, &node, client.MergeFrom(nodeCopy)) -} diff --git a/internal/controllers/pod_node_label_reconciler_test.go b/internal/controllers/pod_node_label_reconciler_test.go new file mode 100644 index 000000000..217565cc8 --- /dev/null +++ b/internal/controllers/pod_node_label_reconciler_test.go @@ -0,0 +1,447 @@ +package controllers + +import ( + "context" + "errors" + + mock_client "github.com/kubernetes-sigs/kernel-module-management/internal/client" + "github.com/kubernetes-sigs/kernel-module-management/internal/constants" + "github.com/kubernetes-sigs/kernel-module-management/internal/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.uber.org/mock/gomock" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var _ = Describe("PodNodeLabelReconciler_Reconcile", func() { + const ( + moduleName = "module-name" + nodeName = "node-name" + podName = "pod-name" + podNamespace = "pod-namespace" + ) + + var ( + kubeClient *mock_client.MockClient + r *PodNodeLabelReconciler + ) + + BeforeEach(func() { + ctrl := gomock.NewController(GinkgoT()) + kubeClient = mock_client.NewMockClient(ctrl) + r = NewPodNodeLabelReconciler(kubeClient) + }) + + ctx := context.Background() + + Context("device-plugin pods", func() { + It("should return an error if the pod is not labeled", func() { + _, err := r.Reconcile(ctx, &v1.Pod{}) + Expect(err).To(HaveOccurred()) + }) + + It("should return an error if we failed to get the list of pods", func() { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{constants.ModuleNameLabel: moduleName}, + Name: podName, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + kubeClient.EXPECT().List(ctx, gomock.Any(), gomock.Any(), gomock.Any()).Return(errors.New("some error")) + + _, err := r.Reconcile(ctx, pod) + Expect(err).To(HaveOccurred()) + }) + + It("should unlabel the node when a Pod is not ready", func() { + var ( + labelSelector = client.MatchingLabels{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DevicePluginRoleLabelValue, + } + fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} + ) + + gomock.InOrder( + kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Return(nil), + kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Do( + func(_ interface{}, _ interface{}, node *v1.Node, _ ...client.GetOption) { + node.SetLabels(map[string]string{utils.GetDevicePluginNodeLabel(podNamespace, moduleName): ""}) + }, + ), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do( + func(_ interface{}, node *v1.Node, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + Expect(p.Data(node)).To(Equal([]byte(`{"metadata":{"labels":null}}`))) + }, + ), + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{constants.ModuleNameLabel: moduleName}, + Name: podName, + Namespace: podNamespace, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should NOT unlabel the node when a Pod is not ready but there is a new running pod", func() { + var ( + labelSelector = client.MatchingLabels{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DevicePluginRoleLabelValue, + } + fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} + ) + + kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Do( + func(_ interface{}, modulePodsList *v1.PodList, _ ...client.ListOption) { + modulePodsList.Items = []v1.Pod{ + { + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + Status: v1.ConditionTrue, + }, + }, + }, + }, + } + }, + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{constants.ModuleNameLabel: moduleName}, + Name: podName, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should NOT label or unlabel when the Pod has no .spec.nodeName", func() { + now := metav1.Now() + + kubeClient.EXPECT().Patch(ctx, gomock.AssignableToTypeOf(&v1.Pod{}), gomock.Any()).Do( + func(_ interface{}, pod *v1.Pod, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + Expect(p.Data(pod)).To(Equal([]byte(`{"metadata":{"finalizers":null}}`))) + }, + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &now, + Finalizers: []string{constants.NodeLabelerFinalizer}, + Labels: map[string]string{constants.ModuleNameLabel: moduleName}, + Name: podName, + }, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should label the node when a Pod is ready", func() { + gomock.InOrder( + kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Return(nil), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do( + func(_ interface{}, node *v1.Node, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + data, err := p.Data(node) + Expect(err).NotTo(HaveOccurred()) + Expect(data).To(ContainSubstring("labels")) + Expect(data).To(ContainSubstring(utils.GetDevicePluginNodeLabel(podNamespace, moduleName))) + }, + ), + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Finalizers: []string{constants.NodeLabelerFinalizer}, + Labels: map[string]string{constants.ModuleNameLabel: moduleName}, + Name: podName, + Namespace: podNamespace, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should remove the pod finalizer when the pod is being deleted", func() { + now := metav1.Now() + var ( + labelSelector = client.MatchingLabels{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DevicePluginRoleLabelValue, + } + fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} + ) + + patchRemoveFinalizerFunc := func(_ interface{}, pod *v1.Pod, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + Expect(p.Data(pod)).To(Equal([]byte(`{"metadata":{"finalizers":null}}`))) + } + + gomock.InOrder( + kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Return(nil), + kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Return(nil), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Return(nil), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do(patchRemoveFinalizerFunc), + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &now, + Finalizers: []string{constants.NodeLabelerFinalizer}, + Labels: map[string]string{constants.ModuleNameLabel: moduleName}, + Name: podName, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Context("DRA pods", func() { + It("should return an error if the pod is not labeled", func() { + _, err := r.Reconcile(ctx, &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.DaemonSetRole: constants.DRARoleLabelValue, + }, + }, + }) + Expect(err).To(HaveOccurred()) + }) + + It("should return an error if we failed to get the list of DRA pods", func() { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + }, + Name: podName, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + kubeClient.EXPECT().List(ctx, gomock.Any(), gomock.Any(), gomock.Any()).Return(errors.New("some error")) + + _, err := r.Reconcile(ctx, pod) + Expect(err).To(HaveOccurred()) + }) + + It("should unlabel the node when a DRA Pod is not ready", func() { + var ( + labelSelector = client.MatchingLabels{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + } + fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} + ) + + gomock.InOrder( + kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Return(nil), + kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Do( + func(_ interface{}, _ interface{}, node *v1.Node, _ ...client.GetOption) { + node.SetLabels(map[string]string{utils.GetDRANodeLabel(podNamespace, moduleName): ""}) + }, + ), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do( + func(_ interface{}, node *v1.Node, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + Expect(p.Data(node)).To(Equal([]byte(`{"metadata":{"labels":null}}`))) + }, + ), + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + }, + Name: podName, + Namespace: podNamespace, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should NOT unlabel the node when a DRA Pod is not ready but there is a new running DRA pod", func() { + var ( + labelSelector = client.MatchingLabels{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + } + fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} + ) + + kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Do( + func(_ interface{}, draPodsList *v1.PodList, _ ...client.ListOption) { + draPodsList.Items = []v1.Pod{ + { + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + Status: v1.ConditionTrue, + }, + }, + }, + }, + } + }, + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + }, + Name: podName, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should label the node with dra-ready when a DRA Pod is ready", func() { + gomock.InOrder( + kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Return(nil), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do( + func(_ interface{}, node *v1.Node, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + data, err := p.Data(node) + Expect(err).NotTo(HaveOccurred()) + Expect(data).To(ContainSubstring("labels")) + Expect(data).To(ContainSubstring(utils.GetDRANodeLabel(podNamespace, moduleName))) + }, + ), + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Finalizers: []string{constants.NodeLabelerFinalizer}, + Labels: map[string]string{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + }, + Name: podName, + Namespace: podNamespace, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should NOT label or unlabel when the DRA Pod has no .spec.nodeName", func() { + now := metav1.Now() + + kubeClient.EXPECT().Patch(ctx, gomock.AssignableToTypeOf(&v1.Pod{}), gomock.Any()).Do( + func(_ interface{}, pod *v1.Pod, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + Expect(p.Data(pod)).To(Equal([]byte(`{"metadata":{"finalizers":null}}`))) + }, + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &now, + Finalizers: []string{constants.NodeLabelerFinalizer}, + Labels: map[string]string{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + }, + Name: podName, + }, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should remove the pod finalizer when the DRA pod is being deleted", func() { + now := metav1.Now() + var ( + labelSelector = client.MatchingLabels{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + } + fieldSelector = client.MatchingFields{"spec.nodeName": nodeName} + ) + + patchRemoveFinalizerFunc := func(_ interface{}, pod *v1.Pod, p client.Patch, _ ...client.GetOption) { + Expect(p.Type()).To(Equal(types.MergePatchType)) + Expect(p.Data(pod)).To(Equal([]byte(`{"metadata":{"finalizers":null}}`))) + } + + gomock.InOrder( + kubeClient.EXPECT().List(ctx, gomock.Any(), labelSelector, fieldSelector).Return(nil), + kubeClient.EXPECT().Get(ctx, gomock.Any(), gomock.Any()).Return(nil), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Return(nil), + kubeClient.EXPECT().Patch(ctx, gomock.Any(), gomock.Any()).Do(patchRemoveFinalizerFunc), + ) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &now, + Finalizers: []string{constants.NodeLabelerFinalizer}, + Labels: map[string]string{ + constants.ModuleNameLabel: moduleName, + constants.DaemonSetRole: constants.DRARoleLabelValue, + }, + Name: podName, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + } + + _, err := r.Reconcile(ctx, pod) + Expect(err).NotTo(HaveOccurred()) + }) + }) +}) diff --git a/internal/utils/kmmlabels.go b/internal/utils/kmmlabels.go index eeecffbe7..1318384d7 100644 --- a/internal/utils/kmmlabels.go +++ b/internal/utils/kmmlabels.go @@ -86,6 +86,10 @@ func GetDevicePluginNodeLabel(namespace, moduleName string) string { return fmt.Sprintf("kmm.node.kubernetes.io/%s.%s.device-plugin-ready", namespace, moduleName) } +func GetDRANodeLabel(namespace, moduleName string) string { + return fmt.Sprintf("kmm.node.kubernetes.io/%s.%s.dra-ready", namespace, moduleName) +} + func GetDevicePluginTargetNodeLabel(namespace, moduleName string) string { return fmt.Sprintf("kmm.node.kubernetes.io/%s.%s.device-plugin-target", namespace, moduleName) } diff --git a/internal/utils/kmmlabels_test.go b/internal/utils/kmmlabels_test.go index acf20d07a..50cf4d907 100644 --- a/internal/utils/kmmlabels_test.go +++ b/internal/utils/kmmlabels_test.go @@ -26,6 +26,13 @@ var _ = Describe("GetDevicePluginVersionLabelName", func() { }) }) +var _ = Describe("GetDRANodeLabel", func() { + It("should work as expected", func() { + res := GetDRANodeLabel("some-namespace", "some-name") + Expect(res).To(Equal("kmm.node.kubernetes.io/some-namespace.some-name.dra-ready")) + }) +}) + var _ = Describe("GetNamespaceNameFromVersionLabel", func() { DescribeTable("should return correct name and namespace", func(versionLabel, expectedNamespace, expectedName string, expectsErr bool) { diff --git a/pkg/labels/labels.go b/pkg/labels/labels.go index 201b234b2..f348cd8bf 100644 --- a/pkg/labels/labels.go +++ b/pkg/labels/labels.go @@ -12,6 +12,10 @@ func GetDevicePluginNodeLabel(namespace, moduleName string) string { return utils.GetDevicePluginNodeLabel(namespace, moduleName) } +func GetDRANodeLabel(namespace, moduleName string) string { + return utils.GetDRANodeLabel(namespace, moduleName) +} + func GetModuleVersionLabelName(namespace, name string) string { return utils.GetModuleVersionLabelName(namespace, name) } diff --git a/pkg/labels/labels_test.go b/pkg/labels/labels_test.go index f6c1f8ce6..17f50b212 100644 --- a/pkg/labels/labels_test.go +++ b/pkg/labels/labels_test.go @@ -16,6 +16,11 @@ var _ = Describe("GetModuleReadyAndDevicePluginReadyLabels", func() { Expect(res).To(Equal("kmm.node.kubernetes.io/some-namespace.some-module.device-plugin-ready")) }) + It("dra ready label", func() { + res := GetDRANodeLabel("some-namespace", "some-module") + Expect(res).To(Equal("kmm.node.kubernetes.io/some-namespace.some-module.dra-ready")) + }) + It("module version label", func() { res := GetModuleVersionLabelName("some-namespace", "some-module") Expect(res).To(Equal("kmm.node.kubernetes.io/version-module.some-namespace.some-module"))