diff --git a/config/rbac/scheduler/role.yaml b/config/rbac/scheduler/role.yaml index 18372c7..7592947 100644 --- a/config/rbac/scheduler/role.yaml +++ b/config/rbac/scheduler/role.yaml @@ -78,6 +78,7 @@ rules: - apiGroups: - apps resources: + - deployments - replicasets - statefulsets verbs: diff --git a/helm/slurm-bridge/files/scheduler_rbac_rules.yaml b/helm/slurm-bridge/files/scheduler_rbac_rules.yaml index 2feb46e..1d9e4b6 100644 --- a/helm/slurm-bridge/files/scheduler_rbac_rules.yaml +++ b/helm/slurm-bridge/files/scheduler_rbac_rules.yaml @@ -73,6 +73,7 @@ rules: - apiGroups: - apps resources: + - deployments - replicasets - statefulsets verbs: diff --git a/internal/scheduler/plugins/slurmbridge/scheduler_test.go b/internal/scheduler/plugins/slurmbridge/scheduler_test.go new file mode 100644 index 0000000..2e2386c --- /dev/null +++ b/internal/scheduler/plugins/slurmbridge/scheduler_test.go @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright (C) SchedMD LLC. +// SPDX-License-Identifier: Apache-2.0 + +package slurmbridge + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + "github.com/SlinkyProject/slurm-bridge/internal/utils/slurmjobir" +) + +var _ = Describe("Scheduler RBAC", func() { + Context("Translating a Deployment-owned pod under real RBAC enforcement", func() { + It("walks the owner chain Pod -> ReplicaSet -> Deployment without RBAC errors", func() { + ns := corev1.NamespaceDefault + labels := map[string]string{"app": "rbac-int-test"} + + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "rbac-int-deploy", Namespace: ns}, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{MatchLabels: labels}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: labels}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "sleep", Image: "busybox"}}, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, deployment)).To(Succeed()) + + rs := &appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rbac-int-rs", + Namespace: ns, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: deployment.Name, + UID: deployment.UID, + Controller: ptr.To(true), + }}, + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{MatchLabels: labels}, + Template: deployment.Spec.Template, + }, + } + Expect(k8sClient.Create(ctx, rs)).To(Succeed()) + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rbac-int-pod", + Namespace: ns, + Labels: labels, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: ptr.To(true), + }}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "sleep", Image: "busybox"}}, + }, + } + Expect(k8sClient.Create(ctx, pod)).To(Succeed()) + + ir, err := slurmjobir.TranslateToSlurmJobIR(schedulerClient, ctx, pod) + Expect(err).NotTo(HaveOccurred(), + "scheduler ServiceAccount must be able to read every owner-Kind reachable from a Pod") + Expect(ir).NotTo(BeNil()) + Expect(ir.RootPOM.Kind).To(Equal("Deployment")) + Expect(ir.RootPOM.Name).To(Equal(deployment.Name)) + }) + }) +}) diff --git a/internal/scheduler/plugins/slurmbridge/slurmbridge.go b/internal/scheduler/plugins/slurmbridge/slurmbridge.go index f9ed9bf..b3d8c1c 100644 --- a/internal/scheduler/plugins/slurmbridge/slurmbridge.go +++ b/internal/scheduler/plugins/slurmbridge/slurmbridge.go @@ -71,6 +71,7 @@ func init() { // +kubebuilder:rbac:groups="",resources=persistentvolumes,verbs=get;list;update;watch // +kubebuilder:rbac:groups="",resources=replicationcontrollers,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch +// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch // +kubebuilder:rbac:groups=apps,resources=replicasets,verbs=get;list;watch // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch // +kubebuilder:rbac:groups=events.k8s.io,resources=events,verbs=create;patch;update diff --git a/internal/scheduler/plugins/slurmbridge/suite_test.go b/internal/scheduler/plugins/slurmbridge/suite_test.go new file mode 100644 index 0000000..080b252 --- /dev/null +++ b/internal/scheduler/plugins/slurmbridge/suite_test.go @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright (C) SchedMD LLC. +// SPDX-License-Identifier: Apache-2.0 + +package slurmbridge + +import ( + "context" + "os" + "path/filepath" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/yaml" + + "github.com/SlinkyProject/slurm-bridge/internal/utils/testutils" +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +const schedulerServiceAccount = "slurm-bridge-scheduler" + +var ( + cfg *rest.Config + k8sClient client.Client + schedulerClient client.Client + testEnv *envtest.Environment + ctx context.Context + cancel context.CancelFunc +) + +func TestSlurmBridgeIntegration(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "SlurmBridge Integration Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + ctx, cancel = context.WithCancel(context.TODO()) + + repoRoot := filepath.Join("..", "..", "..", "..") + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + ErrorIfCRDPathMissing: false, + BinaryAssetsDirectory: testutils.GetEnvTestBinary(repoRoot), + } + var err error + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + By("applying generated scheduler ClusterRole") + roleBytes, err := os.ReadFile(filepath.Join(repoRoot, "config", "rbac", "scheduler", "role.yaml")) + Expect(err).NotTo(HaveOccurred(), "run `make manifests` to generate config/rbac/scheduler/role.yaml") + role := &rbacv1.ClusterRole{} + Expect(yaml.Unmarshal(roleBytes, role)).To(Succeed()) + role.ResourceVersion = "" + Expect(k8sClient.Create(ctx, role)).To(Succeed()) + + By("creating scheduler ServiceAccount and ClusterRoleBinding") + sa := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{Name: schedulerServiceAccount, Namespace: corev1.NamespaceDefault}, + } + Expect(k8sClient.Create(ctx, sa)).To(Succeed()) + crb := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{Name: "slurm-bridge-scheduler-test"}, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: role.Name, + }, + Subjects: []rbacv1.Subject{{ + Kind: rbacv1.ServiceAccountKind, Name: schedulerServiceAccount, Namespace: corev1.NamespaceDefault, + }}, + } + Expect(k8sClient.Create(ctx, crb)).To(Succeed()) + + By("building impersonated client for scheduler ServiceAccount") + impCfg := rest.CopyConfig(cfg) + impCfg.Impersonate = rest.ImpersonationConfig{ + UserName: "system:serviceaccount:" + corev1.NamespaceDefault + ":" + schedulerServiceAccount, + } + schedulerClient, err = client.New(impCfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(schedulerClient).NotTo(BeNil()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + cancel() + Expect(testEnv.Stop()).To(Succeed()) +})