Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Slurm is a full featured HPC workload manager. To highlight a few features:

## Limitations

- Exclusive, whole node allocations are made for each pod.
- Exclusive, whole node allocations are made for each pod when using group workloads (PodGroups, LeaderWorkerSet).
- Only supports the following DRA drivers:
- [DRA Driver CPU][dra-driver-cpu] for CPUs.
- [DRA Example Driver][dra-example-driver] for GPUs.
Expand Down
13 changes: 8 additions & 5 deletions docs/scheduler.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,14 @@ see the [annotations.go] source.

| Annotation | Description | Example |
| -------------------------------------- | --------------------------------- | ------------ |
| slurmjob.slinky.slurm.net/gres | Overrides the default gres. | "GPU:V100:2" |
| slurmjob.slinky.slurm.net/job-name | Sets the job name. | "research" |
| slurmjob.slinky.slurm.net/max-nodes | Sets the maximum number of nodes. | "3" |
| slurmjob.slinky.slurm.net/mem-per-node | Sets the amount of memory. | "8Gi" |
| slurmjob.slinky.slurm.net/partition | Overrides the default partition. | "debug" |
| Annotation | Description | Example |
| -------------------------------------- | -------------------------------------------------------------------- | ------------ |
| slurmjob.slinky.slurm.net/gres | Overrides the default gres. | "GPU:V100:2" |
| slurmjob.slinky.slurm.net/job-name | Sets the job name. | "research" |
| slurmjob.slinky.slurm.net/max-nodes | Sets the maximum number of nodes. | "3" |
| slurmjob.slinky.slurm.net/mem-per-node | Sets the amount of memory. | "8Gi" |
| slurmjob.slinky.slurm.net/partition | Overrides the default partition. | "debug" |
| slurmjob.slinky.slurm.net/shared | Sets the shared policy. Allowed: "none", "user". Only supported on single-pod workloads. | "user" |

An example of the annotations in use:

Expand Down
34 changes: 34 additions & 0 deletions internal/admission/admission.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
lwsv1 "sigs.k8s.io/lws/api/leaderworkerset/v1"
sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
)

type PodAdmission struct {
Expand Down Expand Up @@ -88,6 +90,9 @@ func (r *PodAdmission) ValidateCreate(ctx context.Context, pod *corev1.Pod) (adm
if pod.Spec.ResourceClaims != nil {
return nil, fmt.Errorf("can't schedule a pod with a resourceclaim, use the annotation %s to request devices instead", wellknown.AnnotationGres)
}
if err := validateSharedAnnotation(pod); err != nil {
return nil, err
}
return nil, nil
}

Expand All @@ -113,6 +118,16 @@ func (r *PodAdmission) ValidateUpdate(ctx context.Context, oldPod *corev1.Pod, n
return nil, fmt.Errorf("can't update a running pod's external node annotation")
}
}
// Once the Slurm external job is running, the shared annotation should not be modified.
if newPod.Labels[wellknown.LabelExternalJobId] != "" &&
newPod.Annotations[wellknown.AnnotationExternalJobNode] != "" {
if oldPod.Annotations[wellknown.AnnotationShared] != newPod.Annotations[wellknown.AnnotationShared] {
return nil, fmt.Errorf("can't change shared annotation when the Slurm external job is already running")
}
}
if err := validateSharedAnnotation(newPod); err != nil {
return nil, err
}
return nil, nil
}

Expand All @@ -121,6 +136,25 @@ func (r *PodAdmission) ValidateDelete(ctx context.Context, pod *corev1.Pod) (adm
return nil, nil
}

// validateSharedAnnotation validates the shared annotation value and rejects
// group workloads (PodGroup, LeaderWorkerSet).
func validateSharedAnnotation(pod *corev1.Pod) error {
value, ok := pod.Annotations[wellknown.AnnotationShared]
if !ok {
return nil
}
if err := wellknown.ValidateSharedValue(value); err != nil {
return err
}
if pod.Labels[sched.PodGroupLabel] != "" {
return fmt.Errorf("shared annotation is not allowed on PodGroup pods")
}
if pod.Labels[lwsv1.GroupUniqueHashLabelKey] != "" {
return fmt.Errorf("shared annotation is not allowed on LeaderWorkerSet pods")
}
return nil
}

func (r *PodAdmission) isManagedNamespace(ctx context.Context, namespace string) (bool, error) {
if r.ManagedNamespaceSelector != nil {
selector, err := metav1.LabelSelectorAsSelector(r.ManagedNamespaceSelector)
Expand Down
255 changes: 255 additions & 0 deletions internal/admission/admission_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
lwsv1 "sigs.k8s.io/lws/api/leaderworkerset/v1"
sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
)

const (
Expand Down Expand Up @@ -434,6 +436,88 @@ func TestPodAdmission_ValidateCreate(t *testing.T) {
want: nil,
wantErr: false,
},
{
name: "PodWithSharedUser",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "user",
},
},
},
},
want: nil,
wantErr: false,
},
{
name: "PodWithSharedInvalid",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "invalid",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "PodWithSharedAndPodGroupLabel",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "user",
},
Labels: map[string]string{
sched.PodGroupLabel: "pg",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "PodWithSharedAndLWSLabel",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "user",
},
Labels: map[string]string{
lwsv1.GroupUniqueHashLabelKey: "lws",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "PodWithSchedulerNameInUnmanagedNamespace",
fields: fields{
Expand Down Expand Up @@ -669,6 +753,177 @@ func TestPodAdmission_ValidateUpdate(t *testing.T) {
want: nil,
wantErr: true,
},
{
name: "UpdatePodWithSharedUser",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
oldPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
},
},
newPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "user",
},
},
},
},
want: nil,
wantErr: false,
},
{
name: "UpdatePodWithSharedInvalid",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
oldPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
},
},
newPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "invalid",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "UpdatePodWithSharedAndPodGroupLabel",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
oldPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
},
},
newPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "user",
},
Labels: map[string]string{
sched.PodGroupLabel: "pg",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "UpdatePodWithSharedAndLWSLabel",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
oldPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
},
},
newPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Annotations: map[string]string{
wellknown.AnnotationShared: "user",
},
Labels: map[string]string{
lwsv1.GroupUniqueHashLabelKey: "lws",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "AddSharedAnnotationWhenPlaceholderJobRunning",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
oldPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Labels: map[string]string{
wellknown.LabelExternalJobId: "1",
},
Annotations: map[string]string{
wellknown.AnnotationExternalJobNode: "node1",
},
},
},
newPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Labels: map[string]string{
wellknown.LabelExternalJobId: "1",
},
Annotations: map[string]string{
wellknown.AnnotationExternalJobNode: "node1",
wellknown.AnnotationShared: "user",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "ChangeSharedAnnotationValueWhenPlaceholderJobRunning",
fields: fields{
ManagedNamespaces: []string{namespace},
},
args: args{
ctx: context.TODO(),
oldPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Labels: map[string]string{
wellknown.LabelExternalJobId: "1",
},
Annotations: map[string]string{
wellknown.AnnotationExternalJobNode: "node1",
wellknown.AnnotationShared: "user",
},
},
},
newPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Labels: map[string]string{
wellknown.LabelExternalJobId: "1",
},
Annotations: map[string]string{
wellknown.AnnotationExternalJobNode: "node1",
wellknown.AnnotationShared: "none",
},
},
},
},
want: nil,
wantErr: true,
},
{
name: "RunningPodWithSchedulerNameCantChangeJobIDInUnmanagedNamespace",
fields: fields{
Expand Down
Loading