Skip to content

Commit f4f4ac4

Browse files
feat: add optional SSSD support to controller
This enables user validation for operations like Slurm reservation creation with users without exposing SSH access to the controller pod.
1 parent 2e6fbce commit f4f4ac4

8 files changed

Lines changed: 156 additions & 36 deletions

File tree

api/v1beta1/controller_keys.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,20 @@ func (o *Controller) ConfigKey() types.NamespacedName {
9898
Namespace: o.Namespace,
9999
}
100100
}
101+
102+
func (o *Controller) SssdSecretKey() types.NamespacedName {
103+
return types.NamespacedName{
104+
Name: o.Spec.SssdConfRef.Name,
105+
Namespace: o.Namespace,
106+
}
107+
}
108+
109+
func (o *Controller) SssdSecretRef() *corev1.SecretKeySelector {
110+
key := o.SssdSecretKey()
111+
return &corev1.SecretKeySelector{
112+
LocalObjectReference: corev1.LocalObjectReference{
113+
Name: key.Name,
114+
},
115+
Key: o.Spec.SssdConfRef.Key,
116+
}
117+
}

api/v1beta1/controller_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ type ControllerSpec struct {
115115
// Metrics defines the metric collection configuration.
116116
// +optional
117117
Metrics Metrics `json:"metrics,omitzero"`
118+
119+
// SssdConfRef is a reference to a secret containing the `sssd.conf`.
120+
// +optional
121+
SssdConfRef corev1.SecretKeySelector `json:"sssdConfRef,omitzero"`
118122
}
119123

120124
type ControllerPersistence struct {

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/slinky.slurm.net_controllers.yaml

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helm/slurm-operator-crds/templates/slinky.slurm.net_controllers.yaml

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helm/slurm/templates/controller/controller-cr.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,9 @@ spec:
100100
metrics:
101101
{{- toYaml . | nindent 4 }}
102102
{{- end }}{{- /* with .Values.controller.metrics */}}
103+
{{- if or .Values.sssd.conf .Values.sssd.secretRef }}
104+
sssdConfRef:
105+
name: {{ include "slurm.sssdConf.name" . }}
106+
key: {{ include "slurm.sssdConf.key" . }}
107+
{{- end }}{{- /* if or .Values.sssd.conf .Values.sssd.secretRef */}}
103108
{{- end }}{{- /* if .Values.controller.external */}}

internal/builder/controller_app.go

Lines changed: 75 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,24 @@
44
package builder
55

66
import (
7+
"context"
78
_ "embed"
89
"fmt"
910
"path"
1011

1112
appsv1 "k8s.io/api/apps/v1"
1213
corev1 "k8s.io/api/core/v1"
14+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1315
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1416
"k8s.io/apimachinery/pkg/util/intstr"
17+
"k8s.io/klog/v2"
1518
"k8s.io/utils/ptr"
1619
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
1720

1821
slinkyv1beta1 "github.com/SlinkyProject/slurm-operator/api/v1beta1"
1922
"github.com/SlinkyProject/slurm-operator/internal/builder/labels"
2023
"github.com/SlinkyProject/slurm-operator/internal/builder/metadata"
24+
"github.com/SlinkyProject/slurm-operator/internal/utils/crypto"
2125
)
2226

2327
const (
@@ -106,6 +110,7 @@ func (b *Builder) BuildController(controller *slinkyv1beta1.Controller) (*appsv1
106110
}
107111

108112
func (b *Builder) controllerPodTemplate(controller *slinkyv1beta1.Controller) (corev1.PodTemplateSpec, error) {
113+
ctx := context.TODO()
109114
key := controller.Key()
110115

111116
size := len(controller.Spec.ConfigFileRefs) + len(controller.Spec.PrologScriptRefs) + len(controller.Spec.EpilogScriptRefs) + len(controller.Spec.PrologSlurmctldScriptRefs) + len(controller.Spec.EpilogSlurmctldScriptRefs)
@@ -126,14 +131,29 @@ func (b *Builder) controllerPodTemplate(controller *slinkyv1beta1.Controller) (c
126131
extraConfigMapNames = append(extraConfigMapNames, ref.Name)
127132
}
128133

134+
// Build annotations with SSSD hash if configured
135+
annotations := map[string]string{
136+
annotationDefaultContainer: labels.ControllerApp,
137+
}
138+
if controller.Spec.SssdConfRef.Name != "" {
139+
sssdSecret := &corev1.Secret{}
140+
sssdSecretKey := controller.SssdSecretKey()
141+
if err := b.client.Get(ctx, sssdSecretKey, sssdSecret); err != nil {
142+
if !apierrors.IsNotFound(err) {
143+
return corev1.PodTemplateSpec{}, fmt.Errorf("failed to get object (%s): %w", klog.KObj(sssdSecret), err)
144+
}
145+
}
146+
sssdConfRefKey := controller.SssdSecretRef().Key
147+
sssdConfHash := crypto.CheckSum([]byte(sssdSecret.StringData[sssdConfRefKey]))
148+
annotations[annotationSssdConfHash] = sssdConfHash
149+
}
150+
129151
objectMeta := metadata.NewBuilder(key).
130152
WithAnnotations(controller.Annotations).
131153
WithLabels(controller.Labels).
132154
WithMetadata(controller.Spec.Template.Metadata).
133155
WithLabels(labels.NewBuilder().WithControllerLabels(controller).Build()).
134-
WithAnnotations(map[string]string{
135-
annotationDefaultContainer: labels.ControllerApp,
136-
}).
156+
WithAnnotations(annotations).
137157
Build()
138158

139159
spec := controller.Spec
@@ -147,19 +167,16 @@ func (b *Builder) controllerPodTemplate(controller *slinkyv1beta1.Controller) (c
147167
},
148168
base: corev1.PodSpec{
149169
AutomountServiceAccountToken: ptr.To(false),
170+
SecurityContext: &corev1.PodSecurityContext{
171+
FSGroup: ptr.To[int64](401),
172+
},
150173
Containers: []corev1.Container{
151-
b.slurmctldContainer(spec.Slurmctld.Container, controller.ClusterName()),
174+
b.slurmctldContainer(spec.Slurmctld.Container, controller),
152175
},
153176
InitContainers: []corev1.Container{
154177
b.reconfigureContainer(spec.Reconfigure),
155178
b.logfileContainer(spec.LogFile, slurmctldLogFilePath),
156179
},
157-
SecurityContext: &corev1.PodSecurityContext{
158-
RunAsNonRoot: ptr.To(true),
159-
RunAsUser: ptr.To(slurmUserUid),
160-
RunAsGroup: ptr.To(slurmUserGid),
161-
FSGroup: ptr.To(slurmUserGid),
162-
},
163180
Volumes: controllerVolumes(controller, extraConfigMapNames),
164181
},
165182
merge: template.PodSpec,
@@ -174,7 +191,7 @@ func controllerVolumes(controller *slinkyv1beta1.Controller, extra []string) []c
174191
Name: slurmEtcVolume,
175192
VolumeSource: corev1.VolumeSource{
176193
Projected: &corev1.ProjectedVolumeSource{
177-
DefaultMode: ptr.To[int32](0o610),
194+
DefaultMode: ptr.To[int32](0o640),
178195
Sources: []corev1.VolumeProjection{
179196
{
180197
ConfigMap: &corev1.ConfigMapProjection{
@@ -189,7 +206,7 @@ func controllerVolumes(controller *slinkyv1beta1.Controller, extra []string) []c
189206
Name: controller.AuthSlurmRef().Name,
190207
},
191208
Items: []corev1.KeyToPath{
192-
{Key: controller.AuthSlurmRef().Key, Path: slurmKeyFile},
209+
{Key: controller.AuthSlurmRef().Key, Path: slurmKeyFile, Mode: ptr.To[int32](0o600)},
193210
},
194211
},
195212
},
@@ -199,7 +216,7 @@ func controllerVolumes(controller *slinkyv1beta1.Controller, extra []string) []c
199216
Name: controller.AuthJwtHs256Ref().Name,
200217
},
201218
Items: []corev1.KeyToPath{
202-
{Key: controller.AuthJwtHs256Ref().Key, Path: JwtHs256KeyFile},
219+
{Key: controller.AuthJwtHs256Ref().Key, Path: JwtHs256KeyFile, Mode: ptr.To[int32](0o600)},
203220
},
204221
},
205222
},
@@ -226,14 +243,57 @@ func controllerVolumes(controller *slinkyv1beta1.Controller, extra []string) []c
226243
}
227244
out[0].Projected.Sources = append(out[0].Projected.Sources, volumeProjection)
228245
}
246+
// Add SSSD volume if configured (optional)
247+
if controller.Spec.SssdConfRef.Name != "" {
248+
sssdVolume := corev1.Volume{
249+
Name: sssdConfVolume,
250+
VolumeSource: corev1.VolumeSource{
251+
Projected: &corev1.ProjectedVolumeSource{
252+
DefaultMode: ptr.To[int32](0o600),
253+
Sources: []corev1.VolumeProjection{
254+
{
255+
Secret: &corev1.SecretProjection{
256+
LocalObjectReference: corev1.LocalObjectReference{
257+
Name: controller.SssdSecretRef().Name,
258+
},
259+
Items: []corev1.KeyToPath{
260+
{Key: controller.SssdSecretRef().Key, Path: sssdConfFile, Mode: ptr.To[int32](0o600)},
261+
},
262+
},
263+
},
264+
},
265+
},
266+
},
267+
}
268+
out = append(out, sssdVolume)
269+
}
229270
return out
230271
}
231272

232273
func clusterSpoolDir(clustername string) string {
233274
return path.Join(slurmctldSpoolDir, clustername)
234275
}
235276

236-
func (b *Builder) slurmctldContainer(merge corev1.Container, clusterName string) corev1.Container {
277+
func (b *Builder) slurmctldContainer(merge corev1.Container, controller *slinkyv1beta1.Controller) corev1.Container {
278+
clusterName := controller.ClusterName()
279+
volumeMounts := []corev1.VolumeMount{
280+
{Name: slurmEtcVolume, MountPath: slurmEtcDir, ReadOnly: true},
281+
{Name: slurmPidFileVolume, MountPath: slurmPidFileDir},
282+
{Name: slurmctldStateSaveVolume, MountPath: clusterSpoolDir(clusterName)},
283+
{Name: slurmAuthSocketVolume, MountPath: slurmctldAuthSocketDir},
284+
{Name: slurmLogFileVolume, MountPath: slurmLogFileDir},
285+
}
286+
// Add SSSD mount if configured (optional)
287+
// Mount to staging dir (not /etc/sssd/) so entrypoint can copy with correct permissions
288+
if controller.Spec.SssdConfRef.Name != "" {
289+
volumeMounts = append(volumeMounts, corev1.VolumeMount{
290+
Name: sssdConfVolume,
291+
MountPath: "/run/sssd-mounted/sssd.conf",
292+
SubPath: sssdConfFile,
293+
ReadOnly: true,
294+
})
295+
}
296+
237297
opts := ContainerOpts{
238298
base: corev1.Container{
239299
Name: labels.ControllerApp,
@@ -272,18 +332,7 @@ func (b *Builder) slurmctldContainer(merge corev1.Container, clusterName string)
272332
FailureThreshold: 6,
273333
PeriodSeconds: 10,
274334
},
275-
SecurityContext: &corev1.SecurityContext{
276-
RunAsNonRoot: ptr.To(true),
277-
RunAsUser: ptr.To(slurmUserUid),
278-
RunAsGroup: ptr.To(slurmUserGid),
279-
},
280-
VolumeMounts: []corev1.VolumeMount{
281-
{Name: slurmEtcVolume, MountPath: slurmEtcDir, ReadOnly: true},
282-
{Name: slurmPidFileVolume, MountPath: slurmPidFileDir},
283-
{Name: slurmctldStateSaveVolume, MountPath: clusterSpoolDir(clusterName)},
284-
{Name: slurmAuthSocketVolume, MountPath: slurmctldAuthSocketDir},
285-
{Name: slurmLogFileVolume, MountPath: slurmLogFileDir},
286-
},
335+
VolumeMounts: volumeMounts,
287336
},
288337
merge: merge,
289338
}

internal/builder/controller_app_test.go

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -96,17 +96,13 @@ func TestBuilder_BuildController(t *testing.T) {
9696
t.Errorf("Template.Labels = %v , Selector.MatchLabels = %v",
9797
got.Spec.Template.Labels, got.Spec.Selector.MatchLabels)
9898

99-
case ptr.Deref(got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsNonRoot, false) != true:
100-
t.Errorf("got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsNonRoot = %v , want = %v",
101-
got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsNonRoot, true)
99+
case got.Spec.Template.Spec.Containers[0].SecurityContext != nil:
100+
t.Errorf("got.Spec.Template.Spec.Containers[0].SecurityContext = %v , want = nil",
101+
got.Spec.Template.Spec.Containers[0].SecurityContext)
102102

103-
case ptr.Deref(got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsUser, 0) != slurmUserUid:
104-
t.Errorf("got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsUser = %v , want = %v",
105-
got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsUser, slurmUserUid)
106-
107-
case ptr.Deref(got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsGroup, 0) != slurmUserGid:
108-
t.Errorf("got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsGroup = %v , want = %v",
109-
got.Spec.Template.Spec.Containers[0].SecurityContext.RunAsGroup, slurmUserGid)
103+
case ptr.Deref(got.Spec.Template.Spec.SecurityContext.FSGroup, 0) != slurmUserGid:
104+
t.Errorf("got.Spec.Template.Spec.SecurityContext.FSGroup = %v , want = %v",
105+
got.Spec.Template.Spec.SecurityContext.FSGroup, slurmUserGid)
110106

111107
case got.Spec.Template.Spec.Containers[0].Name != labels.ControllerApp:
112108
t.Errorf("Template.Spec.Containers[0].Name = %v , want = %v",

0 commit comments

Comments
 (0)