Skip to content

Commit d3f56fc

Browse files
authored
feat: add support for arm (#8)
* feat: add support for arm Signed-off-by: vsoch <vsoch@users.noreply.github.com> * tweak module to use 1.24 Signed-off-by: vsoch <vsoch@users.noreply.github.com> * build: update for arm Signed-off-by: vsoch <vsoch@users.noreply.github.com> * bug: remove reliance on proxy Signed-off-by: vsoch <vsoch@users.noreply.github.com> * munge: ensure use consistent template Signed-off-by: vsoch <vsoch@users.noreply.github.com> * feat: allow to specify nodespec Signed-off-by: vsoch <vsoch@users.noreply.github.com> * make size simpler - just is number of worker nodes Signed-off-by: vsoch <vsoch@users.noreply.github.com> * ci: do not build arm on pr Signed-off-by: vsoch <vsoch@users.noreply.github.com> --------- Signed-off-by: vsoch <vsoch@users.noreply.github.com> Co-authored-by: vsoch <vsoch@users.noreply.github.com>
1 parent e7323a9 commit d3f56fc

22 files changed

Lines changed: 1071 additions & 385 deletions

File tree

.github/workflows/build-deploy.yaml

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,42 @@
11
name: build slurm-operator
22

33
on:
4-
pull_request: []
4+
pull_request: {}
55
push:
66
branches:
77
- main
88
workflow_dispatch:
99

1010
jobs:
11+
build-arm:
12+
if: (github.event_name != 'pull_request')
13+
runs-on: ubuntu-latest
14+
name: make and build arm
15+
steps:
16+
- name: Checkout Repository
17+
uses: actions/checkout@v3
18+
- uses: actions/setup-go@v3
19+
with:
20+
go-version: ^1.24
21+
- name: GHCR Login
22+
if: (github.event_name != 'pull_request')
23+
uses: docker/login-action@v2
24+
with:
25+
registry: ghcr.io
26+
username: ${{ github.actor }}
27+
password: ${{ secrets.GITHUB_TOKEN }}
28+
29+
- name: Add custom buildx ARM builder
30+
if: (github.event_name != 'pull_request')
31+
run: |
32+
docker buildx create --name armbuilder
33+
docker buildx use armbuilder
34+
docker buildx inspect --bootstrap
35+
36+
- name: Deploy Container
37+
if: (github.event_name != 'pull_request')
38+
run: make arm-deploy
39+
1140
build:
1241
runs-on: ubuntu-latest
1342
strategy:
@@ -21,7 +50,7 @@ jobs:
2150
uses: actions/checkout@v4
2251
- uses: actions/setup-go@v3
2352
with:
24-
go-version: ^1.23
53+
go-version: ^1.24
2554
- name: GHCR Login
2655
if: (github.event_name != 'pull_request')
2756
uses: docker/login-action@v2

.github/workflows/main.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: test slurm-operator
22

33
on:
4-
pull_request: []
4+
pull_request: {}
55

66
jobs:
77
formatting:
@@ -27,7 +27,7 @@ jobs:
2727
- name: Setup Go
2828
uses: actions/setup-go@v3
2929
with:
30-
go-version: ^1.23
30+
go-version: ^1.24
3131
- name: fmt check
3232
run: make fmt
3333

@@ -60,7 +60,7 @@ jobs:
6060
- name: Setup Go
6161
uses: actions/setup-go@v3
6262
with:
63-
go-version: ^1.23
63+
go-version: ^1.24
6464

6565
- name: Start minikube
6666
uses: medyagh/setup-minikube@697f2b7aaed5f70bf2a94ee21a4ec3dde7b12f92 # v0.0.9

.github/workflows/slurm-containers.yaml

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,74 @@
11
name: build slurm-containers
22

33
on:
4-
pull_request: []
4+
pull_request: {}
55
push:
66
branches:
77
- main
88
workflow_dispatch:
99

1010
jobs:
11+
build-arm-ubuntu:
12+
env:
13+
container: ghcr.io/converged-computing/slurm
14+
runs-on: ubuntu-latest
15+
name: build arm slurm ubuntu
16+
steps:
17+
- name: Checkout Repository
18+
uses: actions/checkout@v3
19+
- uses: actions/setup-go@v3
20+
with:
21+
go-version: ^1.24
22+
- name: GHCR Login
23+
if: (github.event_name != 'pull_request')
24+
uses: docker/login-action@v2
25+
with:
26+
registry: ghcr.io
27+
username: ${{ github.actor }}
28+
password: ${{ secrets.GITHUB_TOKEN }}
29+
30+
- name: Add custom buildx ARM builder
31+
if: (github.event_name != 'pull_request')
32+
run: |
33+
docker buildx create --name armbuilder
34+
docker buildx use armbuilder
35+
docker buildx inspect --bootstrap
36+
37+
- name: Build and Deploy Container
38+
if: (github.event_name != 'pull_request')
39+
run: docker buildx build -f docker/Dockerfile.ubuntu --build-arg ARCH=arm64 --platform linux/arm64 --push -t ${{ env.container }}:ubuntu-arm ./docker
40+
41+
build-arm:
42+
env:
43+
container: ghcr.io/converged-computing/slurm
44+
runs-on: ubuntu-latest
45+
name: make and build arm
46+
steps:
47+
- name: Checkout Repository
48+
uses: actions/checkout@v3
49+
- uses: actions/setup-go@v3
50+
with:
51+
go-version: ^1.24
52+
- name: GHCR Login
53+
if: (github.event_name != 'pull_request')
54+
uses: docker/login-action@v2
55+
with:
56+
registry: ghcr.io
57+
username: ${{ github.actor }}
58+
password: ${{ secrets.GITHUB_TOKEN }}
59+
60+
- name: Add custom buildx ARM builder
61+
if: (github.event_name != 'pull_request')
62+
run: |
63+
docker buildx create --name armbuilder
64+
docker buildx use armbuilder
65+
docker buildx inspect --bootstrap
66+
67+
- name: Build and Deploy Container
68+
if: (github.event_name != 'pull_request')
69+
run: docker buildx build -f docker/Dockerfile --build-arg ARCH=arm64 --platform linux/arm64 --push -t ${{ env.container }}:arm ./docker
70+
71+
1172
build:
1273
env:
1374
container: ghcr.io/converged-computing/slurm
@@ -27,6 +88,11 @@ jobs:
2788
- name: Build Container
2889
run: docker build -f docker/Dockerfile -t ${{ env.container }} ./docker
2990

91+
- name: Build Ubuntu Container
92+
run: docker build -f docker/Dockerfile.ubuntu -t ${{ env.container }}:ubuntu ./docker
93+
3094
- name: Deploy Container
3195
if: (github.event_name != 'pull_request')
32-
run: docker push ${{ env.container }}
96+
run: |
97+
docker push ${{ env.container }}
98+
docker push ${{ env.container }}:ubuntu

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Build the manager binary
2-
FROM golang:1.23 as builder
2+
FROM golang:1.24 AS builder
33
ARG TARGETOS
44
ARG TARGETARCH
55

Makefile

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ endif
4848

4949
# Image URL to use all building/pushing image targets
5050
IMG ?= ghcr.io/converged-computing/slurm-operator:latest
51+
ARMIMG ?= ghcr.io/converged-computing/slurm-operator:arm
5152

5253
# Testing image (for development mostly)
5354
DEVIMG ?= ghcr.io/converged-computing/slurm-operator:test
@@ -126,6 +127,17 @@ run: manifests generate fmt vet ## Run a controller from your host.
126127
docker-build: test ## Build docker image with the manager.
127128
docker build -t ${IMG} .
128129

130+
.PHONY: arm-build
131+
arm-build: test ## Build docker image with the manager.
132+
docker buildx build ARCH=arm64 --platform linux/arm64 -t ${ARMIMG} .
133+
134+
.PHONY: arm-deploy
135+
arm-deploy: manifests kustomize
136+
docker buildx build --platform linux/arm64 --build-arg ARCH=arm64 --push -t ${ARMIMG} .
137+
cd config/manager && $(KUSTOMIZE) edit set image controller=${ARMIMG}
138+
$(KUSTOMIZE) build config/default > examples/dist/slurm-operator-arm.yaml
139+
140+
129141
.PHONY: docker-push
130142
docker-push: ## Push docker image with the manager.
131143
docker push ${IMG}
@@ -206,6 +218,12 @@ test-deploy: manifests kustomize
206218
$(KUSTOMIZE) build config/default > examples/dist/slurm-operator-dev.yaml
207219
sed -i 's/ imagePullPolicy: IfNotPresent/ imagePullPolicy: Always/' examples/dist/slurm-operator-dev.yaml
208220

221+
222+
.PHONY: build-config-arm
223+
build-config-arm: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
224+
cd config/manager && $(KUSTOMIZE) edit set image controller=${ARMIMG}
225+
$(KUSTOMIZE) build config/default > examples/dist/slurm-operator-arm.yaml
226+
209227
.PHONY: test-deploy-recreate
210228
test-deploy-recreate: test-deploy
211229
kubectl delete -f ./examples/dist/slurm-operator-dev.yaml || echo "Already deleted"
@@ -217,7 +235,7 @@ list:
217235

218236
## Tool Versions
219237
KUSTOMIZE_VERSION ?= v3.8.7
220-
CONTROLLER_TOOLS_VERSION ?= v0.14.0
238+
CONTROLLER_TOOLS_VERSION ?= v0.19.0
221239

222240
KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh"
223241
.PHONY: kustomize

api/v1alpha1/slurm_types.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ type SlurmSpec struct {
6363
// +optional
6464
SlurmVersion string `json:"slurmVersion,omitempty"`
6565

66-
// Size of the slurm (1 server + (N-1) nodes)
66+
// Size is number of worker nodes
6767
Size int32 `json:"size"`
6868

6969
// Interactive mode keeps the cluster running
@@ -168,6 +168,11 @@ type Node struct {
168168
// +optional
169169
WorkingDir string `json:"workingDir,omitempty"`
170170

171+
// Node specification. Leave empty for testing cluster
172+
// This does not include hostlist (generated automatically)
173+
// +optional
174+
Nodespec string `json:"nodespec,omitempty"`
175+
171176
// PullAlways will always pull the container
172177
// +optional
173178
PullAlways bool `json:"pullAlways"`
@@ -225,8 +230,8 @@ func (s *Slurm) SelectorName() string {
225230

226231
// Validate the slurm
227232
func (s *Slurm) Validate() bool {
228-
if s.WorkerNodes() < 1 {
229-
fmt.Printf("😥️ Slurm cluster must have at least one worker node, Size >= 2.\n")
233+
if s.Spec.Size < 1 {
234+
fmt.Printf("😥️ Slurm cluster must have 1 or more worker nodes.\n")
230235
return false
231236
}
232237
// Ensure we have the default image set
@@ -238,6 +243,10 @@ func (s *Slurm) Validate() bool {
238243
s.Spec.ClusterName = "linux"
239244
}
240245

246+
// Default node spec
247+
if s.Spec.Node.Nodespec == "" {
248+
s.Spec.Node.Nodespec = "RealMemory=1000 CPUs=1 State=UNKNOWN"
249+
}
241250
// Along with a username and password
242251
if s.Spec.Database.DatabaseName == "" {
243252
s.Spec.Database.DatabaseName = "slurm_acct_db"
@@ -249,12 +258,6 @@ func (s *Slurm) Validate() bool {
249258
return true
250259
}
251260

252-
// WorkerNodes returns the number of worker nodes
253-
// At this point we've already validated size is >= 1
254-
func (s *Slurm) WorkerNodes() int32 {
255-
return s.Spec.Size - 1
256-
}
257-
258261
// WorkerNode returns the worker node (if defined) or falls back to the server
259262
func (s *Slurm) WorkerNode() Node {
260263

config/crd/bases/flux-framework.org_slurms.yaml

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
33
kind: CustomResourceDefinition
44
metadata:
55
annotations:
6-
controller-gen.kubebuilder.io/version: v0.14.0
6+
controller-gen.kubebuilder.io/version: v0.19.0
77
name: slurms.flux-framework.org
88
spec:
99
group: flux-framework.org
@@ -65,6 +65,11 @@ spec:
6565
default: ghcr.io/converged-computing/slurm
6666
description: Image to use for slurm
6767
type: string
68+
nodespec:
69+
description: |-
70+
Node specification. Leave empty for testing cluster
71+
This does not include hostlist (generated automatically)
72+
type: string
6873
ports:
6974
description: |-
7075
Ports to be exposed to other containers in the cluster
@@ -188,6 +193,11 @@ spec:
188193
default: ghcr.io/converged-computing/slurm
189194
description: Image to use for slurm
190195
type: string
196+
nodespec:
197+
description: |-
198+
Node specification. Leave empty for testing cluster
199+
This does not include hostlist (generated automatically)
200+
type: string
191201
ports:
192202
description: |-
193203
Ports to be exposed to other containers in the cluster
@@ -234,7 +244,7 @@ spec:
234244
description: Resources include limits and requests
235245
type: object
236246
size:
237-
description: Size of the slurm (1 server + (N-1) nodes)
247+
description: Size is number of worker nodes
238248
format: int32
239249
type: integer
240250
slurmVersion:
@@ -266,6 +276,11 @@ spec:
266276
default: ghcr.io/converged-computing/slurm
267277
description: Image to use for slurm
268278
type: string
279+
nodespec:
280+
description: |-
281+
Node specification. Leave empty for testing cluster
282+
This does not include hostlist (generated automatically)
283+
type: string
269284
ports:
270285
description: |-
271286
Ports to be exposed to other containers in the cluster

config/default/kustomization.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namePrefix: slurm-operator-
1212
#commonLabels:
1313
# someName: someValue
1414

15-
bases:
15+
resources:
1616
- ../crd
1717
- ../rbac
1818
- ../manager
@@ -28,9 +28,8 @@ patchesStrategicMerge:
2828
# Protect the /metrics endpoint by putting it behind auth.
2929
# If you want your controller-manager to expose the /metrics
3030
# endpoint w/o any authn/z, please comment the following line.
31-
- manager_auth_proxy_patch.yaml
32-
33-
31+
# we have removed this because the image is deprecated 3/2026
32+
# - manager_auth_proxy_patch.yaml
3433

3534
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
3635
# crd/kustomization.yaml

0 commit comments

Comments
 (0)