Skip to content

Commit 291e497

Browse files
committed
nixos: use nvidia device plugin for k3s gpu
1 parent 1ae061d commit 291e497

1 file changed

Lines changed: 72 additions & 44 deletions

File tree

nixos/k3s-single-node.nix

Lines changed: 72 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,56 +14,74 @@ with lib; let
1414
];
1515
};
1616

17-
# Generic CDI Plugin DaemonSet for GPU resource allocation
18-
generic-cdi-plugin-manifest = pkgs.writeText "generic-cdi-plugin.yaml" ''
17+
nvidia-device-plugin-version = "v0.19.1";
18+
19+
nvidia-device-plugin-manifest = pkgs.writeText "nvidia-device-plugin.yaml" ''
20+
apiVersion: node.k8s.io/v1
21+
handler: nvidia
22+
kind: RuntimeClass
23+
metadata:
24+
name: nvidia
25+
labels:
26+
app.kubernetes.io/component: gpu-operator
27+
---
1928
apiVersion: apps/v1
2029
kind: DaemonSet
2130
metadata:
22-
name: generic-cdi-plugin
31+
name: nvidia-device-plugin-daemonset
2332
namespace: kube-system
2433
labels:
25-
app: generic-cdi-plugin
34+
app.kubernetes.io/name: nvidia-device-plugin
2635
spec:
2736
selector:
2837
matchLabels:
29-
app: generic-cdi-plugin
38+
app.kubernetes.io/name: nvidia-device-plugin
39+
updateStrategy:
40+
type: RollingUpdate
3041
template:
3142
metadata:
3243
labels:
33-
app: generic-cdi-plugin
44+
app.kubernetes.io/name: nvidia-device-plugin
3445
spec:
46+
runtimeClassName: nvidia
47+
priorityClassName: system-node-critical
3548
nodeSelector:
36-
nixos-nvidia-cdi: "enabled"
49+
nvidia.com/gpu.present: "true"
3750
tolerations:
3851
- key: nvidia.com/gpu
3952
operator: Exists
4053
effect: NoSchedule
4154
containers:
42-
- name: generic-cdi-plugin
43-
image: ghcr.io/olfillasodikno/generic-cdi-plugin:main
44-
imagePullPolicy: Always
45-
args:
46-
- "/var/run/cdi/nvidia-container-toolkit.json"
55+
- name: nvidia-device-plugin-ctr
56+
image: nvcr.io/nvidia/k8s-device-plugin:${nvidia-device-plugin-version}
57+
imagePullPolicy: IfNotPresent
58+
command: ["nvidia-device-plugin"]
59+
env:
60+
- name: DEVICE_ID_STRATEGY
61+
value: uuid
62+
- name: NVIDIA_VISIBLE_DEVICES
63+
value: all
64+
- name: NVIDIA_DRIVER_CAPABILITIES
65+
value: compute,utility
4766
securityContext:
48-
privileged: true
67+
allowPrivilegeEscalation: false
68+
capabilities:
69+
drop: ["ALL"]
4970
volumeMounts:
50-
- name: device-plugin
71+
- name: kubelet-device-plugins-dir
5172
mountPath: /var/lib/kubelet/device-plugins
52-
- name: pod-resources
53-
mountPath: /var/lib/kubelet/pod-resources
5473
- name: cdi-specs
5574
mountPath: /var/run/cdi
5675
readOnly: true
5776
volumes:
58-
- name: device-plugin
77+
- name: kubelet-device-plugins-dir
5978
hostPath:
6079
path: /var/lib/kubelet/device-plugins
61-
- name: pod-resources
62-
hostPath:
63-
path: /var/lib/kubelet/pod-resources
80+
type: Directory
6481
- name: cdi-specs
6582
hostPath:
6683
path: /var/run/cdi
84+
type: DirectoryOrCreate
6785
'';
6886

6987
# Test pod to verify GPU access
@@ -75,13 +93,14 @@ with lib; let
7593
namespace: default
7694
spec:
7795
restartPolicy: Never
96+
runtimeClassName: nvidia
7897
containers:
7998
- name: cuda-test
8099
image: nvidia/cuda:12.6.3-base-ubuntu24.04
81100
command: ["nvidia-smi"]
82101
resources:
83102
limits:
84-
nvidia.com/gpu-all: 1
103+
nvidia.com/gpu: 1
85104
'';
86105
in {
87106
options = {
@@ -97,7 +116,11 @@ in {
97116

98117
config = mkIf cfg.enable {
99118
# NVIDIA container toolkit for CDI spec generation
100-
hardware.nvidia-container-toolkit.enable = true;
119+
hardware.nvidia-container-toolkit = {
120+
enable = true;
121+
device-name-strategy = "uuid";
122+
mount-nvidia-executables = true;
123+
};
101124

102125
# Ensure CDI generator has access to nvidia libs
103126
systemd.services.nvidia-container-toolkit-cdi-generator = {
@@ -124,12 +147,15 @@ in {
124147
'';
125148
};
126149

127-
extraFlags = [
128-
"--node-label=nixos-nvidia-cdi=enabled"
129-
"--tls-san=${config.networking.hostName}"
130-
"--tls-san=${config.networking.hostName}.local"
131-
"--tls-san=localhost"
132-
] ++ cfg.extraFlags;
150+
extraFlags =
151+
[
152+
"--node-label=nixos-nvidia-cdi=enabled"
153+
"--node-label=nvidia.com/gpu.present=true"
154+
"--tls-san=${config.networking.hostName}"
155+
"--tls-san=${config.networking.hostName}.local"
156+
"--tls-san=localhost"
157+
]
158+
++ cfg.extraFlags;
133159

134160
# Containerd config with CDI support
135161
# k3s 1.31+ with containerd 2.0 has CDI enabled by default
@@ -138,10 +164,13 @@ in {
138164
{{ template "base" . }}
139165
140166
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
167+
privileged_without_host_devices = false
168+
runtime_engine = ""
169+
runtime_root = ""
141170
runtime_type = "io.containerd.runc.v2"
142171
143172
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
144-
BinaryName = "/run/current-system/sw/bin/nvidia-container-runtime.cdi"
173+
BinaryName = "${lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package}/bin/nvidia-container-runtime.cdi"
145174
'';
146175

147176
gracefulNodeShutdown.enable = true;
@@ -160,13 +189,13 @@ in {
160189
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
161190
'';
162191

163-
# Create systemd service to deploy the generic-cdi-plugin after k3s is ready
192+
# Create systemd service to deploy the NVIDIA device plugin after k3s is ready
164193
systemd.services.k3s-gpu-plugin-deploy = {
165-
description = "Deploy generic-cdi-plugin to k3s";
166-
after = [ "k3s.service" ];
167-
wants = [ "k3s.service" ];
168-
wantedBy = [ "multi-user.target" ];
169-
path = [ pkgs.kubectl pkgs.coreutils ];
194+
description = "Deploy NVIDIA device plugin to k3s";
195+
after = ["k3s.service"];
196+
wants = ["k3s.service"];
197+
wantedBy = ["multi-user.target"];
198+
path = [pkgs.kubectl pkgs.coreutils];
170199
serviceConfig = {
171200
Type = "oneshot";
172201
RemainAfterExit = true;
@@ -183,23 +212,22 @@ in {
183212
sleep 5
184213
done
185214
186-
# Check if plugin already exists
187215
if kubectl get daemonset -n kube-system generic-cdi-plugin &>/dev/null; then
188-
echo "generic-cdi-plugin already deployed, updating..."
189-
kubectl apply -f ${generic-cdi-plugin-manifest}
190-
else
191-
echo "Deploying generic-cdi-plugin..."
192-
kubectl apply -f ${generic-cdi-plugin-manifest}
216+
echo "Removing old generic-cdi-plugin deployment..."
217+
kubectl delete daemonset -n kube-system generic-cdi-plugin --ignore-not-found=true
193218
fi
194219
195-
echo "Waiting for generic-cdi-plugin to be ready..."
196-
kubectl rollout status daemonset/generic-cdi-plugin -n kube-system --timeout=120s || true
220+
echo "Deploying NVIDIA device plugin..."
221+
kubectl apply -f ${nvidia-device-plugin-manifest}
222+
223+
echo "Waiting for NVIDIA device plugin to be ready..."
224+
kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n kube-system --timeout=120s || true
197225
'';
198226
};
199227
};
200228

201229
# Store test manifests in /etc for easy access
202230
environment.etc."k3s/gpu-test-pod.yaml".source = gpu-test-pod;
203-
environment.etc."k3s/generic-cdi-plugin.yaml".source = generic-cdi-plugin-manifest;
231+
environment.etc."k3s/nvidia-device-plugin.yaml".source = nvidia-device-plugin-manifest;
204232
};
205233
}

0 commit comments

Comments
 (0)