Skip to content

Commit fc6bc86

Browse files
authored
nvidia-container-toolkit: reintroduce nvidia runtime wrappers (#421088)
2 parents 835a795 + 117bbae commit fc6bc86

5 files changed

Lines changed: 190 additions & 179 deletions

File tree

nixos/modules/services/hardware/nvidia-container-toolkit/default.nix

Lines changed: 173 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
};
4040
in
4141
{
42-
4342
hardware.nvidia-container-toolkit = {
4443
enable = lib.mkOption {
4544
default = false;
@@ -129,137 +128,191 @@
129128
'';
130129
};
131130
};
132-
133131
};
134132

135-
config = lib.mkIf config.hardware.nvidia-container-toolkit.enable {
136-
assertions = [
137-
{
138-
assertion =
139-
config.hardware.nvidia.datacenter.enable
140-
|| lib.elem "nvidia" config.services.xserver.videoDrivers
141-
|| config.hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion;
142-
message = ''`nvidia-container-toolkit` requires nvidia drivers: set `hardware.nvidia.datacenter.enable`, add "nvidia" to `services.xserver.videoDrivers`, or set `hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion` if the driver is provided by another NixOS module (e.g. from NixOS-WSL)'';
143-
}
144-
{
145-
assertion =
146-
((builtins.length config.hardware.nvidia-container-toolkit.csv-files) > 0)
147-
-> config.hardware.nvidia-container-toolkit.discovery-mode == "csv";
148-
message = ''When CSV files are provided, `config.hardware.nvidia-container-toolkit.discovery-mode` has to be set to `csv`.'';
149-
}
150-
];
133+
config = lib.mkMerge [
134+
(lib.mkIf config.virtualisation.docker.enableNvidia {
135+
environment.etc."nvidia-container-runtime/config.toml".text = ''
136+
disable-require = true
137+
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
138+
[nvidia-container-cli]
139+
environment = []
140+
ldconfig = "@${lib.getExe' pkgs.glibc "ldconfig"}"
141+
load-kmods = true
142+
no-cgroups = false
143+
path = "${lib.getExe' pkgs.libnvidia-container "nvidia-container-cli"}"
144+
[nvidia-container-runtime]
145+
mode = "auto"
146+
runtimes = ["docker-runc", "runc", "crun"]
147+
[nvidia-container-runtime-hook]
148+
path = "${lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package}/bin/nvidia-container-runtime-hook"
149+
skip-mode-detection = false
150+
[nvidia-ctk]
151+
path = "${lib.getExe' config.hardware.nvidia-container-toolkit.package "nvidia-ctk"}"
152+
'';
153+
154+
virtualisation.docker = {
155+
daemon.settings = {
156+
default-runtime = "nvidia";
157+
runtimes.nvidia = {
158+
path = "${lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package}/bin/nvidia-container-runtime";
159+
args = [ ];
160+
};
161+
};
151162

152-
virtualisation.docker = {
153-
daemon.settings = lib.mkIf (lib.versionAtLeast config.virtualisation.docker.package.version "25") {
154-
features.cdi = true;
163+
extraPackages = [
164+
(lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package)
165+
];
155166
};
167+
})
168+
(lib.mkIf config.hardware.nvidia-container-toolkit.enable {
169+
assertions = [
170+
{
171+
assertion =
172+
config.hardware.nvidia.datacenter.enable
173+
|| lib.elem "nvidia" config.services.xserver.videoDrivers
174+
|| config.hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion;
175+
message = ''`nvidia-container-toolkit` requires nvidia drivers: set `hardware.nvidia.datacenter.enable`, add "nvidia" to `services.xserver.videoDrivers`, or set `hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion` if the driver is provided by another NixOS module (e.g. from NixOS-WSL)'';
176+
}
177+
{
178+
assertion =
179+
((builtins.length config.hardware.nvidia-container-toolkit.csv-files) > 0)
180+
-> config.hardware.nvidia-container-toolkit.discovery-mode == "csv";
181+
message = ''When CSV files are provided, `config.hardware.nvidia-container-toolkit.discovery-mode` has to be set to `csv`.'';
182+
}
183+
];
156184

157-
rootless.daemon.settings =
158-
lib.mkIf
159-
(
160-
config.virtualisation.docker.rootless.enable
161-
&& (lib.versionAtLeast config.virtualisation.docker.package.version "25")
162-
)
163-
{
164-
features.cdi = true;
185+
warnings = lib.mkMerge [
186+
(lib.mkIf config.virtualisation.podman.enableNvidia [
187+
"Setting virtualisation.podman.enableNvidia has no effect and will be removed soon."
188+
])
189+
];
190+
191+
virtualisation = {
192+
containers.containersConf.settings = {
193+
engine = {
194+
cdi_spec_dirs = [
195+
"/etc/cdi"
196+
"/var/run/cdi"
197+
];
165198
};
166-
};
199+
};
200+
docker =
201+
let
202+
dockerVersion = config.virtualisation.docker.package.version;
203+
in
204+
{
205+
daemon.settings = lib.mkIf (lib.versionAtLeast dockerVersion "25") {
206+
features.cdi = true;
207+
};
167208

168-
hardware = {
169-
graphics.enable = lib.mkIf (!config.hardware.nvidia.datacenter.enable) true;
209+
rootless = {
210+
daemon.settings = lib.mkIf (lib.versionAtLeast dockerVersion "25") {
211+
features.cdi = true;
212+
};
170213

171-
nvidia-container-toolkit.mounts =
172-
let
173-
nvidia-driver = config.hardware.nvidia.package;
174-
in
175-
(lib.mkMerge [
176-
[
177-
{
178-
hostPath = pkgs.addDriverRunpath.driverLink;
179-
containerPath = pkgs.addDriverRunpath.driverLink;
180-
}
181-
{
182-
hostPath = "${lib.getLib nvidia-driver}/etc";
183-
containerPath = "${lib.getLib nvidia-driver}/etc";
184-
}
185-
{
186-
hostPath = "${lib.getLib nvidia-driver}/share";
187-
containerPath = "${lib.getLib nvidia-driver}/share";
188-
}
189-
{
190-
hostPath = "${lib.getLib pkgs.glibc}/lib";
191-
containerPath = "${lib.getLib pkgs.glibc}/lib";
192-
}
193-
{
194-
hostPath = "${lib.getLib pkgs.glibc}/lib64";
195-
containerPath = "${lib.getLib pkgs.glibc}/lib64";
196-
}
197-
]
198-
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables [
199-
{
200-
hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control";
201-
containerPath = "/usr/bin/nvidia-cuda-mps-control";
202-
}
203-
{
204-
hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server";
205-
containerPath = "/usr/bin/nvidia-cuda-mps-server";
206-
}
207-
{
208-
hostPath = lib.getExe' nvidia-driver "nvidia-debugdump";
209-
containerPath = "/usr/bin/nvidia-debugdump";
210-
}
211-
{
212-
hostPath = lib.getExe' nvidia-driver "nvidia-powerd";
213-
containerPath = "/usr/bin/nvidia-powerd";
214-
}
215-
{
216-
hostPath = lib.getExe' nvidia-driver "nvidia-smi";
217-
containerPath = "/usr/bin/nvidia-smi";
218-
}
219-
])
220-
# nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64}
221-
# e.g.
222-
# - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44
223-
# - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173
224-
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories [
225-
{
226-
hostPath = "${lib.getLib nvidia-driver}/lib";
227-
containerPath = "/usr/local/nvidia/lib";
228-
}
229-
{
230-
hostPath = "${lib.getLib nvidia-driver}/lib";
231-
containerPath = "/usr/local/nvidia/lib64";
232-
}
233-
])
234-
]);
235-
};
214+
extraPackages = [
215+
(lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package)
216+
];
217+
};
218+
};
219+
};
236220

237-
systemd.services.nvidia-container-toolkit-cdi-generator = {
238-
description = "Container Device Interface (CDI) for Nvidia generator";
239-
wantedBy = [ "multi-user.target" ];
240-
after = [ "systemd-udev-settle.service" ];
241-
serviceConfig = {
242-
RuntimeDirectory = "cdi";
243-
RemainAfterExit = true;
244-
ExecStart =
221+
hardware = {
222+
graphics.enable = lib.mkIf (!config.hardware.nvidia.datacenter.enable) true;
223+
224+
nvidia-container-toolkit.mounts =
245225
let
246-
script = pkgs.callPackage ./cdi-generate.nix {
247-
inherit (config.hardware.nvidia-container-toolkit)
248-
csv-files
249-
device-name-strategy
250-
discovery-mode
251-
mounts
252-
extraArgs
253-
;
254-
nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package;
255-
nvidia-driver = config.hardware.nvidia.package;
256-
};
226+
nvidia-driver = config.hardware.nvidia.package;
257227
in
258-
lib.getExe script;
259-
Type = "oneshot";
228+
(lib.mkMerge [
229+
[
230+
{
231+
hostPath = pkgs.addDriverRunpath.driverLink;
232+
containerPath = pkgs.addDriverRunpath.driverLink;
233+
}
234+
{
235+
hostPath = "${lib.getLib nvidia-driver}/etc";
236+
containerPath = "${lib.getLib nvidia-driver}/etc";
237+
}
238+
{
239+
hostPath = "${lib.getLib nvidia-driver}/share";
240+
containerPath = "${lib.getLib nvidia-driver}/share";
241+
}
242+
{
243+
hostPath = "${lib.getLib pkgs.glibc}/lib";
244+
containerPath = "${lib.getLib pkgs.glibc}/lib";
245+
}
246+
{
247+
hostPath = "${lib.getLib pkgs.glibc}/lib64";
248+
containerPath = "${lib.getLib pkgs.glibc}/lib64";
249+
}
250+
]
251+
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables [
252+
{
253+
hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control";
254+
containerPath = "/usr/bin/nvidia-cuda-mps-control";
255+
}
256+
{
257+
hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server";
258+
containerPath = "/usr/bin/nvidia-cuda-mps-server";
259+
}
260+
{
261+
hostPath = lib.getExe' nvidia-driver "nvidia-debugdump";
262+
containerPath = "/usr/bin/nvidia-debugdump";
263+
}
264+
{
265+
hostPath = lib.getExe' nvidia-driver "nvidia-powerd";
266+
containerPath = "/usr/bin/nvidia-powerd";
267+
}
268+
{
269+
hostPath = lib.getExe' nvidia-driver "nvidia-smi";
270+
containerPath = "/usr/bin/nvidia-smi";
271+
}
272+
])
273+
# nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64}
274+
# e.g.
275+
# - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44
276+
# - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173
277+
(lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories [
278+
{
279+
hostPath = "${lib.getLib nvidia-driver}/lib";
280+
containerPath = "/usr/local/nvidia/lib";
281+
}
282+
{
283+
hostPath = "${lib.getLib nvidia-driver}/lib";
284+
containerPath = "/usr/local/nvidia/lib64";
285+
}
286+
])
287+
]);
260288
};
261-
};
262289

263-
};
290+
systemd.services.nvidia-container-toolkit-cdi-generator = {
291+
description = "Container Device Interface (CDI) for Nvidia generator";
292+
wantedBy = [ "multi-user.target" ];
293+
after = [ "systemd-udev-settle.service" ];
294+
serviceConfig = {
295+
RuntimeDirectory = "cdi";
296+
RemainAfterExit = true;
297+
ExecStart =
298+
let
299+
script = pkgs.callPackage ./cdi-generate.nix {
300+
inherit (config.hardware.nvidia-container-toolkit)
301+
csv-files
302+
device-name-strategy
303+
discovery-mode
304+
mounts
305+
extraArgs
306+
;
307+
nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package;
308+
nvidia-driver = config.hardware.nvidia.package;
309+
};
310+
in
311+
lib.getExe script;
312+
Type = "oneshot";
313+
};
314+
};
315+
})
316+
];
264317

265318
}

nixos/modules/virtualisation/docker-rootless.nix

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ in
5050
};
5151

5252
package = lib.mkPackageOption pkgs "docker" { };
53+
54+
extraPackages = lib.mkOption {
55+
type = lib.types.listOf lib.types.package;
56+
default = [ ];
57+
description = ''
58+
Extra packages to add to PATH for the docker daemon process.
59+
'';
60+
};
5361
};
5462

5563
###### implementation
@@ -68,7 +76,7 @@ in
6876
wantedBy = [ "default.target" ];
6977
description = "Docker Application Container Engine (Rootless)";
7078
# needs newuidmap from pkgs.shadow
71-
path = [ "/run/wrappers" ];
79+
path = [ "/run/wrappers" ] ++ cfg.extraPackages;
7280
environment = proxy_env;
7381
unitConfig = {
7482
# docker-rootless doesn't support running as root.

nixos/modules/virtualisation/docker.nix

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ in
8888
description = ''
8989
**Deprecated**, please use hardware.nvidia-container-toolkit.enable instead.
9090
91-
Enable nvidia-docker wrapper, supporting NVIDIA GPUs inside docker containers.
91+
Enable Nvidia GPU support inside docker containers.
9292
'';
9393
};
9494

@@ -246,7 +246,7 @@ in
246246
"net.ipv4.conf.all.forwarding" = mkOverride 98 true;
247247
"net.ipv4.conf.default.forwarding" = mkOverride 98 true;
248248
};
249-
environment.systemPackages = [ cfg.package ] ++ optional cfg.enableNvidia pkgs.nvidia-docker;
249+
environment.systemPackages = [ cfg.package ];
250250
users.groups.docker.gid = config.ids.gids.docker;
251251
systemd.packages = [ cfg.package ];
252252

@@ -287,10 +287,7 @@ in
287287
};
288288

289289
path =
290-
[ pkgs.kmod ]
291-
++ optional (cfg.storageDriver == "zfs") pkgs.zfs
292-
++ optional cfg.enableNvidia pkgs.nvidia-docker
293-
++ cfg.extraPackages;
290+
[ pkgs.kmod ] ++ optional (cfg.storageDriver == "zfs") config.boot.zfs.package ++ cfg.extraPackages;
294291
};
295292

296293
systemd.sockets.docker = {

0 commit comments

Comments
 (0)