diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix index 3e1648017a7c1..35a32b9ddcdb2 100644 --- a/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix +++ b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix @@ -39,7 +39,6 @@ }; in { - hardware.nvidia-container-toolkit = { enable = lib.mkOption { default = false; @@ -129,137 +128,191 @@ ''; }; }; - }; - config = lib.mkIf config.hardware.nvidia-container-toolkit.enable { - assertions = [ - { - assertion = - config.hardware.nvidia.datacenter.enable - || lib.elem "nvidia" config.services.xserver.videoDrivers - || config.hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion; - message = ''`nvidia-container-toolkit` requires nvidia drivers: set `hardware.nvidia.datacenter.enable`, add "nvidia" to `services.xserver.videoDrivers`, or set `hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion` if the driver is provided by another NixOS module (e.g. from NixOS-WSL)''; - } - { - assertion = - ((builtins.length config.hardware.nvidia-container-toolkit.csv-files) > 0) - -> config.hardware.nvidia-container-toolkit.discovery-mode == "csv"; - message = ''When CSV files are provided, `config.hardware.nvidia-container-toolkit.discovery-mode` has to be set to `csv`.''; - } - ]; + config = lib.mkMerge [ + (lib.mkIf config.virtualisation.docker.enableNvidia { + environment.etc."nvidia-container-runtime/config.toml".text = '' + disable-require = true + supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video" + [nvidia-container-cli] + environment = [] + ldconfig = "@${lib.getExe' pkgs.glibc "ldconfig"}" + load-kmods = true + no-cgroups = false + path = "${lib.getExe' pkgs.libnvidia-container "nvidia-container-cli"}" + [nvidia-container-runtime] + mode = "auto" + runtimes = ["docker-runc", "runc", "crun"] + [nvidia-container-runtime-hook] + path = "${lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package}/bin/nvidia-container-runtime-hook" + skip-mode-detection = false + [nvidia-ctk] + path = "${lib.getExe' config.hardware.nvidia-container-toolkit.package "nvidia-ctk"}" + ''; + + virtualisation.docker = { + daemon.settings = { + default-runtime = "nvidia"; + runtimes.nvidia = { + path = "${lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package}/bin/nvidia-container-runtime"; + args = [ ]; + }; + }; - virtualisation.docker = { - daemon.settings = lib.mkIf (lib.versionAtLeast config.virtualisation.docker.package.version "25") { - features.cdi = true; + extraPackages = [ + (lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package) + ]; }; + }) + (lib.mkIf config.hardware.nvidia-container-toolkit.enable { + assertions = [ + { + assertion = + config.hardware.nvidia.datacenter.enable + || lib.elem "nvidia" config.services.xserver.videoDrivers + || config.hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion; + message = ''`nvidia-container-toolkit` requires nvidia drivers: set `hardware.nvidia.datacenter.enable`, add "nvidia" to `services.xserver.videoDrivers`, or set `hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion` if the driver is provided by another NixOS module (e.g. from NixOS-WSL)''; + } + { + assertion = + ((builtins.length config.hardware.nvidia-container-toolkit.csv-files) > 0) + -> config.hardware.nvidia-container-toolkit.discovery-mode == "csv"; + message = ''When CSV files are provided, `config.hardware.nvidia-container-toolkit.discovery-mode` has to be set to `csv`.''; + } + ]; - rootless.daemon.settings = - lib.mkIf - ( - config.virtualisation.docker.rootless.enable - && (lib.versionAtLeast config.virtualisation.docker.package.version "25") - ) - { - features.cdi = true; + warnings = lib.mkMerge [ + (lib.mkIf config.virtualisation.podman.enableNvidia [ + "Setting virtualisation.podman.enableNvidia has no effect and will be removed soon." + ]) + ]; + + virtualisation = { + containers.containersConf.settings = { + engine = { + cdi_spec_dirs = [ + "/etc/cdi" + "/var/run/cdi" + ]; }; - }; + }; + docker = + let + dockerVersion = config.virtualisation.docker.package.version; + in + { + daemon.settings = lib.mkIf (lib.versionAtLeast dockerVersion "25") { + features.cdi = true; + }; - hardware = { - graphics.enable = lib.mkIf (!config.hardware.nvidia.datacenter.enable) true; + rootless = { + daemon.settings = lib.mkIf (lib.versionAtLeast dockerVersion "25") { + features.cdi = true; + }; - nvidia-container-toolkit.mounts = - let - nvidia-driver = config.hardware.nvidia.package; - in - (lib.mkMerge [ - [ - { - hostPath = pkgs.addDriverRunpath.driverLink; - containerPath = pkgs.addDriverRunpath.driverLink; - } - { - hostPath = "${lib.getLib nvidia-driver}/etc"; - containerPath = "${lib.getLib nvidia-driver}/etc"; - } - { - hostPath = "${lib.getLib nvidia-driver}/share"; - containerPath = "${lib.getLib nvidia-driver}/share"; - } - { - hostPath = "${lib.getLib pkgs.glibc}/lib"; - containerPath = "${lib.getLib pkgs.glibc}/lib"; - } - { - hostPath = "${lib.getLib pkgs.glibc}/lib64"; - containerPath = "${lib.getLib pkgs.glibc}/lib64"; - } - ] - (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables [ - { - hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control"; - containerPath = "/usr/bin/nvidia-cuda-mps-control"; - } - { - hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server"; - containerPath = "/usr/bin/nvidia-cuda-mps-server"; - } - { - hostPath = lib.getExe' nvidia-driver "nvidia-debugdump"; - containerPath = "/usr/bin/nvidia-debugdump"; - } - { - hostPath = lib.getExe' nvidia-driver "nvidia-powerd"; - containerPath = "/usr/bin/nvidia-powerd"; - } - { - hostPath = lib.getExe' nvidia-driver "nvidia-smi"; - containerPath = "/usr/bin/nvidia-smi"; - } - ]) - # nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64} - # e.g. - # - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44 - # - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173 - (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories [ - { - hostPath = "${lib.getLib nvidia-driver}/lib"; - containerPath = "/usr/local/nvidia/lib"; - } - { - hostPath = "${lib.getLib nvidia-driver}/lib"; - containerPath = "/usr/local/nvidia/lib64"; - } - ]) - ]); - }; + extraPackages = [ + (lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package) + ]; + }; + }; + }; - systemd.services.nvidia-container-toolkit-cdi-generator = { - description = "Container Device Interface (CDI) for Nvidia generator"; - wantedBy = [ "multi-user.target" ]; - after = [ "systemd-udev-settle.service" ]; - serviceConfig = { - RuntimeDirectory = "cdi"; - RemainAfterExit = true; - ExecStart = + hardware = { + graphics.enable = lib.mkIf (!config.hardware.nvidia.datacenter.enable) true; + + nvidia-container-toolkit.mounts = let - script = pkgs.callPackage ./cdi-generate.nix { - inherit (config.hardware.nvidia-container-toolkit) - csv-files - device-name-strategy - discovery-mode - mounts - extraArgs - ; - nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package; - nvidia-driver = config.hardware.nvidia.package; - }; + nvidia-driver = config.hardware.nvidia.package; in - lib.getExe script; - Type = "oneshot"; + (lib.mkMerge [ + [ + { + hostPath = pkgs.addDriverRunpath.driverLink; + containerPath = pkgs.addDriverRunpath.driverLink; + } + { + hostPath = "${lib.getLib nvidia-driver}/etc"; + containerPath = "${lib.getLib nvidia-driver}/etc"; + } + { + hostPath = "${lib.getLib nvidia-driver}/share"; + containerPath = "${lib.getLib nvidia-driver}/share"; + } + { + hostPath = "${lib.getLib pkgs.glibc}/lib"; + containerPath = "${lib.getLib pkgs.glibc}/lib"; + } + { + hostPath = "${lib.getLib pkgs.glibc}/lib64"; + containerPath = "${lib.getLib pkgs.glibc}/lib64"; + } + ] + (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables [ + { + hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control"; + containerPath = "/usr/bin/nvidia-cuda-mps-control"; + } + { + hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server"; + containerPath = "/usr/bin/nvidia-cuda-mps-server"; + } + { + hostPath = lib.getExe' nvidia-driver "nvidia-debugdump"; + containerPath = "/usr/bin/nvidia-debugdump"; + } + { + hostPath = lib.getExe' nvidia-driver "nvidia-powerd"; + containerPath = "/usr/bin/nvidia-powerd"; + } + { + hostPath = lib.getExe' nvidia-driver "nvidia-smi"; + containerPath = "/usr/bin/nvidia-smi"; + } + ]) + # nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64} + # e.g. + # - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44 + # - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173 + (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories [ + { + hostPath = "${lib.getLib nvidia-driver}/lib"; + containerPath = "/usr/local/nvidia/lib"; + } + { + hostPath = "${lib.getLib nvidia-driver}/lib"; + containerPath = "/usr/local/nvidia/lib64"; + } + ]) + ]); }; - }; - }; + systemd.services.nvidia-container-toolkit-cdi-generator = { + description = "Container Device Interface (CDI) for Nvidia generator"; + wantedBy = [ "multi-user.target" ]; + after = [ "systemd-udev-settle.service" ]; + serviceConfig = { + RuntimeDirectory = "cdi"; + RemainAfterExit = true; + ExecStart = + let + script = pkgs.callPackage ./cdi-generate.nix { + inherit (config.hardware.nvidia-container-toolkit) + csv-files + device-name-strategy + discovery-mode + mounts + extraArgs + ; + nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package; + nvidia-driver = config.hardware.nvidia.package; + }; + in + lib.getExe script; + Type = "oneshot"; + }; + }; + }) + ]; } diff --git a/nixos/modules/virtualisation/docker-rootless.nix b/nixos/modules/virtualisation/docker-rootless.nix index 08cf48ef22fbd..6a13e4b916ee5 100644 --- a/nixos/modules/virtualisation/docker-rootless.nix +++ b/nixos/modules/virtualisation/docker-rootless.nix @@ -50,6 +50,14 @@ in }; package = lib.mkPackageOption pkgs "docker" { }; + + extraPackages = lib.mkOption { + type = lib.types.listOf lib.types.package; + default = [ ]; + description = '' + Extra packages to add to PATH for the docker daemon process. + ''; + }; }; ###### implementation @@ -68,7 +76,7 @@ in wantedBy = [ "default.target" ]; description = "Docker Application Container Engine (Rootless)"; # needs newuidmap from pkgs.shadow - path = [ "/run/wrappers" ]; + path = [ "/run/wrappers" ] ++ cfg.extraPackages; environment = proxy_env; unitConfig = { # docker-rootless doesn't support running as root. diff --git a/nixos/modules/virtualisation/docker.nix b/nixos/modules/virtualisation/docker.nix index e67ec76c4dea2..6f5b67e129805 100644 --- a/nixos/modules/virtualisation/docker.nix +++ b/nixos/modules/virtualisation/docker.nix @@ -88,7 +88,7 @@ in description = '' **Deprecated**, please use hardware.nvidia-container-toolkit.enable instead. - Enable nvidia-docker wrapper, supporting NVIDIA GPUs inside docker containers. + Enable Nvidia GPU support inside docker containers. ''; }; @@ -246,7 +246,7 @@ in "net.ipv4.conf.all.forwarding" = mkOverride 98 true; "net.ipv4.conf.default.forwarding" = mkOverride 98 true; }; - environment.systemPackages = [ cfg.package ] ++ optional cfg.enableNvidia pkgs.nvidia-docker; + environment.systemPackages = [ cfg.package ]; users.groups.docker.gid = config.ids.gids.docker; systemd.packages = [ cfg.package ]; @@ -287,10 +287,7 @@ in }; path = - [ pkgs.kmod ] - ++ optional (cfg.storageDriver == "zfs") pkgs.zfs - ++ optional cfg.enableNvidia pkgs.nvidia-docker - ++ cfg.extraPackages; + [ pkgs.kmod ] ++ optional (cfg.storageDriver == "zfs") config.boot.zfs.package ++ cfg.extraPackages; }; systemd.sockets.docker = { diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/package.nix b/pkgs/by-name/nv/nvidia-container-toolkit/package.nix index eeff4411e5f0f..4fd312f553bf7 100644 --- a/pkgs/by-name/nv/nvidia-container-toolkit/package.nix +++ b/pkgs/by-name/nv/nvidia-container-toolkit/package.nix @@ -5,23 +5,11 @@ makeWrapper, buildGoModule, formats, - configTemplate ? null, - configTemplatePath ? null, libnvidia-container, autoAddDriverRunpath, }: -assert configTemplate != null -> (lib.isAttrs configTemplate && configTemplatePath == null); -assert - configTemplatePath != null -> (lib.isStringLike configTemplatePath && configTemplate == null); - let - configToml = - if configTemplatePath != null then - configTemplatePath - else - (formats.toml { }).generate "config.toml" configTemplate; - # From https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/Makefile#L54 cliVersionPackage = "github.com/NVIDIA/nvidia-container-toolkit/internal/info"; in @@ -97,24 +85,10 @@ buildGoModule (finalAttrs: { in [ "-skip=^${builtins.concatStringsSep "$|^" skippedTests}$" ]; - postInstall = - '' - mkdir -p $tools/bin - mv $out/bin/{nvidia-cdi-hook,nvidia-container-runtime,nvidia-container-runtime.cdi,nvidia-container-runtime-hook,nvidia-container-runtime.legacy} $tools/bin - - for bin in nvidia-container-runtime-hook nvidia-container-runtime; do - wrapProgram $tools/bin/$bin \ - --prefix PATH : ${libnvidia-container}/bin:$out/bin - done - '' - + lib.optionalString (configTemplate != null || configTemplatePath != null) '' - mkdir -p $out/etc/nvidia-container-runtime - - cp ${configToml} $out/etc/nvidia-container-runtime/config.toml - - substituteInPlace $out/etc/nvidia-container-runtime/config.toml \ - --subst-var-by glibcbin ${lib.getBin glibc} - ''; + postInstall = '' + mkdir -p $tools/bin + mv $out/bin/{nvidia-cdi-hook,nvidia-container-runtime,nvidia-container-runtime.cdi,nvidia-container-runtime-hook,nvidia-container-runtime.legacy} $tools/bin + ''; meta = { homepage = "https://gitlab.com/nvidia/container-toolkit/container-toolkit"; diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix b/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix index 5e46502367ee0..50c6004a7cc43 100644 --- a/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix +++ b/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix @@ -6,28 +6,7 @@ # Note this scope isn't recursed into, at the time of writing. lib.makeScope newScope (self: { - - # The config is only exposed as an attrset so that the user may reach the - # default values, for inspectability purposes. - dockerConfig = { - disable-require = false; - #swarm-resource = "DOCKER_RESOURCE_GPU" - - nvidia-container-cli = { - #root = "/run/nvidia/driver"; - #path = "/usr/bin/nvidia-container-cli"; - environment = [ ]; - #debug = "/var/log/nvidia-container-runtime-hook.log"; - ldcache = "/tmp/ld.so.cache"; - load-kmods = true; - #no-cgroups = false; - #user = "root:video"; - ldconfig = "@@glibcbin@/bin/ldconfig"; - }; - }; - nvidia-container-toolkit-docker = self.callPackage ./package.nix { - configTemplate = self.dockerConfig; - }; + nvidia-container-toolkit-docker = self.callPackage ./package.nix { }; nvidia-docker = symlinkJoin { name = "nvidia-docker";