diff options
9 files changed, 146 insertions, 30 deletions
diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md index 70ee02183f4f..38e5504bb18d 100644 --- a/nixos/doc/manual/release-notes/rl-2405.section.md +++ b/nixos/doc/manual/release-notes/rl-2405.section.md @@ -25,6 +25,10 @@ In addition to numerous new and upgraded packages, this release has the followin - A new option `systemd.sysusers.enable` was added. If enabled, users and groups are created with systemd-sysusers instead of with a custom perl script. +- A new option `virtualisation.containers.cdi` was added. It contains `static` and `dynamic` attributes (corresponding to `/etc/cdi` and `/run/cdi` respectively) to configure the Container Device Interface (CDI). + +- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `virtualisation.containers.cdi.dynamic.nvidia.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature. + - A new option `system.etc.overlay.enable` was added. If enabled, `/etc` is mounted via an overlayfs instead of being created by a custom perl script. diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index deb7b382e3d1..29f9bdf5438c 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -546,6 +546,7 @@ ./services/hardware/kanata.nix ./services/hardware/lcd.nix ./services/hardware/lirc.nix + ./services/hardware/nvidia-container-toolkit-cdi-generator ./services/hardware/nvidia-optimus.nix ./services/hardware/openrgb.nix ./services/hardware/pcscd.nix diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix new file mode 100644 index 000000000000..a90d234f65c0 --- /dev/null +++ b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix @@ -0,0 +1,39 @@ +{ config, lib, pkgs }: let + mountOptions = { options = ["ro" "nosuid" "nodev" "bind"]; }; + mounts = [ + { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-cuda-mps-control"; + containerPath = "/usr/bin/nvidia-cuda-mps-control"; } + { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-cuda-mps-server"; + containerPath = "/usr/bin/nvidia-cuda-mps-server"; } + { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-debugdump"; + containerPath = "/usr/bin/nvidia-debugdump"; } + { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-powerd"; + containerPath = "/usr/bin/nvidia-powerd"; } + { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-smi"; + containerPath = "/usr/bin/nvidia-smi"; } + { hostPath = "${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk"; + containerPath = "/usr/bin/nvidia-ctk"; } + { hostPath = "${pkgs.glibc}/lib"; + containerPath = "${pkgs.glibc}/lib"; } + { hostPath = "${pkgs.glibc}/lib64"; + containerPath = "${pkgs.glibc}/lib64"; } + ]; + jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +"; + mountsToJq = lib.concatMap + (mount: + ["${pkgs.jq}/bin/jq '${jqAddMountExpression} ${builtins.toJSON (mount // mountOptions)}'"]) + mounts; +in '' +#! ${pkgs.runtimeShell} + +function cdiGenerate { + ${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk cdi generate \ + --format json \ + --ldconfig-path ${pkgs.glibc.bin}/bin/ldconfig \ + --library-search-path ${config.hardware.nvidia.package}/lib \ + --nvidia-ctk-path ${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk +} + +cdiGenerate | \ + ${lib.concatStringsSep " | " mountsToJq} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json +'' diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix new file mode 100644 index 000000000000..3c96e9c41be5 --- /dev/null +++ b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix @@ -0,0 +1,38 @@ +{ config, lib, pkgs, ... }: + +{ + + options = { + + hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkOption { + default = false; + internal = true; + visible = false; + type = lib.types.bool; + description = lib.mdDoc '' + Enable dynamic CDI configuration for NVidia devices by running + nvidia-container-toolkit on boot. + ''; + }; + + }; + + config = { + + systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit-cdi-generator.enable { + description = "Container Device Interface (CDI) for Nvidia generator"; + wantedBy = [ "multi-user.target" ]; + after = [ "systemd-udev-settle.service" ]; + serviceConfig = { + RuntimeDirectory = "cdi"; + RemainAfterExit = true; + ExecStart = let + script = (pkgs.writeScriptBin "nvidia-cdi-generator" + (import ./cdi-generate.nix { inherit config lib pkgs; })); in (lib.getExe script); + Type = "oneshot"; + }; + }; + + }; + +} diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index a205890b6843..b3d81078eb34 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -28,29 +28,39 @@ in description = lib.mdDoc "Enable the OCI seccomp BPF hook"; }; - cdi = mkOption { - type = types.attrs; - default = { }; - description = lib.mdDoc '' - Declarative CDI specification. Each key of the attribute set - will be mapped to a file in /etc/cdi. It is required for every - key to be provided in JSON format. - ''; - example = { - some-vendor = builtins.fromJSON '' - { - "cdiVersion": "0.5.0", - "kind": "some-vendor.com/foo", - "devices": [], - "containerEdits": [] - } - ''; - - some-other-vendor = { - cdiVersion = "0.5.0"; - kind = "some-other-vendor.com/bar"; - devices = []; - containerEdits = []; + cdi = { + dynamic.nvidia.enable = mkOption { + type = types.bool; + default = false; + description = lib.mdDoc '' + Enable dynamic CDI configuration for NVidia devices by running nvidia-container-toolkit on boot. + ''; + }; + + static = mkOption { + type = types.attrs; + default = { }; + description = lib.mdDoc '' + Declarative CDI specification. Each key of the attribute set + will be mapped to a file in /etc/cdi. It is required for every + key to be provided in JSON format. + ''; + example = { + some-vendor = builtins.fromJSON '' + { + "cdiVersion": "0.5.0", + "kind": "some-vendor.com/foo", + "devices": [], + "containerEdits": [] + } + ''; + + some-other-vendor = { + cdiVersion = "0.5.0"; + kind = "some-other-vendor.com/bar"; + devices = []; + containerEdits = []; + }; }; }; }; @@ -140,6 +150,8 @@ in config = lib.mkIf cfg.enable { + hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkIf cfg.cdi.dynamic.nvidia.enable true; + virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ]; virtualisation.containers.containersConf.settings = { @@ -152,11 +164,11 @@ in }; environment.etc = let - cdiConfigurationFiles = (lib.attrsets.mapAttrs' + cdiStaticConfigurationFiles = (lib.attrsets.mapAttrs' (name: value: lib.attrsets.nameValuePair "cdi/${name}.json" { text = builtins.toJSON value; }) - cfg.cdi); + cfg.cdi.static); in { "containers/containers.conf".source = toml.generate "containers.conf" cfg.containersConf.settings; @@ -171,7 +183,7 @@ in "containers/policy.json".source = if cfg.policy != { } then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy) else "${pkgs.skopeo.policy}/default-policy.json"; - } // cdiConfigurationFiles; + } // cdiStaticConfigurationFiles; }; diff --git a/nixos/modules/virtualisation/docker.nix b/nixos/modules/virtualisation/docker.nix index d4d34d13a94d..cceb186e0b36 100644 --- a/nixos/modules/virtualisation/docker.nix +++ b/nixos/modules/virtualisation/docker.nix @@ -72,6 +72,8 @@ in type = types.bool; default = false; description = lib.mdDoc '' + **Deprecated**, please use virtualisation.containers.cdi.dynamic.nvidia.enable instead. + Enable nvidia-docker wrapper, supporting NVIDIA GPUs inside docker containers. ''; }; @@ -185,6 +187,16 @@ in users.groups.docker.gid = config.ids.gids.docker; systemd.packages = [ cfg.package ]; + # Docker 25.0.0 supports CDI by default + # (https://docs.docker.com/engine/release-notes/25.0/#new). Encourage + # moving to CDI as opposed to having deprecated runtime + # wrappers. + warnings = lib.optionals (cfg.enableNvidia && (lib.strings.versionAtLeast cfg.package.version "25")) [ + '' + You have set virtualisation.docker.enableNvidia. This option is deprecated, please set virtualisation.containers.cdi.dynamic.nvidia.enable instead. + '' + ]; + systemd.services.docker = { wantedBy = optional cfg.enableOnBoot "multi-user.target"; after = [ "network.target" "docker.socket" ]; diff --git a/nixos/modules/virtualisation/podman/default.nix b/nixos/modules/virtualisation/podman/default.nix index 47382f9beab0..7411ebc2a311 100644 --- a/nixos/modules/virtualisation/podman/default.nix +++ b/nixos/modules/virtualisation/podman/default.nix @@ -82,6 +82,8 @@ in type = types.bool; default = false; description = lib.mdDoc '' + **Deprecated**, please use virtualisation.containers.cdi.dynamic.nvidia.enable instead. + Enable use of NVidia GPUs from within podman containers. ''; }; @@ -166,6 +168,12 @@ in inherit (networkConfig) dns_enabled network_interface; in lib.mkIf cfg.enable { + warnings = lib.optionals cfg.enableNvidia [ + '' + You have set virtualisation.podman.enableNvidia. This option is deprecated, please set virtualisation.containers.cdi.dynamic.nvidia.enable instead. + '' + ]; + environment.systemPackages = [ cfg.package ] ++ lib.optional cfg.dockerCompat dockerCompat; diff --git a/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix b/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix index 451ddb4906bc..a584be35e7b2 100644 --- a/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix +++ b/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix @@ -6,8 +6,8 @@ , linkFarm , writeShellScript , formats -, containerRuntimePath -, configTemplate +, containerRuntimePath ? null +, configTemplate ? null , configTemplatePath ? null , libnvidia-container , cudaPackages @@ -91,7 +91,7 @@ buildGoModule rec { makeWrapper ]; - preConfigure = '' + preConfigure = lib.optionalString (containerRuntimePath != null) '' # Ensure the runc symlink isn't broken: if ! readlink --quiet --canonicalize-existing "${isolatedContainerRuntimePath}/runc" ; then echo "${isolatedContainerRuntimePath}/runc: broken symlink" >&2 @@ -109,7 +109,7 @@ buildGoModule rec { in [ "-skip" "${builtins.concatStringsSep "|" skippedTests}" ]; - postInstall = '' + postInstall = lib.optionalString (containerRuntimePath != null) '' mkdir -p $out/etc/nvidia-container-runtime # nvidia-container-runtime invokes docker-runc or runc if that isn't diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 2adaad5ac8e2..6c4c2a20b25a 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -24067,6 +24067,8 @@ with pkgs; nv-codec-headers-11 = callPackage ../development/libraries/nv-codec-headers/11_x.nix { }; nv-codec-headers-12 = callPackage ../development/libraries/nv-codec-headers/12_x.nix { }; + nvidia-container-toolkit = callPackage ../applications/virtualization/nvidia-container-toolkit { }; + nvidiaCtkPackages = callPackage ../applications/virtualization/nvidia-container-toolkit/packages.nix { }; |