about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--nixos/doc/manual/release-notes/rl-2405.section.md4
-rw-r--r--nixos/modules/module-list.nix1
-rw-r--r--nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix39
-rw-r--r--nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix38
-rw-r--r--nixos/modules/virtualisation/containers.nix64
-rw-r--r--nixos/modules/virtualisation/docker.nix12
-rw-r--r--nixos/modules/virtualisation/podman/default.nix8
-rw-r--r--pkgs/applications/virtualization/nvidia-container-toolkit/default.nix8
-rw-r--r--pkgs/top-level/all-packages.nix2
9 files changed, 146 insertions, 30 deletions
diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md
index 70ee02183f4f..38e5504bb18d 100644
--- a/nixos/doc/manual/release-notes/rl-2405.section.md
+++ b/nixos/doc/manual/release-notes/rl-2405.section.md
@@ -25,6 +25,10 @@ In addition to numerous new and upgraded packages, this release has the followin
 - A new option `systemd.sysusers.enable` was added. If enabled, users and
   groups are created with systemd-sysusers instead of with a custom perl script.
 
+- A new option `virtualisation.containers.cdi` was added. It contains `static` and `dynamic` attributes (corresponding to `/etc/cdi` and `/run/cdi` respectively) to configure the Container Device Interface (CDI).
+
+- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `virtualisation.containers.cdi.dynamic.nvidia.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.
+
 - A new option `system.etc.overlay.enable` was added. If enabled, `/etc` is
   mounted via an overlayfs instead of being created by a custom perl script.
 
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index deb7b382e3d1..29f9bdf5438c 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -546,6 +546,7 @@
   ./services/hardware/kanata.nix
   ./services/hardware/lcd.nix
   ./services/hardware/lirc.nix
+  ./services/hardware/nvidia-container-toolkit-cdi-generator
   ./services/hardware/nvidia-optimus.nix
   ./services/hardware/openrgb.nix
   ./services/hardware/pcscd.nix
diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix
new file mode 100644
index 000000000000..a90d234f65c0
--- /dev/null
+++ b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix
@@ -0,0 +1,39 @@
+{ config, lib, pkgs }: let
+  mountOptions = { options = ["ro" "nosuid" "nodev" "bind"]; };
+  mounts = [
+    { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-cuda-mps-control";
+      containerPath = "/usr/bin/nvidia-cuda-mps-control"; }
+    { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-cuda-mps-server";
+      containerPath = "/usr/bin/nvidia-cuda-mps-server"; }
+    { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-debugdump";
+      containerPath = "/usr/bin/nvidia-debugdump"; }
+    { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-powerd";
+      containerPath = "/usr/bin/nvidia-powerd"; }
+    { hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-smi";
+      containerPath = "/usr/bin/nvidia-smi"; }
+    { hostPath = "${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk";
+      containerPath = "/usr/bin/nvidia-ctk"; }
+    { hostPath = "${pkgs.glibc}/lib";
+      containerPath = "${pkgs.glibc}/lib"; }
+    { hostPath = "${pkgs.glibc}/lib64";
+      containerPath = "${pkgs.glibc}/lib64"; }
+  ];
+  jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +";
+  mountsToJq = lib.concatMap
+    (mount:
+      ["${pkgs.jq}/bin/jq '${jqAddMountExpression} ${builtins.toJSON (mount // mountOptions)}'"])
+    mounts;
+in ''
+#! ${pkgs.runtimeShell}
+
+function cdiGenerate {
+  ${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk cdi generate \
+    --format json \
+    --ldconfig-path ${pkgs.glibc.bin}/bin/ldconfig \
+    --library-search-path ${config.hardware.nvidia.package}/lib \
+    --nvidia-ctk-path ${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk
+}
+
+cdiGenerate | \
+  ${lib.concatStringsSep " | " mountsToJq} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
+''
diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix
new file mode 100644
index 000000000000..3c96e9c41be5
--- /dev/null
+++ b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix
@@ -0,0 +1,38 @@
+{ config, lib, pkgs, ... }:
+
+{
+
+  options = {
+
+    hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkOption {
+      default = false;
+      internal = true;
+      visible = false;
+      type = lib.types.bool;
+      description = lib.mdDoc ''
+        Enable dynamic CDI configuration for NVidia devices by running
+        nvidia-container-toolkit on boot.
+      '';
+    };
+
+  };
+
+  config = {
+
+    systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit-cdi-generator.enable {
+      description = "Container Device Interface (CDI) for Nvidia generator";
+      wantedBy = [ "multi-user.target" ];
+      after = [ "systemd-udev-settle.service" ];
+      serviceConfig = {
+        RuntimeDirectory = "cdi";
+        RemainAfterExit = true;
+        ExecStart = let
+          script = (pkgs.writeScriptBin "nvidia-cdi-generator"
+            (import ./cdi-generate.nix { inherit config lib pkgs; })); in (lib.getExe script);
+        Type = "oneshot";
+      };
+    };
+
+  };
+
+}
diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix
index a205890b6843..b3d81078eb34 100644
--- a/nixos/modules/virtualisation/containers.nix
+++ b/nixos/modules/virtualisation/containers.nix
@@ -28,29 +28,39 @@ in
       description = lib.mdDoc "Enable the OCI seccomp BPF hook";
     };
 
-    cdi = mkOption {
-      type = types.attrs;
-      default = { };
-      description = lib.mdDoc ''
-        Declarative CDI specification. Each key of the attribute set
-        will be mapped to a file in /etc/cdi. It is required for every
-        key to be provided in JSON format.
-      '';
-      example = {
-        some-vendor = builtins.fromJSON ''
-            {
-              "cdiVersion": "0.5.0",
-              "kind": "some-vendor.com/foo",
-              "devices": [],
-              "containerEdits": []
-            }
-          '';
-
-        some-other-vendor = {
-          cdiVersion = "0.5.0";
-          kind = "some-other-vendor.com/bar";
-          devices = [];
-          containerEdits = [];
+    cdi = {
+      dynamic.nvidia.enable = mkOption {
+        type = types.bool;
+        default = false;
+        description = lib.mdDoc ''
+          Enable dynamic CDI configuration for NVidia devices by running nvidia-container-toolkit on boot.
+        '';
+      };
+
+      static = mkOption {
+        type = types.attrs;
+        default = { };
+        description = lib.mdDoc ''
+          Declarative CDI specification. Each key of the attribute set
+          will be mapped to a file in /etc/cdi. It is required for every
+          key to be provided in JSON format.
+        '';
+        example = {
+          some-vendor = builtins.fromJSON ''
+              {
+                "cdiVersion": "0.5.0",
+                "kind": "some-vendor.com/foo",
+                "devices": [],
+                "containerEdits": []
+              }
+            '';
+
+          some-other-vendor = {
+            cdiVersion = "0.5.0";
+            kind = "some-other-vendor.com/bar";
+            devices = [];
+            containerEdits = [];
+          };
         };
       };
     };
@@ -140,6 +150,8 @@ in
 
   config = lib.mkIf cfg.enable {
 
+    hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkIf cfg.cdi.dynamic.nvidia.enable true;
+
     virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ];
 
     virtualisation.containers.containersConf.settings = {
@@ -152,11 +164,11 @@ in
     };
 
     environment.etc = let
-      cdiConfigurationFiles = (lib.attrsets.mapAttrs'
+      cdiStaticConfigurationFiles = (lib.attrsets.mapAttrs'
         (name: value:
           lib.attrsets.nameValuePair "cdi/${name}.json"
             { text = builtins.toJSON value; })
-        cfg.cdi);
+        cfg.cdi.static);
     in {
       "containers/containers.conf".source =
         toml.generate "containers.conf" cfg.containersConf.settings;
@@ -171,7 +183,7 @@ in
       "containers/policy.json".source =
         if cfg.policy != { } then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy)
         else "${pkgs.skopeo.policy}/default-policy.json";
-    } // cdiConfigurationFiles;
+    } // cdiStaticConfigurationFiles;
 
   };
 
diff --git a/nixos/modules/virtualisation/docker.nix b/nixos/modules/virtualisation/docker.nix
index d4d34d13a94d..cceb186e0b36 100644
--- a/nixos/modules/virtualisation/docker.nix
+++ b/nixos/modules/virtualisation/docker.nix
@@ -72,6 +72,8 @@ in
         type = types.bool;
         default = false;
         description = lib.mdDoc ''
+          **Deprecated**, please use virtualisation.containers.cdi.dynamic.nvidia.enable instead.
+
           Enable nvidia-docker wrapper, supporting NVIDIA GPUs inside docker containers.
         '';
       };
@@ -185,6 +187,16 @@ in
       users.groups.docker.gid = config.ids.gids.docker;
       systemd.packages = [ cfg.package ];
 
+      # Docker 25.0.0 supports CDI by default
+      # (https://docs.docker.com/engine/release-notes/25.0/#new). Encourage
+      # moving to CDI as opposed to having deprecated runtime
+      # wrappers.
+      warnings = lib.optionals (cfg.enableNvidia && (lib.strings.versionAtLeast cfg.package.version "25")) [
+        ''
+          You have set virtualisation.docker.enableNvidia. This option is deprecated, please set virtualisation.containers.cdi.dynamic.nvidia.enable instead.
+        ''
+      ];
+
       systemd.services.docker = {
         wantedBy = optional cfg.enableOnBoot "multi-user.target";
         after = [ "network.target" "docker.socket" ];
diff --git a/nixos/modules/virtualisation/podman/default.nix b/nixos/modules/virtualisation/podman/default.nix
index 47382f9beab0..7411ebc2a311 100644
--- a/nixos/modules/virtualisation/podman/default.nix
+++ b/nixos/modules/virtualisation/podman/default.nix
@@ -82,6 +82,8 @@ in
       type = types.bool;
       default = false;
       description = lib.mdDoc ''
+        **Deprecated**, please use virtualisation.containers.cdi.dynamic.nvidia.enable instead.
+
         Enable use of NVidia GPUs from within podman containers.
       '';
     };
@@ -166,6 +168,12 @@ in
       inherit (networkConfig) dns_enabled network_interface;
     in
     lib.mkIf cfg.enable {
+      warnings = lib.optionals cfg.enableNvidia [
+        ''
+          You have set virtualisation.podman.enableNvidia. This option is deprecated, please set virtualisation.containers.cdi.dynamic.nvidia.enable instead.
+        ''
+      ];
+
       environment.systemPackages = [ cfg.package ]
         ++ lib.optional cfg.dockerCompat dockerCompat;
 
diff --git a/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix b/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix
index 451ddb4906bc..a584be35e7b2 100644
--- a/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix
+++ b/pkgs/applications/virtualization/nvidia-container-toolkit/default.nix
@@ -6,8 +6,8 @@
 , linkFarm
 , writeShellScript
 , formats
-, containerRuntimePath
-, configTemplate
+, containerRuntimePath ? null
+, configTemplate ? null
 , configTemplatePath ? null
 , libnvidia-container
 , cudaPackages
@@ -91,7 +91,7 @@ buildGoModule rec {
     makeWrapper
   ];
 
-  preConfigure = ''
+  preConfigure = lib.optionalString (containerRuntimePath != null) ''
     # Ensure the runc symlink isn't broken:
     if ! readlink --quiet --canonicalize-existing "${isolatedContainerRuntimePath}/runc" ; then
       echo "${isolatedContainerRuntimePath}/runc: broken symlink" >&2
@@ -109,7 +109,7 @@ buildGoModule rec {
     in
     [ "-skip" "${builtins.concatStringsSep "|" skippedTests}" ];
 
-  postInstall = ''
+  postInstall = lib.optionalString (containerRuntimePath != null) ''
     mkdir -p $out/etc/nvidia-container-runtime
 
     # nvidia-container-runtime invokes docker-runc or runc if that isn't
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index 2adaad5ac8e2..6c4c2a20b25a 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -24067,6 +24067,8 @@ with pkgs;
   nv-codec-headers-11 = callPackage ../development/libraries/nv-codec-headers/11_x.nix { };
   nv-codec-headers-12 = callPackage ../development/libraries/nv-codec-headers/12_x.nix { };
 
+  nvidia-container-toolkit = callPackage ../applications/virtualization/nvidia-container-toolkit { };
+
   nvidiaCtkPackages =
     callPackage ../applications/virtualization/nvidia-container-toolkit/packages.nix
       { };