summary refs log tree commit diff
path: root/nixos/modules/services/computing/slurm/slurm.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixos/modules/services/computing/slurm/slurm.nix')
-rw-r--r--nixos/modules/services/computing/slurm/slurm.nix113
1 files changed, 102 insertions, 11 deletions
diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix
index 09174ed39f5e..cd481212db2d 100644
--- a/nixos/modules/services/computing/slurm/slurm.nix
+++ b/nixos/modules/services/computing/slurm/slurm.nix
@@ -6,13 +6,18 @@ let
 
   cfg = config.services.slurm;
   # configuration file can be generated by http://slurm.schedmd.com/configurator.html
+
+  defaultUser = "slurm";
+
   configFile = pkgs.writeTextDir "slurm.conf"
     ''
       ClusterName=${cfg.clusterName}
+      StateSaveLocation=${cfg.stateSaveLocation}
+      SlurmUser=${cfg.user}
       ${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''}
       ${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
-      ${optionalString (cfg.nodeName != null) ''nodeName=${cfg.nodeName}''}
-      ${optionalString (cfg.partitionName != null) ''partitionName=${cfg.partitionName}''}
+      ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
+      ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
       PlugStackConfig=${plugStackConfig}
       ProctrackType=${cfg.procTrackType}
       ${cfg.extraConfig}
@@ -24,12 +29,19 @@ let
       ${cfg.extraPlugstackConfig}
     '';
 
-
   cgroupConfig = pkgs.writeTextDir "cgroup.conf"
    ''
      ${cfg.extraCgroupConfig}
    '';
 
+  slurmdbdConf = pkgs.writeTextDir "slurmdbd.conf"
+   ''
+     DbdHost=${cfg.dbdserver.dbdHost}
+     SlurmUser=${cfg.user}
+     StorageType=accounting_storage/mysql
+     ${cfg.dbdserver.extraConfig}
+   '';
+
   # slurm expects some additional config files to be
   # in the same directory as slurm.conf
   etcSlurm = pkgs.symlinkJoin {
@@ -43,6 +55,8 @@ in
 
   ###### interface
 
+  meta.maintainers = [ maintainers.markuskowa ];
+
   options = {
 
     services.slurm = {
@@ -60,6 +74,27 @@ in
         };
       };
 
+      dbdserver = {
+        enable = mkEnableOption "SlurmDBD service";
+
+        dbdHost = mkOption {
+          type = types.str;
+          default = config.networking.hostName;
+          description = ''
+            Hostname of the machine where <literal>slurmdbd</literal>
+            is running (i.e. name returned by <literal>hostname -s</literal>).
+          '';
+        };
+
+        extraConfig = mkOption {
+          type = types.lines;
+          default = "";
+          description = ''
+            Extra configuration for <literal>slurmdbd.conf</literal>
+          '';
+        };
+      };
+
       client = {
         enable = mkEnableOption "slurm client daemon";
       };
@@ -116,9 +151,9 @@ in
       };
 
       nodeName = mkOption {
-        type = types.nullOr types.str;
-        default = null;
-        example = "linux[1-32] CPUs=1 State=UNKNOWN";
+        type = types.listOf types.str;
+        default = [];
+        example = literalExample ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];'';
         description = ''
           Name that SLURM uses to refer to a node (or base partition for BlueGene
           systems). Typically this would be the string that "/bin/hostname -s"
@@ -127,9 +162,9 @@ in
       };
 
       partitionName = mkOption {
-        type = types.nullOr types.str;
-        default = null;
-        example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP";
+        type = types.listOf types.str;
+        default = [];
+        example = literalExample ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];'';
         description = ''
           Name by which the partition may be referenced. Note that now you have
           to write the partition's parameters after the name.
@@ -150,7 +185,7 @@ in
       };
 
       procTrackType = mkOption {
-        type = types.string;
+        type = types.str;
         default = "proctrack/linuxproc";
         description = ''
           Plugin to be used for process tracking on a job step basis.
@@ -159,6 +194,25 @@ in
         '';
       };
 
+      stateSaveLocation = mkOption {
+        type = types.str;
+        default = "/var/spool/slurmctld";
+        description = ''
+          Directory into which the Slurm controller, slurmctld, saves its state.
+        '';
+      };
+
+      user = mkOption {
+        type = types.str;
+        default = defaultUser;
+        description = ''
+          Set this option when you want to run the slurmctld daemon
+          as something else than the default slurm user "slurm".
+          Note that the UID of this user needs to be the same
+          on all nodes.
+        '';
+      };
+
       extraConfig = mkOption {
         default = "";
         type = types.lines;
@@ -184,6 +238,8 @@ in
           used when <literal>procTrackType=proctrack/cgroup</literal>.
         '';
       };
+
+
     };
 
   };
@@ -220,12 +276,24 @@ in
         '';
       };
 
-  in mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable) {
+  in mkIf ( cfg.enableStools ||
+            cfg.client.enable ||
+            cfg.server.enable ||
+            cfg.dbdserver.enable ) {
 
     environment.systemPackages = [ wrappedSlurm ];
 
     services.munge.enable = mkDefault true;
 
+    # use a static uid as default to ensure it is the same on all nodes
+    users.users.slurm = mkIf (cfg.user == defaultUser) {
+      name = defaultUser;
+      group = "slurm";
+      uid = config.ids.uids.slurm;
+    };
+
+    users.groups.slurm.gid = config.ids.uids.slurm;
+
     systemd.services.slurmd = mkIf (cfg.client.enable) {
       path = with pkgs; [ wrappedSlurm coreutils ]
         ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
@@ -261,6 +329,29 @@ in
         PIDFile = "/run/slurmctld.pid";
         ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
       };
+
+      preStart = ''
+        mkdir -p ${cfg.stateSaveLocation}
+        chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation}
+      '';
+    };
+
+    systemd.services.slurmdbd = mkIf (cfg.dbdserver.enable) {
+      path = with pkgs; [ wrappedSlurm munge coreutils ];
+
+      wantedBy = [ "multi-user.target" ];
+      after = [ "network.target" "munged.service" "mysql.service" ];
+      requires = [ "munged.service" "mysql.service" ];
+
+      # slurm strips the last component off the path
+      environment.SLURM_CONF = "${slurmdbdConf}/slurm.conf";
+
+      serviceConfig = {
+        Type = "forking";
+        ExecStart = "${cfg.package}/bin/slurmdbd";
+        PIDFile = "/run/slurmdbd.pid";
+        ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
+      };
     };
 
   };