diff options
Diffstat (limited to 'nixos/modules/services/computing/slurm/slurm.nix')
-rw-r--r-- | nixos/modules/services/computing/slurm/slurm.nix | 113 |
1 files changed, 102 insertions, 11 deletions
diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix index 09174ed39f5e..cd481212db2d 100644 --- a/nixos/modules/services/computing/slurm/slurm.nix +++ b/nixos/modules/services/computing/slurm/slurm.nix @@ -6,13 +6,18 @@ let cfg = config.services.slurm; # configuration file can be generated by http://slurm.schedmd.com/configurator.html + + defaultUser = "slurm"; + configFile = pkgs.writeTextDir "slurm.conf" '' ClusterName=${cfg.clusterName} + StateSaveLocation=${cfg.stateSaveLocation} + SlurmUser=${cfg.user} ${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''} ${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''} - ${optionalString (cfg.nodeName != null) ''nodeName=${cfg.nodeName}''} - ${optionalString (cfg.partitionName != null) ''partitionName=${cfg.partitionName}''} + ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)} + ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)} PlugStackConfig=${plugStackConfig} ProctrackType=${cfg.procTrackType} ${cfg.extraConfig} @@ -24,12 +29,19 @@ let ${cfg.extraPlugstackConfig} ''; - cgroupConfig = pkgs.writeTextDir "cgroup.conf" '' ${cfg.extraCgroupConfig} ''; + slurmdbdConf = pkgs.writeTextDir "slurmdbd.conf" + '' + DbdHost=${cfg.dbdserver.dbdHost} + SlurmUser=${cfg.user} + StorageType=accounting_storage/mysql + ${cfg.dbdserver.extraConfig} + ''; + # slurm expects some additional config files to be # in the same directory as slurm.conf etcSlurm = pkgs.symlinkJoin { @@ -43,6 +55,8 @@ in ###### interface + meta.maintainers = [ maintainers.markuskowa ]; + options = { services.slurm = { @@ -60,6 +74,27 @@ in }; }; + dbdserver = { + enable = mkEnableOption "SlurmDBD service"; + + dbdHost = mkOption { + type = types.str; + default = config.networking.hostName; + description = '' + Hostname of the machine where <literal>slurmdbd</literal> + is running (i.e. name returned by <literal>hostname -s</literal>). + ''; + }; + + extraConfig = mkOption { + type = types.lines; + default = ""; + description = '' + Extra configuration for <literal>slurmdbd.conf</literal> + ''; + }; + }; + client = { enable = mkEnableOption "slurm client daemon"; }; @@ -116,9 +151,9 @@ in }; nodeName = mkOption { - type = types.nullOr types.str; - default = null; - example = "linux[1-32] CPUs=1 State=UNKNOWN"; + type = types.listOf types.str; + default = []; + example = literalExample ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];''; description = '' Name that SLURM uses to refer to a node (or base partition for BlueGene systems). Typically this would be the string that "/bin/hostname -s" @@ -127,9 +162,9 @@ in }; partitionName = mkOption { - type = types.nullOr types.str; - default = null; - example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP"; + type = types.listOf types.str; + default = []; + example = literalExample ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];''; description = '' Name by which the partition may be referenced. Note that now you have to write the partition's parameters after the name. @@ -150,7 +185,7 @@ in }; procTrackType = mkOption { - type = types.string; + type = types.str; default = "proctrack/linuxproc"; description = '' Plugin to be used for process tracking on a job step basis. @@ -159,6 +194,25 @@ in ''; }; + stateSaveLocation = mkOption { + type = types.str; + default = "/var/spool/slurmctld"; + description = '' + Directory into which the Slurm controller, slurmctld, saves its state. + ''; + }; + + user = mkOption { + type = types.str; + default = defaultUser; + description = '' + Set this option when you want to run the slurmctld daemon + as something else than the default slurm user "slurm". + Note that the UID of this user needs to be the same + on all nodes. + ''; + }; + extraConfig = mkOption { default = ""; type = types.lines; @@ -184,6 +238,8 @@ in used when <literal>procTrackType=proctrack/cgroup</literal>. ''; }; + + }; }; @@ -220,12 +276,24 @@ in ''; }; - in mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable) { + in mkIf ( cfg.enableStools || + cfg.client.enable || + cfg.server.enable || + cfg.dbdserver.enable ) { environment.systemPackages = [ wrappedSlurm ]; services.munge.enable = mkDefault true; + # use a static uid as default to ensure it is the same on all nodes + users.users.slurm = mkIf (cfg.user == defaultUser) { + name = defaultUser; + group = "slurm"; + uid = config.ids.uids.slurm; + }; + + users.groups.slurm.gid = config.ids.uids.slurm; + systemd.services.slurmd = mkIf (cfg.client.enable) { path = with pkgs; [ wrappedSlurm coreutils ] ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; @@ -261,6 +329,29 @@ in PIDFile = "/run/slurmctld.pid"; ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; + + preStart = '' + mkdir -p ${cfg.stateSaveLocation} + chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation} + ''; + }; + + systemd.services.slurmdbd = mkIf (cfg.dbdserver.enable) { + path = with pkgs; [ wrappedSlurm munge coreutils ]; + + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" "munged.service" "mysql.service" ]; + requires = [ "munged.service" "mysql.service" ]; + + # slurm strips the last component off the path + environment.SLURM_CONF = "${slurmdbdConf}/slurm.conf"; + + serviceConfig = { + Type = "forking"; + ExecStart = "${cfg.package}/bin/slurmdbd"; + PIDFile = "/run/slurmdbd.pid"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + }; }; }; |