diff options
author | markuskowa <markus.kowalewski@gmail.com> | 2019-11-12 21:49:29 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-12 21:49:29 +0100 |
commit | 6928cb22e943db64c6fef55d950449b8be9906f3 (patch) | |
tree | f19d0ddbb6d9b172b9bf8c6607a146367d657b1b /nixos | |
parent | 331f45f7bb5a90ce2b5516a9ae34da566f54e69c (diff) | |
parent | 9b28dbd36a4cc4761dfde72a97081c19467745f7 (diff) | |
download | nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.gz nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.bz2 nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.lz nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.xz nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.zst nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.zip |
Merge pull request #73179 from markuskowa/fix-slurm
nixos/slurm: fix test and X11 options
Diffstat (limited to 'nixos')
-rw-r--r-- | nixos/modules/services/computing/slurm/slurm.nix | 51 | ||||
-rw-r--r-- | nixos/tests/slurm.nix | 87 |
2 files changed, 89 insertions, 49 deletions
diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix index d1a1383e45b0..c70d999ca96d 100644 --- a/nixos/modules/services/computing/slurm/slurm.nix +++ b/nixos/modules/services/computing/slurm/slurm.nix @@ -18,7 +18,7 @@ let ${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''} ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)} ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)} - PlugStackConfig=${plugStackConfig} + PlugStackConfig=${plugStackConfig}/plugstack.conf ProctrackType=${cfg.procTrackType} ${cfg.extraConfig} ''; @@ -39,6 +39,8 @@ let DbdHost=${cfg.dbdserver.dbdHost} SlurmUser=${cfg.user} StorageType=accounting_storage/mysql + StorageUser=${cfg.dbdserver.storageUser} + ${optionalString (cfg.dbdserver.storagePass != null) "StoragePass=${cfg.dbdserver.storagePass}"} ${cfg.dbdserver.extraConfig} ''; @@ -48,7 +50,6 @@ let name = "etc-slurm"; paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths; }; - in { @@ -86,6 +87,37 @@ in ''; }; + storageUser = mkOption { + type = types.str; + default = cfg.user; + description = '' + Database user name. + ''; + }; + + storagePass = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Database password. Note that this password will be publicable + readable in the nix store. Use <option>configFile</option> + to store the and config file and password outside the nix store. + ''; + }; + + configFile = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Path to <literal>slurmdbd.conf</literal>. The password for the database connection + is stored in the config file. Use this option to specfify a path + outside the nix store. If this option is unset a configuration file + will be generated. See also: + <citerefentry><refentrytitle>slurmdbd.conf</refentrytitle> + <manvolnum>8</manvolnum></citerefentry>. + ''; + }; + extraConfig = mkOption { type = types.lines; default = ""; @@ -112,7 +144,7 @@ in package = mkOption { type = types.package; - default = pkgs.slurm; + default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; }; defaultText = "pkgs.slurm"; example = literalExample "pkgs.slurm-full"; description = '' @@ -178,9 +210,14 @@ in If enabled srun will accept the option "--x11" to allow for X11 forwarding from within an interactive session or a batch job. This activates the slurm-spank-x11 module. Note that this option also enables - 'services.openssh.forwardX11' on the client. + <option>services.openssh.forwardX11</option> on the client. This option requires slurm to be compiled without native X11 support. + The default behavior is to re-compile the slurm package with native X11 + support disabled if this option is set to true. + + To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>. + Note that this method will only work RSA SSH host keys. ''; }; @@ -356,7 +393,11 @@ in requires = [ "munged.service" "mysql.service" ]; # slurm strips the last component off the path - environment.SLURM_CONF = "${slurmdbdConf}/slurm.conf"; + environment.SLURM_CONF = + if (cfg.dbdserver.configFile == null) then + "${slurmdbdConf}/slurm.conf" + else + cfg.dbdserver.configFile; serviceConfig = { Type = "forking"; diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix index 4c2cd3c3d264..17527378cf0a 100644 --- a/nixos/tests/slurm.nix +++ b/nixos/tests/slurm.nix @@ -1,4 +1,4 @@ -import ./make-test.nix ({ lib, ... }: +import ./make-test-python.nix ({ lib, ... }: let mungekey = "mungeverryweakkeybuteasytointegratoinatest"; @@ -54,10 +54,15 @@ in { networking.firewall.enable = false; services.slurm.dbdserver = { enable = true; + storagePass = "password123"; }; services.mysql = { enable = true; - package = pkgs.mysql; + package = pkgs.mariadb; + initialScript = pkgs.writeText "mysql-init.sql" '' + CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123'; + GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost'; + ''; ensureDatabases = [ "slurm_acct_db" ]; ensureUsers = [{ ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; }; @@ -80,63 +85,57 @@ in { testScript = '' - startAll; + start_all() # Set up authentification across the cluster - foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3)) - { - $node->waitForUnit("default.target"); + for node in [submit, control, dbd, node1, node2, node3]: - $node->succeed("mkdir /etc/munge"); - $node->succeed("echo '${mungekey}' > /etc/munge/munge.key"); - $node->succeed("chmod 0400 /etc/munge/munge.key"); - $node->succeed("chown munge:munge /etc/munge/munge.key"); - $node->succeed("systemctl restart munged"); + node.wait_for_unit("default.target") + + node.succeed("mkdir /etc/munge") + node.succeed( + "echo '${mungekey}' > /etc/munge/munge.key" + ) + node.succeed("chmod 0400 /etc/munge/munge.key") + node.succeed("chown munge:munge /etc/munge/munge.key") + node.succeed("systemctl restart munged") + + node.wait_for_unit("munged") - $node->waitForUnit("munged"); - }; # Restart the services since they have probably failed due to the munge init # failure - subtest "can_start_slurmdbd", sub { - $dbd->succeed("systemctl restart slurmdbd"); - $dbd->waitForUnit("slurmdbd.service"); - $dbd->waitForOpenPort(6819); - }; + with subtest("can_start_slurmdbd"): + dbd.succeed("systemctl restart slurmdbd") + dbd.wait_for_unit("slurmdbd.service") + dbd.wait_for_open_port(6819) # there needs to be an entry for the current # cluster in the database before slurmctld is restarted - subtest "add_account", sub { - $control->succeed("sacctmgr -i add cluster default"); - # check for cluster entry - $control->succeed("sacctmgr list cluster | awk '{ print \$1 }' | grep default"); - }; + with subtest("add_account"): + control.succeed("sacctmgr -i add cluster default") + # check for cluster entry + control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default") - subtest "can_start_slurmctld", sub { - $control->succeed("systemctl restart slurmctld"); - $control->waitForUnit("slurmctld.service"); - }; + with subtest("can_start_slurmctld"): + control.succeed("systemctl restart slurmctld") + control.waitForUnit("slurmctld.service") - subtest "can_start_slurmd", sub { - foreach my $node (($node1,$node2,$node3)) - { - $node->succeed("systemctl restart slurmd.service"); - $node->waitForUnit("slurmd"); - } - }; + with subtest("can_start_slurmd"): + for node in [node1, node2, node3]: + node.succeed("systemctl restart slurmd.service") + node.wait_for_unit("slurmd") # Test that the cluster works and can distribute jobs; - subtest "run_distributed_command", sub { - # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). - # The output must contain the 3 different names - $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq"); - }; + with subtest("run_distributed_command"): + # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). + # The output must contain the 3 different names + submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq") - subtest "check_slurm_dbd", sub { - # find the srun job from above in the database - sleep 5; - $control->succeed("sacct | grep hostname"); - }; + with subtest("check_slurm_dbd"): + # find the srun job from above in the database + control.succeed("sleep 5") + control.succeed("sacct | grep hostname") ''; }) |