about summary refs log tree commit diff
path: root/nixos
diff options
context:
space:
mode:
authormarkuskowa <markus.kowalewski@gmail.com>2019-11-12 21:49:29 +0100
committerGitHub <noreply@github.com>2019-11-12 21:49:29 +0100
commit6928cb22e943db64c6fef55d950449b8be9906f3 (patch)
treef19d0ddbb6d9b172b9bf8c6607a146367d657b1b /nixos
parent331f45f7bb5a90ce2b5516a9ae34da566f54e69c (diff)
parent9b28dbd36a4cc4761dfde72a97081c19467745f7 (diff)
downloadnixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar
nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.gz
nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.bz2
nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.lz
nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.xz
nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.tar.zst
nixlib-6928cb22e943db64c6fef55d950449b8be9906f3.zip
Merge pull request #73179 from markuskowa/fix-slurm
nixos/slurm: fix test and X11 options
Diffstat (limited to 'nixos')
-rw-r--r--nixos/modules/services/computing/slurm/slurm.nix51
-rw-r--r--nixos/tests/slurm.nix87
2 files changed, 89 insertions, 49 deletions
diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix
index d1a1383e45b0..c70d999ca96d 100644
--- a/nixos/modules/services/computing/slurm/slurm.nix
+++ b/nixos/modules/services/computing/slurm/slurm.nix
@@ -18,7 +18,7 @@ let
       ${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
       ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
       ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
-      PlugStackConfig=${plugStackConfig}
+      PlugStackConfig=${plugStackConfig}/plugstack.conf
       ProctrackType=${cfg.procTrackType}
       ${cfg.extraConfig}
     '';
@@ -39,6 +39,8 @@ let
      DbdHost=${cfg.dbdserver.dbdHost}
      SlurmUser=${cfg.user}
      StorageType=accounting_storage/mysql
+     StorageUser=${cfg.dbdserver.storageUser}
+     ${optionalString (cfg.dbdserver.storagePass != null) "StoragePass=${cfg.dbdserver.storagePass}"}
      ${cfg.dbdserver.extraConfig}
    '';
 
@@ -48,7 +50,6 @@ let
     name = "etc-slurm";
     paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
   };
-
 in
 
 {
@@ -86,6 +87,37 @@ in
           '';
         };
 
+        storageUser = mkOption {
+          type = types.str;
+          default = cfg.user;
+          description = ''
+            Database user name.
+          '';
+        };
+
+        storagePass = mkOption {
+          type = types.nullOr types.str;
+          default = null;
+          description = ''
+            Database password. Note that this password will be publicable
+            readable in the nix store. Use <option>configFile</option>
+            to store the and config file and password outside the nix store.
+          '';
+        };
+
+        configFile = mkOption {
+          type = types.nullOr types.str;
+          default = null;
+          description = ''
+            Path to <literal>slurmdbd.conf</literal>. The password for the database connection
+            is stored in the config file. Use this option to specfify a path
+            outside the nix store. If this option is unset a configuration file
+            will be generated. See also:
+            <citerefentry><refentrytitle>slurmdbd.conf</refentrytitle>
+            <manvolnum>8</manvolnum></citerefentry>.
+          '';
+        };
+
         extraConfig = mkOption {
           type = types.lines;
           default = "";
@@ -112,7 +144,7 @@ in
 
       package = mkOption {
         type = types.package;
-        default = pkgs.slurm;
+        default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; };
         defaultText = "pkgs.slurm";
         example = literalExample "pkgs.slurm-full";
         description = ''
@@ -178,9 +210,14 @@ in
           If enabled srun will accept the option "--x11" to allow for X11 forwarding
           from within an interactive session or a batch job. This activates the
           slurm-spank-x11 module. Note that this option also enables
-          'services.openssh.forwardX11' on the client.
+          <option>services.openssh.forwardX11</option> on the client.
 
           This option requires slurm to be compiled without native X11 support.
+          The default behavior is to re-compile the slurm package with native X11
+          support disabled if this option is set to true.
+
+          To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>.
+          Note that this method will only work RSA SSH host keys.
         '';
       };
 
@@ -356,7 +393,11 @@ in
       requires = [ "munged.service" "mysql.service" ];
 
       # slurm strips the last component off the path
-      environment.SLURM_CONF = "${slurmdbdConf}/slurm.conf";
+      environment.SLURM_CONF =
+        if (cfg.dbdserver.configFile == null) then
+          "${slurmdbdConf}/slurm.conf"
+        else
+          cfg.dbdserver.configFile;
 
       serviceConfig = {
         Type = "forking";
diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix
index 4c2cd3c3d264..17527378cf0a 100644
--- a/nixos/tests/slurm.nix
+++ b/nixos/tests/slurm.nix
@@ -1,4 +1,4 @@
-import ./make-test.nix ({ lib, ... }:
+import ./make-test-python.nix ({ lib, ... }:
 let
     mungekey = "mungeverryweakkeybuteasytointegratoinatest";
 
@@ -54,10 +54,15 @@ in {
         networking.firewall.enable = false;
         services.slurm.dbdserver = {
           enable = true;
+          storagePass = "password123";
         };
         services.mysql = {
           enable = true;
-          package = pkgs.mysql;
+          package = pkgs.mariadb;
+          initialScript = pkgs.writeText "mysql-init.sql" ''
+            CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123';
+            GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost';
+          '';
           ensureDatabases = [ "slurm_acct_db" ];
           ensureUsers = [{
             ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; };
@@ -80,63 +85,57 @@ in {
 
   testScript =
   ''
-  startAll;
+  start_all()
 
   # Set up authentification across the cluster
-  foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3))
-  {
-    $node->waitForUnit("default.target");
+  for node in [submit, control, dbd, node1, node2, node3]:
 
-    $node->succeed("mkdir /etc/munge");
-    $node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
-    $node->succeed("chmod 0400 /etc/munge/munge.key");
-    $node->succeed("chown munge:munge /etc/munge/munge.key");
-    $node->succeed("systemctl restart munged");
+      node.wait_for_unit("default.target")
+
+      node.succeed("mkdir /etc/munge")
+      node.succeed(
+          "echo '${mungekey}' > /etc/munge/munge.key"
+      )
+      node.succeed("chmod 0400 /etc/munge/munge.key")
+      node.succeed("chown munge:munge /etc/munge/munge.key")
+      node.succeed("systemctl restart munged")
+
+      node.wait_for_unit("munged")
 
-    $node->waitForUnit("munged");
-  };
 
   # Restart the services since they have probably failed due to the munge init
   # failure
-  subtest "can_start_slurmdbd", sub {
-    $dbd->succeed("systemctl restart slurmdbd");
-    $dbd->waitForUnit("slurmdbd.service");
-    $dbd->waitForOpenPort(6819);
-  };
+  with subtest("can_start_slurmdbd"):
+      dbd.succeed("systemctl restart slurmdbd")
+      dbd.wait_for_unit("slurmdbd.service")
+      dbd.wait_for_open_port(6819)
 
   # there needs to be an entry for the current
   # cluster in the database before slurmctld is restarted
-  subtest "add_account", sub {
-    $control->succeed("sacctmgr -i add cluster default");
-    # check for cluster entry
-    $control->succeed("sacctmgr list cluster | awk '{ print \$1 }' | grep default");
-  };
+  with subtest("add_account"):
+      control.succeed("sacctmgr -i add cluster default")
+      # check for cluster entry
+      control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default")
 
-  subtest "can_start_slurmctld", sub {
-    $control->succeed("systemctl restart slurmctld");
-    $control->waitForUnit("slurmctld.service");
-  };
+  with subtest("can_start_slurmctld"):
+      control.succeed("systemctl restart slurmctld")
+      control.waitForUnit("slurmctld.service")
 
-  subtest "can_start_slurmd", sub {
-    foreach my $node (($node1,$node2,$node3))
-    {
-      $node->succeed("systemctl restart slurmd.service");
-      $node->waitForUnit("slurmd");
-    }
-  };
+  with subtest("can_start_slurmd"):
+      for node in [node1, node2, node3]:
+          node.succeed("systemctl restart slurmd.service")
+          node.wait_for_unit("slurmd")
 
   # Test that the cluster works and can distribute jobs;
 
-  subtest "run_distributed_command", sub {
-    # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
-    # The output must contain the 3 different names
-    $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
-  };
+  with subtest("run_distributed_command"):
+      # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
+      # The output must contain the 3 different names
+      submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")
 
-  subtest "check_slurm_dbd", sub {
-    # find the srun job from above in the database
-    sleep 5;
-    $control->succeed("sacct | grep hostname");
-  };
+      with subtest("check_slurm_dbd"):
+          # find the srun job from above in the database
+          control.succeed("sleep 5")
+          control.succeed("sacct | grep hostname")
   '';
 })