summary refs log tree commit diff
path: root/nixos/tests
diff options
context:
space:
mode:
authorxeji <36407913+xeji@users.noreply.github.com>2018-10-31 00:16:11 +0100
committerGitHub <noreply@github.com>2018-10-31 00:16:11 +0100
commit6efd811062849402f1f44952fd5d7cd283d72baf (patch)
treec207bdf63000dc1385399a32efbc220137af58c0 /nixos/tests
parent933874ef08e4a110e29cb50752b5e7bfed6925f5 (diff)
parentb388beeca3a3a2dd79ba2f2737deb294d4d4748b (diff)
downloadnixlib-6efd811062849402f1f44952fd5d7cd283d72baf.tar
nixlib-6efd811062849402f1f44952fd5d7cd283d72baf.tar.gz
nixlib-6efd811062849402f1f44952fd5d7cd283d72baf.tar.bz2
nixlib-6efd811062849402f1f44952fd5d7cd283d72baf.tar.lz
nixlib-6efd811062849402f1f44952fd5d7cd283d72baf.tar.xz
nixlib-6efd811062849402f1f44952fd5d7cd283d72baf.tar.zst
nixlib-6efd811062849402f1f44952fd5d7cd283d72baf.zip
Merge pull request #49348 from markuskowa/mod-slurm-upgrade
nixos/slurm: add slurmdbd, run daemons as user
Diffstat (limited to 'nixos/tests')
-rw-r--r--nixos/tests/slurm.nix60
1 files changed, 50 insertions, 10 deletions
diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix
index 60f44c3c8459..7f9c266cbff6 100644
--- a/nixos/tests/slurm.nix
+++ b/nixos/tests/slurm.nix
@@ -1,22 +1,27 @@
-import ./make-test.nix ({ ... }:
-let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
+import ./make-test.nix ({ lib, ... }:
+let
+    mungekey = "mungeverryweakkeybuteasytointegratoinatest";
+
     slurmconfig = {
       controlMachine = "control";
-      nodeName = ''
-        control
-        NodeName=node[1-3] CPUs=1 State=UNKNOWN
+      nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ];
+      partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ];
+      extraConfig = ''
+        AccountingStorageHost=dbd
+        AccountingStorageType=accounting_storage/slurmdbd
       '';
-      partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP";
     };
 in {
   name = "slurm";
 
+  meta.maintainers = [ lib.maintainers.markuskowa ];
+
   nodes =
     let
     computeNode =
       { ...}:
       {
-        # TODO slrumd port and slurmctld port should be configurations and
+        # TODO slurmd port and slurmctld port should be configurations and
         # automatically allowed by the  firewall.
         networking.firewall.enable = false;
         services.slurm = {
@@ -43,6 +48,24 @@ in {
         } // slurmconfig;
       };
 
+    dbd =
+      { pkgs, ... } :
+      {
+        networking.firewall.enable = false;
+        services.slurm.dbdserver = {
+          enable = true;
+        };
+        services.mysql = {
+          enable = true;
+          package = pkgs.mysql;
+          ensureDatabases = [ "slurm_acct_db" ];
+          ensureUsers = [{
+            ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; };
+            name = "slurm";
+          }];
+        };
+      };
+
     node1 = computeNode;
     node2 = computeNode;
     node3 = computeNode;
@@ -54,7 +77,7 @@ in {
   startAll;
 
   # Set up authentification across the cluster
-  foreach my $node (($submit,$control,$node1,$node2,$node3))
+  foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3))
   {
     $node->waitForUnit("default.target");
 
@@ -63,10 +86,22 @@ in {
     $node->succeed("chmod 0400 /etc/munge/munge.key");
     $node->succeed("chown munge:munge /etc/munge/munge.key");
     $node->succeed("systemctl restart munged");
-  }
+
+    $node->waitForUnit("munged");
+  };
 
   # Restart the services since they have probably failed due to the munge init
   # failure
+  subtest "can_start_slurmdbd", sub {
+    $dbd->succeed("systemctl restart slurmdbd");
+    $dbd->waitForUnit("slurmdbd.service");
+  };
+
+  # there needs to be an entry for the current
+  # cluster in the database before slurmctld is restarted
+  subtest "add_account", sub {
+    $control->succeed("sacctmgr -i add cluster default");
+  };
 
   subtest "can_start_slurmctld", sub {
     $control->succeed("systemctl restart slurmctld");
@@ -81,12 +116,17 @@ in {
     }
   };
 
-  # Test that the cluster work and can distribute jobs;
+  # Test that the cluster works and can distribute jobs;
 
   subtest "run_distributed_command", sub {
     # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
     # The output must contain the 3 different names
     $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
   };
+
+  subtest "check_slurm_dbd", sub {
+    # find the srun job from above in the database
+    $submit->succeed("sacct | grep hostname");
+  };
   '';
 })