summary refs log tree commit diff
path: root/nixos/tests/slurm.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixos/tests/slurm.nix')
-rw-r--r--nixos/tests/slurm.nix60
1 files changed, 50 insertions, 10 deletions
diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix
index 60f44c3c8459..7f9c266cbff6 100644
--- a/nixos/tests/slurm.nix
+++ b/nixos/tests/slurm.nix
@@ -1,22 +1,27 @@
-import ./make-test.nix ({ ... }:
-let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
+import ./make-test.nix ({ lib, ... }:
+let
+    mungekey = "mungeverryweakkeybuteasytointegratoinatest";
+
     slurmconfig = {
       controlMachine = "control";
-      nodeName = ''
-        control
-        NodeName=node[1-3] CPUs=1 State=UNKNOWN
+      nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ];
+      partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ];
+      extraConfig = ''
+        AccountingStorageHost=dbd
+        AccountingStorageType=accounting_storage/slurmdbd
       '';
-      partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP";
     };
 in {
   name = "slurm";
 
+  meta.maintainers = [ lib.maintainers.markuskowa ];
+
   nodes =
     let
     computeNode =
       { ...}:
       {
-        # TODO slrumd port and slurmctld port should be configurations and
+        # TODO slurmd port and slurmctld port should be configurations and
         # automatically allowed by the  firewall.
         networking.firewall.enable = false;
         services.slurm = {
@@ -43,6 +48,24 @@ in {
         } // slurmconfig;
       };
 
+    dbd =
+      { pkgs, ... } :
+      {
+        networking.firewall.enable = false;
+        services.slurm.dbdserver = {
+          enable = true;
+        };
+        services.mysql = {
+          enable = true;
+          package = pkgs.mysql;
+          ensureDatabases = [ "slurm_acct_db" ];
+          ensureUsers = [{
+            ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; };
+            name = "slurm";
+          }];
+        };
+      };
+
     node1 = computeNode;
     node2 = computeNode;
     node3 = computeNode;
@@ -54,7 +77,7 @@ in {
   startAll;
 
   # Set up authentification across the cluster
-  foreach my $node (($submit,$control,$node1,$node2,$node3))
+  foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3))
   {
     $node->waitForUnit("default.target");
 
@@ -63,10 +86,22 @@ in {
     $node->succeed("chmod 0400 /etc/munge/munge.key");
     $node->succeed("chown munge:munge /etc/munge/munge.key");
     $node->succeed("systemctl restart munged");
-  }
+
+    $node->waitForUnit("munged");
+  };
 
   # Restart the services since they have probably failed due to the munge init
   # failure
+  subtest "can_start_slurmdbd", sub {
+    $dbd->succeed("systemctl restart slurmdbd");
+    $dbd->waitForUnit("slurmdbd.service");
+  };
+
+  # there needs to be an entry for the current
+  # cluster in the database before slurmctld is restarted
+  subtest "add_account", sub {
+    $control->succeed("sacctmgr -i add cluster default");
+  };
 
   subtest "can_start_slurmctld", sub {
     $control->succeed("systemctl restart slurmctld");
@@ -81,12 +116,17 @@ in {
     }
   };
 
-  # Test that the cluster work and can distribute jobs;
+  # Test that the cluster works and can distribute jobs;
 
   subtest "run_distributed_command", sub {
     # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
     # The output must contain the 3 different names
     $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
   };
+
+  subtest "check_slurm_dbd", sub {
+    # find the srun job from above in the database
+    $submit->succeed("sacct | grep hostname");
+  };
   '';
 })