about summary refs log tree commit diff
path: root/nixos/tests/slurm.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixos/tests/slurm.nix')
-rw-r--r--nixos/tests/slurm.nix92
1 files changed, 92 insertions, 0 deletions
diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix
new file mode 100644
index 000000000000..ec67ea092874
--- /dev/null
+++ b/nixos/tests/slurm.nix
@@ -0,0 +1,92 @@
+import ./make-test.nix ({ pkgs, ... }:
+let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
+    slurmconfig = {
+      controlMachine = "control";
+      nodeName = ''
+        control
+        NodeName=node[1-3] CPUs=1 State=UNKNOWN
+      '';
+      partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP";
+    };
+in {
+  name = "slurm";
+
+  nodes =
+    let
+    computeNode =
+      { config, pkgs, ...}:
+      {
+        # TODO slrumd port and slurmctld port should be configurations and
+        # automatically allowed by the  firewall.
+        networking.firewall.enable = false;
+        services.slurm = {
+          client.enable = true;
+        } // slurmconfig;
+      };
+    in {
+
+    control =
+      { config, pkgs, ...}:
+      {
+        networking.firewall.enable = false;
+        services.slurm = {
+          server.enable = true;
+        } // slurmconfig;
+      };
+
+    submit =
+      { config, pkgs, ...}:
+      {
+        networking.firewall.enable = false;
+        services.slurm = {
+          enableStools = true;
+        } // slurmconfig;
+      };
+
+    node1 = computeNode;
+    node2 = computeNode;
+    node3 = computeNode;
+  };
+
+
+  testScript =
+  ''
+  startAll;
+
+  # Set up authentification across the cluster
+  foreach my $node (($submit,$control,$node1,$node2,$node3))
+  {
+    $node->waitForUnit("default.target");
+
+    $node->succeed("mkdir /etc/munge");
+    $node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
+    $node->succeed("chmod 0400 /etc/munge/munge.key");
+    $node->succeed("chown munge:munge /etc/munge/munge.key");
+    $node->succeed("systemctl restart munged");
+  }
+
+  # Restart the services since they have probably failed due to the munge init
+  # failure
+
+  subtest "can_start_slurmctld", sub {
+    $control->succeed("systemctl restart slurmctld");
+    $control->waitForUnit("slurmctld.service");
+  };
+
+  subtest "can_start_slurmd", sub {
+    foreach my $node (($node1,$node2,$node3))
+    {
+      $node->succeed("systemctl restart slurmd.service");
+      $node->waitForUnit("slurmd");
+    }
+  };
+
+  # Test that the cluster work and can distribute jobs;
+
+  subtest "run_distributed_command", sub {
+    # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
+    # The output must contain the 3 different names
+    $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
+  };
+  '';
+})