diff options
Diffstat (limited to 'nixos/tests/slurm.nix')
-rw-r--r-- | nixos/tests/slurm.nix | 60 |
1 files changed, 50 insertions, 10 deletions
diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix index 60f44c3c8459..7f9c266cbff6 100644 --- a/nixos/tests/slurm.nix +++ b/nixos/tests/slurm.nix @@ -1,22 +1,27 @@ -import ./make-test.nix ({ ... }: -let mungekey = "mungeverryweakkeybuteasytointegratoinatest"; +import ./make-test.nix ({ lib, ... }: +let + mungekey = "mungeverryweakkeybuteasytointegratoinatest"; + slurmconfig = { controlMachine = "control"; - nodeName = '' - control - NodeName=node[1-3] CPUs=1 State=UNKNOWN + nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ]; + partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ]; + extraConfig = '' + AccountingStorageHost=dbd + AccountingStorageType=accounting_storage/slurmdbd ''; - partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP"; }; in { name = "slurm"; + meta.maintainers = [ lib.maintainers.markuskowa ]; + nodes = let computeNode = { ...}: { - # TODO slrumd port and slurmctld port should be configurations and + # TODO slurmd port and slurmctld port should be configurations and # automatically allowed by the firewall. networking.firewall.enable = false; services.slurm = { @@ -43,6 +48,24 @@ in { } // slurmconfig; }; + dbd = + { pkgs, ... } : + { + networking.firewall.enable = false; + services.slurm.dbdserver = { + enable = true; + }; + services.mysql = { + enable = true; + package = pkgs.mysql; + ensureDatabases = [ "slurm_acct_db" ]; + ensureUsers = [{ + ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; }; + name = "slurm"; + }]; + }; + }; + node1 = computeNode; node2 = computeNode; node3 = computeNode; @@ -54,7 +77,7 @@ in { startAll; # Set up authentification across the cluster - foreach my $node (($submit,$control,$node1,$node2,$node3)) + foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3)) { $node->waitForUnit("default.target"); @@ -63,10 +86,22 @@ in { $node->succeed("chmod 0400 /etc/munge/munge.key"); $node->succeed("chown munge:munge /etc/munge/munge.key"); $node->succeed("systemctl restart munged"); - } + + $node->waitForUnit("munged"); + }; # Restart the services since they have probably failed due to the munge init # failure + subtest "can_start_slurmdbd", sub { + $dbd->succeed("systemctl restart slurmdbd"); + $dbd->waitForUnit("slurmdbd.service"); + }; + + # there needs to be an entry for the current + # cluster in the database before slurmctld is restarted + subtest "add_account", sub { + $control->succeed("sacctmgr -i add cluster default"); + }; subtest "can_start_slurmctld", sub { $control->succeed("systemctl restart slurmctld"); @@ -81,12 +116,17 @@ in { } }; - # Test that the cluster work and can distribute jobs; + # Test that the cluster works and can distribute jobs; subtest "run_distributed_command", sub { # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). # The output must contain the 3 different names $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq"); }; + + subtest "check_slurm_dbd", sub { + # find the srun job from above in the database + $submit->succeed("sacct | grep hostname"); + }; ''; }) |