Provide tests for SIG/HPC slurm packages
This MR provides a very, very basic test suite for the Slurm packages built by the HPC SIG. It checks the following: - Necessary packages for a single-node Slurm instance install successfully from the SIG/HPC repository - A job can be scheduled and executed to completion - A job can be scheduled and then cancelled ``` ./fifloader.py --clean --load templates.fif.json openqa-cli api -X POST isos ISO=Rocky-8.8-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=8.8 CURRREL=8 BUILD=-${date +%Y%d%m}.0-slurm-8.8 TEST=slurm22,slurm23 openqa-cli api -X POST isos ISO=Rocky-9.2-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=9.2 CURRREL=9 BUILD=-${date +%Y%d%m}.0-slurm-9.2 TEST=slurm22,slurm23 ``` - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my own code - [x] I have commented my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] Any dependent changes have been merged and published in downstream modules
This commit is contained in:
parent
f4c33a4f21
commit
8becb62887
@ -1418,6 +1418,36 @@
|
||||
"WORKER_CLASS": "tap"
|
||||
}
|
||||
},
|
||||
"slurm22": {
|
||||
"profiles": {
|
||||
"rocky-dvd-iso-aarch64-*-aarch64": 10,
|
||||
"rocky-dvd-iso-x86_64-*-64bit": 10
|
||||
},
|
||||
"settings": {
|
||||
"BOOTFROM": "c",
|
||||
"HDD_1": "disk_%FLAVOR%_%MACHINE%.qcow2",
|
||||
"POSTINSTALL": "slurm",
|
||||
"START_AFTER_TEST": "%DEPLOY_UPLOAD_TEST%",
|
||||
"ROOT_PASSWORD": "weakpassword",
|
||||
"USER_LOGIN": "false",
|
||||
"SLURM_VERSION": "22"
|
||||
}
|
||||
},
|
||||
"slurm23": {
|
||||
"profiles": {
|
||||
"rocky-dvd-iso-aarch64-*-aarch64": 10,
|
||||
"rocky-dvd-iso-x86_64-*-64bit": 10
|
||||
},
|
||||
"settings": {
|
||||
"BOOTFROM": "c",
|
||||
"HDD_1": "disk_%FLAVOR%_%MACHINE%.qcow2",
|
||||
"POSTINSTALL": "slurm",
|
||||
"START_AFTER_TEST": "%DEPLOY_UPLOAD_TEST%",
|
||||
"ROOT_PASSWORD": "weakpassword",
|
||||
"USER_LOGIN": "false",
|
||||
"SLURM_VERSION": "23"
|
||||
}
|
||||
},
|
||||
"support_server": {
|
||||
"profiles": {
|
||||
"rocky-dvd-iso-aarch64-*-aarch64": 10,
|
||||
|
65
tests/slurm.pm
Normal file
65
tests/slurm.pm
Normal file
@ -0,0 +1,65 @@
|
||||
use base "installedtest";
|
||||
use strict;
|
||||
use testapi;
|
||||
use utils;
|
||||
|
||||
sub slurm_setup {
|
||||
# install HPC repository
|
||||
my $version = get_var("SLURM_VERSION");
|
||||
assert_script_run "dnf -y install rocky-release-hpc", 120;
|
||||
|
||||
# Set up munge
|
||||
assert_script_run "dnf -y install munge", 120;
|
||||
assert_script_run "dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key";
|
||||
assert_script_run "chmod 400 /etc/munge/munge.key";
|
||||
assert_script_run "chown munge.munge /etc/munge/munge.key";
|
||||
assert_script_run "systemctl enable --now munge.service";
|
||||
|
||||
# install slurm
|
||||
if (get_var("CURRREL") eq '8') {
|
||||
assert_script_run "dnf config-manager --set-enabled powertools";
|
||||
}
|
||||
assert_script_run "dnf install -y slurm$version-slurmdbd slurm$version-slurmrestd slurm$version-slurmctld slurm$version-slurmd";
|
||||
|
||||
# Since this is a single node system, we don't have to modify the conf files. We will for larger multi-node tests.
|
||||
# start services
|
||||
assert_script_run "systemctl enable --now slurmctld slurmdbd slurmrestd slurmd";
|
||||
}
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
|
||||
# do all the install stuff
|
||||
slurm_setup();
|
||||
|
||||
# if everything is configured right, sinfo should show the following output
|
||||
# $ sinfo
|
||||
# PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
|
||||
# debug* up infinite 1 idle localhost
|
||||
validate_script_output "sinfo", sub { m/debug.*localhost/ };
|
||||
|
||||
# write a boring job script
|
||||
assert_script_run "echo '#!/bin/bash\n#SBATCH --job-name=antarctica_time\nsleep 120\nTZ=NZST date' > job.sh";
|
||||
|
||||
## schedule a job and run it to completion
|
||||
assert_script_run "sbatch job.sh";
|
||||
validate_script_output "squeue", sub { m/antar/ };
|
||||
sleep 121;
|
||||
# after 121 seconds, job should have completed and no longer exist in the queue
|
||||
validate_script_output "squeue", sub { $_ !~ m/antar/ };
|
||||
|
||||
## cancel a job
|
||||
assert_script_run "sbatch job.sh";
|
||||
validate_script_output "squeue", sub { m/antar/ };
|
||||
assert_script_run "scancel 2";
|
||||
# job should no longer be in the queue
|
||||
validate_script_output "squeue", sub { $_ !~ m/antar/ };
|
||||
}
|
||||
|
||||
sub test_flags {
|
||||
return {fatal => 1};
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
# vim: set sw=4 et:
|
Loading…
Reference in New Issue
Block a user