8becb62887
This MR provides a very, very basic test suite for the Slurm packages built by the HPC SIG. It checks the following: - Necessary packages for a single-node Slurm instance install successfully from the SIG/HPC repository - A job can be scheduled and executed to completion - A job can be scheduled and then cancelled ``` ./fifloader.py --clean --load templates.fif.json openqa-cli api -X POST isos ISO=Rocky-8.8-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=8.8 CURRREL=8 BUILD=-${date +%Y%d%m}.0-slurm-8.8 TEST=slurm22,slurm23 openqa-cli api -X POST isos ISO=Rocky-9.2-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=9.2 CURRREL=9 BUILD=-${date +%Y%d%m}.0-slurm-9.2 TEST=slurm22,slurm23 ``` - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my own code - [x] I have commented my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] Any dependent changes have been merged and published in downstream modules
66 lines
2.1 KiB
Perl
66 lines
2.1 KiB
Perl
use base "installedtest";
|
|
use strict;
|
|
use testapi;
|
|
use utils;
|
|
|
|
sub slurm_setup {
|
|
# install HPC repository
|
|
my $version = get_var("SLURM_VERSION");
|
|
assert_script_run "dnf -y install rocky-release-hpc", 120;
|
|
|
|
# Set up munge
|
|
assert_script_run "dnf -y install munge", 120;
|
|
assert_script_run "dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key";
|
|
assert_script_run "chmod 400 /etc/munge/munge.key";
|
|
assert_script_run "chown munge.munge /etc/munge/munge.key";
|
|
assert_script_run "systemctl enable --now munge.service";
|
|
|
|
# install slurm
|
|
if (get_var("CURRREL") eq '8') {
|
|
assert_script_run "dnf config-manager --set-enabled powertools";
|
|
}
|
|
assert_script_run "dnf install -y slurm$version-slurmdbd slurm$version-slurmrestd slurm$version-slurmctld slurm$version-slurmd";
|
|
|
|
# Since this is a single node system, we don't have to modify the conf files. We will for larger multi-node tests.
|
|
# start services
|
|
assert_script_run "systemctl enable --now slurmctld slurmdbd slurmrestd slurmd";
|
|
}
|
|
|
|
sub run {
|
|
my $self = shift;
|
|
|
|
# do all the install stuff
|
|
slurm_setup();
|
|
|
|
# if everything is configured right, sinfo should show the following output
|
|
# $ sinfo
|
|
# PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
|
|
# debug* up infinite 1 idle localhost
|
|
validate_script_output "sinfo", sub { m/debug.*localhost/ };
|
|
|
|
# write a boring job script
|
|
assert_script_run "echo '#!/bin/bash\n#SBATCH --job-name=antarctica_time\nsleep 120\nTZ=NZST date' > job.sh";
|
|
|
|
## schedule a job and run it to completion
|
|
assert_script_run "sbatch job.sh";
|
|
validate_script_output "squeue", sub { m/antar/ };
|
|
sleep 121;
|
|
# after 121 seconds, job should have completed and no longer exist in the queue
|
|
validate_script_output "squeue", sub { $_ !~ m/antar/ };
|
|
|
|
## cancel a job
|
|
assert_script_run "sbatch job.sh";
|
|
validate_script_output "squeue", sub { m/antar/ };
|
|
assert_script_run "scancel 2";
|
|
# job should no longer be in the queue
|
|
validate_script_output "squeue", sub { $_ !~ m/antar/ };
|
|
}
|
|
|
|
sub test_flags {
|
|
return {fatal => 1};
|
|
}
|
|
|
|
1;
|
|
|
|
# vim: set sw=4 et:
|