os-autoinst-distri-rocky/tests/slurm.pm
Al Bowles 8becb62887
Provide tests for SIG/HPC slurm packages
This MR provides a very, very basic test suite for the Slurm packages
built by the HPC SIG. It checks the following:

- Necessary packages for a single-node Slurm instance install
  successfully from the SIG/HPC repository
- A job can be scheduled and executed to completion
- A job can be scheduled and then cancelled

```
./fifloader.py --clean --load templates.fif.json
openqa-cli api -X POST isos ISO=Rocky-8.8-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=8.8 CURRREL=8 BUILD=-${date +%Y%d%m}.0-slurm-8.8 TEST=slurm22,slurm23
openqa-cli api -X POST isos ISO=Rocky-9.2-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=9.2 CURRREL=9 BUILD=-${date +%Y%d%m}.0-slurm-9.2 TEST=slurm22,slurm23

```

- [x] My code follows the style guidelines of this project
- [x] I have performed a self-review of my own code
- [x] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] Any dependent changes have been merged and published in downstream modules
2023-07-25 15:04:07 -05:00

66 lines
2.1 KiB
Perl

use base "installedtest";
use strict;
use testapi;
use utils;
sub slurm_setup {
# install HPC repository
my $version = get_var("SLURM_VERSION");
assert_script_run "dnf -y install rocky-release-hpc", 120;
# Set up munge
assert_script_run "dnf -y install munge", 120;
assert_script_run "dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key";
assert_script_run "chmod 400 /etc/munge/munge.key";
assert_script_run "chown munge.munge /etc/munge/munge.key";
assert_script_run "systemctl enable --now munge.service";
# install slurm
if (get_var("CURRREL") eq '8') {
assert_script_run "dnf config-manager --set-enabled powertools";
}
assert_script_run "dnf install -y slurm$version-slurmdbd slurm$version-slurmrestd slurm$version-slurmctld slurm$version-slurmd";
# Since this is a single node system, we don't have to modify the conf files. We will for larger multi-node tests.
# start services
assert_script_run "systemctl enable --now slurmctld slurmdbd slurmrestd slurmd";
}
sub run {
my $self = shift;
# do all the install stuff
slurm_setup();
# if everything is configured right, sinfo should show the following output
# $ sinfo
# PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
# debug* up infinite 1 idle localhost
validate_script_output "sinfo", sub { m/debug.*localhost/ };
# write a boring job script
assert_script_run "echo '#!/bin/bash\n#SBATCH --job-name=antarctica_time\nsleep 120\nTZ=NZST date' > job.sh";
## schedule a job and run it to completion
assert_script_run "sbatch job.sh";
validate_script_output "squeue", sub { m/antar/ };
sleep 121;
# after 121 seconds, job should have completed and no longer exist in the queue
validate_script_output "squeue", sub { $_ !~ m/antar/ };
## cancel a job
assert_script_run "sbatch job.sh";
validate_script_output "squeue", sub { m/antar/ };
assert_script_run "scancel 2";
# job should no longer be in the queue
validate_script_output "squeue", sub { $_ !~ m/antar/ };
}
sub test_flags {
return {fatal => 1};
}
1;
# vim: set sw=4 et: