From 8becb62887e7d266ed9c8550b4a13a9c184a6f64 Mon Sep 17 00:00:00 2001 From: Al Bowles Date: Tue, 25 Jul 2023 15:04:07 -0500 Subject: [PATCH] Provide tests for SIG/HPC slurm packages This MR provides a very, very basic test suite for the Slurm packages built by the HPC SIG. It checks the following: - Necessary packages for a single-node Slurm instance install successfully from the SIG/HPC repository - A job can be scheduled and executed to completion - A job can be scheduled and then cancelled ``` ./fifloader.py --clean --load templates.fif.json openqa-cli api -X POST isos ISO=Rocky-8.8-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=8.8 CURRREL=8 BUILD=-${date +%Y%d%m}.0-slurm-8.8 TEST=slurm22,slurm23 openqa-cli api -X POST isos ISO=Rocky-9.2-x86_64-dvd.iso ARCH=x86_64 DISTRI=rocky FLAVOR=dvd-iso VERSION=9.2 CURRREL=9 BUILD=-${date +%Y%d%m}.0-slurm-9.2 TEST=slurm22,slurm23 ``` - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my own code - [x] I have commented my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] Any dependent changes have been merged and published in downstream modules --- templates.fif.json | 40 ++++++++++++++++++++++++---- tests/slurm.pm | 65 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 tests/slurm.pm diff --git a/templates.fif.json b/templates.fif.json index eb916ba9..7980f693 100644 --- a/templates.fif.json +++ b/templates.fif.json @@ -82,7 +82,7 @@ }, "version": "*" }, - "rocky-boot-iso-aarch64-*": { + "rocky-boot-iso-aarch64-*": { "arch": "aarch64", "distri": "rocky", "flavor": "boot-iso", @@ -210,7 +210,7 @@ "machine": "uefi", "product": "rocky-GenericCloud_LVM-qcow2-qcow2-x86_64-*" }, - "rocky-boot-iso-aarch64-*-aarch64": { + "rocky-boot-iso-aarch64-*-aarch64": { "machine": "aarch64", "product": "rocky-boot-iso-aarch64-*" }, @@ -422,7 +422,7 @@ "POSTINSTALL": "autocloud" } }, - "install_resize_lvm": { + "install_resize_lvm": { "profiles": { "rocky-dvd-iso-aarch64-*-aarch64": 40, "rocky-dvd-iso-x86_64-*-64bit": 40 @@ -926,8 +926,8 @@ }, "install_scsi_updates_img": { "profiles": { - "rocky-universal-aarch64-*-aarch64": 20, - "rocky-universal-x86_64-*-64bit": 20 + "rocky-universal-aarch64-*-aarch64": 20, + "rocky-universal-x86_64-*-64bit": 20 }, "settings": { "CDMODEL": "scsi-cd", @@ -1418,6 +1418,36 @@ "WORKER_CLASS": "tap" } }, + "slurm22": { + "profiles": { + "rocky-dvd-iso-aarch64-*-aarch64": 10, + "rocky-dvd-iso-x86_64-*-64bit": 10 + }, + "settings": { + "BOOTFROM": "c", + "HDD_1": "disk_%FLAVOR%_%MACHINE%.qcow2", + "POSTINSTALL": "slurm", + "START_AFTER_TEST": "%DEPLOY_UPLOAD_TEST%", + "ROOT_PASSWORD": "weakpassword", + "USER_LOGIN": "false", + "SLURM_VERSION": "22" + } + }, + "slurm23": { + "profiles": { + "rocky-dvd-iso-aarch64-*-aarch64": 10, + "rocky-dvd-iso-x86_64-*-64bit": 10 + }, + "settings": { + "BOOTFROM": "c", + "HDD_1": "disk_%FLAVOR%_%MACHINE%.qcow2", + "POSTINSTALL": "slurm", + "START_AFTER_TEST": "%DEPLOY_UPLOAD_TEST%", + "ROOT_PASSWORD": "weakpassword", + "USER_LOGIN": "false", + "SLURM_VERSION": "23" + } + }, "support_server": { "profiles": { "rocky-dvd-iso-aarch64-*-aarch64": 10, diff --git a/tests/slurm.pm b/tests/slurm.pm new file mode 100644 index 00000000..3dfceca0 --- /dev/null +++ b/tests/slurm.pm @@ -0,0 +1,65 @@ +use base "installedtest"; +use strict; +use testapi; +use utils; + +sub slurm_setup { + # install HPC repository + my $version = get_var("SLURM_VERSION"); + assert_script_run "dnf -y install rocky-release-hpc", 120; + + # Set up munge + assert_script_run "dnf -y install munge", 120; + assert_script_run "dd if=/dev/urandom bs=1 count=1024 >/etc/munge/munge.key"; + assert_script_run "chmod 400 /etc/munge/munge.key"; + assert_script_run "chown munge.munge /etc/munge/munge.key"; + assert_script_run "systemctl enable --now munge.service"; + + # install slurm + if (get_var("CURRREL") eq '8') { + assert_script_run "dnf config-manager --set-enabled powertools"; + } + assert_script_run "dnf install -y slurm$version-slurmdbd slurm$version-slurmrestd slurm$version-slurmctld slurm$version-slurmd"; + + # Since this is a single node system, we don't have to modify the conf files. We will for larger multi-node tests. + # start services + assert_script_run "systemctl enable --now slurmctld slurmdbd slurmrestd slurmd"; +} + +sub run { + my $self = shift; + + # do all the install stuff + slurm_setup(); + + # if everything is configured right, sinfo should show the following output + # $ sinfo + # PARTITION AVAIL TIMELIMIT NODES STATE NODELIST + # debug* up infinite 1 idle localhost + validate_script_output "sinfo", sub { m/debug.*localhost/ }; + + # write a boring job script + assert_script_run "echo '#!/bin/bash\n#SBATCH --job-name=antarctica_time\nsleep 120\nTZ=NZST date' > job.sh"; + + ## schedule a job and run it to completion + assert_script_run "sbatch job.sh"; + validate_script_output "squeue", sub { m/antar/ }; + sleep 121; + # after 121 seconds, job should have completed and no longer exist in the queue + validate_script_output "squeue", sub { $_ !~ m/antar/ }; + + ## cancel a job + assert_script_run "sbatch job.sh"; + validate_script_output "squeue", sub { m/antar/ }; + assert_script_run "scancel 2"; + # job should no longer be in the queue + validate_script_output "squeue", sub { $_ !~ m/antar/ }; +} + +sub test_flags { + return {fatal => 1}; +} + +1; + +# vim: set sw=4 et: