From 20bb6a02551d1608e8e1b7a5a51cbc8aeac2284a Mon Sep 17 00:00:00 2001 From: Andreas Florath Date: Wed, 20 Jul 2016 23:34:50 +0200 Subject: [PATCH] Add option to be able to run_functests.sh in parallel Running the functional tests is time consuming. This patch adds the option `-j ` to the tests/run_functests.sh: when given the test run in parallel up the jobs. When using this, be sure to have enough resources (CPUs, RAM and HD space) on the host. In addition there was the need to change two things: o Global /tmp/dib-test-should-fail was move to temporary build directory of each execution. o Because the logs might now interleave, each log line has now a prefix of the name of the testcase. [In my environment running functests sequential takes 15+ minutes, running them parallel takes less than 6 minutes.] Change-Id: Id9ea5131f0026c292ca6453ba2c80fe12c47f808 Signed-off-by: Andreas Florath --- doc/source/developer/developing_elements.rst | 5 + tests/run_functests.sh | 129 +++++++++++++++++-- 2 files changed, 126 insertions(+), 8 deletions(-) diff --git a/doc/source/developer/developing_elements.rst b/doc/source/developer/developing_elements.rst index 0cee5946..aa536440 100644 --- a/doc/source/developer/developing_elements.rst +++ b/doc/source/developer/developing_elements.rst @@ -385,6 +385,11 @@ line to run it. If it should not be run as part of the default CI run, you can submit a change with it added to ``DEFAULT_SKIP_TESTS`` in that file. +Running the functional tests is time consuming. Multiple parallel +jobs can be started by specifying ``-j ``. Each of the +jobs uses a lot resources (CPU, disk space, RAM) - therefore the job +count must carefully be chosen. + python """""" diff --git a/tests/run_functests.sh b/tests/run_functests.sh index f43c1e2f..4c6801ee 100755 --- a/tests/run_functests.sh +++ b/tests/run_functests.sh @@ -23,21 +23,65 @@ DEFAULT_SKIP_TESTS=( debian-minimal/testing-build-succeeds ) +function log_with_prefix { + local pr=$1 + + while read a; do + echo $(date +"%Y%m%d-%H%M%S.%N") "[$pr] $a" + done +} + +# Log job control messages +function log_jc { + local msg="$1" + printf "[JOB-CONTROL] %s %s\n" "$(date)" "${msg}" +} + +function job_cnt { + running_jobs=$(jobs -p) + echo ${running_jobs} | wc -w +} + +# This is needed, because the better 'wait -n' is +# available since bash 4.3 only. +function wait_minus_n { + if [ "${BASH_VERSINFO[0]}" -gt 4 \ + -o "${BASH_VERSINFO[0]}" = 4 \ + -a "${BASH_VERSINFO[1]}" -ge 3 ]; then + # Good way: wait on any job + wait -n + return $? + else + # Not that good way: wait on one specific job + # (others may be finished in the mean time) + local wait_for_pid=$(jobs -p | head -1) + wait ${wait_for_pid} + return $? + fi +} + # run_disk_element_test # Run a disk-image-build .tar build of ELEMENT including any elements # specified by TEST_ELEMENT function run_disk_element_test() { local test_element=$1 local element=$2 + local dont_use_tmp=$3 + local use_tmp_flag="" local dest_dir=$(mktemp -d) - trap "rm -rf $dest_dir /tmp/dib-test-should-fail" EXIT + trap "rm -rf $dest_dir" EXIT + + if [ "${dont_use_tmp}" = "yes" ]; then + use_tmp_flag="--no-tmpfs" + fi if break="after-error" break_outside_target=1 \ - break_cmd="cp \$TMP_MOUNT_PATH/tmp/dib-test-should-fail /tmp/ 2>&1 > /dev/null || true" \ + break_cmd="cp -v \$TMP_MOUNT_PATH/tmp/dib-test-should-fail ${dest_dir} || true" \ DIB_SHOW_IMAGE_USAGE=1 \ ELEMENTS_PATH=$DIB_ELEMENTS:$DIB_ELEMENTS/$element/test-elements \ - $DIB_CMD -x -t tar,qcow2 -o $dest_dir/image -n $element $test_element; then + $DIB_CMD -x -t tar,qcow2 ${use_tmp_flag} -o $dest_dir/image -n $element $test_element 2>&1 \ + | log_with_prefix "${element}/${test_element}"; then if ! [ -f "$dest_dir/image.qcow2" ]; then echo "Error: qcow2 build failed for element: $element, test-element: $test_element." @@ -58,7 +102,7 @@ function run_disk_element_test() { fi fi else - if [ -f "/tmp/dib-test-should-fail" ]; then + if [ -f "${dest_dir}/dib-test-should-fail" ]; then echo "PASS: Element $element, test-element: $test_element" else echo "Error: Build failed for element: $element, test-element: $test_element." @@ -79,7 +123,8 @@ function run_ramdisk_element_test() { local dest_dir=$(mktemp -d) if ELEMENTS_PATH=$DIB_ELEMENTS/$element/test-elements \ - $DIB_CMD -x -o $dest_dir/image $element $test_element; then + $DIB_CMD -x -o $dest_dir/image $element $test_element \ + | log_with_prefix "${element}/${test_element}"; then # TODO(dtantsur): test also kernel presence once we sort out its naming # problem (vmlinuz vs kernel) if ! [ -f "$dest_dir/image.initramfs" ]; then @@ -109,12 +154,15 @@ for e in $DIB_ELEMENTS/*/test-elements/*; do TESTS+=("$element/$test_element") done -while getopts ":hl" opt; do +JOB_MAX_CNT=1 + +while getopts ":hlpj:" opt; do case $opt in h) echo "run_functests.sh [-h] [-l] ..." echo " -h : show this help" echo " -l : list available tests" + echo " -p : run all tests in parallel" echo " : functional test to run" echo " Special test 'all' will run all tests" exit 0 @@ -128,6 +176,10 @@ while getopts ":hl" opt; do echo exit 0 ;; + j) + JOB_MAX_CNT=${OPTARG} + echo "Running parallel - using [${JOB_MAX_CNT}] jobs" + ;; \?) echo "Invalid option: -$OPTARG" exit 1 @@ -136,6 +188,15 @@ while getopts ":hl" opt; do done shift $((OPTIND-1)) +DONT_USE_TMP="no" +if [ "${JOB_MAX_CNT}" -gt 1 ]; then + # switch off using tmp dir for image building + # (The mem check using the tmp dir is currently done + # based on the available memory - and not on the free. + # See #1618124 for more details) + DONT_USE_TMP="yes" +fi + # cull the list of tests to run into TESTS_TO_RUN TESTS_TO_RUN=() title="" @@ -171,7 +232,36 @@ for test in "${TESTS_TO_RUN[@]}"; do done echo "------" +function wait_and_exit_on_failure { + local pid=$1 + + wait ${pid} + result=$? + + if [ "${result}" -ne 0 ]; then + exit ${result} + fi + return 0 +} + +EXIT_CODE=0 for test in "${TESTS_TO_RUN[@]}"; do + running_jobs_cnt=$(job_cnt) + log_jc "Number of running jobs [${running_jobs_cnt}] max jobs [${JOB_MAX_CNT}]" + if [ "${running_jobs_cnt}" -ge "${JOB_MAX_CNT}" ]; then + log_jc "Waiting for job to finish" + wait_minus_n + result=$? + + if [ "${result}" -ne 0 ]; then + EXIT_CODE=1 + # If a job fails, do not start any new ones. + break + fi + fi + + log_jc "Starting new job" + # from above; each array value is element/test_element. split it # back up element=${test%/*} @@ -186,7 +276,30 @@ for test in "${TESTS_TO_RUN[@]}"; do fi echo "Running $test ($element_type)" - run_${element_type}_element_test $test_element $element + run_${element_type}_element_test $test_element $element ${DONT_USE_TMP} & done -echo "Tests passed!" +# Wait for the rest of the jobs +while true; do + running_jobs_cnt=$(job_cnt) + log_jc "Number of running jobs left [${running_jobs_cnt}]" + + if [ "${running_jobs_cnt}" -eq 0 ]; then + break; + fi + + wait_minus_n + result=$? + + if [ "${result}" -ne 0 ]; then + EXIT_CODE=1 + fi +done + +if [ "${EXIT_CODE}" -eq 0 ]; then + echo "Tests passed!" + exit 0 +else + echo "At least one test failed" + exit 1 +fi