tools/testing/selftests/rcutorture/bin/kvm-remote.sh - linux - Git at Google

 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0+
 #
 # Run a series of tests on remote systems under KVM.
 #
 # Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
 #	 kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
 #
 # Copyright (C) 2021 Facebook, Inc.
 #
 # Authors: Paul E. McKenney <paulmck@kernel.org>

 scriptname=$0
 args="$*"

 if ! test -d tools/testing/selftests/rcutorture/bin
 then
 	echo $scriptname must be run from top-level directory of kernel source tree.
 	exit 1
 fi

 RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
 PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh

 starttime="`get_starttime`"

 systems="$1"
 if test -z "$systems"
 then
 	echo $scriptname: Empty list of systems will go nowhere good, giving up.
 	exit 1
 fi
 shift

 # Pathnames:
 # T:	  /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
 # resdir: /tmp/kvm-remote.sh.NNNNNN/res
 # rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
 # oldrun: `pwd`/tools/testing/.../res/$otherds
 #
 # Pathname segments:
 # TD:	  kvm-remote.sh.NNNNNN
 # ds:	  yyyy.mm.dd-hh.mm.ss-remote

 T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
 trap 'rm -rf $T' 0
 TD="`basename "$T"`"

 resdir="$T/res"
 ds=`date +%Y.%m.%d-%H.%M.%S`-remote
 rundir=$resdir/$ds
 echo Results directory: $rundir
 echo $scriptname $args
 if echo $1 | grep -q '^--'
 then
 	# Fresh build.  Create a datestamp unless the caller supplied one.
 	datestamp="`echo "$@" | awk -v ds="$ds" '{
 		for (i = 1; i < NF; i++) {
 			if ($i == "--datestamp") {
 				ds = "";
 				break;
 			}
 		}
 		if (ds != "")
 			print "--datestamp " ds;
 	}'`"
 	kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
 	ret=$?
 	if test "$ret" -ne 0
 	then
 		echo $scriptname: kvm.sh failed exit code $?
 		cat $T/kvm.sh.out
 		exit 2
 	fi
 	oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
 	touch "$oldrun/remote-log"
 	echo $scriptname $args >> "$oldrun/remote-log"
 	echo | tee -a "$oldrun/remote-log"
 	echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
 	cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
 	# We are going to run this, so remove the buildonly files.
 	rm -f "$oldrun"/*/buildonly
 	kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
 	ret=$?
 	if test "$ret" -ne 0
 	then
 		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
 		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
 		exit 2
 	fi
 else
 	# Re-use old run.
 	oldrun="$1"
 	if ! echo $oldrun | grep -q '^/'
 	then
 		oldrun="`pwd`/$oldrun"
 	fi
 	shift
 	touch "$oldrun/remote-log"
 	echo $scriptname $args >> "$oldrun/remote-log"
 	kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
 	ret=$?
 	if test "$ret" -ne 0
 	then
 		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
 		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
 		exit 2
 	fi
 	cp -a "$rundir" "$RCUTORTURE/res/"
 	oldrun="$RCUTORTURE/res/$ds"
 fi
 echo | tee -a "$oldrun/remote-log"
 echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
 cat $T/kvm-again.sh.out
 echo | tee -a "$oldrun/remote-log"
 echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
 echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"

 # Create the kvm-remote-N.sh scripts in the bin directory.
 awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
 {
 	n = $1;
 	sub(/\./, "", n);
 	fn = dest "/kvm-remote-" n ".sh"
 	print "kvm-remote-noreap.sh " rundir " &" > fn;
 	scenarios = "";
 	for (i = 2; i <= NF; i++)
 		scenarios = scenarios " " $i;
 	print "kvm-test-1-run-batch.sh" scenarios >> fn;
 	print "sync" >> fn;
 	print "rm " rundir "/remote.run" >> fn;
 }'
 chmod +x $T/bin/kvm-remote-*.sh
 ( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )

 # Check first to avoid the need for cleanup for system-name typos
 for i in $systems
 do
 	ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
 	ret=$?
 	if test "$ret" -ne 0
 	then
 		echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
 		exit 4
 	fi
 	echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
 done

 # Download and expand the tarball on all systems.
 echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
 for i in $systems
 do
 	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
 	cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
 	ret=$?
 	tries=0
 	while test "$ret" -ne 0
 	do
 		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
 		sleep 60
 		cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
 		ret=$?
 		if test "$ret" -ne 0
 		then
 			if test "$tries" > 5
 			then
 				echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
 				exit 10
 			fi
 		fi
 		tries=$((tries+1))
 	done
 done

 # Function to check for presence of a file on the specified system.
 # Complain if the system cannot be reached, and retry after a wait.
 # Currently just waits forever if a machine disappears.
 #
 # Usage: checkremotefile system pathname
 checkremotefile () {
 	local ret
 	local sleeptime=60

 	while :
 	do
 		ssh -o BatchMode=yes $1 "test -f \"$2\""
 		ret=$?
 		if test "$ret" -eq 255
 		then
 			echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
 		elif test "$ret" -eq 0
 		then
 			return 0
 		elif test "$ret" -eq 1
 		then
 			echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log"
 			return 1
 		else
 			echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
 			return $ret
 		fi
 		sleep $sleeptime
 	done
 }

 # Function to start batches on idle remote $systems
 #
 # Usage: startbatches curbatch nbatches
 #
 # Batches are numbered starting at 1.  Returns the next batch to start.
 # Be careful to redirect all debug output to FD 2 (stderr).
 startbatches () {
 	local curbatch="$1"
 	local nbatches="$2"
 	local ret

 	# Each pass through the following loop examines one system.
 	for i in $systems
 	do
 		if test "$curbatch" -gt "$nbatches"
 		then
 			echo $((nbatches + 1))
 			return 0
 		fi
 		if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
 		then
 			continue # System still running last test, skip.
 		fi
 		ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
 		ret=$?
 		if test "$ret" -ne 0
 		then
 			echo ssh $i failed: exitcode $ret 1>&2
 			exit 11
 		fi
 		echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
 		curbatch=$((curbatch + 1))
 	done
 	echo $curbatch
 }

 # Launch all the scenarios.
 nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
 curbatch=1
 while test "$curbatch" -le "$nbatches"
 do
 	startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
 	curbatch="`cat $T/curbatch`"
 	if test -s "$T/startbatches.stderr"
 	then
 		cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
 	fi
 	if test "$curbatch" -le "$nbatches"
 	then
 		sleep 30
 	fi
 done
 echo All batches started. `date` | tee -a "$oldrun/remote-log"

 # Wait for all remaining scenarios to complete and collect results.
 for i in $systems
 do
 	echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
 	while checkremotefile "$i" "$resdir/$ds/remote.run"
 	do
 		sleep 30
 	done
 	echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
 	( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
 done

 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
 exit "`cat $T/exitcode`"
	#!/bin/bash
	# SPDX-License-Identifier: GPL-2.0+
	#
	# Run a series of tests on remote systems under KVM.
	#
	# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
	# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
	#
	# Copyright (C) 2021 Facebook, Inc.
	#
	# Authors: Paul E. McKenney <paulmck@kernel.org>

	scriptname=$0
	args="$*"

	if ! test -d tools/testing/selftests/rcutorture/bin
	then
	echo $scriptname must be run from top-level directory of kernel source tree.
	exit 1
	fi

	RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
	PATH=${RCUTORTURE}/bin:$PATH; export PATH
	. functions.sh

	starttime="`get_starttime`"

	systems="$1"
	if test -z "$systems"
	then
	echo $scriptname: Empty list of systems will go nowhere good, giving up.
	exit 1
	fi
	shift

	# Pathnames:
	# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
	# resdir: /tmp/kvm-remote.sh.NNNNNN/res
	# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
	# oldrun: `pwd`/tools/testing/.../res/$otherds
	#
	# Pathname segments:
	# TD: kvm-remote.sh.NNNNNN
	# ds: yyyy.mm.dd-hh.mm.ss-remote

	T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
	trap 'rm -rf $T' 0
	TD="`basename "$T"`"

	resdir="$T/res"
	ds=`date +%Y.%m.%d-%H.%M.%S`-remote
	rundir=$resdir/$ds
	echo Results directory: $rundir
	echo $scriptname $args
	if echo $1 \| grep -q '^--'
	then
	# Fresh build. Create a datestamp unless the caller supplied one.
	datestamp="`echo "$@" \| awk -v ds="$ds" '{
	for (i = 1; i < NF; i++) {
	if ($i == "--datestamp") {
	ds = "";
	break;
	}
	}
	if (ds != "")
	print "--datestamp " ds;
	}'`"
	kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
	ret=$?
	if test "$ret" -ne 0
	then
	echo $scriptname: kvm.sh failed exit code $?
	cat $T/kvm.sh.out
	exit 2
	fi
	oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out \| awk '{ print $3 }'`"
	touch "$oldrun/remote-log"
	echo $scriptname $args >> "$oldrun/remote-log"
	echo \| tee -a "$oldrun/remote-log"
	echo " ----" kvm.sh output: "(`date`)" \| tee -a "$oldrun/remote-log"
	cat $T/kvm.sh.out \| tee -a "$oldrun/remote-log"
	# We are going to run this, so remove the buildonly files.
	rm -f "$oldrun"/*/buildonly
	kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
	ret=$?
	if test "$ret" -ne 0
	then
	echo $scriptname: kvm-again.sh failed exit code $? \| tee -a "$oldrun/remote-log"
	cat $T/kvm-again.sh.out \| tee -a "$oldrun/remote-log"
	exit 2
	fi
	else
	# Re-use old run.
	oldrun="$1"
	if ! echo $oldrun \| grep -q '^/'
	then
	oldrun="`pwd`/$oldrun"
	fi
	shift
	touch "$oldrun/remote-log"
	echo $scriptname $args >> "$oldrun/remote-log"
	kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
	ret=$?
	if test "$ret" -ne 0
	then
	echo $scriptname: kvm-again.sh failed exit code $? \| tee -a "$oldrun/remote-log"
	cat $T/kvm-again.sh.out \| tee -a "$oldrun/remote-log"
	exit 2
	fi
	cp -a "$rundir" "$RCUTORTURE/res/"
	oldrun="$RCUTORTURE/res/$ds"
	fi
	echo \| tee -a "$oldrun/remote-log"
	echo " ----" kvm-again.sh output: "(`date`)" \| tee -a "$oldrun/remote-log"
	cat $T/kvm-again.sh.out
	echo \| tee -a "$oldrun/remote-log"
	echo Remote run directory: $rundir \| tee -a "$oldrun/remote-log"
	echo Local build-side run directory: $oldrun \| tee -a "$oldrun/remote-log"

	# Create the kvm-remote-N.sh scripts in the bin directory.
	awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
	{
	n = $1;
	sub(/\./, "", n);
	fn = dest "/kvm-remote-" n ".sh"
	print "kvm-remote-noreap.sh " rundir " &" > fn;
	scenarios = "";
	for (i = 2; i <= NF; i++)
	scenarios = scenarios " " $i;
	print "kvm-test-1-run-batch.sh" scenarios >> fn;
	print "sync" >> fn;
	print "rm " rundir "/remote.run" >> fn;
	}'
	chmod +x $T/bin/kvm-remote-*.sh
	( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )

	# Check first to avoid the need for cleanup for system-name typos
	for i in $systems
	do
	ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
	ret=$?
	if test "$ret" -ne 0
	then
	echo System $i unreachable, giving up. \| tee -a "$oldrun/remote-log"
	exit 4
	fi
	echo $i: $ncpus CPUs " " `date` \| tee -a "$oldrun/remote-log"
	done

	# Download and expand the tarball on all systems.
	echo Build-products tarball: `du -h $T/binres.tgz` \| tee -a "$oldrun/remote-log"
	for i in $systems
	do
	echo Downloading tarball to $i `date` \| tee -a "$oldrun/remote-log"
	cat $T/binres.tgz \| ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
	ret=$?
	tries=0
	while test "$ret" -ne 0
	do
	echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. \| tee -a "$oldrun/remote-log"
	sleep 60
	cat $T/binres.tgz \| ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
	ret=$?
	if test "$ret" -ne 0
	then
	if test "$tries" > 5
	then
	echo Unable to download $T/binres.tgz to system $i, giving up. \| tee -a "$oldrun/remote-log"
	exit 10
	fi
	fi
	tries=$((tries+1))
	done
	done

	# Function to check for presence of a file on the specified system.
	# Complain if the system cannot be reached, and retry after a wait.
	# Currently just waits forever if a machine disappears.
	#
	# Usage: checkremotefile system pathname
	checkremotefile () {
	local ret
	local sleeptime=60

	while :
	do
	ssh -o BatchMode=yes $1 "test -f \"$2\""
	ret=$?
	if test "$ret" -eq 255
	then
	echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` \| tee -a "$oldrun/remote-log"
	elif test "$ret" -eq 0
	then
	return 0
	elif test "$ret" -eq 1
	then
	echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" \| tee -a "$oldrun/remote-log"
	return 1
	else
	echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` \| tee -a "$oldrun/remote-log"
	return $ret
	fi
	sleep $sleeptime
	done
	}

	# Function to start batches on idle remote $systems
	#
	# Usage: startbatches curbatch nbatches
	#
	# Batches are numbered starting at 1. Returns the next batch to start.
	# Be careful to redirect all debug output to FD 2 (stderr).
	startbatches () {
	local curbatch="$1"
	local nbatches="$2"
	local ret

	# Each pass through the following loop examines one system.
	for i in $systems
	do
	if test "$curbatch" -gt "$nbatches"
	then
	echo $((nbatches + 1))
	return 0
	fi
	if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
	then
	continue # System still running last test, skip.
	fi
	ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
	ret=$?
	if test "$ret" -ne 0
	then
	echo ssh $i failed: exitcode $ret 1>&2
	exit 11
	fi
	echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios \| tail -1` `date` 1>&2
	curbatch=$((curbatch + 1))
	done
	echo $curbatch
	}

	# Launch all the scenarios.
	nbatches="`wc -l "$rundir"/scenarios \| awk '{ print $1 }'`"
	curbatch=1
	while test "$curbatch" -le "$nbatches"
	do
	startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
	curbatch="`cat $T/curbatch`"
	if test -s "$T/startbatches.stderr"
	then
	cat "$T/startbatches.stderr" \| tee -a "$oldrun/remote-log"
	fi
	if test "$curbatch" -le "$nbatches"
	then
	sleep 30
	fi
	done
	echo All batches started. `date` \| tee -a "$oldrun/remote-log"

	# Wait for all remaining scenarios to complete and collect results.
	for i in $systems
	do
	echo " ---" Waiting for $i `date` \| tee -a "$oldrun/remote-log"
	while checkremotefile "$i" "$resdir/$ds/remote.run"
	do
	sleep 30
	done
	echo " ---" Collecting results from $i `date` \| tee -a "$oldrun/remote-log"
	( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-.sh.out /console.log /kvm-test-1-run.sh.out /qemu[_-]pid /qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" \| tar -xzf - )
	done

	( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) \| tee -a "$oldrun/remote-log"
	exit "`cat $T/exitcode`"