Add option to invoke QEMU with a PVM template

Add -T command line option to aarch64/run_qemu.sh which invokes QEMU
with a given file as a PVM template.

It will automatically determine the address at which the file will be
placed in memory. This is based on the fixed QEMU memory layout:
    kernel | initrd | dtb | template

It will run QEMU first to dump its DTB and then overlay it with a node
specifying the address and size of the template before running QEMU
properly.

Bug: 171309087
Test: run_qemu.sh with a script:
      hexdump -C /proc/device-tree/pkvm_template@*/reg
Change-Id: I82dd50f407dc45f9dc615c702e25841b607bbb61
diff --git a/aarch64/run_qemu.sh b/aarch64/run_qemu.sh
index 5fb1a37..f4f2edb 100755
--- a/aarch64/run_qemu.sh
+++ b/aarch64/run_qemu.sh
@@ -20,6 +20,7 @@
 default_var ROM_DIR	"${PREBUILTS_QEMU_ROM_DIR}"
 default_var KERNEL	"${LINUX_OUT_IMAGE}"
 default_var ROOTFS	"${PREBUILTS_KUT_ROOTFS}"
+default_var TEMPLATE	""
 default_var CPU		"max"
 default_var SMP		2
 default_var RAM		512
@@ -28,11 +29,15 @@
 default_var VERBOSE	0
 default_var TIMEOUT	""
 
+KiB=1024
+MiB=$((1024 * KiB))
+GiB=$((1024 * MiB))
+
 function usage() {
 	cat <<EOF
 
 Usage: $0 [-h] [-v]
-       [-e QEMU] [-L ROM_DIR] [-k KERNEL] [-r ROOTFS] [-R DRIVE]
+       [-e QEMU] [-L ROM_DIR] [-k KERNEL] [-r ROOTFS] [-R DRIVE] [-T TEMPLATE]
        [-c CPU] [-s NUM_CPUS] [-m MEM] [-g GIC] [-G]
        [-t TIMEOUT]
 
@@ -43,6 +48,7 @@
     -k    kernel image
     -r    root filesystem image
     -R    additional drive image(s) to mount as read-only
+    -T    VM template image
     -c    CPU model (defaults to "${DEFAULT_CPU}")
     -s    number of CPU cores (defaults to ${DEFAULT_SMP})
     -m    amount of memory in MB (defaults to ${DEFAULT_RAM})
@@ -52,17 +58,56 @@
 EOF
 }
 
+function file_size() {
+	stat --format=%s "$1"
+}
+
+function align_up_pow2() {
+	local val="$1"
+	local align="$2"
+	echo $(( (val + (align - 1)) & (~(align - 1)) ))
+}
+
+function hex() {
+	printf '0x%x\n' $1
+}
+
+function template_overlay() {
+	local in="$1"
+	local out="$2"
+	local addr="$3"
+	local size="$4"
+	local tmp="$(mktemp)"
+
+	# Convert input DTB back to source.
+	dtc -I dtb -O dts -o "${tmp}" "${in}"
+
+	# Append an overlay describing the template.
+	cat <<EOF >> "${tmp}"
+&{/} {
+	pkvm_template@${addr} {
+		compatible = "pkvm,arm64";
+		reg = <0x00 ${addr} 0x00 ${size}>;
+	};
+};
+EOF
+
+	# Compile back to DTB.
+	dtc -I dts -O dtb -o "${out}" "${tmp}"
+}
+
 CMD=()
 APPEND=()
 EXTRA_RO_MOUNTS=()
 
-while getopts ":e:L:k:r:R:c:s:m:g:t:vGh" OPT; do
+while getopts ":e:L:k:r:R:T:c:s:m:g:t:vGh" OPT; do
 	case "${OPT}" in
 	e)	QEMU="${OPTARG}"		;;
 	L)	ROM_DIR="${OPTARG}"		;;
 	k)	KERNEL="${OPTARG}"		;;
 	r)	ROOTFS="${OPTARG}"		;;
 	R)	EXTRA_RO_MOUNTS+=("${OPTARG}")	;;
+	T)	TEMPLATE="${OPTARG}"		;;
 	c)	CPU="${OPTARG}"			;;
 	s)	SMP="${OPTARG}"			;;
 	m)	RAM="${OPTARG}"			;;
@@ -127,6 +172,32 @@
 
 CMD+=(-append "${APPEND[*]}")
 
+if [ -n "${TEMPLATE}" ]; then
+	# Dump the QEMU DTB.
+	QEMU_DTB="$(mktemp)"
+	"${CMD[@]}" -machine dumpdtb="${QEMU_DTB}" > /dev/null
+
+	# Compile the overlayed DTB with dummy values to determine its size.
+	TMP_DTB="$(mktemp)"
+	template_overlay "${QEMU_DTB}" "${TMP_DTB}" 0x0 0x0
+	TMP_DTB_SIZE=$(file_size "${TMP_DTB}")
+
+	# From QEMU's hw/arm/boot.c:
+	# RAM always starts at 1GiB PA offset. The kernel is placed there.
+	# 128MiB (or RAM/2 if RAM<256MiB) is left to the kernel to decompress.
+	# This is followed by the ramdisk and then 2MiB-aligned DTB.
+	# We place the template at the following page (assume 64KiB page size).
+	TEMPLATE_ADDR=$(( 1*GiB + 128*MiB + TMP_DTB_SIZE ))
+	TEMPLATE_ADDR=$(hex $(align_up_pow2 $TEMPLATE_ADDR 64*KiB))
+	TEMPLATE_SIZE=$(hex $(file_size "${TEMPLATE}"))
+
+	QEMU_PATCHED_DTB="$(mktemp)"
+	template_overlay "${QEMU_DTB}" "${QEMU_PATCHED_DTB}" "${TEMPLATE_ADDR}" "${TEMPLATE_SIZE}"
+
+	CMD+=(-dtb "${QEMU_PATCHED_DTB}")
+	CMD+=(-device loader,file="${TEMPLATE}",addr=${TEMPLATE_ADDR},force-raw=true)
+fi
+
 if [ "${VERBOSE}" -eq 1 ]; then
 	set -x
 fi