#!/usr/bin/env bash

# Copyright 2020 The Android KVM Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

source "$(dirname "${BASH_SOURCE[0]}")/../common.inc"

default_var QEMU		"${PREBUILTS_QEMU_BIN}"
default_var ROM_DIR		"${PREBUILTS_QEMU_ROM_DIR}"
default_var KERNEL		"${LINUX_OUT_IMAGE}"
default_var ROOTFS		"${PREBUILTS_KUT_ROOTFS}"
default_var FIRMWARE		""
default_var CPU			""
default_var SMP			2
default_var RAM			512
default_var GIC			3
default_var GDB			0
default_var MODE		"pkvm"
default_var MODE_CPU		""
default_var VERBOSE		0
default_var KEEP_TEMP		0
default_var TIMEOUT		""

# QEMU CPUs to use for VHE and nVHE runs
CPU_VHE="max"
CPU_NVHE="cortex-a53"

KiB=1024
MiB=$((1024 * KiB))
GiB=$((1024 * MiB))

function usage() {
	cat <<EOF

Usage: $0 [-h] [-v] [-K]
       [-e QEMU] [-L ROM_DIR] [-k KERNEL] [-r ROOTFS] [-R DRIVE] [-F FIRMWARE]
       [-c CPU] [-s NUM_CPUS] [-m MEM] [-g GIC] [-m KVM_MODE] [-G]
       [-t TIMEOUT] [-a APPEND]

    -h    output this help text
    -v    print invoked command
    -e    QEMU emulator binary
    -L    directory where QEMU should look for BIOS image
    -k    kernel image
    -r    root filesystem image
    -R    additional drive image(s) to mount as read-only
    -F    VM firmware image
    -c    CPU model (defaults to "${DEFAULT_CPU}")
    -s    number of CPU cores (defaults to ${DEFAULT_SMP})
    -m    amount of memory in MB (defaults to ${DEFAULT_RAM})
    -g    version of GIC (defaults to ${DEFAULT_GIC})
    -m    KVM mode (defaults to 'pkvm', other: 'vhe', 'nvhe')
    -a    append to kernel command line
    -G    enable debugging of emulated system with GDB
    -t    kill QEMU after given number of seconds
    -K    keep temp files
EOF
}

# Note: Due to a bug in older versions of Bash, use '${array[@]+"${array[@]}"}'
# to expand potentially empty arrays. '${array[@]}' is treated as undefined.
TMP_FILES=()
function cleanup() {
	rm -f ${TMP_FILES[@]+"${TMP_FILES[@]}"}
}

function file_size() {
	stat --format=%s "$1"
}

function align_up_pow2() {
	local val="$1"
	local align="$2"
	echo $(( (val + (align - 1)) & (~(align - 1)) ))
}

function hex() {
	printf '0x%x\n' $1
}

# On a real system, the firmware, not the kernel, would live in a separate
# memory slot, described in the DT as "pkvm_guest_firmware" and
# compatible = "linux,pkvm-guest-firmware-memory". Here, as a temporary
# solution for testing on QEMU, we put the kernel in such memory slot.
function kernel_payload_overlay() {
	local in="$1"
	local out="$2"
	local tmp="$3"
	local addr="$4"
	local size="$5"

	# Convert input DTB back to source.
	${DTC} -I dtb -O dts -o "${tmp}" "${in}"

	# Append an overlay describing the kernel payload.
	cat <<EOF >> "${tmp}"
&{/} {
	pkvm_kernel_payload@${addr} {
		compatible = "pkvm,arm64";
		#address-cells = <2>;
		#size-cells = <1>;
		reg = <0x00 ${addr} ${size}>;
	};
};
EOF

	# Compile back to DTB.
	${DTC} -I dts -O dtb -o "${out}" "${tmp}"
}

CMD=()
APPEND=()
EXTRA_RO_MOUNTS=()

while getopts ":e:L:k:r:R:F:c:s:m:g:t:M:a:vGKh" OPT; do
	case "${OPT}" in
	e)	QEMU="${OPTARG}"		;;
	L)	ROM_DIR="${OPTARG}"		;;
	k)	KERNEL="${OPTARG}"		;;
	r)	ROOTFS="${OPTARG}"		;;
	R)	EXTRA_RO_MOUNTS+=("${OPTARG}")	;;
	F)	FIRMWARE="${OPTARG}"		;;
	c)	CPU="${OPTARG}"			;;
	s)	SMP="${OPTARG}"			;;
	m)	RAM="${OPTARG}"			;;
	g)	GIC="${OPTARG}"			;;
	t)	TIMEOUT="${OPTARG}"		;;
	M)	MODE="${OPTARG}"		;;
	a)	APPEND+=("${OPTARG}")		;;
	v)	VERBOSE=1			;;
	G)	GDB=1				;;
	K)	KEEP_TEMP=1			;;
	h)
		usage
		exit 0
		;;
	\?)
		echo "Invalid option: -${OPTARG}" 1>&2
		usage 1>&2
		exit 1
		;;
	:)
		echo "Invalid option: -${OPTARG} requires an argument" 1>&2
		usage 1>&2
		exit 1
		;;
    esac
done
shift $((OPTIND -1))
if [ $# -ne 0 ]; then
	echo "Unrecognized options: $@" 1>&2
	usage 1>&2
	exit 1
fi

if [ "${KEEP_TEMP}" -ne 1 ]; then
	trap cleanup EXIT
fi

if [ -n "${TIMEOUT}" ]; then
	CMD+=(timeout -k 1s --foreground "${TIMEOUT}")
fi

CMD_KERNEL=${KERNEL}
if [ -n "${FIRMWARE}" ]; then
	CMD_KERNEL=${FIRMWARE}
fi

case "${MODE}" in
	vhe)	MODE_CPU="${CPU_VHE}";;
	nvhe)	MODE_CPU="${CPU_NVHE}";;
	pkvm)	APPEND+=(kvm-arm.mode=protected)
		MODE_CPU="${CPU_NVHE}"
		;;
	*)	echo "Unknown KVM mode: ${MODE}" 1>&2
		exit 1
		;;
esac

# If not set by the user, use the default CPU model for the given mode.
if [ -z "${CPU}" -a -n "${MODE_CPU}" ]; then
	CPU="${MODE_CPU}"
fi

CMD+=("${QEMU}")
CMD+=(-M virt)
CMD+=(-machine virtualization=true -machine virt,gic-version=${GIC})
CMD+=(-cpu "${CPU}")
CMD+=(-smp "${SMP}")
CMD+=(-m "${RAM}")
CMD+=(-L "${ROM_DIR}")
CMD+=(-kernel "${CMD_KERNEL}")
CMD+=(-drive file="${ROOTFS}",readonly,if=virtio,format=raw)
CMD+=(-object rng-random,filename=/dev/urandom,id=rng0)
CMD+=(-device virtio-rng-pci,rng=rng0)
CMD+=(-nographic -nodefaults -serial stdio)

APPEND+=(rootwait root=/dev/vda)

# Note: Due to a bug in older versions of Bash, use '${array[@]+"${array[@]}"}'
# to expand potentially empty arrays. '${array[@]}' is treated as undefined.
for MOUNT in ${EXTRA_RO_MOUNTS[@]+"${EXTRA_RO_MOUNTS[@]}"}; do
	CMD+=(-drive "file=${MOUNT},readonly,if=virtio,format=raw")
done

if [ "${GDB}" -eq 1 ]; then
	CMD+=(-S -s)
	APPEND+=(nokaslr)
fi

CMD+=(-append "${APPEND[*]}")

if [ -n "${FIRMWARE}" ]; then
	QEMU_DTB="$(mktemp)"
	QEMU_PATCHED_DTB="$(mktemp)"
	TMP_DTS="$(mktemp)"

	# Mark files for deletion on EXIT.
	TMP_FILES+=("${QEMU_DTB}" "${QEMU_PATCHED_DTB}" "${TMP_DTS}")

	# Dump the QEMU DTB.
	"${CMD[@]}" -machine dumpdtb="${QEMU_DTB}" > /dev/null

	# Compile the overlayed DTB with dummy values to determine its size.
	kernel_payload_overlay "${QEMU_DTB}" "${QEMU_PATCHED_DTB}" "${TMP_DTS}" 0x0 0x0
	TMP_DTB_SIZE=$(file_size "${QEMU_PATCHED_DTB}")

	# From QEMU's hw/arm/boot.c:
	# RAM always starts at 1GiB PA offset. The kernel is placed there.
	# 128MiB (or RAM/2 if RAM<256MiB) is left to the kernel to decompress.
	# This is followed by the ramdisk and then 2MiB-aligned DTB.
	# In our case, for testing, `-kernel` is the firmware, and we place the
	# actual kernel Image at the following page (assume 64KiB page size).
	KERNEL_PAYLOAD_ADDR=$(( 1*GiB + 128*MiB + TMP_DTB_SIZE ))
	KERNEL_PAYLOAD_ADDR=$(hex $(align_up_pow2 $KERNEL_PAYLOAD_ADDR 64*KiB))
	KERNEL_SIZE=$(hex $(file_size "${KERNEL}"))

	kernel_payload_overlay "${QEMU_DTB}" "${QEMU_PATCHED_DTB}" "${TMP_DTS}" \
			 "${KERNEL_PAYLOAD_ADDR}" "${KERNEL_SIZE}"

	CMD+=(-dtb "${QEMU_PATCHED_DTB}")
	CMD+=(-device loader,file="${KERNEL}",addr=${KERNEL_PAYLOAD_ADDR},force-raw=true)
fi

if [ "${VERBOSE}" -eq 1 ]; then
	set -x
fi

# Invoke QEMU and then propagate its exit code.
# We do this instead of `exec` to delete TMP_FILES in the EXIT handler.
set +e; "${CMD[@]}"; exit $?
