#!/usr/bin/env bash

# Copyright 2020 The Android KVM Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

source "$(dirname "${BASH_SOURCE[0]}")/../common.inc"

default_var QEMU		"${PREBUILTS_QEMU_BIN}"
default_var ROM_DIR		"${PREBUILTS_QEMU_ROM_DIR}"
default_var KERNEL		"${LINUX_OUT_IMAGE}"
default_var ROOTFS		"${PREBUILTS_KUT_ROOTFS}"
default_var PVMFW		""
default_var CPU			""
default_var SMP			2
default_var RAM			512
default_var GIC			3
default_var GDB			0
default_var MODE		"pkvm"
default_var MODE_CPU		""
default_var VERBOSE		0
default_var KEEP_TEMP		0
default_var TIMEOUT		""

# QEMU CPUs to use for VHE and nVHE runs
CPU_VHE="max"
CPU_NVHE="cortex-a53"

KiB=1024
MiB=$((1024 * KiB))
GiB=$((1024 * MiB))

# We assume the largest potential kernel's PAGE_SIZE (for alignment purpose):
MAX_SUPPORTED_PAGE_SIZE=$(( 64*KiB ))
# From QEMU's hw/arm/boot.c:
# RAM always starts at 1GiB PA offset. The kernel is placed there.
KERNEL_ADDR=$(( 1*GiB ))

function usage() {
	cat <<EOF

Usage: $0 [-h] [-v] [-K]
       [-e QEMU] [-L ROM_DIR] [-k KERNEL] [-r ROOTFS] [-R DRIVE] [-F PVMFW]
       [-c CPU] [-s NUM_CPUS] [-m MEM] [-g GIC] [-M KVM_MODE] [-G]
       [-t TIMEOUT] [-a APPEND]

    -h    output this help text
    -v    print invoked command
    -e    QEMU emulator binary
    -L    directory where QEMU should look for BIOS image
    -k    kernel image
    -r    root filesystem image
    -R    additional drive image(s) to mount as read-only
    -D    'tag:path' pair to mount as read-only using 9p
    -F    pvmfw image
    -c    CPU model (defaults to "${DEFAULT_CPU}")
    -s    number of CPU cores (defaults to ${DEFAULT_SMP})
    -m    amount of memory in MB (defaults to ${DEFAULT_RAM})
    -g    version of GIC (defaults to ${DEFAULT_GIC})
    -M    KVM mode (defaults to 'pkvm', other: 'vhe', 'nvhe')
    -a    append to kernel command line
    -G    enable debugging of emulated system with GDB
    -t    kill QEMU after given number of seconds
    -K    keep temp files
EOF
}

# Note: Due to a bug in older versions of Bash, use '${array[@]+"${array[@]}"}'
# to expand potentially empty arrays. '${array[@]}' is treated as undefined.
TMP_FILES=()
function cleanup() {
	rm -f ${TMP_FILES[@]+"${TMP_FILES[@]}"}
}

function file_size() {
	stat --format=%s "$1"
}

function align_up_pow2() {
	local val="$1"
	local align="$2"
	echo $(( (val + (align - 1)) & (~(align - 1)) ))
}

function align_page_size() {
	echo $(align_up_pow2 "$1" ${MAX_SUPPORTED_PAGE_SIZE})
}

function hex() {
	printf "%${2-#}x\n" $1
}

function min() {
	echo $(( $1 < $2 ? $1 : $2 ))
}

function pvmfw_overlay() {
	local addr=$(hex "$1")
	local size=$(hex "$2")

	cat <<EOF
&{/} {
	reserved-memory {
		#address-cells = <2>;
		#size-cells = <2>;
		ranges;

		pvmfw: pkvm_guest_firmware@$(hex ${addr} '') {
			compatible = "linux,pkvm-guest-firmware-memory";
			no-map;
			reg = <0x00 ${addr} 0x00 ${size}>;
		};
	};
};
EOF
}

function inject_overlay() {
	local overlay="$1"
	local dtb="$2"

	${DTC} -I dts -O dtb <(cat <(${DTC} -I dtb -O dts "${dtb}") "${overlay}")
}

CMD=()
APPEND=()
EXTRA_RO_MOUNTS=()
EXTRA_9P_MOUNTS=()

while getopts ":e:L:k:r:R:D:F:c:s:m:g:t:M:a:vGKh" OPT; do
	case "${OPT}" in
	e)	QEMU="${OPTARG}"		;;
	L)	ROM_DIR="${OPTARG}"		;;
	k)	KERNEL="${OPTARG}"		;;
	r)	ROOTFS="${OPTARG}"		;;
	R)	EXTRA_RO_MOUNTS+=("${OPTARG}")	;;
	D)	EXTRA_9P_MOUNTS+=("${OPTARG}")	;;
	F)	PVMFW="${OPTARG}"		;;
	c)	CPU="${OPTARG}"			;;
	s)	SMP="${OPTARG}"			;;
	m)	RAM="${OPTARG}"			;;
	g)	GIC="${OPTARG}"			;;
	t)	TIMEOUT="${OPTARG}"		;;
	M)	MODE="${OPTARG}"		;;
	a)	APPEND+=("${OPTARG}")		;;
	v)	VERBOSE=1			;;
	G)	GDB=1				;;
	K)	KEEP_TEMP=1			;;
	h)
		usage
		exit 0
		;;
	\?)
		echo "Invalid option: -${OPTARG}" 1>&2
		usage 1>&2
		exit 1
		;;
	:)
		echo "Invalid option: -${OPTARG} requires an argument" 1>&2
		usage 1>&2
		exit 1
		;;
    esac
done
shift $((OPTIND -1))
if [ $# -ne 0 ]; then
	echo "Unrecognized options: $@" 1>&2
	usage 1>&2
	exit 1
fi

if [ "${KEEP_TEMP}" -ne 1 ]; then
	trap cleanup EXIT
fi

if [ -n "${TIMEOUT}" ]; then
	CMD+=(timeout -k 1s --foreground "${TIMEOUT}")
fi

case "${MODE}" in
	vhe)	MODE_CPU="${CPU_VHE}";;
	nvhe)	MODE_CPU="${CPU_NVHE}";;
	pkvm)	APPEND+=(kvm-arm.mode=protected)
		MODE_CPU="${CPU_NVHE}"
		;;
	*)	echo "Unknown KVM mode: ${MODE}" 1>&2
		exit 1
		;;
esac

# If not set by the user, use the default CPU model for the given mode.
if [ -z "${CPU}" -a -n "${MODE_CPU}" ]; then
	CPU="${MODE_CPU}"
fi

CMD+=("${QEMU}")
CMD+=(-M virt)
CMD+=(-machine virtualization=true -machine virt,gic-version=${GIC})
CMD+=(-cpu "${CPU}")
CMD+=(-smp "${SMP}")
CMD+=(-m "${RAM}")
CMD+=(-L "${ROM_DIR}")
CMD+=(-kernel "${KERNEL}")
CMD+=(-drive file="${ROOTFS}",readonly=on,if=virtio,format=raw)
CMD+=(-object rng-random,filename=/dev/urandom,id=rng0)
CMD+=(-device virtio-rng-pci,rng=rng0)
CMD+=(-nographic -nodefaults -serial stdio)

APPEND+=(rootwait root=/dev/vda)

# Note: Due to a bug in older versions of Bash, use '${array[@]+"${array[@]}"}'
# to expand potentially empty arrays. '${array[@]}' is treated as undefined.
for MOUNT in ${EXTRA_RO_MOUNTS[@]+"${EXTRA_RO_MOUNTS[@]}"}; do
	CMD+=(-drive "file=${MOUNT},readonly=on,if=virtio,format=raw")
done

# Note: Due to a bug in older versions of Bash, use '${array[@]+"${array[@]}"}'
# to expand potentially empty arrays. '${array[@]}' is treated as undefined.
for MOUNT in ${EXTRA_9P_MOUNTS[@]+"${EXTRA_9P_MOUNTS[@]}"}; do
	TAG=$(echo "$MOUNT" | cut -d: -f1)
	DIR=$(echo "$MOUNT" | cut -d: -f2-)
	CMD+=(-virtfs)
	CMD+=("local,readonly=on,security_model=mapped-xattr,path=${DIR},mount_tag=${TAG}")
done

if [ "${GDB}" -eq 1 ]; then
	CMD+=(-S -s)
	APPEND+=(nokaslr)
fi

CMD+=(-append "${APPEND[*]}")

if [ -n "${PVMFW}" ]; then
	QEMU_DTB="$(mktemp)"
	DTB="$(mktemp)"

	# Mark files for deletion on EXIT.
	TMP_FILES+=("${QEMU_DTB}" "${DTB}")

	# Dump the QEMU DTB.
	"${CMD[@]}" -machine dumpdtb="${QEMU_DTB}" > /dev/null

	# Compile the overlayed DTB with dummy values to determine its size.
	inject_overlay <(pvmfw_overlay 0x0 0x0) "${QEMU_DTB}" >"${DTB}"

	# 128MiB (or RAM/2 if RAM<256MiB) is left to the kernel to decompress.
	KERNEL_PAD_SIZE=$(min $(( RAM * MiB / 2 )) $(( 128 * MiB )) )
	# This is followed by the ramdisk and then 2MiB-aligned DTB.
	KERNEL_PAD_SIZE=$(align_up_pow2 ${KERNEL_PAD_SIZE} $(( 2 * MiB )))
	# We place the pvmfw at the following page.
	PVMFW_ADDR=$(( KERNEL_ADDR + KERNEL_PAD_SIZE + $(file_size "${DTB}") ))
	PVMFW_ADDR=$(hex $(align_page_size ${PVMFW_ADDR}))
	PVMFW_SIZE=$(file_size "${PVMFW}")
	# pKVM enforces a page-aligned size for the memory slot
	MEMSLOT_SIZE=$(align_page_size ${PVMFW_SIZE})

	inject_overlay <(pvmfw_overlay ${PVMFW_ADDR} ${MEMSLOT_SIZE}) "${QEMU_DTB}"\
		>"${DTB}"

	CMD+=(-dtb "${DTB}")
	CMD+=(-device loader,file="${PVMFW}",addr=${PVMFW_ADDR},force-raw=true)
fi

if [ "${VERBOSE}" -eq 1 ]; then
	set -x
fi

# Invoke QEMU and then propagate its exit code.
# We do this instead of `exec` to delete TMP_FILES in the EXIT handler.
set +e; "${CMD[@]}"; exit $?
