Merge branch 'arm/queue' into 'master'

arm/arm64: LPA2 support and fpu/sve s/r test

See merge request kvm-unit-tests/kvm-unit-tests!61
diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64
index 960880f..3b9034e 100644
--- a/arm/Makefile.arm64
+++ b/arm/Makefile.arm64
@@ -10,9 +10,17 @@
 arch_LDFLAGS += -z notext
 CFLAGS += -mstrict-align
 
+sve_flag := $(call cc-option, -march=armv8.5-a+sve, "")
+ifneq ($(strip $(sve_flag)),)
+# Don't pass the option to the compiler, we don't
+# want the compiler to generate SVE instructions.
+CFLAGS += -DCC_HAS_SVE
+endif
+
 mno_outline_atomics := $(call cc-option, -mno-outline-atomics, "")
 CFLAGS += $(mno_outline_atomics)
 CFLAGS += -DCONFIG_RELOC
+CFLAGS += -mgeneral-regs-only
 
 define arch_elf_check =
 	$(if $(shell ! $(READELF) -rW $(1) >&/dev/null && echo "nok"),
@@ -48,6 +56,7 @@
 tests += $(TEST_DIR)/micro-bench.$(exe)
 tests += $(TEST_DIR)/cache.$(exe)
 tests += $(TEST_DIR)/debug.$(exe)
+tests += $(TEST_DIR)/fpu.$(exe)
 
 include $(SRCDIR)/$(TEST_DIR)/Makefile.common
 
diff --git a/arm/cstart.S b/arm/cstart.S
index 3dd71ed..29961c3 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -96,26 +96,59 @@
 .text
 
 /*
- * psci_invoke_hvc / psci_invoke_smc
+ * arm_smccc_hvc / arm_smccc_smc
  *
  * Inputs:
  *   r0 -- function_id
  *   r1 -- arg0
  *   r2 -- arg1
  *   r3 -- arg2
+ *   [sp] - arg3
+ *   [sp + #4] - arg4
+ *   [sp + #8] - arg5
+ *   [sp + #12] - arg6
+ *   [sp + #16] - arg7
+ *   [sp + #20] - arg8
+ *   [sp + #24] - arg9
+ *   [sp + #28] - arg10
+ *   [sp + #32] - result (as a pointer to a struct smccc_result)
  *
  * Outputs:
  *   r0 -- return code
+ *
+ * If result pointer is not NULL:
+ *   result.r0 -- return code
+ *   result.r1 -- r1
+ *   result.r2 -- r2
+ *   result.r3 -- r3
+ *   result.r4 -- r4
+ *   result.r5 -- r5
+ *   result.r6 -- r6
+ *   result.r7 -- r7
+ *   result.r8 -- r8
+ *   result.r9 -- r9
  */
-.globl psci_invoke_hvc
-psci_invoke_hvc:
-	hvc	#0
+.macro do_smccc_call instr
+	mov	r12, sp
+	push	{r4-r11}
+	ldm	r12, {r4-r11}
+	\instr	#0
+	ldr	r10, [sp, #64]
+	cmp	r10, #0
+	beq	1f
+	stm	r10, {r0-r9}
+1:
+	pop	{r4-r11}
 	mov	pc, lr
+.endm
 
-.globl psci_invoke_smc
-psci_invoke_smc:
-	smc	#0
-	mov	pc, lr
+.globl arm_smccc_hvc
+arm_smccc_hvc:
+	do_smccc_call hvc
+
+.globl arm_smccc_smc
+arm_smccc_smc:
+	do_smccc_call smc
 
 enable_vfp:
 	/* Enable full access to CP10 and CP11: */
diff --git a/arm/cstart64.S b/arm/cstart64.S
index bc2be45..b480a55 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -12,6 +12,7 @@
 #include <asm/ptrace.h>
 #include <asm/page.h>
 #include <asm/pgtable-hwdef.h>
+#include <asm/processor.h>
 #include <asm/thread_info.h>
 #include <asm/sysreg.h>
 
@@ -88,8 +89,9 @@
 	adrp    x4, stackptr
 	add     sp, x4, :lo12:stackptr
 
-	/* enable FP/ASIMD */
-	mov	x4, #(3 << 20)
+	/* enable FP/ASIMD and SVE */
+	mov	x4, (3 << 20)
+	orr	x4, x4, (3 << 16)
 	msr	cpacr_el1, x4
 
 	/* set up exception handling */
@@ -116,26 +118,65 @@
 .text
 
 /*
- * psci_invoke_hvc / psci_invoke_smc
+ * arm_smccc_hvc / arm_smccc_smc
  *
  * Inputs:
  *   w0 -- function_id
  *   x1 -- arg0
  *   x2 -- arg1
  *   x3 -- arg2
+ *   x4 -- arg3
+ *   x5 -- arg4
+ *   x6 -- arg5
+ *   x7 -- arg6
+ *   sp -- { arg7, arg8, arg9, arg10, result }
  *
  * Outputs:
  *   x0 -- return code
+ *
+ * If result pointer is not NULL:
+ *   result.r0 -- return code
+ *   result.r1 -- x1
+ *   result.r2 -- x2
+ *   result.r3 -- x3
+ *   result.r4 -- x4
+ *   result.r5 -- x5
+ *   result.r6 -- x6
+ *   result.r7 -- x7
+ *   result.r8 -- x8
+ *   result.r9 -- x9
  */
-.globl psci_invoke_hvc
-psci_invoke_hvc:
-	hvc	#0
+.macro do_smccc_call instr
+	/* Save x8-x11 on stack */
+	stp	x9, x8,	  [sp, #-16]!
+	stp	x11, x10, [sp, #-16]!
+	/* Load arg7 - arg10 from the stack */
+	ldp	x8, x9,   [sp, #32]
+	ldp	x10, x11, [sp, #48]
+	\instr	#0
+	/* Get the result address */
+	ldr	x10, [sp, #64]
+	cmp	x10, xzr
+	b.eq	1f
+	stp	x0, x1, [x10, #0]
+	stp	x2, x3, [x10, #16]
+	stp	x4, x5, [x10, #32]
+	stp	x6, x7, [x10, #48]
+	stp	x8, x9, [x10, #64]
+1:
+	/* Restore x8-x11 from stack */
+	ldp	x11, x10, [sp], #16
+	ldp	x9, x8,   [sp], #16
 	ret
+.endm
 
-.globl psci_invoke_smc
-psci_invoke_smc:
-	smc	#0
-	ret
+.globl arm_smccc_hvc
+arm_smccc_hvc:
+	do_smccc_call hvc
+
+.globl arm_smccc_smc
+arm_smccc_smc:
+	do_smccc_call smc
 
 get_mmu_off:
 	adrp	x0, auxinfo
@@ -145,8 +186,9 @@
 
 .globl secondary_entry
 secondary_entry:
-	/* Enable FP/ASIMD */
+	/* enable FP/ASIMD and SVE */
 	mov	x0, #(3 << 20)
+	orr	x0, x0, #(3 << 16)
 	msr	cpacr_el1, x0
 
 	/* set up exception handling */
diff --git a/arm/fpu.c b/arm/fpu.c
new file mode 100644
index 0000000..39413fc
--- /dev/null
+++ b/arm/fpu.c
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Limited.
+ * All rights reserved.
+ */
+
+#include <libcflat.h>
+#include <asm/smp.h>
+#include <stdlib.h>
+
+#define CPU0_ID			0
+#define CPU1_ID			(CPU0_ID + 1)
+#define CPUS_MAX		(CPU1_ID + 1)
+#define FPU_QREG_MAX	32
+#define FPU_RESULT_PASS	(-1U)
+
+/*
+ * Write 8 bytes of random data in random. Returns true on success, false on
+ * failure.
+ */
+static inline bool arch_collect_entropy(uint64_t *random)
+{
+	unsigned long ret;
+
+	asm volatile(
+	"	mrs  %[ptr], " xstr(RNDR) "\n"
+	"	cset %[ret], ne\n" /* RNDR sets NZCV to 0b0100 on failure */
+	:
+	  [ret] "=r" (ret),
+	  [ptr] "=r" (*random)
+	:
+	: "cc"
+	);
+
+	return ret == 1;
+}
+
+#define fpu_reg_read(val)				\
+({							\
+	uint64_t *__val = (val);			\
+	asm volatile("stp q0, q1, [%0], #32\n\t"	\
+		     "stp q2, q3, [%0], #32\n\t"	\
+		     "stp q4, q5, [%0], #32\n\t"	\
+		     "stp q6, q7, [%0], #32\n\t"	\
+		     "stp q8, q9, [%0], #32\n\t"	\
+		     "stp q10, q11, [%0], #32\n\t"	\
+		     "stp q12, q13, [%0], #32\n\t"	\
+		     "stp q14, q15, [%0], #32\n\t"	\
+		     "stp q16, q17, [%0], #32\n\t"	\
+		     "stp q18, q19, [%0], #32\n\t"	\
+		     "stp q20, q21, [%0], #32\n\t"	\
+		     "stp q22, q23, [%0], #32\n\t"	\
+		     "stp q24, q25, [%0], #32\n\t"	\
+		     "stp q26, q27, [%0], #32\n\t"	\
+		     "stp q28, q29, [%0], #32\n\t"	\
+		     "stp q30, q31, [%0], #32\n\t"	\
+		     : "=r" (__val)			\
+		     :					\
+		     : "q0", "q1", "q2", "q3",		\
+			"q4", "q5", "q6", "q7",		\
+			"q8", "q9", "q10", "q11",	\
+			"q12", "q13", "q14",		\
+			"q15", "q16", "q17",		\
+			"q18", "q19", "q20",		\
+			"q21", "q22", "q23",		\
+			"q24", "q25", "q26",		\
+			"q27", "q28", "q29",		\
+			"q30", "q31", "memory");	\
+})
+
+#define fpu_reg_write(val)				\
+do {							\
+	uint64_t *__val = (val);			\
+	asm volatile("ldp q0, q1, [%0], #32\n\t"	\
+		     "ldp q2, q3, [%0], #32\n\t"	\
+		     "ldp q4, q5, [%0], #32\n\t"	\
+		     "ldp q6, q7, [%0], #32\n\t"	\
+		     "ldp q8, q9, [%0], #32\n\t"	\
+		     "ldp q10, q11, [%0], #32\n\t"	\
+		     "ldp q12, q13, [%0], #32\n\t"	\
+		     "ldp q14, q15, [%0], #32\n\t"	\
+		     "ldp q16, q17, [%0], #32\n\t"	\
+		     "ldp q18, q19, [%0], #32\n\t"	\
+		     "ldp q20, q21, [%0], #32\n\t"	\
+		     "ldp q22, q23, [%0], #32\n\t"	\
+		     "ldp q24, q25, [%0], #32\n\t"	\
+		     "ldp q26, q27, [%0], #32\n\t"	\
+		     "ldp q28, q29, [%0], #32\n\t"	\
+		     "ldp q30, q31, [%0], #32\n\t"	\
+		     :					\
+		     : "r" (__val)			\
+		     : "q0", "q1", "q2", "q3",		\
+			"q4", "q5", "q6", "q7",		\
+			"q8", "q9", "q10", "q11",	\
+			"q12", "q13", "q14",		\
+			"q15", "q16", "q17",		\
+			"q18", "q19", "q20",		\
+			"q21", "q22", "q23",		\
+			"q24", "q25", "q26",		\
+			"q27", "q28", "q29",		\
+			"q30", "q31", "memory");	\
+} while (0)
+
+#ifdef CC_HAS_SVE
+#define sve_reg_read(val)				\
+({							\
+	uint64_t *__val = (val);			\
+	asm volatile(".arch_extension sve\n"		\
+		     "str z0, [%0, #0, MUL VL]\n"	\
+		     "str z1, [%0, #1, MUL VL]\n"	\
+		     "str z2, [%0, #2, MUL VL]\n"	\
+		     "str z3, [%0, #3, MUL VL]\n"	\
+		     "str z4, [%0, #4, MUL VL]\n"	\
+		     "str z5, [%0, #5, MUL VL]\n"	\
+		     "str z6, [%0, #6, MUL VL]\n"	\
+		     "str z7, [%0, #7, MUL VL]\n"	\
+		     "str z8, [%0, #8, MUL VL]\n"	\
+		     "str z9, [%0, #9, MUL VL]\n"	\
+		     "str z10, [%0, #10, MUL VL]\n"	\
+		     "str z11, [%0, #11, MUL VL]\n"	\
+		     "str z12, [%0, #12, MUL VL]\n"	\
+		     "str z13, [%0, #13, MUL VL]\n"	\
+		     "str z14, [%0, #14, MUL VL]\n"	\
+		     "str z15, [%0, #15, MUL VL]\n"	\
+		     "str z16, [%0, #16, MUL VL]\n"	\
+		     "str z17, [%0, #17, MUL VL]\n"	\
+		     "str z18, [%0, #18, MUL VL]\n"	\
+		     "str z19, [%0, #19, MUL VL]\n"	\
+		     "str z20, [%0, #20, MUL VL]\n"	\
+		     "str z21, [%0, #21, MUL VL]\n"	\
+		     "str z22, [%0, #22, MUL VL]\n"	\
+		     "str z23, [%0, #23, MUL VL]\n"	\
+		     "str z24, [%0, #24, MUL VL]\n"	\
+		     "str z25, [%0, #25, MUL VL]\n"	\
+		     "str z26, [%0, #26, MUL VL]\n"	\
+		     "str z27, [%0, #27, MUL VL]\n"	\
+		     "str z28, [%0, #28, MUL VL]\n"	\
+		     "str z29, [%0, #29, MUL VL]\n"	\
+		     "str z30, [%0, #30, MUL VL]\n"	\
+		     "str z31, [%0, #31, MUL VL]\n"	\
+		     : "=r" (__val)			\
+		     :					\
+		     : "z0", "z1", "z2", "z3",		\
+			"z4", "z5", "z6", "z7",		\
+			"z8", "z9", "z10", "z11",	\
+			"z12", "z13", "z14",		\
+			"z15", "z16", "z17",		\
+			"z18", "z19", "z20",		\
+			"z21", "z22", "z23",		\
+			"z24", "z25", "z26",		\
+			"z27", "z28", "z29",		\
+			"z30", "z31", "memory");	\
+})
+
+#define sve_reg_write(val)				\
+({							\
+	uint64_t *__val = (val);			\
+	asm volatile(".arch_extension sve\n"		\
+		     "ldr z0, [%0, #0, MUL VL]\n"	\
+		     "ldr z1, [%0, #1, MUL VL]\n"	\
+		     "ldr z2, [%0, #2, MUL VL]\n"	\
+		     "ldr z3, [%0, #3, MUL VL]\n"	\
+		     "ldr z4, [%0, #4, MUL VL]\n"	\
+		     "ldr z5, [%0, #5, MUL VL]\n"	\
+		     "ldr z6, [%0, #6, MUL VL]\n"	\
+		     "ldr z7, [%0, #7, MUL VL]\n"	\
+		     "ldr z8, [%0, #8, MUL VL]\n"	\
+		     "ldr z9, [%0, #9, MUL VL]\n"	\
+		     "ldr z10, [%0, #10, MUL VL]\n"	\
+		     "ldr z11, [%0, #11, MUL VL]\n"	\
+		     "ldr z12, [%0, #12, MUL VL]\n"	\
+		     "ldr z13, [%0, #13, MUL VL]\n"	\
+		     "ldr z14, [%0, #14, MUL VL]\n"	\
+		     "ldr z15, [%0, #15, MUL VL]\n"	\
+		     "ldr z16, [%0, #16, MUL VL]\n"	\
+		     "ldr z17, [%0, #17, MUL VL]\n"	\
+		     "ldr z18, [%0, #18, MUL VL]\n"	\
+		     "ldr z19, [%0, #19, MUL VL]\n"	\
+		     "ldr z20, [%0, #20, MUL VL]\n"	\
+		     "ldr z21, [%0, #21, MUL VL]\n"	\
+		     "ldr z22, [%0, #22, MUL VL]\n"	\
+		     "ldr z23, [%0, #23, MUL VL]\n"	\
+		     "ldr z24, [%0, #24, MUL VL]\n"	\
+		     "ldr z25, [%0, #25, MUL VL]\n"	\
+		     "ldr z26, [%0, #26, MUL VL]\n"	\
+		     "ldr z27, [%0, #27, MUL VL]\n"	\
+		     "ldr z28, [%0, #28, MUL VL]\n"	\
+		     "ldr z29, [%0, #29, MUL VL]\n"	\
+		     "ldr z30, [%0, #30, MUL VL]\n"	\
+		     "ldr z31, [%0, #31, MUL VL]\n"	\
+		     :					\
+		     : "r" (__val)			\
+		     : "z0", "z1", "z2", "z3",		\
+			"z4", "z5", "z6", "z7",		\
+			"z8", "z9", "z10", "z11",	\
+			"z12", "z13", "z14",		\
+			"z15", "z16", "z17",		\
+			"z18", "z19", "z20",		\
+			"z21", "z22", "z23",		\
+			"z24", "z25", "z26",		\
+			"z27", "z28", "z29",		\
+			"z30", "z31", "memory");	\
+})
+#else
+#define sve_reg_read(val)	report_abort("SVE: not supported")
+#define sve_reg_write(val)	report_abort("SVE: not supported")
+#endif
+
+static void nr_cpu_check(int nr)
+{
+	if (nr_cpus < nr)
+		report_abort("At least %d cpus required", nr);
+}
+
+/**
+ * @brief check if the FPU/SIMD/SVE register contents are the same as
+ * the input data provided.
+ */
+static uint32_t __fpuregs_testall(uint64_t *indata, int sve)
+{
+	/* 128b aligned array to read data into */
+	uint64_t outdata[FPU_QREG_MAX * 2]
+			 __attribute__((aligned(sizeof(__uint128_t)))) = {
+			[0 ... ((FPU_QREG_MAX * 2) - 1)] = 0 };
+	uint8_t regcnt	= 0;
+	uint32_t result	= 0;
+
+	if (indata == NULL)
+		report_abort("invalid data pointer received");
+
+	/* Read data from FPU/SVE registers */
+	if (sve)
+		sve_reg_read(outdata);
+	else
+		fpu_reg_read(outdata);
+
+	/* Check is the data is the same */
+	for (regcnt = 0; regcnt < (FPU_QREG_MAX * 2); regcnt += 2) {
+		if ((outdata[regcnt] != indata[regcnt]) ||
+			(outdata[regcnt + 1] != indata[regcnt + 1])) {
+			report_info(
+			"%s save/restore failed for reg: %c%u expected: %lx_%lx received: %lx_%lx\n",
+			sve ? "SVE" : "FPU/SIMD",
+			sve ? 'z' : 'q',
+			regcnt / 2,
+			indata[regcnt + 1], indata[regcnt],
+			outdata[regcnt + 1], outdata[regcnt]);
+		} else {
+			/* populate a bitmask indicating which
+			 * registers passed/failed
+			 */
+			result |= (1 << (regcnt / 2));
+		}
+	}
+
+	return result;
+}
+
+/**
+ * @brief writes randomly sampled data into the FPU/SIMD registers.
+ */
+static void __fpuregs_writeall_random(uint64_t **indata, int sve)
+{
+	/* allocate 128b aligned memory */
+	*indata = memalign(sizeof(__uint128_t), sizeof(uint64_t) * FPU_QREG_MAX);
+
+	if (system_supports_rndr()) {
+		/* Populate memory with random data */
+		for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++)
+			while (!arch_collect_entropy(&(*indata)[i])) {}
+	} else {
+		/* Populate memory with data from the counter register */
+		for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++)
+			(*indata)[i] = get_cntvct();
+	}
+
+	/* Write data into FPU registers */
+	if (sve)
+		sve_reg_write(*indata);
+	else
+		fpu_reg_write(*indata);
+}
+
+static void fpuregs_writeall_run(void *data)
+{
+	uint64_t **indata	= (uint64_t **)data;
+
+	__fpuregs_writeall_random(indata, 0);
+}
+
+static void sveregs_writeall_run(void *data)
+{
+	uint64_t **indata	= (uint64_t **)data;
+
+	__fpuregs_writeall_random(indata, 1);
+}
+
+static void fpuregs_testall_run(void *data)
+{
+	uint64_t *indata	= (uint64_t *)data;
+	uint32_t result		= 0;
+
+	result = __fpuregs_testall(indata, 0);
+	report((result == FPU_RESULT_PASS),
+	       "FPU/SIMD register save/restore mask: 0x%x", result);
+}
+
+static void sveregs_testall_run(void *data)
+{
+	uint64_t *indata	= (uint64_t *)data;
+	uint32_t result		= 0;
+
+	result = __fpuregs_testall(indata, 1);
+	report((result == FPU_RESULT_PASS),
+	       "SVE register save/restore mask: 0x%x", result);
+}
+
+/**
+ * @brief This test uses two CPUs to test FPU/SIMD save/restore
+ * @details CPU1 writes random data into FPU/SIMD registers,
+ * CPU0 corrupts/overwrites the data and finally CPU1 checks
+ * if the data remains unchanged in its context.
+ */
+static void fpuregs_context_switch_cpu1(int sve)
+{
+	int target		= CPU1_ID;
+	uint64_t *indata_remote	= NULL;
+	uint64_t *indata_local	= NULL;
+
+	/* write data from CPU1 */
+	on_cpu(target, sve ? sveregs_writeall_run
+	                   : fpuregs_writeall_run,
+	       &indata_remote);
+
+	/* Overwrite from CPU0 */
+	__fpuregs_writeall_random(&indata_local, sve);
+
+	/* Check data consistency */
+	on_cpu(target, sve ? sveregs_testall_run
+	                   : fpuregs_testall_run,
+	       indata_remote);
+
+	free(indata_remote);
+	free(indata_local);
+}
+
+/**
+ * @brief This test uses two CPUs to test FPU/SIMD save/restore
+ * @details CPU0 writes random data into FPU/SIMD registers,
+ * CPU1 corrupts/overwrites the data and finally CPU0 checks if
+ * the data remains unchanged in its context.
+ */
+static void fpuregs_context_switch_cpu0(int sve)
+{
+	int target		= CPU1_ID;
+	uint64_t *indata_local	= NULL;
+	uint64_t *indata_remote	= NULL;
+	uint32_t result		= 0;
+
+	/* write data from CPU0 */
+	__fpuregs_writeall_random(&indata_local, sve);
+
+	/* Overwrite from CPU1 */
+	on_cpu(target, sve ? sveregs_writeall_run
+	                   : fpuregs_writeall_run,
+	       &indata_remote);
+
+	/* Check data consistency */
+	result = __fpuregs_testall(indata_local, sve);
+	report((result == FPU_RESULT_PASS),
+	       "%s register save/restore mask: 0x%x", sve ? "SVE" : "FPU/SIMD", result);
+
+	free(indata_remote);
+	free(indata_local);
+}
+
+/**
+ * Checks if during context switch, FPU/SIMD registers
+ * are saved/restored.
+ */
+static void fpuregs_context_switch(void)
+{
+	fpuregs_context_switch_cpu0(0);
+	fpuregs_context_switch_cpu1(0);
+}
+
+/**
+ * Checks if during context switch, SVE registers
+ * are saved/restored.
+ */
+static void sveregs_context_switch(void)
+{
+	unsigned long zcr = read_sysreg(ZCR_EL1);
+
+	// Set the SVE vector length to 128-bits
+	write_sysreg(zcr & ~ZCR_EL1_LEN, ZCR_EL1);
+
+	fpuregs_context_switch_cpu0(1);
+	fpuregs_context_switch_cpu1(1);
+}
+
+static bool should_run_sve_tests(void)
+{
+#ifdef CC_HAS_SVE
+	if (system_supports_sve())
+		return true;
+#endif
+	return false;
+}
+
+int main(int argc, char **argv)
+{
+	report_prefix_pushf("fpu");
+
+	nr_cpu_check(CPUS_MAX);
+	fpuregs_context_switch();
+
+	if (should_run_sve_tests())
+		sveregs_context_switch();
+
+	return report_summary();
+}
diff --git a/arm/selftest.c b/arm/selftest.c
index 007d230..1553ed8 100644
--- a/arm/selftest.c
+++ b/arm/selftest.c
@@ -406,7 +406,7 @@
 	int ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
 	report_info("PSCI version: %d.%d", PSCI_VERSION_MAJOR(ver),
 					  PSCI_VERSION_MINOR(ver));
-	report_info("PSCI method: %s", psci_invoke == psci_invoke_hvc ?
+	report_info("PSCI method: %s", psci_invoke_fn == arm_smccc_hvc ?
 				       "hvc" : "smc");
 }
 
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index 54cedea..2bdad67 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -263,3 +263,11 @@
 arch = arm64
 extra_params = -append 'ss-migration'
 groups = debug migration
+
+# FPU/SIMD test
+[fpu-context]
+file = fpu.flat
+smp = 2
+groups = nodefault
+accel = kvm
+arch = arm64
diff --git a/lib/arm/asm/arm-smccc.h b/lib/arm/asm/arm-smccc.h
new file mode 100644
index 0000000..5d85b01
--- /dev/null
+++ b/lib/arm/asm/arm-smccc.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Arm Limited.
+ * All rights reserved.
+ */
+#ifndef _ASMARM_ARM_SMCCC_H_
+#define _ASMARM_ARM_SMCCC_H_
+
+struct smccc_result {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r9;
+};
+
+typedef int (*smccc_invoke_fn)(unsigned int function_id, unsigned long arg0,
+			       unsigned long arg1, unsigned long arg2,
+			       unsigned long arg3, unsigned long arg4,
+			       unsigned long arg5, unsigned long arg6,
+			       unsigned long arg7, unsigned long arg8,
+			       unsigned long arg9, unsigned long arg10,
+			       struct smccc_result *result);
+extern int arm_smccc_hvc(unsigned int function_id, unsigned long arg0,
+			 unsigned long arg1, unsigned long arg2,
+			 unsigned long arg3, unsigned long arg4,
+			 unsigned long arg5, unsigned long arg6,
+			 unsigned long arg7, unsigned long arg8,
+			 unsigned long arg9, unsigned long arg10,
+			 struct smccc_result *result);
+extern int arm_smccc_smc(unsigned int function_id, unsigned long arg0,
+			 unsigned long arg1, unsigned long arg2,
+			 unsigned long arg3, unsigned long arg4,
+			 unsigned long arg5, unsigned long arg6,
+			 unsigned long arg7, unsigned long arg8,
+			 unsigned long arg9, unsigned long arg10,
+			 struct smccc_result *result);
+
+#endif /* _ASMARM_ARM_SMCCC_H_ */
diff --git a/lib/arm/asm/pgtable.h b/lib/arm/asm/pgtable.h
index d7c7390..aa98d9a 100644
--- a/lib/arm/asm/pgtable.h
+++ b/lib/arm/asm/pgtable.h
@@ -13,7 +13,9 @@
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  */
+#include <alloc.h>
 #include <alloc_page.h>
+#include <asm/setup.h>
 
 /*
  * We can convert va <=> pa page table addresses with simple casts
diff --git a/lib/arm/asm/psci.h b/lib/arm/asm/psci.h
index cf03449..6a39962 100644
--- a/lib/arm/asm/psci.h
+++ b/lib/arm/asm/psci.h
@@ -3,13 +3,12 @@
 #include <libcflat.h>
 #include <linux/psci.h>
 
-typedef int (*psci_invoke_fn)(unsigned int function_id, unsigned long arg0,
-			      unsigned long arg1, unsigned long arg2);
-extern psci_invoke_fn psci_invoke;
-extern int psci_invoke_hvc(unsigned int function_id, unsigned long arg0,
-			   unsigned long arg1, unsigned long arg2);
-extern int psci_invoke_smc(unsigned int function_id, unsigned long arg0,
-			   unsigned long arg1, unsigned long arg2);
+#include <asm/arm-smccc.h>
+
+extern smccc_invoke_fn psci_invoke_fn;
+
+extern int psci_invoke(unsigned int function_id, unsigned long arg0,
+		       unsigned long arg1, unsigned long arg2);
 extern void psci_set_conduit(void);
 extern int psci_cpu_on(unsigned long cpuid, unsigned long entry_point);
 extern void psci_system_reset(void);
diff --git a/lib/arm/psci.c b/lib/arm/psci.c
index bddb078..e0614e0 100644
--- a/lib/arm/psci.c
+++ b/lib/arm/psci.c
@@ -13,13 +13,24 @@
 #include <asm/smp.h>
 
 static int psci_invoke_none(unsigned int function_id, unsigned long arg0,
-			    unsigned long arg1, unsigned long arg2)
+			    unsigned long arg1, unsigned long arg2,
+			    unsigned long arg3, unsigned long arg4,
+			    unsigned long arg5, unsigned long arg6,
+			    unsigned long arg7, unsigned long arg8,
+			    unsigned long arg9, unsigned long arg10,
+			    struct smccc_result *result)
 {
 	printf("No PSCI method configured! Can't invoke...\n");
 	return PSCI_RET_NOT_PRESENT;
 }
 
-psci_invoke_fn psci_invoke = psci_invoke_none;
+smccc_invoke_fn psci_invoke_fn = psci_invoke_none;
+
+int psci_invoke(unsigned int function_id, unsigned long arg0,
+		unsigned long arg1, unsigned long arg2)
+{
+	return psci_invoke_fn(function_id, arg0, arg1, arg2, 0, 0, 0, 0, 0, 0, 0, 0, NULL);
+}
 
 int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
 {
@@ -69,9 +80,9 @@
 	assert(method != NULL && len == 4);
 
 	if (strcmp(method->data, "hvc") == 0)
-		psci_invoke = psci_invoke_hvc;
+		psci_invoke_fn = arm_smccc_hvc;
 	else if (strcmp(method->data, "smc") == 0)
-		psci_invoke = psci_invoke_smc;
+		psci_invoke_fn = arm_smccc_smc;
 	else
 		assert_msg(false, "Unknown PSCI conduit: %s", method->data);
 }
@@ -89,9 +100,9 @@
 		   "PSCI is not supported in this platform");
 
 	if (fadt->arm_boot_flags & ACPI_FADT_PSCI_USE_HVC)
-		psci_invoke = psci_invoke_hvc;
+		psci_invoke_fn = arm_smccc_hvc;
 	else
-		psci_invoke = psci_invoke_smc;
+		psci_invoke_fn = arm_smccc_smc;
 }
 
 #else
diff --git a/lib/arm/setup.c b/lib/arm/setup.c
index 2f649af..aaa0da9 100644
--- a/lib/arm/setup.c
+++ b/lib/arm/setup.c
@@ -35,6 +35,7 @@
 #define NR_MEM_REGIONS		(MAX_DT_MEM_REGIONS + NR_EXTRA_MEM_REGIONS)
 
 extern unsigned long _text, _etext, _data, _edata;
+extern unsigned long stacktop;
 
 char *initrd;
 u32 initrd_size;
@@ -196,6 +197,14 @@
 	u32 fdt_size;
 	int ret;
 
+#ifndef CONFIG_EFI
+	/*
+	 * Ensure that the FDT was not overlapping with the uninitialised
+	 * data that was overwritten.
+	 */
+	assert((unsigned long)fdt > (unsigned long)&stacktop);
+#endif
+
 	fdt_size = fdt_totalsize(fdt);
 	ret = fdt_move(fdt, *freemem, fdt_size);
 	assert(ret == 0);
diff --git a/lib/arm64/asm/arm-smccc.h b/lib/arm64/asm/arm-smccc.h
new file mode 100644
index 0000000..ab64948
--- /dev/null
+++ b/lib/arm64/asm/arm-smccc.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Arm Limited.
+ * All rights reserved.
+ */
+#include "../../arm/asm/arm-smccc.h"
diff --git a/lib/arm64/asm/esr.h b/lib/arm64/asm/esr.h
index 8c35163..335343c 100644
--- a/lib/arm64/asm/esr.h
+++ b/lib/arm64/asm/esr.h
@@ -26,6 +26,7 @@
 #define ESR_EL1_EC_SVC32	(0x11)
 #define ESR_EL1_EC_SVC64	(0x15)
 #define ESR_EL1_EC_SYS64	(0x18)
+#define ESR_EL1_EC_SVE		(0x19)
 #define ESR_EL1_EC_IABT_EL0	(0x20)
 #define ESR_EL1_EC_IABT_EL1	(0x21)
 #define ESR_EL1_EC_PC_ALIGN	(0x22)
diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h
index 1c73ba3..b28d41f 100644
--- a/lib/arm64/asm/processor.h
+++ b/lib/arm64/asm/processor.h
@@ -110,31 +110,67 @@
 #define ID_AA64MMFR0_TGRAN64_SHIFT	24
 #define ID_AA64MMFR0_TGRAN16_SHIFT	20
 
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED(r)			\
+({								\
+	u64 __v = ((r) >> ID_AA64MMFR0_TGRAN4_SHIFT) & 0xf;	\
+	(__v) == 0 || (__v) == 1;				\
+})
+
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED(r)			\
+({								\
+	u64 __v = ((r) >> ID_AA64MMFR0_TGRAN64_SHIFT) & 0xf;	\
+	(__v) == 0;						\
+})
+
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED(r)			\
+({								\
+	u64 __v = ((r) >> ID_AA64MMFR0_TGRAN16_SHIFT) & 0xf;	\
+	(__v) == 1 || (__v) == 2;				\
+})
 
 static inline bool system_supports_granule(size_t granule)
 {
-	u32 shift;
-	u32 val;
-	u64 mmfr0;
+	u64 mmfr0 = get_id_aa64mmfr0_el1();
 
-	if (granule == SZ_4K) {
-		shift = ID_AA64MMFR0_TGRAN4_SHIFT;
-		val = ID_AA64MMFR0_TGRAN4_SUPPORTED;
-	} else if (granule == SZ_16K) {
-		shift = ID_AA64MMFR0_TGRAN16_SHIFT;
-		val = ID_AA64MMFR0_TGRAN16_SUPPORTED;
-	} else {
-		assert(granule == SZ_64K);
-		shift = ID_AA64MMFR0_TGRAN64_SHIFT;
-		val = ID_AA64MMFR0_TGRAN64_SUPPORTED;
-	}
+	if (granule == SZ_4K)
+		return ID_AA64MMFR0_TGRAN4_SUPPORTED(mmfr0);
 
-	mmfr0 = get_id_aa64mmfr0_el1();
+	if (granule == SZ_16K)
+		return ID_AA64MMFR0_TGRAN16_SUPPORTED(mmfr0);
 
-	return ((mmfr0 >> shift) & 0xf) == val;
+	assert(granule == SZ_64K);
+	return ID_AA64MMFR0_TGRAN64_SUPPORTED(mmfr0);
+}
+
+static inline unsigned long get_id_aa64pfr0_el1(void)
+{
+	return read_sysreg(id_aa64pfr0_el1);
+}
+
+#define ID_AA64PFR0_EL1_SVE_SHIFT	32
+
+static inline bool system_supports_sve(void)
+{
+	return ((get_id_aa64pfr0_el1() >> ID_AA64PFR0_EL1_SVE_SHIFT) & 0xf) != 0;
+}
+
+static inline int sve_vl(void)
+{
+	int vl;
+
+	asm volatile(".arch_extension sve\n"
+		     "rdvl %0, #8"
+		     : "=r" (vl));
+
+	return vl;
+}
+
+
+static inline bool system_supports_rndr(void)
+{
+	u64 id_aa64isar0_el1 = read_sysreg(ID_AA64ISAR0_EL1);
+
+	return ((id_aa64isar0_el1 >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf) != 0;
 }
 
 #endif /* !__ASSEMBLY__ */
diff --git a/lib/arm64/asm/sysreg.h b/lib/arm64/asm/sysreg.h
index 6cae8b8..f214a4f 100644
--- a/lib/arm64/asm/sysreg.h
+++ b/lib/arm64/asm/sysreg.h
@@ -73,6 +73,8 @@
 );
 #endif /* __ASSEMBLY__ */
 
+#define ID_AA64ISAR0_EL1_RNDR_SHIFT	60
+
 #define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
 #define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
 #define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
@@ -102,4 +104,9 @@
 			 SCTLR_EL1_TSCXT | SCTLR_EL1_EIS | SCTLR_EL1_SPAN | \
 			 SCTLR_EL1_NTLSMD | SCTLR_EL1_LSMAOE)
 
+#define ZCR_EL1		S3_0_C1_C2_0
+#define ZCR_EL1_LEN	GENMASK(3, 0)
+
+#define RNDR		S3_3_C2_C4_0
+
 #endif /* _ASMARM64_SYSREG_H_ */
diff --git a/lib/arm64/processor.c b/lib/arm64/processor.c
index 06fd7cf..eb93fd7 100644
--- a/lib/arm64/processor.c
+++ b/lib/arm64/processor.c
@@ -43,6 +43,7 @@
 	[ESR_EL1_EC_SVC32]		= "SVC32",
 	[ESR_EL1_EC_SVC64]		= "SVC64",
 	[ESR_EL1_EC_SYS64]		= "SYS64",
+	[ESR_EL1_EC_SVE]		= "SVE",
 	[ESR_EL1_EC_IABT_EL0]		= "IABT_EL0",
 	[ESR_EL1_EC_IABT_EL1]		= "IABT_EL1",
 	[ESR_EL1_EC_PC_ALIGN]		= "PC_ALIGN",