Merge tag 'v5.11-rc3' into pkvm
Linux 5.11-rc3
Signed-off-by: Will Deacon <willdeacon@google.com>
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9a55580..a9b4f16 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -600,6 +600,7 @@
void check_local_cpu_capabilities(void);
u64 read_sanitised_ftr_reg(u32 id);
+int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst);
static inline bool cpu_supports_mixed_endian_el0(void)
{
diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h
index daa1a1d..8b807b6 100644
--- a/arch/arm64/include/asm/hyp_image.h
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -7,11 +7,15 @@
#ifndef __ARM64_HYP_IMAGE_H__
#define __ARM64_HYP_IMAGE_H__
+#ifndef __KVM_NVHE_HYPERVISOR__
/*
* KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
* to separate it from the kernel proper.
*/
#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym
+#else
+#define kvm_nvhe_sym(sym) sym
+#endif
#ifdef LINKER_SCRIPT
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 8a33d83..80c31f2 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -57,6 +57,10 @@
#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14
+#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_protect 15
+#define __KVM_HOST_SMCCC_FUNC___hyp_create_mappings 16
+#define __KVM_HOST_SMCCC_FUNC___hyp_create_private_mapping 17
+#define __KVM_HOST_SMCCC_FUNC___hyp_cpu_set_vector 18
#ifndef __ASSEMBLY__
@@ -154,6 +158,9 @@
unsigned long tpidr_el2;
unsigned long stack_hyp_va;
phys_addr_t pgd_pa;
+ unsigned long hcr_el2;
+ unsigned long vttbr;
+ unsigned long vtcr;
};
/* Translate a kernel address @ptr into its equivalent linear mapping */
diff --git a/arch/arm64/include/asm/kvm_cpufeature.h b/arch/arm64/include/asm/kvm_cpufeature.h
new file mode 100644
index 0000000..74043a1
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_cpufeature.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <asm/cpufeature.h>
+
+#ifndef KVM_HYP_CPU_FTR_REG
+#if defined(__KVM_NVHE_HYPERVISOR__)
+#define KVM_HYP_CPU_FTR_REG(id, name) extern struct arm64_ftr_reg name;
+#else
+#define KVM_HYP_CPU_FTR_REG(id, name) DECLARE_KVM_NVHE_SYM(name);
+#endif
+#endif
+
+KVM_HYP_CPU_FTR_REG(SYS_CTR_EL0, arm64_ftr_reg_ctrel0)
+KVM_HYP_CPU_FTR_REG(SYS_ID_AA64MMFR0_EL1, arm64_ftr_reg_id_aa64mmfr0_el1)
+KVM_HYP_CPU_FTR_REG(SYS_ID_AA64MMFR1_EL1, arm64_ftr_reg_id_aa64mmfr1_el1)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8fcfab0..9d59beb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -95,7 +95,7 @@
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
- struct kvm *kvm;
+ struct kvm_arch *arch;
};
struct kvm_arch_memory_slot {
@@ -592,6 +592,7 @@
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
+#ifndef __KVM_NVHE_HYPERVISOR__
#define kvm_call_hyp_nvhe(f, ...) \
({ \
struct arm_smccc_res res; \
@@ -631,6 +632,11 @@
\
ret; \
})
+#else /* __KVM_NVHE_HYPERVISOR__ */
+#define kvm_call_hyp(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
+#endif /* __KVM_NVHE_HYPERVISOR__ */
void force_vm_exit(const cpumask_t *mask);
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
@@ -771,4 +777,12 @@
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
+#ifdef CONFIG_KVM
+extern phys_addr_t hyp_mem_base;
+extern phys_addr_t hyp_mem_size;
+void __init kvm_hyp_reserve(void);
+#else
+static inline void kvm_hyp_reserve(void) { }
+#endif
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index c045082..682add8 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -100,4 +100,12 @@
void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
#endif
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __kvm_init_switch_pgd(phys_addr_t phys, unsigned long size,
+ phys_addr_t pgd, void *sp, void *cont_fn);
+int __kvm_hyp_protect(phys_addr_t phys, unsigned long size,
+ unsigned long nr_cpus, unsigned long *per_cpu_base);
+void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+#endif
+
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e52d82a..8d37d6d 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -195,6 +195,14 @@
phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(void);
+static inline void *__kvm_vector_slot2addr(void *base,
+ enum arm64_hyp_spectre_vector slot)
+{
+ int idx = slot - (slot != HYP_VECTOR_DIRECT);
+
+ return base + (idx * SZ_2K);
+}
+
struct kvm;
#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
@@ -272,6 +280,54 @@
return ret;
}
+static inline u64 kvm_get_parange(u64 mmfr0)
+{
+ u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_PARANGE_SHIFT);
+ if (parange > ID_AA64MMFR0_PARANGE_MAX)
+ parange = ID_AA64MMFR0_PARANGE_MAX;
+
+ return parange;
+}
+
+/*
+ * The VTCR value is common across all the physical CPUs on the system.
+ * We use system wide sanitised values to fill in different fields,
+ * except for Hardware Management of Access Flags. HA Flag is set
+ * unconditionally on all CPUs, as it is safe to run with or without
+ * the feature and the bit is RES0 on CPUs that don't support it.
+ */
+static inline u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
+{
+ u64 vtcr = VTCR_EL2_FLAGS;
+ u8 lvls;
+
+ vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
+ vtcr |= VTCR_EL2_T0SZ(phys_shift);
+ /*
+ * Use a minimum 2 level page table to prevent splitting
+ * host PMD huge pages at stage2.
+ */
+ lvls = stage2_pgtable_levels(phys_shift);
+ if (lvls < 2)
+ lvls = 2;
+ vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
+
+ /*
+ * Enable the Hardware Access Flag management, unconditionally
+ * on all CPUs. The features is RES0 on CPUs without the support
+ * and must be ignored by the CPUs.
+ */
+ vtcr |= VTCR_EL2_HA;
+
+ /* Set the vmid bits */
+ vtcr |= (get_vmid_bits(mmfr1) == 16) ?
+ VTCR_EL2_VS_16BIT :
+ VTCR_EL2_VS_8BIT;
+
+ return vtcr;
+}
+
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
@@ -289,9 +345,9 @@
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
-static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr)
{
- write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2);
+ write_sysreg(vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/*
@@ -302,5 +358,14 @@
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
}
+static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
+{
+ __load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
+}
+
+static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
+{
+ return container_of(mmu->arch, struct kvm, arch);
+}
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 52ab38d..d846bc3 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -14,16 +14,40 @@
typedef u64 kvm_pte_t;
/**
+ * struct kvm_pgtable_mm_ops - Memory management callbacks.
+ * @zalloc_page: Allocate a zeroed memory page.
+ * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
+ * @free_pages_exact: Free an exact number of memory pages.
+ * @get_page: Increment the refcount on a page.
+ * @put_page: Decrement the refcount on a page.
+ * @page_count: Returns the refcount of a page.
+ * @phys_to_virt: Convert a physical address into a virtual address.
+ * @virt_to_phys: Convert a virtual address into a physical address.
+ */
+struct kvm_pgtable_mm_ops {
+ void* (*zalloc_page)(void *arg);
+ void* (*zalloc_pages_exact)(size_t size);
+ void (*free_pages_exact)(void *addr, size_t size);
+ void (*get_page)(void *addr);
+ void (*put_page)(void *addr);
+ int (*page_count)(void *addr);
+ void* (*phys_to_virt)(phys_addr_t phys);
+ phys_addr_t (*virt_to_phys)(void *addr);
+};
+
+/**
* struct kvm_pgtable - KVM page-table.
* @ia_bits: Maximum input address size, in bits.
* @start_level: Level at which the page-table walk starts.
* @pgd: Pointer to the first top-level entry of the page-table.
+ * @mm_ops: Memory management callbacks.
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
*/
struct kvm_pgtable {
u32 ia_bits;
u32 start_level;
kvm_pte_t *pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
/* Stage-2 only */
struct kvm_s2_mmu *mmu;
@@ -86,10 +110,12 @@
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
* @pgt: Uninitialised page-table structure to initialise.
* @va_bits: Maximum virtual address bits.
+ * @mm_ops: Memory management callbacks.
*
* Return: 0 on success, negative error code on failure.
*/
-int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits);
+int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
+ struct kvm_pgtable_mm_ops *mm_ops);
/**
* kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
@@ -125,11 +151,14 @@
/**
* kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
- * @kvm: KVM structure representing the guest virtual machine.
+ * @arch: Arch-specific KVM structure representing the guest virtual
+ * machine.
+ * @mm_ops: Memory management callbacks.
*
* Return: 0 on success, negative error code on failure.
*/
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);
+int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+ struct kvm_pgtable_mm_ops *mm_ops);
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -147,8 +176,8 @@
* @size: Size of the mapping.
* @phys: Physical address of the memory to map.
* @prot: Permissions and attributes for the mapping.
- * @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to
- * allocate page-table pages.
+ * @mc: Cache of pre-allocated memory from which to allocate page-table
+ * pages.
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
@@ -165,7 +194,7 @@
*/
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 phys, enum kvm_pgtable_prot prot,
- struct kvm_mmu_memory_cache *mc);
+ void *mc);
/**
* kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 8ff5793..f58cf49 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -12,6 +12,7 @@
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
+extern char __hyp_bss_start[], __hyp_bss_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f42fd9e..1f1a1d7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -117,6 +117,9 @@
DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2));
DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va));
DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa));
+ DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
+ DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
+ DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e99edde..6bfe3d4 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1114,6 +1114,18 @@
}
EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
+int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst)
+{
+ struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
+
+ if (!regp)
+ return -EINVAL;
+
+ memcpy(dst, regp, sizeof(*regp));
+
+ return 0;
+}
+
#define read_sysreg_case(r) \
case r: return read_sysreg_s(r)
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index f676243..ac28e70 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -102,6 +102,45 @@
/* Array containing bases of nVHE per-CPU memory regions. */
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
+/* Position-independent library routines */
+__kvm_nvhe_clear_page = __kvm_nvhe___pi_clear_page;
+__kvm_nvhe_copy_page = __kvm_nvhe___pi_copy_page;
+__kvm_nvhe_memcpy = __kvm_nvhe___pi_memcpy;
+__kvm_nvhe_memset = __kvm_nvhe___pi_memset;
+
+#ifdef CONFIG_KASAN
+__kvm_nvhe___memcpy = __kvm_nvhe___pi_memcpy;
+__kvm_nvhe___memset = __kvm_nvhe___pi_memset;
+#endif
+
+_kvm_nvhe___flush_dcache_area = __kvm_nvhe___pi___flush_dcache_area;
+
+/* Hypevisor VA size */
+KVM_NVHE_ALIAS(hyp_va_bits);
+
+/* Kernel memory sections */
+KVM_NVHE_ALIAS(__start_rodata);
+KVM_NVHE_ALIAS(__end_rodata);
+KVM_NVHE_ALIAS(__bss_start);
+KVM_NVHE_ALIAS(__bss_stop);
+
+/* Hyp memory sections */
+KVM_NVHE_ALIAS(__hyp_idmap_text_start);
+KVM_NVHE_ALIAS(__hyp_idmap_text_end);
+KVM_NVHE_ALIAS(__hyp_text_start);
+KVM_NVHE_ALIAS(__hyp_text_end);
+KVM_NVHE_ALIAS(__hyp_data_ro_after_init_start);
+KVM_NVHE_ALIAS(__hyp_data_ro_after_init_end);
+KVM_NVHE_ALIAS(__hyp_bss_start);
+KVM_NVHE_ALIAS(__hyp_bss_end);
+
+/* pKVM static key */
+KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
+
+#ifdef CONFIG_KVM_ARM_HYP_DEBUG_UART
+KVM_NVHE_ALIAS(kvm_hyp_debug_uart_set_basep);
+#endif
+
#endif /* CONFIG_KVM */
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 4c0b0c8..7003c41 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -8,6 +8,13 @@
#define RO_EXCEPTION_TABLE_ALIGN 8
#define RUNTIME_DISCARD_EXIT
+#define BSS_FIRST_SECTIONS \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_bss_start = .; \
+ *(.hyp.bss) \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_bss_end = .;
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/hyp_image.h>
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 3964acf..0be5f86 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -49,6 +49,14 @@
source "virt/kvm/Kconfig"
+config KVM_ARM_HYP_DEBUG_UART
+ bool "Hack up some basic UART functionality at EL2"
+
+config KVM_ARM_HYP_DEBUG_UART_ADDR
+ hex "Physical address of the PL011 for EL2 to use"
+ depends on KVM_ARM_HYP_DEBUG_UART
+ default 0x09000000
+
endif # KVM
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 04c44853..198bb91 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -34,6 +34,7 @@
#include <asm/virt.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_cpufeature.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_emulate.h>
#include <asm/sections.h>
@@ -1345,16 +1346,9 @@
/* A lookup table holding the hypervisor VA for each vector slot */
static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
-static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot)
-{
- return slot - (slot != HYP_VECTOR_DIRECT);
-}
-
static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
{
- int idx = __kvm_vector_slot2idx(slot);
-
- hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K);
+ hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot);
}
static int kvm_init_vector_slots(void)
@@ -1383,21 +1377,17 @@
return 0;
}
-static void cpu_init_hyp_mode(void)
+static void cpu_prepare_hyp_mode(int cpu)
{
- struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params);
- struct arm_smccc_res res;
+ struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
unsigned long tcr;
- /* Switch from the HYP stub to our own HYP init vector */
- __hyp_set_vectors(kvm_get_idmap_vector());
-
/*
* Calculate the raw per-cpu offset without a translation from the
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
- params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
+ params->tpidr_el2 = (unsigned long)per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu) -
(unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
params->mair_el2 = read_sysreg(mair_el1);
@@ -1421,14 +1411,28 @@
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
params->tcr_el2 = tcr;
- params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
+ params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
params->pgd_pa = kvm_mmu_get_httbr();
+ if (is_protected_kvm_enabled())
+ params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
+ else
+ params->hcr_el2 = HCR_HOST_NVHE_FLAGS;
+ params->vttbr = params->vtcr = 0;
/*
* Flush the init params from the data cache because the struct will
* be read while the MMU is off.
*/
kvm_flush_dcache_to_poc(params, sizeof(*params));
+}
+
+static void kvm_set_hyp_vector(void)
+{
+ struct kvm_nvhe_init_params *params;
+ struct arm_smccc_res res;
+
+ /* Switch from the HYP stub to our own HYP init vector */
+ __hyp_set_vectors(kvm_get_idmap_vector());
/*
* Call initialization code, and switch to the full blown HYP code.
@@ -1437,8 +1441,14 @@
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
+ params = this_cpu_ptr_nvhe_sym(kvm_init_params);
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
+}
+
+static void cpu_init_hyp_mode(void)
+{
+ kvm_set_hyp_vector();
/*
* Disabling SSBD on a non-VHE system requires us to enable SSBS
@@ -1481,7 +1491,10 @@
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
void *vector = hyp_spectre_vector_selector[data->slot];
- *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
+ if (!is_protected_kvm_enabled())
+ *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
+ else
+ kvm_call_hyp_nvhe(__hyp_cpu_set_vector, data->slot);
}
static void cpu_hyp_reinit(void)
@@ -1489,13 +1502,14 @@
kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
cpu_hyp_reset();
- cpu_set_hyp_vector();
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
else
cpu_init_hyp_mode();
+ cpu_set_hyp_vector();
+
kvm_arm_init_debug();
if (vgic_present)
@@ -1691,13 +1705,82 @@
}
}
+#undef KVM_HYP_CPU_FTR_REG
+#define KVM_HYP_CPU_FTR_REG(id, name) \
+ { .sys_id = id, .dst = (struct arm64_ftr_reg *)&kvm_nvhe_sym(name) },
+static const struct __ftr_reg_copy_entry {
+ u32 sys_id;
+ struct arm64_ftr_reg *dst;
+} hyp_ftr_regs[] = {
+ #include <asm/kvm_cpufeature.h>
+};
+
+static int copy_cpu_ftr_regs(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(hyp_ftr_regs); i++) {
+ ret = copy_ftr_reg(hyp_ftr_regs[i].sys_id, hyp_ftr_regs[i].dst);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int kvm_hyp_enable_protection(void)
+{
+ void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
+ int ret, cpu;
+ void *addr;
+
+ if (!is_protected_kvm_enabled())
+ return 0;
+
+ if (!hyp_mem_base)
+ return -ENOMEM;
+
+ addr = phys_to_virt(hyp_mem_base);
+ ret = create_hyp_mappings(addr, addr + hyp_mem_size - 1, PAGE_HYP);
+ if (ret)
+ return ret;
+
+ preempt_disable();
+ kvm_set_hyp_vector();
+ ret = kvm_call_hyp_nvhe(__kvm_hyp_protect, hyp_mem_base, hyp_mem_size,
+ num_possible_cpus(), kern_hyp_va(per_cpu_base));
+ preempt_enable();
+ if (ret)
+ return ret;
+
+ free_hyp_pgds();
+ for_each_possible_cpu(cpu)
+ free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+
+ return 0;
+}
+
/**
* Inits Hyp-mode on all online CPUs
*/
static int init_hyp_mode(void)
{
int cpu;
- int err = 0;
+ int err = -ENOMEM;
+
+ /*
+ * The protected Hyp-mode cannot be initialized if the memory pool
+ * allocation has failed.
+ */
+ if (is_protected_kvm_enabled() && !hyp_mem_base)
+ return err;
+
+ /*
+ * Copy the required CPU feature register in their EL2 counterpart
+ */
+ err = copy_cpu_ftr_regs();
+ if (err)
+ return err;
/*
* Allocate Hyp PGD and setup Hyp identity mapping
@@ -1764,7 +1847,18 @@
goto out_err;
}
+ /*
+ * .hyp.bss is placed at the beginning of the .bss section, so map that
+ * part RW, and the rest RO as the hyp shouldn't be touching it.
+ */
err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
+ kvm_ksym_ref(__hyp_bss_end), PAGE_HYP);
+ if (err) {
+ kvm_err("Cannot map hyp bss section: %d\n", err);
+ goto out_err;
+ }
+
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end),
kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
if (err) {
kvm_err("Cannot map bss section\n");
@@ -1807,6 +1901,18 @@
goto out_err;
}
+ /*
+ * Prepare the CPU initialization parameters
+ */
+ for_each_possible_cpu(cpu)
+ cpu_prepare_hyp_mode(cpu);
+
+ err = kvm_hyp_enable_protection();
+ if (err) {
+ kvm_err("Failed to enable hyp memory protection: %d\n", err);
+ goto out_err;
+ }
+
return 0;
out_err:
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 687598e..b726332 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,4 +10,4 @@
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
-obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
diff --git a/arch/arm64/kvm/hyp/debug-pl011.h b/arch/arm64/kvm/hyp/debug-pl011.h
new file mode 100644
index 0000000..c76108a
--- /dev/null
+++ b/arch/arm64/kvm/hyp/debug-pl011.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Stand-alone header for basic debug output on the PL011 UART. To use it,
+ * ensure that CONFIG_KVM_ARM_HYP_DEBUG_UART is enabled and that
+ * CONFIG_KVM_ARM_HYP_DEBUG_UART_ADDR is the physical address of the PL011
+ * UART that you want to use. Then just include this header and try not to
+ * vomit at the state of the macros and functions it provides.
+ *
+ * The C functions only work when the MMU is enabled, but the assembly macros
+ * should work pretty much everywhere.
+ *
+ * It's slow and racy, but you'll be fine. Patches unwelcome.
+ */
+
+#ifndef __ARM64_KVM_HYP_DEBUG_PL011_H__
+#define __ARM64_KVM_HYP_DEBUG_PL011_H__
+
+#ifdef CONFIG_KVM_ARM_HYP_DEBUG_UART
+
+#define HYP_PL011_BASE_PHYS CONFIG_KVM_ARM_HYP_DEBUG_UART_ADDR
+#define HYP_PL011_UARTFR 0x18
+
+#define HYP_PL011_UARTFR_BUSY 3
+#define HYP_PL011_UARTFR_FULL 5
+
+#ifdef __ASSEMBLY__
+
+.macro hyp_pl011_base, tmp
+ mrs \tmp, sctlr_el2
+ tbz \tmp, #0, 9990f
+ isb
+
+alternative_cb kvm_hyp_debug_uart_set_basep
+ movz \tmp, #0
+ movk \tmp, #0, lsl #16
+ movk \tmp, #0, lsl #32
+ movk \tmp, #0, lsl #48
+alternative_cb_end
+
+ kern_hyp_va \tmp
+ ldr \tmp, [\tmp]
+ b 9991f
+9990: mov \tmp, HYP_PL011_BASE_PHYS
+9991:
+.endm
+
+/*
+ * 'c' is a W register containing the character to transmit. Preserved.
+ * 'tmpnr' is the number of another scratch register. Clobbered.
+ */
+.macro hyp_putc, c, tmpnr
+9992: hyp_pl011_base x\tmpnr
+ ldr w\tmpnr, [x\tmpnr, HYP_PL011_UARTFR]
+ tbnz w\tmpnr, HYP_PL011_UARTFR_FULL, 9992b
+ hyp_pl011_base x\tmpnr
+ str \c, [x\tmpnr]
+9992: hyp_pl011_base x\tmpnr
+ ldr w\tmpnr, [x\tmpnr, HYP_PL011_UARTFR]
+ tbnz w\tmpnr, HYP_PL011_UARTFR_BUSY, 9992b
+.endm
+
+/*
+ * 's' is an X register containing the address of the string to print.
+ * 'tmpnr1' and 'tmpnr2' are numbers of other scratch registers.
+ * All three registers clobbered.
+ *
+ * The string must be mapped, so it's best to use a combination of '.ascii'
+ * and PC-relative addressing (i.e. an ADR instruction)
+ */
+.macro hyp_puts, s, tmpnr1, tmpnr2
+9993: ldrb w\tmpnr1, [\s]
+ cbz w\tmpnr1, 9993f
+ hyp_putc w\tmpnr1, \tmpnr2
+ add \s, \s, #1
+ b 9993b
+9993: mov w\tmpnr1, '\n'
+ hyp_putc w\tmpnr1, \tmpnr2
+.endm
+
+.macro __hyp_putx4, xnr, tmpnr
+ bic x\xnr, x\xnr, #0xfffffff0
+ sub w\tmpnr, w\xnr, #10
+ tbnz w\tmpnr, #31, 9994f
+ add x\xnr, x\xnr, #0x27
+9994: add x\xnr, x\xnr, #0x30
+ hyp_putc w\xnr, \tmpnr
+.endm
+
+/*
+ * 'x' is an X register containing a value to printed in hex. Preserved.
+ * 'tmpnr1' and 'tmpnr2' are numbers of other scratch registers. Clobbered.
+ */
+.macro hyp_putx64, x, tmpnr1, tmpnr2
+ mov w\tmpnr1, '0'
+ hyp_putc w\tmpnr1, \tmpnr2
+ mov w\tmpnr1, 'x'
+ hyp_putc w\tmpnr1, \tmpnr2
+ movz x\tmpnr1, #15, lsl #32
+9995: bfxil x\tmpnr1, \x, #60, #4
+ ror \x, \x, #60
+ __hyp_putx4 \tmpnr1, \tmpnr2
+ ror x\tmpnr1, x\tmpnr1, #32
+ cbz w\tmpnr1, 9995f
+ sub x\tmpnr1, x\tmpnr1, #1
+ ror x\tmpnr1, x\tmpnr1, #32
+ b 9995b
+9995: mov w\tmpnr1, '\n'
+ hyp_putc w\tmpnr1, \tmpnr2
+.endm
+
+#else
+
+static inline void *__hyp_pl011_base(void)
+{
+ unsigned long ioaddr;
+
+ asm volatile(ALTERNATIVE_CB(
+ "movz %0, #0\n"
+ "movk %0, #0, lsl #16\n"
+ "movk %0, #0, lsl #32\n"
+ "movk %0, #0, lsl #48",
+ kvm_hyp_debug_uart_set_basep)
+ : "=r" (ioaddr));
+
+ return *((void **)kern_hyp_va(ioaddr));
+}
+
+static inline unsigned int __hyp_readw(void *ioaddr)
+{
+ unsigned int val;
+ asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (ioaddr));
+ return val;
+}
+
+static inline void __hyp_writew(unsigned int val, void *ioaddr)
+{
+ asm volatile("str %w0, [%1]" : : "r" (val), "r" (ioaddr));
+}
+
+static inline void hyp_putc(char c)
+{
+ unsigned int val;
+ void *base = __hyp_pl011_base();
+
+ do {
+ val = __hyp_readw(base + HYP_PL011_UARTFR);
+ } while (val & (1U << HYP_PL011_UARTFR_FULL));
+
+ __hyp_writew(c, base);
+
+ do {
+ val = __hyp_readw(base + HYP_PL011_UARTFR);
+ } while (val & (1U << HYP_PL011_UARTFR_BUSY));
+}
+
+/*
+ * Caller needs to ensure string is mapped. If it lives in .rodata, you should
+ * be good as long as we're using PC-relative addressing (probably true).
+ */
+static inline void hyp_puts(char *s)
+{
+ while (*s)
+ hyp_putc(*s++);
+ hyp_putc('\n');
+}
+
+static inline void __hyp_putx4(unsigned int x)
+{
+ x &= 0xf;
+ if (x <= 9)
+ x += '0';
+ else
+ x += ('a' - 0xa);
+ hyp_putc(x);
+}
+
+static inline void __hyp_putx4n(unsigned long x, int n)
+{
+ int i = n >> 2;
+
+ hyp_putc('0');
+ hyp_putc('x');
+
+ while (i--)
+ __hyp_putx4(x >> (4 * i));
+
+ hyp_putc('\n');
+}
+
+static inline void hyp_putx32(unsigned int x)
+{
+ __hyp_putx4n(x, 32);
+}
+
+static inline void hyp_putx64(unsigned long x)
+{
+ __hyp_putx4n(x, 64);
+}
+
+#endif
+
+#else
+
+#warning "Please don't include debug-pl011.h if you're not debugging"
+
+#ifdef __ASSEMBLY__
+
+.macro hyp_putc, c, tmpnr
+.endm
+
+.macro hyp_puts, s, tmpnr1, tmpnr2
+.endm
+
+.macro hyp_putx64, x, tmpnr1, tmpnr2
+.endm
+
+#else
+
+static inline void hyp_putc(char c) { }
+static inline void hyp_puts(char *s) { }
+static inline void hyp_putx32(unsigned int x) { }
+static inline void hyp_putx64(unsigned long x) { }
+
+#endif
+
+#endif /* CONFIG_KVM_ARM_HYP_DEBUG_UART */
+#endif /* __ARM64_KVM_HYP_DEBUG_PL011_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 8447357..e900525 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -157,19 +157,9 @@
return true;
}
-static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
+static inline bool __get_fault_info(u64 esr, u64 *far, u64 *hpfar)
{
- u8 ec;
- u64 esr;
- u64 hpfar, far;
-
- esr = vcpu->arch.fault.esr_el2;
- ec = ESR_ELx_EC(esr);
-
- if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
- return true;
-
- far = read_sysreg_el2(SYS_FAR);
+ *far = read_sysreg_el2(SYS_FAR);
/*
* The HPFAR can be invalid if the stage 2 fault did not
@@ -185,12 +175,30 @@
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
- if (!__translate_far_to_hpfar(far, &hpfar))
+ if (!__translate_far_to_hpfar(*far, hpfar))
return false;
} else {
- hpfar = read_sysreg(hpfar_el2);
+ *hpfar = read_sysreg(hpfar_el2);
}
+ return true;
+}
+
+static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+ u8 ec;
+ u64 esr;
+ u64 hpfar, far;
+
+ esr = vcpu->arch.fault.esr_el2;
+ ec = ESR_ELx_EC(esr);
+
+ if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+ return true;
+
+ if (!__get_fault_info(esr, &far, &hpfar))
+ return false;
+
vcpu->arch.fault.far_el2 = far;
vcpu->arch.fault.hpfar_el2 = hpfar;
return true;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
new file mode 100644
index 0000000..68ce2bf
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_EARLY_ALLOC_H
+#define __KVM_HYP_EARLY_ALLOC_H
+
+#include <asm/kvm_pgtable.h>
+
+void hyp_early_alloc_init(void *virt, unsigned long size);
+unsigned long hyp_early_alloc_nr_pages(void);
+void *hyp_early_alloc_page(void *arg);
+void *hyp_early_alloc_contig(unsigned int nr_pages);
+
+extern struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
+
+#endif /* __KVM_HYP_EARLY_ALLOC_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
new file mode 100644
index 0000000..95587fa
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_GFP_H
+#define __KVM_HYP_GFP_H
+
+#include <linux/list.h>
+
+#include <nvhe/memory.h>
+#include <nvhe/spinlock.h>
+
+#define HYP_MAX_ORDER 11U
+#define HYP_NO_ORDER UINT_MAX
+
+struct hyp_pool {
+ hyp_spinlock_t lock;
+ struct list_head free_area[HYP_MAX_ORDER + 1];
+ phys_addr_t range_start;
+ phys_addr_t range_end;
+};
+
+/* GFP flags */
+#define HYP_GFP_NONE 0
+#define HYP_GFP_ZERO 1
+
+/* Allocation */
+void *hyp_alloc_pages(struct hyp_pool *pool, gfp_t mask, unsigned int order);
+void hyp_get_page(void *addr);
+void hyp_put_page(void *addr);
+
+/* Used pages cannot be freed */
+int hyp_pool_init(struct hyp_pool *pool, phys_addr_t phys,
+ unsigned int nr_pages, unsigned int used_pages);
+#endif /* __KVM_HYP_GFP_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
new file mode 100644
index 0000000..a22ef11
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#ifndef __KVM_NVHE_MEM_PROTECT__
+#define __KVM_NVHE_MEM_PROTECT__
+#include <linux/kvm_host.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_pgtable.h>
+#include <asm/virt.h>
+#include <nvhe/spinlock.h>
+
+struct host_kvm {
+ struct kvm_arch arch;
+ struct kvm_pgtable pgt;
+ struct kvm_pgtable_mm_ops mm_ops;
+ hyp_spinlock_t lock;
+};
+extern struct host_kvm host_kvm;
+
+int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
+void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
+
+static __always_inline void __load_host_stage2(void)
+{
+ if (static_branch_likely(&kvm_protected_mode_initialized))
+ __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
+ else
+ write_sysreg(0, vttbr_el2);
+}
+#endif /* __KVM_NVHE_MEM_PROTECT__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
new file mode 100644
index 0000000..c8af6fe
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_MEMORY_H
+#define __KVM_HYP_MEMORY_H
+
+#include <asm/page.h>
+
+#include <linux/types.h>
+
+#define HYP_MEMBLOCK_REGIONS 128
+struct hyp_memblock_region {
+ phys_addr_t start;
+ phys_addr_t end;
+};
+
+struct hyp_pool;
+struct hyp_page {
+ unsigned int refcount;
+ unsigned int order;
+ struct hyp_pool *pool;
+ struct list_head node;
+};
+
+extern s64 hyp_physvirt_offset;
+extern u64 __hyp_vmemmap;
+#define hyp_vmemmap ((struct hyp_page *)__hyp_vmemmap)
+
+#define __hyp_pa(virt) ((phys_addr_t)(virt) + hyp_physvirt_offset)
+#define __hyp_va(virt) ((void *)((phys_addr_t)(virt) - hyp_physvirt_offset))
+
+static inline void *hyp_phys_to_virt(phys_addr_t phys)
+{
+ return __hyp_va(phys);
+}
+
+static inline phys_addr_t hyp_virt_to_phys(void *addr)
+{
+ return __hyp_pa(addr);
+}
+
+#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
+#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
+#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
+
+#define hyp_page_to_phys(page) ((phys_addr_t)((page) - hyp_vmemmap) << PAGE_SHIFT)
+#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page))
+#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool)
+
+static inline int hyp_page_count(void *addr)
+{
+ struct hyp_page *p = hyp_virt_to_page(addr);
+
+ return p->refcount;
+}
+
+#endif /* __KVM_HYP_MEMORY_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
new file mode 100644
index 0000000..b79be25
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __KVM_HYP_MM_H
+#define __KVM_HYP_MM_H
+
+#include <asm/kvm_pgtable.h>
+#include <asm/spectre.h>
+#include <linux/types.h>
+
+#include <nvhe/memory.h>
+#include <nvhe/spinlock.h>
+
+extern struct hyp_memblock_region kvm_nvhe_sym(hyp_memory)[];
+extern int kvm_nvhe_sym(hyp_memblock_nr);
+extern struct kvm_pgtable hyp_pgtable;
+extern hyp_spinlock_t __hyp_pgd_lock;
+extern struct hyp_pool hpool;
+extern u64 __io_map_base;
+extern u32 hyp_va_bits;
+
+int hyp_create_idmap(void);
+int hyp_map_vectors(void);
+int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
+int hyp_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
+int hyp_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
+int __hyp_create_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, unsigned long prot);
+unsigned long __hyp_create_private_mapping(phys_addr_t phys, size_t size,
+ unsigned long prot);
+
+static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
+ unsigned long *start, unsigned long *end)
+{
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ struct hyp_page *p = hyp_phys_to_page(phys);
+
+ *start = (unsigned long)p;
+ *end = *start + nr_pages * sizeof(struct hyp_page);
+ *start = ALIGN_DOWN(*start, PAGE_SIZE);
+ *end = ALIGN(*end, PAGE_SIZE);
+}
+
+static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
+{
+ unsigned long total = 0, i;
+
+ /* Provision the worst case scenario with 4 levels of page-table */
+ for (i = 0; i < 4; i++) {
+ nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
+ total += nr_pages;
+ }
+
+ return total;
+}
+
+static inline unsigned long __hyp_pgtable_total_size(void)
+{
+ struct hyp_memblock_region *reg;
+ unsigned long nr_pages, res = 0;
+ int i;
+
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ reg = &kvm_nvhe_sym(hyp_memory)[i];
+ nr_pages = (reg->end - reg->start) >> PAGE_SHIFT;
+ nr_pages = __hyp_pgtable_max_pages(nr_pages);
+ res += nr_pages << PAGE_SHIFT;
+ }
+
+ return res;
+}
+
+static inline unsigned long hyp_s1_pgtable_size(void)
+{
+ unsigned long res, nr_pages;
+
+ if (kvm_nvhe_sym(hyp_memblock_nr) <= 0)
+ return 0;
+
+ res = __hyp_pgtable_total_size();
+
+ /* Allow 1 GiB for private mappings */
+ nr_pages = (1 << 30) >> PAGE_SHIFT;
+ nr_pages = __hyp_pgtable_max_pages(nr_pages);
+ res += nr_pages << PAGE_SHIFT;
+
+ return res;
+}
+
+static inline unsigned long host_s2_mem_pgtable_size(void)
+{
+ unsigned long max_pgd_sz = 16 << PAGE_SHIFT;
+
+ if (kvm_nvhe_sym(hyp_memblock_nr) <= 0)
+ return 0;
+
+ return __hyp_pgtable_total_size() + max_pgd_sz;
+}
+
+static inline unsigned long host_s2_dev_pgtable_size(void)
+{
+ if (kvm_nvhe_sym(hyp_memblock_nr) <= 0)
+ return 0;
+
+ /* Allow 1 GiB for private mappings */
+ return __hyp_pgtable_max_pages((1 << 30) >> PAGE_SHIFT) << PAGE_SHIFT;
+}
+
+#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
new file mode 100644
index 0000000..7584c39
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * A stand-alone ticket spinlock implementation for use by the non-VHE
+ * KVM hypervisor code running at EL2.
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Will Deacon <will@kernel.org>
+ *
+ * Heavily based on the implementation removed by c11090474d70 which was:
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ARM64_KVM_NVHE_SPINLOCK_H__
+#define __ARM64_KVM_NVHE_SPINLOCK_H__
+
+#include <asm/alternative.h>
+#include <asm/lse.h>
+
+typedef union hyp_spinlock {
+ u32 __val;
+ struct {
+#ifdef __AARCH64EB__
+ u16 next, owner;
+#else
+ u16 owner, next;
+ };
+#endif
+} hyp_spinlock_t;
+
+#define hyp_spin_lock_init(l) \
+do { \
+ *(l) = (hyp_spinlock_t){ .__val = 0 }; \
+} while (0)
+
+static inline void hyp_spin_lock(hyp_spinlock_t *lock)
+{
+ u32 tmp;
+ hyp_spinlock_t lockval, newval;
+
+ asm volatile(
+ /* Atomically increment the next ticket. */
+ ARM64_LSE_ATOMIC_INSN(
+ /* LL/SC */
+" prfm pstl1strm, %3\n"
+"1: ldaxr %w0, %3\n"
+" add %w1, %w0, #(1 << 16)\n"
+" stxr %w2, %w1, %3\n"
+" cbnz %w2, 1b\n",
+ /* LSE atomics */
+" mov %w2, #(1 << 16)\n"
+" ldadda %w2, %w0, %3\n"
+ __nops(3))
+
+ /* Did we get the lock? */
+" eor %w1, %w0, %w0, ror #16\n"
+" cbz %w1, 3f\n"
+ /*
+ * No: spin on the owner. Send a local event to avoid missing an
+ * unlock before the exclusive load.
+ */
+" sevl\n"
+"2: wfe\n"
+" ldaxrh %w2, %4\n"
+" eor %w1, %w2, %w0, lsr #16\n"
+" cbnz %w1, 2b\n"
+ /* We got the lock. Critical section starts here. */
+"3:"
+ : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
+ : "Q" (lock->owner)
+ : "memory");
+}
+
+static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
+{
+ u64 tmp;
+
+ asm volatile(
+ ARM64_LSE_ATOMIC_INSN(
+ /* LL/SC */
+ " ldrh %w1, %0\n"
+ " add %w1, %w1, #1\n"
+ " stlrh %w1, %0",
+ /* LSE atomics */
+ " mov %w1, #1\n"
+ " staddlh %w1, %0\n"
+ __nops(1))
+ : "=Q" (lock->owner), "=&r" (tmp)
+ :
+ : "memory");
+}
+
+#endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 1f1e351..c3e2f98 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -6,10 +6,15 @@
asflags-y := -D__KVM_NVHE_HYPERVISOR__
ccflags-y := -D__KVM_NVHE_HYPERVISOR__
+lib-objs := clear_page.o copy_page.o memcpy.o memset.o
+lib-objs := $(addprefix ../../../lib/, $(lib-objs))
+
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
- hyp-main.o hyp-smp.o psci-relay.o
+ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
+ cache.o cpufeature.o setup.o mm.o mem_protect.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
- ../fpsimd.o ../hyp-entry.o ../exception.o
+ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
+obj-y += $(lib-objs)
##
## Build rules for compiling nVHE hyp code
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
new file mode 100644
index 0000000..36cef69
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Code copied from arch/arm64/mm/cache.S.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/alternative.h>
+
+SYM_FUNC_START_PI(__flush_dcache_area)
+ dcache_by_line_op civac, sy, x0, x1, x2, x3
+ ret
+SYM_FUNC_END_PI(__flush_dcache_area)
diff --git a/arch/arm64/kvm/hyp/nvhe/cpufeature.c b/arch/arm64/kvm/hyp/nvhe/cpufeature.c
new file mode 100644
index 0000000..a887508
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/cpufeature.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#define KVM_HYP_CPU_FTR_REG(id, name) struct arm64_ftr_reg name;
+#include <asm/kvm_cpufeature.h>
diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
new file mode 100644
index 0000000..441c674
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <asm/kvm_pgtable.h>
+
+#include <nvhe/memory.h>
+
+struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
+__ro_after_init s64 hyp_physvirt_offset;
+
+static unsigned long base;
+static unsigned long end;
+static unsigned long cur;
+
+unsigned long hyp_early_alloc_nr_pages(void)
+{
+ return (cur - base) >> PAGE_SHIFT;
+}
+
+extern void clear_page(void *to);
+
+void *hyp_early_alloc_contig(unsigned int nr_pages)
+{
+ unsigned long ret = cur, i, p;
+
+ if (!nr_pages)
+ return NULL;
+
+ cur += nr_pages << PAGE_SHIFT;
+ if (cur > end) {
+ cur = ret;
+ return NULL;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ p = ret + (i << PAGE_SHIFT);
+ clear_page((void *)(p));
+ }
+
+ return (void *)ret;
+}
+
+void *hyp_early_alloc_page(void *arg)
+{
+ return hyp_early_alloc_contig(1);
+}
+
+void hyp_early_alloc_init(unsigned long virt, unsigned long size)
+{
+ base = virt;
+ end = virt + size;
+ cur = virt;
+
+ hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
+ hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
+ hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 31b060a..be66d46 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -102,6 +102,15 @@
ldr x1, [x0, #NVHE_INIT_MAIR_EL2]
msr mair_el2, x1
+ ldr x1, [x0, #NVHE_INIT_HCR_EL2]
+ msr hcr_el2, x1
+
+ ldr x1, [x0, #NVHE_INIT_VTTBR]
+ msr vttbr_el2, x1
+
+ ldr x1, [x0, #NVHE_INIT_VTCR]
+ msr vtcr_el2, x1
+
ldr x1, [x0, #NVHE_INIT_PGD_PA]
phys_to_ttbr x2, x1
alternative_if ARM64_HAS_CNP
@@ -251,4 +260,34 @@
SYM_CODE_END(__kvm_handle_stub_hvc)
+SYM_FUNC_START(__kvm_init_switch_pgd)
+ /* Turn the MMU off */
+ pre_disable_mmu_workaround
+ mrs x2, sctlr_el2
+ bic x3, x2, #SCTLR_ELx_M
+ msr sctlr_el2, x3
+ isb
+
+ tlbi alle2
+
+ /* Install the new pgtables */
+ ldr x3, [x0, #NVHE_INIT_PGD_PA]
+ phys_to_ttbr x4, x3
+alternative_if ARM64_HAS_CNP
+ orr x4, x4, #TTBR_CNP_BIT
+alternative_else_nop_endif
+ msr ttbr0_el2, x4
+
+ /* Set the new stack pointer */
+ ldr x0, [x0, #NVHE_INIT_STACK_HYP_VA]
+ mov sp, x0
+
+ /* And turn the MMU back on! */
+ dsb nsh
+ isb
+ msr sctlr_el2, x2
+ isb
+ ret x1
+SYM_FUNC_END(__kvm_init_switch_pgd)
+
.popsection
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index a906f9e..1d827ad2 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -6,12 +6,15 @@
#include <hyp/switch.h>
+#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/mm.h>
#include <nvhe/trap_handler.h>
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
@@ -106,6 +109,43 @@
__vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
}
+static void handle___kvm_hyp_protect(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
+ DECLARE_REG(unsigned long, size, host_ctxt, 2);
+ DECLARE_REG(unsigned long, nr_cpus, host_ctxt, 3);
+ DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 4);
+
+ cpu_reg(host_ctxt, 1) = __kvm_hyp_protect(phys, size, nr_cpus,
+ per_cpu_base);
+}
+
+static void handle___hyp_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(enum arm64_hyp_spectre_vector, slot, host_ctxt, 1);
+
+ cpu_reg(host_ctxt, 1) = hyp_cpu_set_vector(slot);
+}
+
+static void handle___hyp_create_mappings(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(unsigned long, start, host_ctxt, 1);
+ DECLARE_REG(unsigned long, size, host_ctxt, 2);
+ DECLARE_REG(unsigned long, phys, host_ctxt, 3);
+ DECLARE_REG(unsigned long, prot, host_ctxt, 4);
+
+ cpu_reg(host_ctxt, 1) = __hyp_create_mappings(start, size, phys, prot);
+}
+
+static void handle___hyp_create_private_mapping(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
+ DECLARE_REG(size_t, size, host_ctxt, 2);
+ DECLARE_REG(unsigned long, prot, host_ctxt, 3);
+
+ cpu_reg(host_ctxt, 1) = __hyp_create_private_mapping(phys, size, prot);
+}
+
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
@@ -125,6 +165,10 @@
HANDLE_FUNC(__kvm_get_mdcr_el2),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_aprs),
+ HANDLE_FUNC(__kvm_hyp_protect),
+ HANDLE_FUNC(__hyp_cpu_set_vector),
+ HANDLE_FUNC(__hyp_create_mappings),
+ HANDLE_FUNC(__hyp_create_private_mapping),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
@@ -180,6 +224,11 @@
case ESR_ELx_EC_SMC64:
handle_host_smc(host_ctxt);
break;
+ case ESR_ELx_EC_IABT_LOW:
+ fallthrough;
+ case ESR_ELx_EC_DABT_LOW:
+ handle_host_mem_abort(host_ctxt);
+ break;
default:
hyp_panic();
}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index 1206d0d..3a22900 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -22,4 +22,5 @@
PERCPU_INPUT(L1_CACHE_BYTES)
}
HYP_SECTION(.data..ro_after_init)
+ HYP_SECTION(.bss)
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
new file mode 100644
index 0000000..0cd3eb1
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_cpufeature.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_pgtable.h>
+#include <asm/stage2_pgtable.h>
+
+#include <hyp/switch.h>
+
+#include <nvhe/gfp.h>
+#include <nvhe/memory.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/mm.h>
+
+extern unsigned long hyp_nr_cpus;
+struct host_kvm host_kvm;
+
+struct hyp_pool host_s2_mem;
+struct hyp_pool host_s2_dev;
+
+static void *host_s2_zalloc_pages_exact(size_t size)
+{
+ return hyp_alloc_pages(&host_s2_mem, HYP_GFP_ZERO, get_order(size));
+}
+
+static void *host_s2_zalloc_page(void *pool)
+{
+ return hyp_alloc_pages(pool, HYP_GFP_ZERO, 0);
+}
+
+static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
+{
+ unsigned long nr_pages;
+ int ret;
+
+ nr_pages = host_s2_mem_pgtable_size() >> PAGE_SHIFT;
+ ret = hyp_pool_init(&host_s2_mem, __hyp_pa(mem_pgt_pool), nr_pages, 0);
+ if (ret)
+ return ret;
+
+ nr_pages = host_s2_dev_pgtable_size() >> PAGE_SHIFT;
+ ret = hyp_pool_init(&host_s2_dev, __hyp_pa(dev_pgt_pool), nr_pages, 0);
+ if (ret)
+ return ret;
+
+ host_kvm.mm_ops.zalloc_pages_exact = host_s2_zalloc_pages_exact;
+ host_kvm.mm_ops.zalloc_page = host_s2_zalloc_page;
+ host_kvm.mm_ops.phys_to_virt = hyp_phys_to_virt;
+ host_kvm.mm_ops.virt_to_phys = hyp_virt_to_phys;
+ host_kvm.mm_ops.page_count = hyp_page_count;
+ host_kvm.mm_ops.get_page = hyp_get_page;
+ host_kvm.mm_ops.put_page = hyp_put_page;
+
+ return 0;
+}
+
+static void prepare_host_vtcr(void)
+{
+ u32 parange, phys_shift;
+ u64 mmfr0, mmfr1;
+
+ mmfr0 = arm64_ftr_reg_id_aa64mmfr0_el1.sys_val;
+ mmfr1 = arm64_ftr_reg_id_aa64mmfr1_el1.sys_val;
+
+ /* The host stage 2 is id-mapped, so use parange for T0SZ */
+ parange = kvm_get_parange(mmfr0);
+ phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
+
+ host_kvm.arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+}
+
+int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
+{
+ struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
+ struct kvm_nvhe_init_params *params;
+ int ret, i;
+
+ prepare_host_vtcr();
+ hyp_spin_lock_init(&host_kvm.lock);
+
+ ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
+ if (ret)
+ return ret;
+
+ ret = kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+ &host_kvm.mm_ops);
+ if (ret)
+ return ret;
+
+ mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
+ mmu->arch = &host_kvm.arch;
+ mmu->pgt = &host_kvm.pgt;
+ mmu->vmid.vmid_gen = 0;
+ mmu->vmid.vmid = 0;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ params = per_cpu_ptr(&kvm_init_params, i);
+ params->vttbr = kvm_get_vttbr(mmu);
+ params->vtcr = host_kvm.arch.vtcr;
+ params->hcr_el2 |= HCR_VM;
+ __flush_dcache_area(params, sizeof(*params));
+ }
+
+ write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
+ __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
+
+ return 0;
+}
+
+static void host_stage2_unmap_dev_all(void)
+{
+ struct kvm_pgtable *pgt = &host_kvm.pgt;
+ struct hyp_memblock_region *reg;
+ u64 addr = 0;
+ int i;
+
+ /* Unmap all non-memory regions to recycle the pages */
+ for (i = 0; i < hyp_memblock_nr; i++, addr = reg->end) {
+ reg = &hyp_memory[i];
+ kvm_pgtable_stage2_unmap(pgt, addr, reg->start - addr);
+ }
+ kvm_pgtable_stage2_unmap(pgt, addr, ULONG_MAX);
+}
+
+static bool ipa_is_memory(u64 ipa)
+{
+ int cur, left = 0, right = hyp_memblock_nr;
+ struct hyp_memblock_region *reg;
+
+ /* The list of memblock regions is sorted, binary search it */
+ while (left < right) {
+ cur = (left + right) >> 1;
+ reg = &hyp_memory[cur];
+ if (ipa < reg->start)
+ right = cur;
+ else if (ipa >= reg->end)
+ left = cur + 1;
+ else
+ return true;
+ }
+
+ return false;
+}
+
+static int __host_stage2_map(u64 ipa, u64 size, enum kvm_pgtable_prot prot,
+ struct hyp_pool *p)
+{
+ return kvm_pgtable_stage2_map(&host_kvm.pgt, ipa, size, ipa, prot, p);
+}
+
+static int host_stage2_map(u64 ipa, u64 size, enum kvm_pgtable_prot prot)
+{
+ int ret, is_memory = ipa_is_memory(ipa);
+ struct hyp_pool *pool;
+
+ pool = is_memory ? &host_s2_mem : &host_s2_dev;
+
+ hyp_spin_lock(&host_kvm.lock);
+ ret = __host_stage2_map(ipa, size, prot, pool);
+ if (ret == -ENOMEM && !is_memory) {
+ host_stage2_unmap_dev_all();
+ ret = __host_stage2_map(ipa, size, prot, pool);
+ }
+ hyp_spin_unlock(&host_kvm.lock);
+
+ return ret;
+}
+
+void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
+{
+ enum kvm_pgtable_prot prot;
+ u64 far, hpfar, esr, ipa;
+ int ret;
+
+ esr = read_sysreg_el2(SYS_ESR);
+ if (!__get_fault_info(esr, &far, &hpfar))
+ hyp_panic();
+
+ prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W | KVM_PGTABLE_PROT_X;
+ ipa = (hpfar & HPFAR_MASK) << 8;
+ ret = host_stage2_map(ipa, PAGE_SIZE, prot);
+ if (ret)
+ hyp_panic();
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
new file mode 100644
index 0000000..cad5dae
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_pgtable.h>
+#include <asm/spectre.h>
+
+#include <nvhe/early_alloc.h>
+#include <nvhe/gfp.h>
+#include <nvhe/memory.h>
+#include <nvhe/mm.h>
+#include <nvhe/spinlock.h>
+
+struct kvm_pgtable hyp_pgtable;
+
+hyp_spinlock_t __hyp_pgd_lock;
+u64 __io_map_base;
+
+struct hyp_memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
+int hyp_memblock_nr;
+
+int __hyp_create_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, unsigned long prot)
+{
+ int err;
+
+ hyp_spin_lock(&__hyp_pgd_lock);
+ err = kvm_pgtable_hyp_map(&hyp_pgtable, start, size, phys, prot);
+ hyp_spin_unlock(&__hyp_pgd_lock);
+
+ return err;
+}
+
+unsigned long __hyp_create_private_mapping(phys_addr_t phys, size_t size,
+ unsigned long prot)
+{
+ unsigned long addr;
+ int ret;
+
+ hyp_spin_lock(&__hyp_pgd_lock);
+
+ size = PAGE_ALIGN(size + offset_in_page(phys));
+ addr = __io_map_base;
+ __io_map_base += size;
+
+ /* Are we overflowing on the vmemmap ? */
+ if (__io_map_base > __hyp_vmemmap) {
+ __io_map_base -= size;
+ addr = 0;
+ goto out;
+ }
+
+ ret = kvm_pgtable_hyp_map(&hyp_pgtable, addr, size, phys, prot);
+ if (ret) {
+ addr = 0;
+ goto out;
+ }
+
+ addr = addr + offset_in_page(phys);
+out:
+ hyp_spin_unlock(&__hyp_pgd_lock);
+
+ return addr;
+}
+
+int hyp_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
+{
+ unsigned long start = (unsigned long)from;
+ unsigned long end = (unsigned long)to;
+ unsigned long virt_addr;
+ phys_addr_t phys;
+
+ start = start & PAGE_MASK;
+ end = PAGE_ALIGN(end);
+
+ for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+ int err;
+
+ phys = hyp_virt_to_phys((void *)virt_addr);
+ err = __hyp_create_mappings(virt_addr, PAGE_SIZE, phys, prot);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
+{
+ unsigned long start, end;
+
+ hyp_vmemmap_range(phys, size, &start, &end);
+
+ return __hyp_create_mappings(start, end - start, back, PAGE_HYP);
+}
+
+static void *__hyp_bp_vect_base;
+int hyp_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
+{
+ void *vector;
+
+ switch (slot) {
+ case HYP_VECTOR_DIRECT: {
+ vector = hyp_symbol_addr(__kvm_hyp_vector);
+ break;
+ }
+ case HYP_VECTOR_SPECTRE_DIRECT: {
+ vector = hyp_symbol_addr(__bp_harden_hyp_vecs);
+ break;
+ }
+ case HYP_VECTOR_INDIRECT:
+ case HYP_VECTOR_SPECTRE_INDIRECT: {
+ vector = (void *)__hyp_bp_vect_base;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ vector = __kvm_vector_slot2addr(vector, slot);
+ *this_cpu_ptr(&kvm_hyp_vector) = (unsigned long)vector;
+
+ return 0;
+}
+
+int hyp_map_vectors(void)
+{
+ unsigned long bp_base;
+
+ if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
+ return 0;
+
+ bp_base = (unsigned long)hyp_symbol_addr(__bp_harden_hyp_vecs);
+ bp_base = __hyp_pa(bp_base);
+ bp_base = __hyp_create_private_mapping(bp_base, __BP_HARDEN_HYP_VECS_SZ,
+ PAGE_HYP_EXEC);
+ if (!bp_base)
+ return -1;
+
+ __hyp_bp_vect_base = (void *)bp_base;
+
+ return 0;
+}
+
+int hyp_create_idmap(void)
+{
+ unsigned long start, end;
+
+ start = (unsigned long)hyp_symbol_addr(__hyp_idmap_text_start);
+ start = hyp_virt_to_phys((void *)start);
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+
+ end = (unsigned long)hyp_symbol_addr(__hyp_idmap_text_end);
+ end = hyp_virt_to_phys((void *)end);
+ end = ALIGN(end, PAGE_SIZE);
+
+ /*
+ * One half of the VA space is reserved to linearly map portions of
+ * memory -- see va_layout.c for more details. The other half of the VA
+ * space contains the trampoline page, and needs some care. Split that
+ * second half in two and find the quarter of VA space not conflicting
+ * with the idmap to place the IOs and the vmemmap. IOs use the lower
+ * half of the quarter and the vmemmap the upper half.
+ */
+ __io_map_base = start & BIT(hyp_va_bits - 2);
+ __io_map_base ^= BIT(hyp_va_bits - 2);
+ __hyp_vmemmap = __io_map_base | BIT(hyp_va_bits - 3);
+
+ return __hyp_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
new file mode 100644
index 0000000..6de6515
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <asm/kvm_hyp.h>
+#include <nvhe/gfp.h>
+
+u64 __hyp_vmemmap;
+
+/*
+ * Example buddy-tree for a 4-pages physically contiguous pool:
+ *
+ * o : Page 3
+ * /
+ * o-o : Page 2
+ * /
+ * / o : Page 1
+ * / /
+ * o---o-o : Page 0
+ * Order 2 1 0
+ *
+ * Example of requests on this zon:
+ * __find_buddy(pool, page 0, order 0) => page 1
+ * __find_buddy(pool, page 0, order 1) => page 2
+ * __find_buddy(pool, page 1, order 0) => page 0
+ * __find_buddy(pool, page 2, order 0) => page 3
+ */
+static struct hyp_page *__find_buddy(struct hyp_pool *pool, struct hyp_page *p,
+ unsigned int order)
+{
+ phys_addr_t addr = hyp_page_to_phys(p);
+
+ addr ^= (PAGE_SIZE << order);
+ if (addr < pool->range_start || addr >= pool->range_end)
+ return NULL;
+
+ return hyp_phys_to_page(addr);
+}
+
+static void __hyp_attach_page(struct hyp_pool *pool,
+ struct hyp_page *p)
+{
+ unsigned int order = p->order;
+ struct hyp_page *buddy;
+
+ p->order = HYP_NO_ORDER;
+ for (; order < HYP_MAX_ORDER; order++) {
+ /* Nothing to do if the buddy isn't in a free-list */
+ buddy = __find_buddy(pool, p, order);
+ if (!buddy || list_empty(&buddy->node) || buddy->order != order)
+ break;
+
+ /* Otherwise, coalesce the buddies and go one level up */
+ list_del_init(&buddy->node);
+ buddy->order = HYP_NO_ORDER;
+ p = (p < buddy) ? p : buddy;
+ }
+
+ p->order = order;
+ list_add_tail(&p->node, &pool->free_area[order]);
+}
+
+void hyp_put_page(void *addr)
+{
+ struct hyp_page *p = hyp_virt_to_page(addr);
+ struct hyp_pool *pool = hyp_page_to_pool(p);
+
+ hyp_spin_lock(&pool->lock);
+ if (!p->refcount)
+ hyp_panic();
+ p->refcount--;
+ if (!p->refcount)
+ __hyp_attach_page(pool, p);
+ hyp_spin_unlock(&pool->lock);
+}
+
+void hyp_get_page(void *addr)
+{
+ struct hyp_page *p = hyp_virt_to_page(addr);
+ struct hyp_pool *pool = hyp_page_to_pool(p);
+
+ hyp_spin_lock(&pool->lock);
+ p->refcount++;
+ hyp_spin_unlock(&pool->lock);
+}
+
+/* Extract a page from the buddy tree, at a specific order */
+static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
+ struct hyp_page *p,
+ unsigned int order)
+{
+ struct hyp_page *buddy;
+
+ if (p->order == HYP_NO_ORDER || p->order < order)
+ return NULL;
+
+ list_del_init(&p->node);
+
+ /* Split the page in two until reaching the requested order */
+ while (p->order > order) {
+ p->order--;
+ buddy = __find_buddy(pool, p, p->order);
+ buddy->order = p->order;
+ list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
+ }
+
+ p->refcount = 1;
+
+ return p;
+}
+
+static void clear_hyp_page(struct hyp_page *p)
+{
+ unsigned long i;
+
+ for (i = 0; i < (1 << p->order); i++)
+ clear_page(hyp_page_to_virt(p) + (i << PAGE_SHIFT));
+}
+
+static void *__hyp_alloc_pages(struct hyp_pool *pool, gfp_t mask,
+ unsigned int order)
+{
+ unsigned int i = order;
+ struct hyp_page *p;
+
+ /* Look for a high-enough-order page */
+ while (i <= HYP_MAX_ORDER && list_empty(&pool->free_area[i]))
+ i++;
+ if (i > HYP_MAX_ORDER)
+ return NULL;
+
+ /* Extract it from the tree at the right order */
+ p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
+ p = __hyp_extract_page(pool, p, order);
+
+ if (mask & HYP_GFP_ZERO)
+ clear_hyp_page(p);
+
+ return p;
+}
+
+void *hyp_alloc_pages(struct hyp_pool *pool, gfp_t mask, unsigned int order)
+{
+ struct hyp_page *p;
+
+ hyp_spin_lock(&pool->lock);
+ p = __hyp_alloc_pages(pool, mask, order);
+ hyp_spin_unlock(&pool->lock);
+
+ return p ? hyp_page_to_virt(p) : NULL;
+}
+
+/* hyp_vmemmap must be backed beforehand */
+int hyp_pool_init(struct hyp_pool *pool, phys_addr_t phys,
+ unsigned int nr_pages, unsigned int used_pages)
+{
+ struct hyp_page *p;
+ int i;
+
+ if (phys % PAGE_SIZE)
+ return -EINVAL;
+
+ hyp_spin_lock_init(&pool->lock);
+ for (i = 0; i <= HYP_MAX_ORDER; i++)
+ INIT_LIST_HEAD(&pool->free_area[i]);
+ pool->range_start = phys;
+ pool->range_end = phys + (nr_pages << PAGE_SHIFT);
+
+ /* Init the vmemmap portion */
+ p = hyp_phys_to_page(phys);
+ memset(p, 0, sizeof(*p) * nr_pages);
+ for (i = 0; i < nr_pages; i++, p++) {
+ p->pool = pool;
+ INIT_LIST_HEAD(&p->node);
+ }
+
+ /* Attach the unused pages to the buddy tree */
+ p = hyp_phys_to_page(phys + (used_pages << PAGE_SHIFT));
+ for (i = used_pages; i < nr_pages; i++, p++)
+ __hyp_attach_page(pool, p);
+
+ return 0;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index e394784..bdd8054 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -11,6 +11,7 @@
#include <linux/kvm_host.h>
#include <uapi/linux/psci.h>
+#include <nvhe/memory.h>
#include <nvhe/trap_handler.h>
void kvm_hyp_cpu_entry(unsigned long r0);
@@ -20,9 +21,6 @@
/* Config options set by the host. */
struct kvm_host_psci_config __ro_after_init kvm_host_psci_config;
-s64 __ro_after_init hyp_physvirt_offset;
-
-#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
#define INVALID_CPU_ID UINT_MAX
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
new file mode 100644
index 0000000..a7f4b54
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_pgtable.h>
+
+#include <nvhe/early_alloc.h>
+#include <nvhe/gfp.h>
+#include <nvhe/memory.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/mm.h>
+
+struct hyp_pool hpool;
+struct kvm_pgtable_mm_ops hyp_pgtable_mm_ops;
+unsigned long hyp_nr_cpus;
+
+#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
+ (unsigned long)__per_cpu_start)
+
+#ifdef CONFIG_KVM_ARM_HYP_DEBUG_UART
+unsigned long arm64_kvm_hyp_debug_uart_addr;
+static int create_hyp_debug_uart_mapping(void)
+{
+ phys_addr_t base = CONFIG_KVM_ARM_HYP_DEBUG_UART_ADDR;
+ unsigned long haddr;
+
+ haddr = __hyp_create_private_mapping(base, PAGE_SIZE, PAGE_HYP_DEVICE);
+ if (!haddr)
+ return -1;
+
+ arm64_kvm_hyp_debug_uart_addr = haddr;
+
+ return 0;
+}
+#else
+static int create_hyp_debug_uart_mapping(void) { return 0; }
+#endif
+
+static void *stacks_base;
+static void *vmemmap_base;
+static void *hyp_pgt_base;
+static void *host_s2_mem_pgt_base;
+static void *host_s2_dev_pgt_base;
+
+static int divide_memory_pool(void *virt, unsigned long size)
+{
+ unsigned long vstart, vend, nr_pages;
+
+ hyp_early_alloc_init(virt, size);
+
+ stacks_base = hyp_early_alloc_contig(hyp_nr_cpus);
+ if (!stacks_base)
+ return -ENOMEM;
+
+ hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend);
+ nr_pages = (vend - vstart) >> PAGE_SHIFT;
+ vmemmap_base = hyp_early_alloc_contig(nr_pages);
+ if (!vmemmap_base)
+ return -ENOMEM;
+
+ nr_pages = hyp_s1_pgtable_size() >> PAGE_SHIFT;
+ hyp_pgt_base = hyp_early_alloc_contig(nr_pages);
+ if (!hyp_pgt_base)
+ return -ENOMEM;
+
+ nr_pages = host_s2_mem_pgtable_size() >> PAGE_SHIFT;
+ host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
+ if (!host_s2_mem_pgt_base)
+ return -ENOMEM;
+
+ nr_pages = host_s2_dev_pgtable_size() >> PAGE_SHIFT;
+ host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
+ if (!host_s2_dev_pgt_base)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
+ unsigned long *per_cpu_base)
+{
+ void *start, *end, *virt = hyp_phys_to_virt(phys);
+ int ret, i;
+
+ /* Recreate the hyp page-table using the early page allocator */
+ hyp_early_alloc_init(hyp_pgt_base, hyp_s1_pgtable_size());
+ ret = kvm_pgtable_hyp_init(&hyp_pgtable, hyp_va_bits,
+ &hyp_early_alloc_mm_ops);
+ if (ret)
+ return ret;
+
+ ret = hyp_create_idmap();
+ if (ret)
+ return ret;
+
+ ret = hyp_map_vectors();
+ if (ret)
+ return ret;
+
+ ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base));
+ if (ret)
+ return ret;
+
+ ret = hyp_create_mappings(hyp_symbol_addr(__hyp_text_start),
+ hyp_symbol_addr(__hyp_text_end),
+ PAGE_HYP_EXEC);
+ if (ret)
+ return ret;
+
+ ret = hyp_create_mappings(hyp_symbol_addr(__start_rodata),
+ hyp_symbol_addr(__end_rodata), PAGE_HYP_RO);
+ if (ret)
+ return ret;
+
+ ret = hyp_create_mappings(hyp_symbol_addr(__hyp_data_ro_after_init_start),
+ hyp_symbol_addr(__hyp_data_ro_after_init_end),
+ PAGE_HYP_RO);
+ if (ret)
+ return ret;
+
+ ret = hyp_create_mappings(hyp_symbol_addr(__bss_start),
+ hyp_symbol_addr(__hyp_bss_end), PAGE_HYP);
+ if (ret)
+ return ret;
+
+ ret = hyp_create_mappings(hyp_symbol_addr(__hyp_bss_end),
+ hyp_symbol_addr(__bss_stop), PAGE_HYP_RO);
+ if (ret)
+ return ret;
+
+ ret = hyp_create_mappings(virt, virt + size - 1, PAGE_HYP);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ start = (void *)kern_hyp_va(per_cpu_base[i]);
+ end = start + PAGE_ALIGN(hyp_percpu_size);
+ ret = hyp_create_mappings(start, end, PAGE_HYP);
+ if (ret)
+ return ret;
+ }
+
+ ret = create_hyp_debug_uart_mapping();
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void update_nvhe_init_params(void)
+{
+ struct kvm_nvhe_init_params *params;
+ unsigned long i, stack;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ stack = (unsigned long)stacks_base + (i << PAGE_SHIFT);
+ params = per_cpu_ptr(&kvm_init_params, i);
+ params->stack_hyp_va = stack + PAGE_SIZE;
+ params->pgd_pa = __hyp_pa(hyp_pgtable.pgd);
+ __flush_dcache_area(params, sizeof(*params));
+ }
+}
+
+static void *hyp_zalloc_hyp_page(void *arg)
+{
+ return hyp_alloc_pages(&hpool, HYP_GFP_ZERO, 0);
+}
+
+void __noreturn __kvm_hyp_protect_finalise(void)
+{
+ struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
+ struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt;
+ unsigned long nr_pages, used_pages;
+ int ret;
+
+ /* Now that the vmemmap is backed, install the full-fledged allocator */
+ nr_pages = hyp_s1_pgtable_size() >> PAGE_SHIFT;
+ used_pages = hyp_early_alloc_nr_pages();
+ ret = hyp_pool_init(&hpool, __hyp_pa(hyp_pgt_base), nr_pages, used_pages);
+ if (ret)
+ goto out;
+
+ /* Wrap the host with a stage 2 */
+ ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
+ if (ret)
+ goto out;
+
+ hyp_pgtable_mm_ops.zalloc_page = hyp_zalloc_hyp_page;
+ hyp_pgtable_mm_ops.phys_to_virt = hyp_phys_to_virt;
+ hyp_pgtable_mm_ops.virt_to_phys = hyp_virt_to_phys;
+ hyp_pgtable_mm_ops.get_page = hyp_get_page;
+ hyp_pgtable_mm_ops.put_page = hyp_put_page;
+ hyp_pgtable.mm_ops = &hyp_pgtable_mm_ops;
+
+out:
+ host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS;
+ host_ctxt->regs.regs[1] = ret;
+
+ __host_enter(host_ctxt);
+}
+
+int __kvm_hyp_protect(phys_addr_t phys, unsigned long size,
+ unsigned long nr_cpus, unsigned long *per_cpu_base)
+{
+ struct kvm_nvhe_init_params *params;
+ void *virt = hyp_phys_to_virt(phys);
+ void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
+ int ret;
+
+ if (phys % PAGE_SIZE || size % PAGE_SIZE || (u64)virt % PAGE_SIZE)
+ return -EINVAL;
+
+ hyp_spin_lock_init(&__hyp_pgd_lock);
+ hyp_nr_cpus = nr_cpus;
+
+ ret = divide_memory_pool(virt, size);
+ if (ret)
+ return ret;
+
+ ret = recreate_hyp_mappings(phys, size, per_cpu_base);
+ if (ret)
+ return ret;
+
+ update_nvhe_init_params();
+
+ /* Jump in the idmap page to switch to the new page-tables */
+ params = this_cpu_ptr(&kvm_init_params);
+ fn = (typeof(fn))__hyp_pa(hyp_symbol_addr(__kvm_init_switch_pgd));
+ fn(__hyp_pa(params), hyp_symbol_addr(__kvm_hyp_protect_finalise));
+
+ unreachable();
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/stub.c b/arch/arm64/kvm/hyp/nvhe/stub.c
new file mode 100644
index 0000000..c0aa6bb
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/stub.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Stubs for out-of-line function calls caused by re-using kernel
+ * infrastructure at EL2.
+ *
+ * Copyright (C) 2020 - Google LLC
+ */
+
+#include <linux/list.h>
+
+#ifdef CONFIG_DEBUG_LIST
+bool __list_add_valid(struct list_head *new, struct list_head *prev,
+ struct list_head *next)
+{
+ return true;
+}
+
+bool __list_del_entry_valid(struct list_head *entry)
+{
+ return true;
+}
+#endif
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index f3d0e9e..31bc1a8 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -28,6 +28,8 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
+#include <nvhe/mem_protect.h>
+
/* Non-VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
@@ -97,19 +99,11 @@
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
write_sysreg(mdcr_el2, mdcr_el2);
- if (is_protected_kvm_enabled())
- write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2);
- else
- write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+ write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
}
-static void __load_host_stage2(void)
-{
- write_sysreg(0, vttbr_el2);
-}
-
/* Save VGICv3 state on non-VHE systems */
static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index fbde89a..255a23a 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -8,6 +8,8 @@
#include <asm/kvm_mmu.h>
#include <asm/tlbflush.h>
+#include <nvhe/mem_protect.h>
+
struct tlb_inv_context {
u64 tcr;
};
@@ -43,7 +45,7 @@
static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
{
- write_sysreg(0, vttbr_el2);
+ __load_host_stage2();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/* Ensure write of the host VMID */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index bdf8e55..9feec72 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -148,9 +148,9 @@
return pte;
}
-static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte)
+static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
{
- return __va(kvm_pte_to_phys(pte));
+ return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
}
static void kvm_set_invalid_pte(kvm_pte_t *ptep)
@@ -159,9 +159,10 @@
WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID);
}
-static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
+static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
+ struct kvm_pgtable_mm_ops *mm_ops)
{
- kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp));
+ kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
pte |= KVM_PTE_VALID;
@@ -229,7 +230,7 @@
goto out;
}
- childp = kvm_pte_follow(pte);
+ childp = kvm_pte_follow(pte, data->pgt->mm_ops);
ret = __kvm_pgtable_walk(data, childp, level + 1);
if (ret)
goto out;
@@ -304,8 +305,9 @@
}
struct hyp_map_data {
- u64 phys;
- kvm_pte_t attr;
+ u64 phys;
+ kvm_pte_t attr;
+ struct kvm_pgtable_mm_ops *mm_ops;
};
static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
@@ -355,6 +357,8 @@
enum kvm_pgtable_walk_flags flag, void * const arg)
{
kvm_pte_t *childp;
+ struct hyp_map_data *data = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
return 0;
@@ -362,11 +366,11 @@
if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
- childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
+ childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
if (!childp)
return -ENOMEM;
- kvm_set_table_pte(ptep, childp);
+ kvm_set_table_pte(ptep, childp, mm_ops);
return 0;
}
@@ -376,6 +380,7 @@
int ret;
struct hyp_map_data map_data = {
.phys = ALIGN_DOWN(phys, PAGE_SIZE),
+ .mm_ops = pgt->mm_ops,
};
struct kvm_pgtable_walker walker = {
.cb = hyp_map_walker,
@@ -393,16 +398,18 @@
return ret;
}
-int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits)
+int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
+ struct kvm_pgtable_mm_ops *mm_ops)
{
u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
- pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
+ pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
if (!pgt->pgd)
return -ENOMEM;
pgt->ia_bits = va_bits;
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
+ pgt->mm_ops = mm_ops;
pgt->mmu = NULL;
return 0;
}
@@ -410,7 +417,9 @@
static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
- free_page((unsigned long)kvm_pte_follow(*ptep));
+ struct kvm_pgtable_mm_ops *mm_ops = arg;
+
+ mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0;
}
@@ -419,10 +428,11 @@
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
.flags = KVM_PGTABLE_WALK_TABLE_POST,
+ .arg = pgt->mm_ops,
};
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
- free_page((unsigned long)pgt->pgd);
+ pgt->mm_ops->put_page(pgt->pgd);
pgt->pgd = NULL;
}
@@ -433,7 +443,9 @@
kvm_pte_t *anchor;
struct kvm_s2_mmu *mmu;
- struct kvm_mmu_memory_cache *memcache;
+ void *memcache;
+
+ struct kvm_pgtable_mm_ops *mm_ops;
};
static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
@@ -465,6 +477,7 @@
kvm_pte_t *ptep,
struct stage2_map_data *data)
{
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
u64 granule = kvm_granule_size(level), phys = data->phys;
if (!kvm_block_mapping_supported(addr, end, phys, level))
@@ -477,7 +490,7 @@
* valid PTE update.
*/
if (kvm_pte_valid(*ptep))
- put_page(virt_to_page(ptep));
+ mm_ops->put_page(ptep);
if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
goto out;
@@ -516,12 +529,12 @@
static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct stage2_map_data *data)
{
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
kvm_pte_t *childp, pte = *ptep;
- struct page *page = virt_to_page(ptep);
if (data->anchor) {
if (kvm_pte_valid(pte))
- put_page(page);
+ mm_ops->put_page(ptep);
return 0;
}
@@ -535,7 +548,7 @@
if (!data->memcache)
return -ENOMEM;
- childp = kvm_mmu_memory_cache_alloc(data->memcache);
+ childp = mm_ops->zalloc_page(data->memcache);
if (!childp)
return -ENOMEM;
@@ -547,13 +560,13 @@
if (kvm_pte_valid(pte)) {
kvm_set_invalid_pte(ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
- put_page(page);
+ mm_ops->put_page(ptep);
}
- kvm_set_table_pte(ptep, childp);
+ kvm_set_table_pte(ptep, childp, mm_ops);
out_get_page:
- get_page(page);
+ mm_ops->get_page(ptep);
return 0;
}
@@ -561,13 +574,14 @@
kvm_pte_t *ptep,
struct stage2_map_data *data)
{
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
int ret = 0;
if (!data->anchor)
return 0;
- free_page((unsigned long)kvm_pte_follow(*ptep));
- put_page(virt_to_page(ptep));
+ mm_ops->put_page(kvm_pte_follow(*ptep, mm_ops));
+ mm_ops->put_page(ptep);
if (data->anchor == ptep) {
data->anchor = NULL;
@@ -615,13 +629,14 @@
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 phys, enum kvm_pgtable_prot prot,
- struct kvm_mmu_memory_cache *mc)
+ void *mc)
{
int ret;
struct stage2_map_data map_data = {
.phys = ALIGN_DOWN(phys, PAGE_SIZE),
.mmu = pgt->mmu,
.memcache = mc,
+ .mm_ops = pgt->mm_ops,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@@ -658,7 +673,9 @@
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
- struct kvm_s2_mmu *mmu = arg;
+ struct kvm_pgtable *pgt = arg;
+ struct kvm_s2_mmu *mmu = pgt->mmu;
+ struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep, *childp = NULL;
bool need_flush = false;
@@ -666,9 +683,9 @@
return 0;
if (kvm_pte_table(pte, level)) {
- childp = kvm_pte_follow(pte);
+ childp = kvm_pte_follow(pte, mm_ops);
- if (page_count(virt_to_page(childp)) != 1)
+ if (mm_ops->page_count(childp) != 1)
return 0;
} else if (stage2_pte_cacheable(pte)) {
need_flush = true;
@@ -681,15 +698,15 @@
*/
kvm_set_invalid_pte(ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
- put_page(virt_to_page(ptep));
+ mm_ops->put_page(ptep);
if (need_flush) {
- stage2_flush_dcache(kvm_pte_follow(pte),
+ stage2_flush_dcache(kvm_pte_follow(pte, mm_ops),
kvm_granule_size(level));
}
if (childp)
- free_page((unsigned long)childp);
+ mm_ops->put_page(childp);
return 0;
}
@@ -698,7 +715,7 @@
{
struct kvm_pgtable_walker walker = {
.cb = stage2_unmap_walker,
- .arg = pgt->mmu,
+ .arg = pgt,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
@@ -830,12 +847,13 @@
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
+ struct kvm_pgtable_mm_ops *mm_ops = arg;
kvm_pte_t pte = *ptep;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
return 0;
- stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level));
+ stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
return 0;
}
@@ -844,6 +862,7 @@
struct kvm_pgtable_walker walker = {
.cb = stage2_flush_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
+ .arg = pgt->mm_ops,
};
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
@@ -852,22 +871,24 @@
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
-int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
+int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+ struct kvm_pgtable_mm_ops *mm_ops)
{
size_t pgd_sz;
- u64 vtcr = kvm->arch.vtcr;
+ u64 vtcr = arch->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
- pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
if (!pgt->pgd)
return -ENOMEM;
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
- pgt->mmu = &kvm->arch.mmu;
+ pgt->mm_ops = mm_ops;
+ pgt->mmu = &arch->mmu;
/* Ensure zeroed PGD pages are visible to the hardware walker */
dsb(ishst);
@@ -878,15 +899,16 @@
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
+ struct kvm_pgtable_mm_ops *mm_ops = arg;
kvm_pte_t pte = *ptep;
if (!kvm_pte_valid(pte))
return 0;
- put_page(virt_to_page(ptep));
+ mm_ops->put_page(ptep);
if (kvm_pte_table(pte, level))
- free_page((unsigned long)kvm_pte_follow(pte));
+ mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
return 0;
}
@@ -898,10 +920,11 @@
.cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST,
+ .arg = pgt->mm_ops,
};
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
- free_pages_exact(pgt->pgd, pgd_sz);
+ pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
pgt->pgd = NULL;
}
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
new file mode 100644
index 0000000..72811c5
--- /dev/null
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/memblock.h>
+#include <linux/sort.h>
+
+#include <asm/kvm_host.h>
+
+#include <nvhe/memory.h>
+#include <nvhe/mm.h>
+
+phys_addr_t hyp_mem_base;
+phys_addr_t hyp_mem_size;
+
+int __init early_init_dt_add_memory_hyp(u64 base, u64 size)
+{
+ struct hyp_memblock_region *reg;
+
+ if (kvm_nvhe_sym(hyp_memblock_nr) >= HYP_MEMBLOCK_REGIONS)
+ kvm_nvhe_sym(hyp_memblock_nr) = -1;
+
+ if (kvm_nvhe_sym(hyp_memblock_nr) < 0)
+ return -ENOMEM;
+
+ reg = kvm_nvhe_sym(hyp_memory);
+ reg[kvm_nvhe_sym(hyp_memblock_nr)].start = base;
+ reg[kvm_nvhe_sym(hyp_memblock_nr)].end = base + size;
+ kvm_nvhe_sym(hyp_memblock_nr)++;
+
+ return 0;
+}
+
+static int cmp_hyp_memblock(const void *p1, const void *p2)
+{
+ const struct hyp_memblock_region *r1 = p1;
+ const struct hyp_memblock_region *r2 = p2;
+
+ return r1->start < r2->start ? -1 : (r1->start > r2->start);
+}
+
+static void __init sort_memblock_regions(void)
+{
+ sort(kvm_nvhe_sym(hyp_memory),
+ kvm_nvhe_sym(hyp_memblock_nr),
+ sizeof(struct hyp_memblock_region),
+ cmp_hyp_memblock,
+ NULL);
+}
+
+void __init kvm_hyp_reserve(void)
+{
+ u64 nr_pages, prev;
+
+ if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
+ return;
+
+ if (kvm_get_mode() != KVM_MODE_PROTECTED)
+ return;
+
+ if (kvm_nvhe_sym(hyp_memblock_nr) <= 0) {
+ kvm_err("Failed to register hyp memblocks\n");
+ return;
+ }
+
+ sort_memblock_regions();
+
+ hyp_mem_size += NR_CPUS << PAGE_SHIFT;
+ hyp_mem_size += hyp_s1_pgtable_size();
+ hyp_mem_size += host_s2_mem_pgtable_size();
+ hyp_mem_size += host_s2_dev_pgtable_size();
+
+ /*
+ * The hyp_vmemmap needs to be backed by pages, but these pages
+ * themselves need to be present in the vmemmap, so compute the number
+ * of pages needed by looking for a fixed point.
+ */
+ nr_pages = 0;
+ do {
+ prev = nr_pages;
+ nr_pages = (hyp_mem_size >> PAGE_SHIFT) + prev;
+ nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
+ nr_pages += __hyp_pgtable_max_pages(nr_pages);
+ } while (nr_pages != prev);
+ hyp_mem_size += nr_pages << PAGE_SHIFT;
+
+ hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
+ hyp_mem_size, SZ_2M);
+ if (!hyp_mem_base) {
+ kvm_err("Failed to reserve hyp memory\n");
+ return;
+ }
+ memblock_reserve(hyp_mem_base, hyp_mem_size);
+
+ kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
+ hyp_mem_base);
+}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7d2257c..62f3fd7 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -88,6 +88,48 @@
return !pfn_valid(pfn);
}
+static void *stage2_memcache_alloc_page(void *arg)
+{
+ struct kvm_mmu_memory_cache *mc = arg;
+ kvm_pte_t *ptep = NULL;
+
+ /* Allocated with GFP_KERNEL_ACCOUNT, so no need to zero */
+ if (mc && mc->nobjs)
+ ptep = mc->objects[--mc->nobjs];
+
+ return ptep;
+}
+
+static void *kvm_host_zalloc_pages_exact(size_t size)
+{
+ return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+}
+
+static void kvm_host_get_page(void *addr)
+{
+ get_page(virt_to_page(addr));
+}
+
+static void kvm_host_put_page(void *addr)
+{
+ put_page(virt_to_page(addr));
+}
+
+static int kvm_host_page_count(void *addr)
+{
+ return page_count(virt_to_page(addr));
+}
+
+static phys_addr_t kvm_host_pa(void *addr)
+{
+ return __pa(addr);
+}
+
+static void *kvm_host_va(phys_addr_t phys)
+{
+ return __va(phys);
+}
+
/*
* Unmapping vs dcache management:
*
@@ -127,7 +169,7 @@
static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
bool may_block)
{
- struct kvm *kvm = mmu->kvm;
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
phys_addr_t end = start + size;
assert_spin_locked(&kvm->mmu_lock);
@@ -183,15 +225,39 @@
if (hyp_pgtable) {
kvm_pgtable_hyp_destroy(hyp_pgtable);
kfree(hyp_pgtable);
+ hyp_pgtable = NULL;
}
mutex_unlock(&kvm_hyp_pgd_mutex);
}
+static bool kvm_host_owns_hyp_mappings(void)
+{
+ if (static_branch_likely(&kvm_protected_mode_initialized))
+ return false;
+
+ /*
+ * This can happen at boot time when __create_hyp_mappings() is called
+ * after the hyp protection has been enabled, but the static key has
+ * not been flipped yet.
+ */
+ if (!hyp_pgtable && is_protected_kvm_enabled())
+ return false;
+
+ BUG_ON(!hyp_pgtable);
+
+ return true;
+}
+
static int __create_hyp_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
{
int err;
+ if (!kvm_host_owns_hyp_mappings()) {
+ return kvm_call_hyp_nvhe(__hyp_create_mappings,
+ start, size, phys, prot);
+ }
+
mutex_lock(&kvm_hyp_pgd_mutex);
err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -253,6 +319,16 @@
unsigned long base;
int ret = 0;
+ if (!kvm_host_owns_hyp_mappings()) {
+ base = kvm_call_hyp_nvhe(__hyp_create_private_mapping,
+ phys_addr, size, prot);
+ if (!base)
+ return -ENOMEM;
+ *haddr = base;
+
+ return 0;
+ }
+
mutex_lock(&kvm_hyp_pgd_mutex);
/*
@@ -290,6 +366,44 @@
return ret;
}
+#ifdef CONFIG_KVM_ARM_HYP_DEBUG_UART
+extern unsigned long __kvm_nvhe_arm64_kvm_hyp_debug_uart_addr;
+
+void __init kvm_hyp_debug_uart_set_basep(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr,
+ int nr_inst)
+{
+ int i;
+ u64 addr = (u64)kvm_ksym_ref(&__kvm_nvhe_arm64_kvm_hyp_debug_uart_addr);
+
+ BUG_ON(nr_inst != 4);
+
+ for (i = 0; i < 4; ++i) {
+ u32 insn = le32_to_cpu(origptr[i]);
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16,
+ insn,
+ addr & 0xffff);
+ BUG_ON(insn == AARCH64_BREAK_FAULT);
+ updptr[i] = cpu_to_le32(insn);
+ addr >>= 16;
+ }
+}
+
+static int create_hyp_debug_uart_mapping(void)
+{
+ if (is_kernel_in_hyp_mode())
+ return -EBUSY;
+
+ return __create_hyp_private_mapping(CONFIG_KVM_ARM_HYP_DEBUG_UART_ADDR,
+ PAGE_SIZE,
+ &__kvm_nvhe_arm64_kvm_hyp_debug_uart_addr,
+ PAGE_HYP_DEVICE);
+}
+#else
+static int create_hyp_debug_uart_mapping(void) { return 0; }
+#endif
+
/**
* create_hyp_io_mappings - Map IO into both kernel and HYP
* @phys_addr: The physical start address which gets mapped
@@ -351,6 +465,17 @@
return 0;
}
+static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
+ .zalloc_page = stage2_memcache_alloc_page,
+ .zalloc_pages_exact = kvm_host_zalloc_pages_exact,
+ .free_pages_exact = free_pages_exact,
+ .get_page = kvm_host_get_page,
+ .put_page = kvm_host_put_page,
+ .page_count = kvm_host_page_count,
+ .phys_to_virt = kvm_host_va,
+ .virt_to_phys = kvm_host_pa,
+};
+
/**
* kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
* @kvm: The pointer to the KVM structure
@@ -374,7 +499,7 @@
if (!pgt)
return -ENOMEM;
- err = kvm_pgtable_stage2_init(pgt, kvm);
+ err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
@@ -387,7 +512,7 @@
for_each_possible_cpu(cpu)
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
- mmu->kvm = kvm;
+ mmu->arch = &kvm->arch;
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
mmu->vmid.vmid_gen = 0;
@@ -469,7 +594,7 @@
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
{
- struct kvm *kvm = mmu->kvm;
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
struct kvm_pgtable *pgt = NULL;
spin_lock(&kvm->mmu_lock);
@@ -538,7 +663,7 @@
*/
static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
- struct kvm *kvm = mmu->kvm;
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect);
}
@@ -1205,10 +1330,23 @@
return err;
}
+static void *kvm_hyp_zalloc_page(void *arg)
+{
+ return (void *)get_zeroed_page(GFP_KERNEL);
+}
+
+static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
+ .zalloc_page = kvm_hyp_zalloc_page,
+ .get_page = kvm_host_get_page,
+ .put_page = kvm_host_put_page,
+ .phys_to_virt = kvm_host_va,
+ .virt_to_phys = kvm_host_pa,
+};
+
+u32 hyp_va_bits;
int kvm_mmu_init(void)
{
int err;
- u32 hyp_va_bits;
hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
@@ -1248,7 +1386,7 @@
goto out;
}
- err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits);
+ err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits, &kvm_hyp_mm_ops);
if (err)
goto out_free_pgtable;
@@ -1257,6 +1395,7 @@
goto out_destroy_pgtable;
io_map_base = hyp_idmap_start;
+ WARN_ON(create_hyp_debug_uart_mapping());
return 0;
out_destroy_pgtable:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 47f3f03..6aae118 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -332,19 +332,10 @@
return 0;
}
-/*
- * Configure the VTCR_EL2 for this VM. The VTCR value is common
- * across all the physical CPUs on the system. We use system wide
- * sanitised values to fill in different fields, except for Hardware
- * Management of Access Flags. HA Flag is set unconditionally on
- * all CPUs, as it is safe to run with or without the feature and
- * the bit is RES0 on CPUs that don't support it.
- */
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
{
- u64 vtcr = VTCR_EL2_FLAGS, mmfr0;
- u32 parange, phys_shift;
- u8 lvls;
+ u64 mmfr0, mmfr1;
+ u32 phys_shift;
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
return -EINVAL;
@@ -359,33 +350,8 @@
}
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
- parange = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_PARANGE_SHIFT);
- if (parange > ID_AA64MMFR0_PARANGE_MAX)
- parange = ID_AA64MMFR0_PARANGE_MAX;
- vtcr |= parange << VTCR_EL2_PS_SHIFT;
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
- vtcr |= VTCR_EL2_T0SZ(phys_shift);
- /*
- * Use a minimum 2 level page table to prevent splitting
- * host PMD huge pages at stage2.
- */
- lvls = stage2_pgtable_levels(phys_shift);
- if (lvls < 2)
- lvls = 2;
- vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
-
- /*
- * Enable the Hardware Access Flag management, unconditionally
- * on all CPUs. The features is RES0 on CPUs without the support
- * and must be ignored by the CPUs.
- */
- vtcr |= VTCR_EL2_HA;
-
- /* Set the vmid bits */
- vtcr |= (kvm_get_vmid_bits() == 16) ?
- VTCR_EL2_VS_16BIT :
- VTCR_EL2_VS_8BIT;
- kvm->arch.vtcr = vtcr;
return 0;
}
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index 073acbf..b84b179 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -14,7 +14,7 @@
* Parameters:
* x0 - dest
*/
-SYM_FUNC_START(clear_page)
+SYM_FUNC_START_PI(clear_page)
mrs x1, dczid_el0
and w1, w1, #0xf
mov x2, #4
@@ -25,5 +25,5 @@
tst x0, #(PAGE_SIZE - 1)
b.ne 1b
ret
-SYM_FUNC_END(clear_page)
+SYM_FUNC_END_PI(clear_page)
EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index e7a79396..29144f4 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,7 +17,7 @@
* x0 - dest
* x1 - src
*/
-SYM_FUNC_START(copy_page)
+SYM_FUNC_START_PI(copy_page)
alternative_if ARM64_HAS_NO_HW_PREFETCH
// Prefetch three cache lines ahead.
prfm pldl1strm, [x1, #128]
@@ -75,5 +75,5 @@
stnp x16, x17, [x0, #112 - 256]
ret
-SYM_FUNC_END(copy_page)
+SYM_FUNC_END_PI(copy_page)
EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 7deddf5..eff4763 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -35,6 +35,7 @@
#include <asm/fixmap.h>
#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
+#include <asm/kvm_host.h>
#include <asm/memory.h>
#include <asm/numa.h>
#include <asm/sections.h>
@@ -431,6 +432,8 @@
dma_pernuma_cma_reserve();
+ kvm_hyp_reserve();
+
/*
* sparse_init() tries to allocate memory from memblock, so must be
* done after the fixed reservations
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index feb0f2d..4b65e7f 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1096,6 +1096,10 @@
#define MAX_MEMBLOCK_ADDR ((phys_addr_t)~0)
#endif
+void __init __weak early_init_dt_add_memory_hyp(u64 base, u64 size)
+{
+}
+
void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
{
const u64 phys_offset = MIN_MEMBLOCK_ADDR;
@@ -1136,6 +1140,7 @@
base = phys_offset;
}
memblock_add(base, size);
+ early_init_dt_add_memory_hyp(base, size);
}
int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)