blob: a61711f3eabba34fc0c612b4c4c247d5abbc53e3 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 - Google LLC
* Author: Quentin Perret <qperret@google.com>
* Author: Fuad Tabba <tabba@google.com>
*/
#ifndef __ARM64_KVM_PKVM_H__
#define __ARM64_KVM_PKVM_H__
#include <linux/arm_ffa.h>
#include <linux/memblock.h>
#include <linux/scatterlist.h>
#include <asm/kvm_pgtable.h>
#include <asm/sysreg.h>
/* Maximum number of VMs that can co-exist under pKVM. */
#define KVM_MAX_PVMS 255
#define HYP_MEMBLOCK_REGIONS 128
int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
int pkvm_create_hyp_vm(struct kvm *kvm);
void pkvm_destroy_hyp_vm(struct kvm *kvm);
bool pkvm_is_hyp_created(struct kvm *kvm);
/*
* Definitions for features to be allowed or restricted for guest virtual
* machines, depending on the mode KVM is running in and on the type of guest
* that is running.
*
* Each field in the masks represents the highest supported *unsigned* value for
* the feature, if supported by the system.
*
* If a feature field is not present in either, than it is not supported.
*
* The approach taken for protected VMs is to allow features that are:
* - Needed by common Linux distributions (e.g., floating point)
* - Trivial to support, e.g., supporting the feature does not introduce or
* require tracking of additional state in KVM
* - Cannot be trapped or prevent the guest from using anyway
*/
/*
* Allow for protected VMs:
* - Floating-point and Advanced SIMD
* - GICv3(+) system register interface
* - Data Independent Timing
* - Spectre/Meltdown Mitigation
*
* Restrict to the following *unsigned* features for protected VMs:
* - AArch64 guests only (no support for AArch32 guests):
* AArch32 adds complexity in trap handling, emulation, condition codes,
* etc...
* - RAS (v1)
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) | \
SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \
SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \
SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \
SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \
SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \
)
/*
* Allow for protected VMs:
* - Branch Target Identification
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
#define PVM_ID_AA64PFR2_ALLOW 0ULL
/*
* Allow for protected VMs:
* - Mixed-endian
* - Distinction between Secure and Non-secure Memory
* - Mixed-endian at EL0 only
* - Non-context synchronizing exception entry and exit
*
* Restrict to the following *unsigned* features for protected VMs:
* - 40-bit IPA
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
)
/*
* Allow for protected VMs:
* - Hardware translation table updates to Access flag and Dirty state
* - Number of VMID bits from CPU
* - Hierarchical Permission Disables
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
* - Control for cache maintenance permission
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
)
/*
* Allow for protected VMs:
* - Common not Private translations
* - User Access Override
* - IESB bit in the SCTLR_ELx registers
* - Unaligned single-copy atomicity and atomic functions
* - ESR_ELx.EC value on an exception by read access to feature ID space
* - TTL field in address operations.
* - Break-before-make sequences when changing translation block size
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
* trapping at EL2
*/
#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
/*
* No support for debug, including breakpoints, and watchpoints for protected
* VMs:
* The Arm architecture mandates support for at least the Armv8 debug
* architecture, which would include at least 2 hardware breakpoints and
* watchpoints. Providing that support to protected guests adds
* considerable state and complexity. Therefore, the reserved value of 0 is
* used for debug-related fields.
*/
#define PVM_ID_AA64DFR0_ALLOW (0ULL)
#define PVM_ID_AA64DFR1_ALLOW (0ULL)
/*
* No support for implementation defined features.
*/
#define PVM_ID_AA64AFR0_ALLOW (0ULL)
#define PVM_ID_AA64AFR1_ALLOW (0ULL)
/*
* No restrictions on instructions implemented in AArch64.
*/
#define PVM_ID_AA64ISAR0_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
/* Restrict pointer authentication to the basic version. */
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
)
#define PVM_ID_AA64ISAR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) | \
FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
)
/*
* Returns the maximum number of breakpoints supported for protected VMs.
*/
static inline int pkvm_get_max_brps(void)
{
int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs),
PVM_ID_AA64DFR0_ALLOW);
/*
* If breakpoints are supported, the maximum number is 1 + the field.
* Otherwise, return 0, which is not compliant with the architecture,
* but is reserved and is used here to indicate no debug support.
*/
return num ? num + 1 : 0;
}
/*
* Returns the maximum number of watchpoints supported for protected VMs.
*/
static inline int pkvm_get_max_wrps(void)
{
int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs),
PVM_ID_AA64DFR0_ALLOW);
return num ? num + 1 : 0;
}
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
static inline unsigned long
hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
{
unsigned long nr_pages = reg->size >> PAGE_SHIFT;
unsigned long start, end;
start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
end = start + nr_pages * vmemmap_entry_size;
start = ALIGN_DOWN(start, PAGE_SIZE);
end = ALIGN(end, PAGE_SIZE);
return end - start;
}
static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
{
unsigned long res = 0, i;
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
vmemmap_entry_size);
}
return res >> PAGE_SHIFT;
}
static inline unsigned long hyp_vm_table_pages(void)
{
return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
}
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
unsigned long total = 0;
int i;
/* Provision the worst case scenario */
for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
total += nr_pages;
}
return total;
}
static inline unsigned long __hyp_pgtable_total_pages(void)
{
unsigned long res = 0, i;
/* Cover all of memory with page-granularity */
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
}
return res;
}
static inline unsigned long hyp_s1_pgtable_pages(void)
{
unsigned long res;
res = __hyp_pgtable_total_pages();
/* Allow 1 GiB for private mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
static inline unsigned long host_s2_pgtable_pages(void)
{
unsigned long res;
/*
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
res = __hyp_pgtable_total_pages() + 16;
/* Allow 1 GiB for MMIO mappings */
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
return res;
}
#define KVM_FFA_MBOX_NR_PAGES 1
static inline unsigned long hyp_ffa_proxy_pages(void)
{
size_t desc_max;
/*
* The hypervisor FFA proxy needs enough memory to buffer a fragmented
* descriptor returned from EL3 in response to a RETRIEVE_REQ call.
*/
desc_max = sizeof(struct ffa_mem_region) +
sizeof(struct ffa_mem_region_attributes) +
sizeof(struct ffa_composite_mem_region) +
SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range);
/* Plus a page each for the hypervisor's RX and TX mailboxes. */
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
}
static inline size_t pkvm_host_sve_state_size(void)
{
if (!system_supports_sve())
return 0;
return size_add(sizeof(struct cpu_sve_state),
SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
}
#endif /* __ARM64_KVM_PKVM_H__ */