blob: f7f6b08dd371047ad1e6be993b2af408d01f1611 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 - Google LLC
* Author: Quentin Perret <qperret@google.com>
*/
#include <linux/io.h>
#include <linux/kvm_host.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/of_fdt.h>
#include <linux/of_reserved_mem.h>
#include <linux/sort.h>
#include <asm/kvm_pkvm.h>
#include "hyp_constants.h"
static struct reserved_mem *pkvm_firmware_mem;
static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
phys_addr_t hyp_mem_base;
phys_addr_t hyp_mem_size;
static int cmp_hyp_memblock(const void *p1, const void *p2)
{
const struct memblock_region *r1 = p1;
const struct memblock_region *r2 = p2;
return r1->base < r2->base ? -1 : (r1->base > r2->base);
}
static void __init sort_memblock_regions(void)
{
sort(hyp_memory,
*hyp_memblock_nr_ptr,
sizeof(struct memblock_region),
cmp_hyp_memblock,
NULL);
}
static int __init register_memblock_regions(void)
{
struct memblock_region *reg;
for_each_mem_region(reg) {
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
return -ENOMEM;
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
(*hyp_memblock_nr_ptr)++;
}
sort_memblock_regions();
return 0;
}
void __init kvm_hyp_reserve(void)
{
u64 hyp_mem_pages = 0;
int ret;
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
return;
if (kvm_get_mode() != KVM_MODE_PROTECTED)
return;
ret = register_memblock_regions();
if (ret) {
*hyp_memblock_nr_ptr = 0;
kvm_err("Failed to register hyp memblocks: %d\n", ret);
return;
}
hyp_mem_pages += hyp_s1_pgtable_pages();
hyp_mem_pages += host_s2_pgtable_pages();
hyp_mem_pages += hyp_shadow_table_pages(KVM_SHADOW_VM_SIZE);
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
hyp_mem_pages += hyp_ffa_proxy_pages();
/*
* Try to allocate a PMD-aligned region to reduce TLB pressure once
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
*/
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
ALIGN(hyp_mem_size, PMD_SIZE),
PMD_SIZE);
if (!hyp_mem_base)
hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
hyp_mem_size, PAGE_SIZE);
else
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
if (!hyp_mem_base) {
kvm_err("Failed to reserve hyp memory\n");
return;
}
memblock_reserve(hyp_mem_base, hyp_mem_size);
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
hyp_mem_base);
}
/*
* Allocates and donates memory for EL2 shadow structs.
*
* Allocates space for the shadow state, which includes the shadow vm as well as
* the shadow vcpu states.
*
* Stores an opaque handler in the kvm struct for future reference.
*
* Return 0 on success, negative error code on failure.
*/
static int __create_el2_shadow(struct kvm *kvm)
{
struct kvm_vcpu *vcpu, **vcpu_array;
size_t pgd_sz, shadow_sz;
void *pgd, *shadow_addr;
unsigned long idx;
int shadow_handle;
int ret;
if (kvm->created_vcpus < 1)
return -EINVAL;
pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr);
/*
* The PGD pages will be reclaimed using a hyp_memcache which implies
* page granularity. So, use alloc_pages_exact() to get individual
* refcounts.
*/
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
if (!pgd)
return -ENOMEM;
/* Allocate memory to donate to hyp for the kvm and vcpu state. */
shadow_sz = PAGE_ALIGN(KVM_SHADOW_VM_SIZE +
SHADOW_VCPU_STATE_SIZE * kvm->created_vcpus);
shadow_addr = alloc_pages_exact(shadow_sz, GFP_KERNEL_ACCOUNT);
if (!shadow_addr) {
ret = -ENOMEM;
goto free_pgd;
}
/* Stash the vcpu pointers into the PGD */
BUILD_BUG_ON(KVM_MAX_VCPUS > (PAGE_SIZE / sizeof(u64)));
vcpu_array = pgd;
kvm_for_each_vcpu(idx, vcpu, kvm)
vcpu_array[idx] = vcpu;
/* Donate the shadow memory to hyp and let hyp initialize it. */
ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz,
pgd);
if (ret < 0)
goto free_shadow;
shadow_handle = ret;
/* Store the shadow handle given by hyp for future call reference. */
kvm->arch.pkvm.shadow_handle = shadow_handle;
return 0;
free_shadow:
free_pages_exact(shadow_addr, shadow_sz);
free_pgd:
free_pages_exact(pgd, pgd_sz);
return ret;
}
int create_el2_shadow(struct kvm *kvm)
{
int ret = 0;
mutex_lock(&kvm->arch.pkvm.shadow_lock);
if (!kvm->arch.pkvm.shadow_handle)
ret = __create_el2_shadow(kvm);
mutex_unlock(&kvm->arch.pkvm.shadow_lock);
return ret;
}
void kvm_shadow_destroy(struct kvm *kvm)
{
struct kvm_pinned_page *ppage, *tmp;
struct mm_struct *mm = current->mm;
struct list_head *ppages;
if (kvm->arch.pkvm.shadow_handle)
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_shadow,
kvm->arch.pkvm.shadow_handle));
free_hyp_memcache(&kvm->arch.pkvm.teardown_mc);
ppages = &kvm->arch.pkvm.pinned_pages;
list_for_each_entry_safe(ppage, tmp, ppages, link) {
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_reclaim_page,
page_to_pfn(ppage->page)));
cond_resched();
account_locked_vm(mm, 1, false);
unpin_user_pages_dirty_lock(&ppage->page, 1, true);
list_del(&ppage->link);
kfree(ppage);
}
}
static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
const char *reason)
{
phys_addr_t end = rmem->base + rmem->size;
kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
&rmem->base, &end, reason);
return -EINVAL;
}
static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
{
unsigned long node = rmem->fdt_node;
if (pkvm_firmware_mem)
return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
if (!of_get_flat_dt_prop(node, "no-map", NULL))
return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
if (of_get_flat_dt_prop(node, "reusable", NULL))
return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
if (!PAGE_ALIGNED(rmem->base))
return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
if (!PAGE_ALIGNED(rmem->size))
return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
*pvmfw_size = rmem->size;
*pvmfw_base = rmem->base;
pkvm_firmware_mem = rmem;
return 0;
}
RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
pkvm_firmware_rmem_init);
static int __init pkvm_firmware_rmem_clear(void)
{
void *addr;
phys_addr_t size;
if (likely(!pkvm_firmware_mem) || is_protected_kvm_enabled())
return 0;
kvm_info("Clearing unused pKVM firmware memory\n");
size = pkvm_firmware_mem->size;
addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB);
if (!addr)
return -EINVAL;
memset(addr, 0, size);
dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
memunmap(addr);
return 0;
}
device_initcall_sync(pkvm_firmware_rmem_clear);
static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
{
int ret = 0;
if (!pkvm_firmware_mem)
return -EINVAL;
mutex_lock(&kvm->arch.pkvm.shadow_lock);
if (kvm->arch.pkvm.shadow_handle) {
ret = -EBUSY;
goto out_unlock;
}
kvm->arch.pkvm.pvmfw_load_addr = ipa;
out_unlock:
mutex_unlock(&kvm->arch.pkvm.shadow_lock);
return ret;
}
static int pkvm_vm_ioctl_info(struct kvm *kvm,
struct kvm_protected_vm_info __user *info)
{
struct kvm_protected_vm_info kinfo = {
.firmware_size = pkvm_firmware_mem ?
pkvm_firmware_mem->size :
0,
};
return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
}
int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap)
{
if (cap->args[1] || cap->args[2] || cap->args[3])
return -EINVAL;
switch (cap->flags) {
case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
default:
return -EINVAL;
}
return 0;
}
int kvm_init_pvm(struct kvm *kvm, unsigned long type)
{
mutex_init(&kvm->arch.pkvm.shadow_lock);
kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
return 0;
if (!is_protected_kvm_enabled())
return -EINVAL;
kvm->arch.pkvm.enabled = true;
return 0;
}