blob: f689a9de6da8af4656deec126e6d857a3bb33fab [file] [log] [blame] [edit]
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 - Google LLC
* Author: Quentin Perret <qperret@google.com>
*/
#include <linux/init.h>
#include <linux/io.h>
#include <linux/kmemleak.h>
#include <linux/kvm_host.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/of_fdt.h>
#include <linux/of_reserved_mem.h>
#include <linux/of_address.h>
#include <linux/sort.h>
#include <linux/debugfs.h>
#include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pkvm.h>
#include <asm/kvm_pkvm_module.h>
#include <asm/setup.h>
#include <kvm/device.h>
#include "hyp_constants.h"
#define PKVM_DEVICE_ASSIGN_COMPAT "pkvm,device-assignment"
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
static struct reserved_mem *pkvm_firmware_mem;
static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs);
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
phys_addr_t hyp_mem_base;
phys_addr_t hyp_mem_size;
extern struct pkvm_device *kvm_nvhe_sym(registered_devices);
extern u32 kvm_nvhe_sym(registered_devices_nr);
static int cmp_hyp_memblock(const void *p1, const void *p2)
{
const struct memblock_region *r1 = p1;
const struct memblock_region *r2 = p2;
return r1->base < r2->base ? -1 : (r1->base > r2->base);
}
static void __init sort_memblock_regions(void)
{
sort(hyp_memory,
*hyp_memblock_nr_ptr,
sizeof(struct memblock_region),
cmp_hyp_memblock,
NULL);
}
static int __init register_memblock_regions(void)
{
struct memblock_region *reg;
for_each_mem_region(reg) {
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
return -ENOMEM;
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
(*hyp_memblock_nr_ptr)++;
}
sort_memblock_regions();
return 0;
}
static int cmp_moveable_reg(const void *p1, const void *p2)
{
const struct pkvm_moveable_reg *r1 = p1;
const struct pkvm_moveable_reg *r2 = p2;
/*
* Moveable regions may overlap, so put the largest one first when start
* addresses are equal to allow a simpler walk from e.g.
* host_stage2_unmap_unmoveable_regs().
*/
if (r1->start < r2->start)
return -1;
else if (r1->start > r2->start)
return 1;
else if (r1->size > r2->size)
return -1;
else if (r1->size < r2->size)
return 1;
return 0;
}
static void __init sort_moveable_regs(void)
{
sort(moveable_regs,
kvm_nvhe_sym(pkvm_moveable_regs_nr),
sizeof(struct pkvm_moveable_reg),
cmp_moveable_reg,
NULL);
}
static int __init register_moveable_fdt_resource(struct device_node *np,
enum pkvm_moveable_reg_type type)
{
struct resource res;
u64 start, size;
int ret;
unsigned int i = kvm_nvhe_sym(pkvm_moveable_regs_nr);
unsigned int count, j = 0;
while (of_get_address(np, j++, NULL, NULL))
;
count = j - 1;
if (i + count >= PKVM_NR_MOVEABLE_REGS)
return -ENOMEM;
for (j = 0; j < count; ++j) {
ret = of_address_to_resource(np, j, &res);
if (ret)
return ret;
start = res.start;
size = resource_size(&res);
if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
return -EINVAL;
moveable_regs[i].start = start;
moveable_regs[i].size = size;
moveable_regs[i].type = type;
i++;
}
kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
return 0;
}
static int __init register_moveable_regions(void)
{
struct memblock_region *reg;
struct device_node *np;
int i = 0, ret = 0, idx = 0;
for_each_mem_region(reg) {
if (i >= PKVM_NR_MOVEABLE_REGS)
return -ENOMEM;
moveable_regs[i].start = reg->base;
moveable_regs[i].size = reg->size;
moveable_regs[i].type = PKVM_MREG_MEMORY;
i++;
}
kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
for_each_compatible_node(np, NULL, "pkvm,protected-region") {
ret = register_moveable_fdt_resource(np, PKVM_MREG_PROTECTED_RANGE);
if (ret)
return ret;
}
for_each_compatible_node(np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
struct of_phandle_args args;
while (!of_parse_phandle_with_fixed_args(np, "devices", 1, idx, &args)) {
idx++;
ret = register_moveable_fdt_resource(args.np, PKVM_MREG_ASSIGN_MMIO);
if (ret)
return ret;
}
}
sort_moveable_regs();
return ret;
}
void __init kvm_hyp_reserve(void)
{
u64 hyp_mem_pages = 0;
int ret;
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
return;
if (kvm_get_mode() != KVM_MODE_PROTECTED)
return;
ret = register_memblock_regions();
if (ret) {
*hyp_memblock_nr_ptr = 0;
kvm_err("Failed to register hyp memblocks: %d\n", ret);
return;
}
ret = register_moveable_regions();
if (ret) {
*hyp_memblock_nr_ptr = 0;
kvm_err("Failed to register pkvm moveable regions: %d\n", ret);
return;
}
hyp_mem_pages += hyp_s1_pgtable_pages();
hyp_mem_pages += host_s2_pgtable_pages();
hyp_mem_pages += hyp_vm_table_pages();
hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
hyp_mem_pages += hyp_ffa_proxy_pages();
hyp_mem_pages += hyp_host_fp_pages(num_possible_cpus());
/*
* Try to allocate a PMD-aligned region to reduce TLB pressure once
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
*/
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
PMD_SIZE);
if (!hyp_mem_base)
hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
else
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
if (!hyp_mem_base) {
kvm_err("Failed to reserve hyp memory\n");
return;
}
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
hyp_mem_base);
}
static int __pkvm_create_hyp_vcpu(struct kvm *host_kvm, struct kvm_vcpu *host_vcpu, unsigned long idx)
{
pkvm_handle_t handle = host_kvm->arch.pkvm.handle;
struct kvm_hyp_req *hyp_reqs;
int ret;
/* Indexing of the vcpus to be sequential starting at 0. */
if (WARN_ON(host_vcpu->vcpu_idx != idx))
return -EINVAL;
hyp_reqs = (struct kvm_hyp_req *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!hyp_reqs)
return -ENOMEM;
ret = kvm_share_hyp(hyp_reqs, hyp_reqs + 1);
if (ret)
goto end;
host_vcpu->arch.hyp_reqs = hyp_reqs;
ret = refill_hyp_alloc(kvm_call_hyp_nvhe(__pkvm_init_vcpu,
handle, host_vcpu), 2);
end:
if (ret) {
free_page((unsigned long)hyp_reqs);
host_vcpu->arch.hyp_reqs = NULL;
}
return ret;
}
static void __pkvm_vcpu_hyp_created(struct kvm_vcpu *vcpu)
{
if (kvm_vm_is_protected(vcpu->kvm))
vcpu->arch.sve_state = NULL;
}
/*
* Allocates and donates memory for hypervisor VM structs at EL2.
*
* Allocates space for the VM state, which includes the hyp vm as well as
* the hyp vcpus.
*
* Stores an opaque handler in the kvm struct for future reference.
*
* Return 0 on success, negative error code on failure.
*/
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
{
struct kvm_vcpu *host_vcpu;
pkvm_handle_t handle;
unsigned long idx;
size_t pgd_sz;
void *pgd;
int ret;
if (host_kvm->created_vcpus < 1)
return -EINVAL;
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
/*
* The PGD pages will be reclaimed using a hyp_memcache which implies
* page granularity. So, use alloc_pages_exact() to get individual
* refcounts.
*/
pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
if (!pgd)
return -ENOMEM;
atomic64_add(pgd_sz, &host_kvm->stat.protected_hyp_mem);
/* Donate the VM memory to hyp and let hyp initialize it. */
ret = refill_hyp_alloc(kvm_call_hyp_nvhe(__pkvm_init_vm,
host_kvm, pgd), 4);
if (ret < 0)
goto free_pgd;
handle = ret;
host_kvm->arch.pkvm.handle = handle;
/* Donate memory for the vcpus at hyp and initialize it. */
kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
ret = __pkvm_create_hyp_vcpu(host_kvm, host_vcpu, idx);
if (ret)
goto destroy_vm;
__pkvm_vcpu_hyp_created(host_vcpu);
}
kvm_account_pgtable_pages(pgd, pgd_sz >> PAGE_SHIFT);
return 0;
destroy_vm:
pkvm_destroy_hyp_vm(host_kvm);
return ret;
free_pgd:
free_pages_exact(pgd, pgd_sz);
return ret;
}
int pkvm_create_hyp_vm(struct kvm *host_kvm)
{
int ret = 0;
mutex_lock(&host_kvm->lock);
if (!host_kvm->arch.pkvm.handle)
ret = __pkvm_create_hyp_vm(host_kvm);
mutex_unlock(&host_kvm->lock);
return ret;
}
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
{
struct kvm_pinned_page *ppage;
struct mm_struct *mm = current->mm;
struct kvm_vcpu *host_vcpu;
struct rb_node *node;
unsigned long idx;
int ret;
struct kvm_mmio_page *mmio_page, *temp;
if (!host_kvm->arch.pkvm.handle)
goto out_free;
while (true) {
ret = kvm_call_hyp_nvhe(__pkvm_start_teardown_vm,
host_kvm->arch.pkvm.handle);
if (ret != 0) {
WARN_ON(ret);
pr_warn("start teardown returned: %d\n", ret);
cond_resched();
} else {
break;
}
}
node = rb_first(&host_kvm->arch.pkvm.pinned_pages);
while (node) {
ppage = rb_entry(node, struct kvm_pinned_page, node);
WARN_ON(kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
host_kvm->arch.pkvm.handle,
page_to_pfn(ppage->page),
ppage->ipa));
cond_resched();
account_locked_vm(mm, 1, false);
unpin_user_pages_dirty_lock(&ppage->page, 1, true);
node = rb_next(node);
rb_erase(&ppage->node, &host_kvm->arch.pkvm.pinned_pages);
kfree(ppage);
}
/* Reclaim MMIO. */
list_for_each_entry_safe(mmio_page, temp, &host_kvm->arch.pkvm.mmio_pages, list) {
WARN_ON(kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
host_kvm->arch.pkvm.handle,
mmio_page->pfn,
mmio_page->ipa));
list_del(&mmio_page->list);
kfree(mmio_page);
}
WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
out_free:
host_kvm->arch.pkvm.handle = 0;
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc, 0);
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_stage2_mc,
HYP_MEMCACHE_ACCOUNT_STAGE2);
kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
struct kvm_hyp_req *hyp_reqs = host_vcpu->arch.hyp_reqs;
if (!hyp_reqs)
continue;
kvm_unshare_hyp(hyp_reqs, hyp_reqs + 1);
free_page((unsigned long)hyp_reqs);
}
}
int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
{
mutex_init(&host_kvm->lock);
if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
return 0;
if (!is_protected_kvm_enabled())
return -EINVAL;
host_kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
host_kvm->arch.pkvm.enabled = true;
return 0;
}
static int pkvm_register_device(struct of_phandle_args *args,
struct pkvm_device * dev)
{
struct device_node *np = args->np;
struct of_phandle_args iommu_spec;
u32 group_id = args->args[0];
struct resource res;
u64 base, size, iommu_id;
unsigned int count, j = 0;
int ret;
/* Parse regs */
while (of_get_address(np, j++, NULL, NULL))
;
count = j - 1;
if (count > PKVM_DEVICE_MAX_RESOURCE)
return -E2BIG;
for (j = 0; j < count; ++j) {
ret = of_address_to_resource(np, j, &res);
if (ret)
return ret;
base = res.start;
size = resource_size(&res);
if (!PAGE_ALIGNED(base) || !PAGE_ALIGNED(size))
return -EINVAL;
dev->resource[j].base = base;
dev->resource[j].size = size;
}
dev->nr_resources = count;
/* Parse iommus */
j = 0;
while (!of_parse_phandle_with_args(np, "iommus",
"#iommu-cells",
j, &iommu_spec)) {
if (iommu_spec.args_count != 1) {
kvm_err("[Devices] Unsupported binding for %s, expected <&iommu id>",
np->full_name);
return -EINVAL;
}
iommu_id = kvm_get_iommu_id_by_of(iommu_spec.np);
dev->iommus[j].id = iommu_id;
dev->iommus[j].endpoint = iommu_spec.args[0];
of_node_put(iommu_spec.np);
j++;
}
dev->nr_iommus = j;
dev->ctxt = NULL;
dev->group_id = group_id;
return 0;
}
static int pkvm_init_devices(void)
{
struct device_node *np;
int idx = 0, ret = 0;
size_t dev_sz;
for_each_compatible_node(np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
struct of_phandle_args args;
while (!of_parse_phandle_with_fixed_args(np, "devices", 1, idx, &args)) {
idx++;
kvm_nvhe_sym(registered_devices_nr)++;
}
}
kvm_info("Found %d assignable devices", kvm_nvhe_sym(registered_devices_nr));
if (!kvm_nvhe_sym(registered_devices_nr))
return 0;
dev_sz = PAGE_ALIGN(size_mul(sizeof(struct pkvm_device),
kvm_nvhe_sym(registered_devices_nr)));
kvm_nvhe_sym(registered_devices) = alloc_pages_exact(dev_sz, GFP_KERNEL_ACCOUNT);
if (!kvm_nvhe_sym(registered_devices))
return -ENOMEM;
idx = 0;
for_each_compatible_node(np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
struct of_phandle_args args;
while (!of_parse_phandle_with_fixed_args(np, "devices", 1, idx, &args)) {
ret = pkvm_register_device(&args, &kvm_nvhe_sym(registered_devices)[idx]);
if (ret)
return ret;
idx++;
}
}
return ret;
}
static void dump_pkvm_devices(void)
{
int i, j;
for (i = 0 ; i < kvm_nvhe_sym(registered_devices_nr) ; ++i) {
kvm_info("Device[%d]:", i);
for (j = 0 ; j < kvm_nvhe_sym(registered_devices)[i].nr_resources ; ++j) {
kvm_info("Resource[%d] 0x%llx - 0x%llx", j,
kvm_nvhe_sym(registered_devices)[i].resource[j].base,
kvm_nvhe_sym(registered_devices)[i].resource[j].size);
}
for (j = 0 ; j < kvm_nvhe_sym(registered_devices)[i].nr_iommus ; ++j) {
kvm_info("IOMMU[%d] %lld - %lld", j,
kvm_nvhe_sym(registered_devices)[i].iommus[j].id,
kvm_nvhe_sym(registered_devices)[i].iommus[j].endpoint);
}
}
}
static void __init _kvm_host_prot_finalize(void *arg)
{
int *err = arg;
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
WRITE_ONCE(*err, -EINVAL);
}
static int __init pkvm_drop_host_privileges(void)
{
int ret = 0;
/*
* Flip the static key upfront as that may no longer be possible
* once the host stage 2 is installed.
*/
static_branch_enable(&kvm_protected_mode_initialized);
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
return ret;
}
static int __init finalize_pkvm(void)
{
int ret;
if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
return 0;
/*
* Modules can play an essential part in the pKVM protection. All of
* them must properly load to enable protected VMs.
*/
ret = pkvm_load_early_modules();
if (ret)
pr_err("Failed to load modules %d\n", ret);
ret = kvm_iommu_init_driver();
if (ret) {
pr_err("Failed to init kvm IOMMU driver: %d\n", ret);
return ret;
}
ret = pkvm_init_devices();
if (ret) {
pr_err("Failed to init kvm devices %d\n", ret);
return ret;
}
dump_pkvm_devices();
ret = kvm_call_hyp_nvhe(__pkvm_devices_init);
if (ret)
pr_warn("Assignable devices failed to initialize in the hypervisor %d", ret);
/*
* Exclude HYP sections from kmemleak so that they don't get peeked
* at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
ret = pkvm_drop_host_privileges();
if (ret) {
pr_err("Failed to finalize Hyp protection: %d\n", ret);
kvm_iommu_remove_driver();
}
return ret;
}
device_initcall_sync(finalize_pkvm);
static int rb_ppage_cmp(const void *key, const struct rb_node *node)
{
struct kvm_pinned_page *p = container_of(node, struct kvm_pinned_page, node);
phys_addr_t ipa = (phys_addr_t)key;
return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
}
void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
{
struct kvm_pinned_page *ppage;
struct mm_struct *mm = current->mm;
struct rb_node *node;
write_lock(&host_kvm->mmu_lock);
node = rb_find((void *)ipa, &host_kvm->arch.pkvm.pinned_pages,
rb_ppage_cmp);
if (node)
rb_erase(node, &host_kvm->arch.pkvm.pinned_pages);
write_unlock(&host_kvm->mmu_lock);
WARN_ON(!node);
if (!node)
return;
ppage = container_of(node, struct kvm_pinned_page, node);
account_locked_vm(mm, 1, false);
unpin_user_pages_dirty_lock(&ppage->page, 1, true);
kfree(ppage);
}
static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
const char *reason)
{
phys_addr_t end = rmem->base + rmem->size;
kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
&rmem->base, &end, reason);
return -EINVAL;
}
static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
{
unsigned long node = rmem->fdt_node;
if (pkvm_firmware_mem)
return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
if (!of_get_flat_dt_prop(node, "no-map", NULL))
return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
if (of_get_flat_dt_prop(node, "reusable", NULL))
return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
if (!PAGE_ALIGNED(rmem->base))
return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
if (!PAGE_ALIGNED(rmem->size))
return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
*pvmfw_size = rmem->size;
*pvmfw_base = rmem->base;
pkvm_firmware_mem = rmem;
return 0;
}
RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
pkvm_firmware_rmem_init);
static int __init pkvm_firmware_rmem_clear(void)
{
void *addr;
phys_addr_t size;
if (likely(!pkvm_firmware_mem) || is_protected_kvm_enabled())
return 0;
kvm_info("Clearing unused pKVM firmware memory\n");
size = pkvm_firmware_mem->size;
addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB);
if (!addr)
return -EINVAL;
memset(addr, 0, size);
dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
memunmap(addr);
return 0;
}
device_initcall_sync(pkvm_firmware_rmem_clear);
static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
{
int ret = 0;
if (!pkvm_firmware_mem)
return -EINVAL;
mutex_lock(&kvm->lock);
if (kvm->arch.pkvm.handle) {
ret = -EBUSY;
goto out_unlock;
}
kvm->arch.pkvm.pvmfw_load_addr = ipa;
out_unlock:
mutex_unlock(&kvm->lock);
return ret;
}
static int pkvm_vm_ioctl_info(struct kvm *kvm,
struct kvm_protected_vm_info __user *info)
{
struct kvm_protected_vm_info kinfo = {
.firmware_size = pkvm_firmware_mem ?
pkvm_firmware_mem->size :
0,
};
return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
}
int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{
if (!kvm_vm_is_protected(kvm))
return -EINVAL;
if (cap->args[1] || cap->args[2] || cap->args[3])
return -EINVAL;
switch (cap->flags) {
case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
default:
return -EINVAL;
}
return 0;
}
int __pkvm_topup_hyp_alloc_mgt(unsigned long id, unsigned long nr_pages,
unsigned long sz_alloc)
{
struct kvm_hyp_memcache mc = {
.head = 0,
.nr_pages = 0,
};
int ret;
ret = topup_hyp_memcache(&mc, nr_pages, 0, get_order(sz_alloc));
if (ret)
return ret;
ret = kvm_call_hyp_nvhe(__pkvm_hyp_alloc_mgt_refill, id,
mc.head, mc.nr_pages);
if (ret)
free_hyp_memcache(&mc, 0);
return ret;
}
EXPORT_SYMBOL_GPL(__pkvm_topup_hyp_alloc_mgt);
int __pkvm_topup_hyp_alloc(unsigned long nr_pages)
{
return __pkvm_topup_hyp_alloc_mgt(HYP_ALLOC_MGT_HEAP_ID, nr_pages, PAGE_SIZE);
}
EXPORT_SYMBOL_GPL(__pkvm_topup_hyp_alloc);
unsigned long __pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages)
{
unsigned long ratelimit, last_reclaim, reclaimed = 0;
struct kvm_hyp_memcache mc;
struct arm_smccc_res res;
do {
/* Arbitrary upper bound to limit the time spent at EL2 */
ratelimit = min(nr_pages, 256UL);
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_hyp_alloc_mgt_reclaim),
ratelimit, &res);
if (WARN_ON(res.a0 != SMCCC_RET_SUCCESS))
break;
mc.head = res.a2;
last_reclaim = mc.nr_pages = res.a3;
free_hyp_memcache(&mc, 0);
reclaimed += last_reclaim;
if (last_reclaim > nr_pages)
break;
nr_pages -= last_reclaim;
} while (last_reclaim && nr_pages);
return reclaimed;
}
#include <linux/debugfs.h>
static ssize_t hyp_reclaim_debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
{
struct kvm_hyp_memcache mc;
struct arm_smccc_res res;
int target;
if (kstrtoint_from_user(buf, count, 10, &target))
return -EINVAL;
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_hyp_alloc_mgt_reclaim), target, &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
mc.head = res.a2;
mc.nr_pages = res.a3;
printk("%lu page(s) reclaimed\n", mc.nr_pages);
free_hyp_memcache(&mc, 0);
return count;
}
static const struct file_operations hyp_reclaim_debugfs_fops = {
.write = hyp_reclaim_debugfs_write,
};
static ssize_t hyp_alloc_debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
{
u64 value;
int ret;
ret = kstrtoull_from_user(buf, count, 10, &value);
if (ret)
return ret;
again:
ret = kvm_call_hyp_nvhe(__pkvm_hyp_alloc, value);
if (ret == -ENOMEM) {
struct kvm_hyp_memcache mc = {
.head = 0,
.nr_pages = 0,
};
ret = topup_hyp_memcache(&mc, 1, 0, 0);
if (ret)
return ret;
ret = kvm_call_hyp_nvhe(__pkvm_hyp_alloc_mgt_refill, mc.head, mc.nr_pages);
if (ret)
return ret;
goto again;
} else if (ret) {
return ret;
}
return count;
}
static const struct file_operations hyp_alloc_debugfs_fops = {
.write = hyp_alloc_debugfs_write,
};
static ssize_t hyp_free_debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
{
u64 value;
int ret;
ret = kstrtoull_from_user(buf, count, 16, &value);
if (ret)
return ret;
ret = kvm_call_hyp_nvhe(__pkvm_hyp_free, value);
if (ret)
return ret;
return count;
}
static const struct file_operations hyp_free_debugfs_fops = {
.write = hyp_free_debugfs_write,
};
struct hyp_allocator_chunk_dump {
unsigned long addr;
unsigned long alloc_start;
size_t alloc_size;
size_t unmapped_size;
size_t mapped_size;
u32 hash;
};
#define BYTES_TO_LINES PAGE_SIZE
#define LINE_WIDTH 33
#if 0
static void __dump_region(struct seq_file *m, const char *name, size_t size, unsigned long va,
bool end_chunk)
{
int i, j, nr_lines = size / BYTES_TO_LINES;
/* TODO: Check for non decreasing va */
if (!size)
return;
if (!nr_lines)
nr_lines = 1;
for (i = 0; i < nr_lines; i++) {
if (i == nr_lines / 2) {
int name_len = strlen(name);
int start = (LINE_WIDTH - 2 - name_len) / 2;
seq_putc(m, '|');
for (j = 0; j < start; j++)
seq_putc(m, ' ');
seq_puts(m, name);
for (j = 0; j < (name_len % 2 ? start - 1 : start); j++)
seq_putc(m, ' ');
seq_puts(m, "|\n");
} else
seq_puts(m, "| |\n");
}
if (end_chunk)
seq_printf(m, "+==============================+ 0x%08lx\n", va);
else
seq_printf(m, "+------------------------------+ 0x%08lx\n", va);
}
static int dump_hyp_allocator_show(struct seq_file *m, void *v)
{
struct hyp_allocator_chunk_dump *first_chunk, *chunk;
void *page = m->private;
seq_printf(m, "Reclaimable: %ld pages\n",
kvm_call_hyp_nvhe(__pkvm_hyp_alloc_reclaimable));
/* Decode the page */
first_chunk = chunk = (struct hyp_allocator_chunk_dump *)page;
if (!chunk->addr)
return 0;
while ((chunk + 1)->addr)
chunk++;
seq_printf(m, "+==============================+ 0x%08lx\n", chunk->addr + chunk->mapped_size + chunk->unmapped_size);
while ((unsigned long)chunk >= (unsigned long)first_chunk) {
size_t header_size = chunk->alloc_start - chunk->addr;
size_t mapped_display_size = chunk->mapped_size - header_size - chunk->alloc_size;
__dump_region(m, "unmapped", chunk->unmapped_size,
chunk->addr + chunk->mapped_size, false);
__dump_region(m, "mapped", mapped_display_size,
chunk->alloc_start + chunk->alloc_size, false);
__dump_region(m, "alloc", chunk->alloc_size,
chunk->alloc_start, false);
__dump_region(m, "chunk header", header_size, chunk->addr, true);
chunk--;
}
return 0;
}
#else
static int dump_hyp_allocator_show(struct seq_file *m, void *v)
{
struct hyp_allocator_chunk_dump *first_chunk, *chunk;
void *page = m->private;
first_chunk = chunk = (struct hyp_allocator_chunk_dump *)page;
if (!chunk->addr)
return 0;
while (chunk->addr) {
seq_printf(m, "0x%lx: alloc=%zu mapped=%zu unmapped=%zu hash=%x\n",
chunk->addr, chunk->alloc_size, chunk->mapped_size,
chunk->unmapped_size, chunk->hash);
chunk++;
}
return 0;
}
#endif
static int dump_hyp_allocator_open(struct inode *inode, struct file *file)
{
void *page;
int ret;
page = page_address(alloc_page(GFP_KERNEL));
if (!page)
return -ENOMEM;
ret = kvm_call_hyp_nvhe(__pkvm_dump_hyp_allocator, page);
if (ret) {
free_page((unsigned long)page);
return ret;
}
return single_open(file, dump_hyp_allocator_show, page);
}
static int dump_hyp_allocator_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
void *page = m->private;
free_page((unsigned long)page);
seq_release(inode, file);
return 0;
}
static const struct file_operations dump_hyp_allocator_debugfs_fops = {
.open = dump_hyp_allocator_open,
.read = seq_read,
.llseek = seq_lseek,
.release = dump_hyp_allocator_release,
};
static int __init hyp_allocator_debugfs_init(void)
{
debugfs_create_file("hyp_alloc", S_IWUSR, NULL, NULL, &hyp_alloc_debugfs_fops);
debugfs_create_file("hyp_free", S_IWUSR, NULL, NULL, &hyp_free_debugfs_fops);
debugfs_create_file("hyp_reclaim", S_IWUSR, NULL, NULL, &hyp_reclaim_debugfs_fops);
debugfs_create_file("dump_hyp_allocator", S_IRUSR, NULL, NULL, &dump_hyp_allocator_debugfs_fops);
return 0;
}
late_initcall(hyp_allocator_debugfs_init);
static ssize_t event_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
kvm_call_hyp_nvhe(__pkvm_selftest_event);
return size;
}
static const struct file_operations event_fops = {
.read = NULL,
.write = event_write,
.llseek = default_llseek,
};
static int __init pkvm_selftest_init(void)
{
debugfs_create_file("pkvm_selftest_event", 0200, NULL, NULL,
&event_fops);
return 0;
}
device_initcall(pkvm_selftest_init);
#ifdef CONFIG_MODULES
static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
static int __init pkvm_enable_module_late_loading(void)
{
WARN(1, "Loading pKVM modules with kvm-arm.protected_modules is deprecated\n"
"Use kvm-arm.protected_modules=<module1>,<module2>");
kvm_nvhe_sym(__pkvm_modules_enabled) = true;
return 0;
}
static int __init early_pkvm_modules_cfg(char *arg)
{
pkvm_enable_module_late_loading();
strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
return 0;
}
early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
static void free_modprobe_argv(struct subprocess_info *info)
{
kfree(info->argv[3]);
kfree(info->argv);
}
/*
* Heavily inspired by request_module(). The latest couldn't be reused though as
* the feature can be disabled depending on umh configuration. Here some
* security is enforced by making sure this can be called only when pKVM is
* enabled, not yet completely initialized.
*/
static int __init pkvm_request_early_module(char *module_name)
{
char *modprobe_path = CONFIG_MODPROBE_PATH;
struct subprocess_info *info;
static char *envp[] = {
"HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL
};
char **argv;
if (!is_protected_kvm_enabled())
return -EACCES;
if (static_branch_likely(&kvm_protected_mode_initialized))
return -EACCES;
argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
if (!argv)
return -ENOMEM;
module_name = kstrdup(module_name, GFP_KERNEL);
if (!module_name)
goto free_argv;
argv[0] = modprobe_path;
argv[1] = "-q";
argv[2] = "--";
argv[3] = module_name;
argv[4] = NULL;
info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
NULL, free_modprobe_argv, NULL);
if (!info)
goto free_module_name;
/* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
info->path = modprobe_path;
return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
free_module_name:
kfree(module_name);
free_argv:
kfree(argv);
return -ENOMEM;
}
int __init pkvm_load_early_modules(void)
{
char *token, *buf = early_pkvm_modules;
int err;
while (true) {
token = strsep(&buf, ",");
if (!token)
break;
if (*token) {
err = pkvm_request_early_module(token);
if (err) {
pr_err("Failed to load pkvm module %s: %d\n",
token, err);
return err;
}
}
if (buf)
*(buf - 1) = ',';
}
return 0;
}
struct pkvm_mod_sec_mapping {
struct pkvm_module_section *sec;
enum kvm_pgtable_prot prot;
};
static void pkvm_unmap_module_pages(void *kern_va, void *hyp_va, size_t size)
{
size_t offset;
u64 pfn;
for (offset = 0; offset < size; offset += PAGE_SIZE) {
pfn = vmalloc_to_pfn(kern_va + offset);
kvm_call_hyp_nvhe(__pkvm_unmap_module_page, pfn,
hyp_va + offset);
}
}
static void pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
{
size_t offset, size;
void *start;
int i;
for (i = 0; i < nr_secs; i++) {
start = secs_map[i].sec->start;
size = secs_map[i].sec->end - start;
offset = start - secs_map[0].sec->start;
pkvm_unmap_module_pages(start, hyp_va_base + offset, size);
}
}
static int pkvm_map_module_section(struct pkvm_mod_sec_mapping *sec_map, void *hyp_va)
{
size_t offset, size = sec_map->sec->end - sec_map->sec->start;
int ret;
u64 pfn;
for (offset = 0; offset < size; offset += PAGE_SIZE) {
pfn = vmalloc_to_pfn(sec_map->sec->start + offset);
ret = kvm_call_hyp_nvhe(__pkvm_map_module_page, pfn,
hyp_va + offset, sec_map->prot);
if (ret) {
pkvm_unmap_module_pages(sec_map->sec->start, hyp_va, offset);
return ret;
}
}
return 0;
}
static int pkvm_map_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
{
size_t offset;
int i, ret;
for (i = 0; i < nr_secs; i++) {
offset = secs_map[i].sec->start - secs_map[0].sec->start;
ret = pkvm_map_module_section(&secs_map[i], hyp_va_base + offset);
if (ret) {
pkvm_unmap_module_sections(secs_map, hyp_va_base, i);
return ret;
}
}
return 0;
}
static int __pkvm_cmp_mod_sec(const void *p1, const void *p2)
{
struct pkvm_mod_sec_mapping const *s1 = p1;
struct pkvm_mod_sec_mapping const *s2 = p2;
return s1->sec->start < s2->sec->start ? -1 : s1->sec->start > s2->sec->start;
}
int __pkvm_load_el2_module(struct module *this, unsigned long *token)
{
struct pkvm_el2_module *mod = &this->arch.hyp;
struct pkvm_mod_sec_mapping secs_map[] = {
{ &mod->text, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X },
{ &mod->bss, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
{ &mod->rodata, KVM_PGTABLE_PROT_R },
{ &mod->data, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
};
void *start, *end, *hyp_va;
kvm_nvhe_reloc_t *endrel;
size_t offset, size;
int ret, i;
if (!is_protected_kvm_enabled())
return -EOPNOTSUPP;
for (i = 0; i < ARRAY_SIZE(secs_map); i++) {
if (!PAGE_ALIGNED(secs_map[i].sec->start)) {
kvm_err("EL2 sections are not page-aligned\n");
return -EINVAL;
}
}
if (!try_module_get(this)) {
kvm_err("Kernel module has been unloaded\n");
return -ENODEV;
}
sort(secs_map, ARRAY_SIZE(secs_map), sizeof(secs_map[0]), __pkvm_cmp_mod_sec, NULL);
start = secs_map[0].sec->start;
end = secs_map[ARRAY_SIZE(secs_map) - 1].sec->end;
size = end - start;
hyp_va = (void *)kvm_call_hyp_nvhe(__pkvm_alloc_module_va, size >> PAGE_SHIFT);
if (!hyp_va) {
kvm_err("Failed to allocate hypervisor VA space for EL2 module\n");
module_put(this);
return -ENOMEM;
}
/*
* The token can be used for other calls related to this module.
* Conveniently the only information needed is this addr so let's use it
* as an identifier.
*/
if (token)
*token = (unsigned long)hyp_va;
endrel = (void *)mod->relocs + mod->nr_relocs * sizeof(*endrel);
kvm_apply_hyp_module_relocations(start, hyp_va, mod->relocs, endrel);
ret = pkvm_map_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
if (ret) {
kvm_err("Failed to map EL2 module page: %d\n", ret);
module_put(this);
return ret;
}
offset = (size_t)((void *)mod->init - start);
ret = kvm_call_hyp_nvhe(__pkvm_init_module, hyp_va + offset);
if (ret) {
kvm_err("Failed to init EL2 module: %d\n", ret);
pkvm_unmap_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
module_put(this);
return ret;
}
return 0;
}
EXPORT_SYMBOL_GPL(__pkvm_load_el2_module);
int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
{
return kvm_call_hyp_nvhe(__pkvm_register_hcall, hfn_hyp_va);
}
EXPORT_SYMBOL_GPL(__pkvm_register_el2_call);
#endif /* CONFIG_MODULES */