blob: 0f30563b3d1066d0b40b1981f966bf5a82d1758e [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2022 Google LLC
* Author: David Brazdil <dbrazdil@google.com>
*/
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pkvm.h>
#include <hyp/adjust_pc.h>
#include <nvhe/iommu.h>
#include <nvhe/mm.h>
enum {
IOMMU_DRIVER_NOT_READY = 0,
IOMMU_DRIVER_INITIALIZING,
IOMMU_DRIVER_READY,
};
struct pkvm_iommu_driver {
const struct pkvm_iommu_ops *ops;
atomic_t state;
};
static struct pkvm_iommu_driver iommu_drivers[PKVM_IOMMU_NR_DRIVERS];
/* IOMMU device list. Must only be accessed with host_kvm.lock held. */
static LIST_HEAD(iommu_list);
static bool iommu_finalized;
static DEFINE_HYP_SPINLOCK(iommu_registration_lock);
static void *iommu_mem_pool;
static size_t iommu_mem_remaining;
static void assert_host_component_locked(void)
{
hyp_assert_lock_held(&host_kvm.lock);
}
static void host_lock_component(void)
{
hyp_spin_lock(&host_kvm.lock);
}
static void host_unlock_component(void)
{
hyp_spin_unlock(&host_kvm.lock);
}
/*
* Find IOMMU driver by its ID. The input ID is treated as unstrusted
* and is properly validated.
*/
static inline struct pkvm_iommu_driver *get_driver(enum pkvm_iommu_driver_id id)
{
size_t index = (size_t)id;
if (index >= ARRAY_SIZE(iommu_drivers))
return NULL;
return &iommu_drivers[index];
}
static const struct pkvm_iommu_ops *get_driver_ops(enum pkvm_iommu_driver_id id)
{
switch (id) {
case PKVM_IOMMU_DRIVER_S2MPU:
return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_s2mpu_ops : NULL;
case PKVM_IOMMU_DRIVER_SYSMMU_SYNC:
return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_sysmmu_sync_ops : NULL;
default:
return NULL;
}
}
static inline bool driver_acquire_init(struct pkvm_iommu_driver *drv)
{
return atomic_cmpxchg_acquire(&drv->state, IOMMU_DRIVER_NOT_READY,
IOMMU_DRIVER_INITIALIZING)
== IOMMU_DRIVER_NOT_READY;
}
static inline void driver_release_init(struct pkvm_iommu_driver *drv,
bool success)
{
atomic_set_release(&drv->state, success ? IOMMU_DRIVER_READY
: IOMMU_DRIVER_NOT_READY);
}
static inline bool is_driver_ready(struct pkvm_iommu_driver *drv)
{
return atomic_read(&drv->state) == IOMMU_DRIVER_READY;
}
static size_t __iommu_alloc_size(struct pkvm_iommu_driver *drv)
{
return ALIGN(sizeof(struct pkvm_iommu) + drv->ops->data_size,
sizeof(unsigned long));
}
/* Global memory pool for allocating IOMMU list entry structs. */
static inline struct pkvm_iommu *alloc_iommu(struct pkvm_iommu_driver *drv,
void *mem, size_t mem_size)
{
size_t size = __iommu_alloc_size(drv);
void *ptr;
assert_host_component_locked();
/*
* If new memory is being provided, replace the existing pool with it.
* Any remaining memory in the pool is discarded.
*/
if (mem && mem_size) {
iommu_mem_pool = mem;
iommu_mem_remaining = mem_size;
}
if (size > iommu_mem_remaining)
return NULL;
ptr = iommu_mem_pool;
iommu_mem_pool += size;
iommu_mem_remaining -= size;
return ptr;
}
static inline void free_iommu(struct pkvm_iommu_driver *drv, struct pkvm_iommu *ptr)
{
size_t size = __iommu_alloc_size(drv);
assert_host_component_locked();
if (!ptr)
return;
/* Only allow freeing the last allocated buffer. */
if ((void*)ptr + size != iommu_mem_pool)
return;
iommu_mem_pool -= size;
iommu_mem_remaining += size;
}
static bool is_overlap(phys_addr_t r1_start, size_t r1_size,
phys_addr_t r2_start, size_t r2_size)
{
phys_addr_t r1_end = r1_start + r1_size;
phys_addr_t r2_end = r2_start + r2_size;
return (r1_start < r2_end) && (r2_start < r1_end);
}
static bool is_mmio_range(phys_addr_t base, size_t size)
{
struct memblock_region *reg;
phys_addr_t limit = BIT(host_kvm.pgt.ia_bits);
size_t i;
/* Check against limits of host IPA space. */
if ((base >= limit) || !size || (size > limit - base))
return false;
for (i = 0; i < hyp_memblock_nr; i++) {
reg = &hyp_memory[i];
if (is_overlap(base, size, reg->base, reg->size))
return false;
}
return true;
}
static int __snapshot_host_stage2(u64 start, u64 end, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flags,
void * const arg)
{
struct pkvm_iommu_driver * const drv = arg;
enum kvm_pgtable_prot prot;
kvm_pte_t pte = *ptep;
/*
* Valid stage-2 entries are created lazily, invalid ones eagerly.
* Note: In the future we may need to check if [start,end) is MMIO.
*/
prot = (!pte || kvm_pte_valid(pte)) ? PKVM_HOST_MEM_PROT : 0;
drv->ops->host_stage2_idmap_prepare(start, end, prot);
return 0;
}
static int snapshot_host_stage2(struct pkvm_iommu_driver * const drv)
{
struct kvm_pgtable_walker walker = {
.cb = __snapshot_host_stage2,
.arg = drv,
.flags = KVM_PGTABLE_WALK_LEAF,
};
struct kvm_pgtable *pgt = &host_kvm.pgt;
if (!drv->ops->host_stage2_idmap_prepare)
return 0;
return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
}
static bool validate_against_existing_iommus(struct pkvm_iommu *dev)
{
struct pkvm_iommu *other;
assert_host_component_locked();
list_for_each_entry(other, &iommu_list, list) {
/* Device ID must be unique. */
if (dev->id == other->id)
return false;
/* MMIO regions must not overlap. */
if (is_overlap(dev->pa, dev->size, other->pa, other->size))
return false;
}
return true;
}
static struct pkvm_iommu *find_iommu_by_id(unsigned long id)
{
struct pkvm_iommu *dev;
assert_host_component_locked();
list_for_each_entry(dev, &iommu_list, list) {
if (dev->id == id)
return dev;
}
return NULL;
}
/*
* Initialize EL2 IOMMU driver.
*
* This is a common hypercall for driver initialization. Driver-specific
* arguments are passed in a shared memory buffer. The driver is expected to
* initialize it's page-table bookkeeping.
*/
int __pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size)
{
struct pkvm_iommu_driver *drv;
const struct pkvm_iommu_ops *ops;
int ret = 0;
data = kern_hyp_va(data);
/* New driver initialization not allowed after __pkvm_iommu_finalize(). */
hyp_spin_lock(&iommu_registration_lock);
if (iommu_finalized) {
ret = -EPERM;
goto out_unlock;
}
drv = get_driver(id);
ops = get_driver_ops(id);
if (!drv || !ops) {
ret = -EINVAL;
goto out_unlock;
}
if (!driver_acquire_init(drv)) {
ret = -EBUSY;
goto out_unlock;
}
drv->ops = ops;
/* This can change stage-2 mappings. */
if (ops->init) {
ret = hyp_pin_shared_mem(data, data + size);
if (!ret) {
ret = ops->init(data, size);
hyp_unpin_shared_mem(data, data + size);
}
if (ret)
goto out_release;
}
/*
* Walk host stage-2 and pass current mappings to the driver. Start
* accepting host stage-2 updates as soon as the host lock is released.
*/
host_lock_component();
ret = snapshot_host_stage2(drv);
if (!ret)
driver_release_init(drv, /*success=*/true);
host_unlock_component();
out_release:
if (ret)
driver_release_init(drv, /*success=*/false);
out_unlock:
hyp_spin_unlock(&iommu_registration_lock);
return ret;
}
int __pkvm_iommu_register(unsigned long dev_id,
enum pkvm_iommu_driver_id drv_id,
phys_addr_t dev_pa, size_t dev_size,
unsigned long parent_id,
void *kern_mem_va, size_t mem_size)
{
struct pkvm_iommu *dev = NULL;
struct pkvm_iommu_driver *drv;
void *mem_va = NULL;
int ret = 0;
/* New device registration not allowed after __pkvm_iommu_finalize(). */
hyp_spin_lock(&iommu_registration_lock);
if (iommu_finalized) {
ret = -EPERM;
goto out_unlock;
}
drv = get_driver(drv_id);
if (!drv || !is_driver_ready(drv)) {
ret = -ENOENT;
goto out_unlock;
}
if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) {
ret = -EINVAL;
goto out_unlock;
}
if (!is_mmio_range(dev_pa, dev_size)) {
ret = -EINVAL;
goto out_unlock;
}
/*
* Accept memory donation if the host is providing new memory.
* Note: We do not return the memory even if there is an error later.
*/
if (kern_mem_va && mem_size) {
mem_va = kern_hyp_va(kern_mem_va);
if (!PAGE_ALIGNED(mem_va) || !PAGE_ALIGNED(mem_size)) {
ret = -EINVAL;
goto out_unlock;
}
ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va),
mem_size >> PAGE_SHIFT);
if (ret)
goto out_unlock;
}
host_lock_component();
/* Allocate memory for the new device entry. */
dev = alloc_iommu(drv, mem_va, mem_size);
if (!dev) {
ret = -ENOMEM;
goto out_free;
}
/* Populate the new device entry. */
*dev = (struct pkvm_iommu){
.children = LIST_HEAD_INIT(dev->children),
.id = dev_id,
.ops = drv->ops,
.pa = dev_pa,
.size = dev_size,
};
if (!validate_against_existing_iommus(dev)) {
ret = -EBUSY;
goto out_free;
}
if (parent_id) {
dev->parent = find_iommu_by_id(parent_id);
if (!dev->parent) {
ret = -EINVAL;
goto out_free;
}
if (dev->parent->ops->validate_child) {
ret = dev->parent->ops->validate_child(dev->parent, dev);
if (ret)
goto out_free;
}
}
if (dev->ops->validate) {
ret = dev->ops->validate(dev);
if (ret)
goto out_free;
}
/*
* Unmap the device's MMIO range from host stage-2. If registration
* is successful, future attempts to re-map will be blocked by
* pkvm_iommu_host_stage2_adjust_range.
*/
ret = host_stage2_unmap_dev_locked(dev_pa, dev_size);
if (ret)
goto out_free;
/* Create EL2 mapping for the device. Do it last as it is irreversible. */
dev->va = (void *)__pkvm_create_private_mapping(dev_pa, dev_size,
PAGE_HYP_DEVICE);
if (IS_ERR(dev->va)) {
ret = PTR_ERR(dev->va);
goto out_free;
}
/* Register device and prevent host from mapping the MMIO range. */
list_add_tail(&dev->list, &iommu_list);
if (dev->parent)
list_add_tail(&dev->siblings, &dev->parent->children);
out_free:
if (ret)
free_iommu(drv, dev);
host_unlock_component();
out_unlock:
hyp_spin_unlock(&iommu_registration_lock);
return ret;
}
int __pkvm_iommu_finalize(void)
{
int ret = 0;
hyp_spin_lock(&iommu_registration_lock);
if (!iommu_finalized)
iommu_finalized = true;
else
ret = -EPERM;
hyp_spin_unlock(&iommu_registration_lock);
return ret;
}
int __pkvm_iommu_pm_notify(unsigned long dev_id, enum pkvm_iommu_pm_event event)
{
struct pkvm_iommu *dev;
int ret;
host_lock_component();
dev = find_iommu_by_id(dev_id);
if (dev) {
if (event == PKVM_IOMMU_PM_SUSPEND) {
ret = dev->ops->suspend ? dev->ops->suspend(dev) : 0;
if (!ret)
dev->powered = false;
} else if (event == PKVM_IOMMU_PM_RESUME) {
ret = dev->ops->resume ? dev->ops->resume(dev) : 0;
if (!ret)
dev->powered = true;
} else {
ret = -EINVAL;
}
} else {
ret = -ENODEV;
}
host_unlock_component();
return ret;
}
/*
* Check host memory access against IOMMUs' MMIO regions.
* Returns -EPERM if the address is within the bounds of a registered device.
* Otherwise returns zero and adjusts boundaries of the new mapping to avoid
* MMIO regions of registered IOMMUs.
*/
int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start,
phys_addr_t *end)
{
struct pkvm_iommu *dev;
phys_addr_t new_start = *start;
phys_addr_t new_end = *end;
phys_addr_t dev_start, dev_end;
assert_host_component_locked();
list_for_each_entry(dev, &iommu_list, list) {
dev_start = dev->pa;
dev_end = dev_start + dev->size;
if (addr < dev_start)
new_end = min(new_end, dev_start);
else if (addr >= dev_end)
new_start = max(new_start, dev_end);
else
return -EPERM;
}
*start = new_start;
*end = new_end;
return 0;
}
bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr,
phys_addr_t pa)
{
struct pkvm_iommu *dev;
assert_host_component_locked();
list_for_each_entry(dev, &iommu_list, list) {
if (pa < dev->pa || pa >= dev->pa + dev->size)
continue;
/* No 'powered' check - the host assumes it is powered. */
if (!dev->ops->host_dabt_handler ||
!dev->ops->host_dabt_handler(dev, host_ctxt, esr, pa - dev->pa))
return false;
kvm_skip_host_instr();
return true;
}
return false;
}
void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot)
{
struct pkvm_iommu_driver *drv;
struct pkvm_iommu *dev;
size_t i;
assert_host_component_locked();
for (i = 0; i < ARRAY_SIZE(iommu_drivers); i++) {
drv = get_driver(i);
if (drv && is_driver_ready(drv) && drv->ops->host_stage2_idmap_prepare)
drv->ops->host_stage2_idmap_prepare(start, end, prot);
}
list_for_each_entry(dev, &iommu_list, list) {
if (dev->powered && dev->ops->host_stage2_idmap_apply)
dev->ops->host_stage2_idmap_apply(dev, start, end);
}
}