| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Copyright (C) 2022 Google LLC |
| * Author: David Brazdil <dbrazdil@google.com> |
| */ |
| |
| #include <linux/kvm_host.h> |
| |
| #include <asm/kvm_asm.h> |
| #include <asm/kvm_hyp.h> |
| #include <asm/kvm_mmu.h> |
| #include <asm/kvm_pkvm.h> |
| |
| #include <hyp/adjust_pc.h> |
| #include <nvhe/iommu.h> |
| #include <nvhe/mm.h> |
| |
| enum { |
| IOMMU_DRIVER_NOT_READY = 0, |
| IOMMU_DRIVER_INITIALIZING, |
| IOMMU_DRIVER_READY, |
| }; |
| |
| struct pkvm_iommu_driver { |
| const struct pkvm_iommu_ops *ops; |
| atomic_t state; |
| }; |
| |
| static struct pkvm_iommu_driver iommu_drivers[PKVM_IOMMU_NR_DRIVERS]; |
| |
| /* IOMMU device list. Must only be accessed with host_kvm.lock held. */ |
| static LIST_HEAD(iommu_list); |
| |
| static bool iommu_finalized; |
| static DEFINE_HYP_SPINLOCK(iommu_registration_lock); |
| |
| static void *iommu_mem_pool; |
| static size_t iommu_mem_remaining; |
| |
| static void assert_host_component_locked(void) |
| { |
| hyp_assert_lock_held(&host_kvm.lock); |
| } |
| |
| static void host_lock_component(void) |
| { |
| hyp_spin_lock(&host_kvm.lock); |
| } |
| |
| static void host_unlock_component(void) |
| { |
| hyp_spin_unlock(&host_kvm.lock); |
| } |
| |
| /* |
| * Find IOMMU driver by its ID. The input ID is treated as unstrusted |
| * and is properly validated. |
| */ |
| static inline struct pkvm_iommu_driver *get_driver(enum pkvm_iommu_driver_id id) |
| { |
| size_t index = (size_t)id; |
| |
| if (index >= ARRAY_SIZE(iommu_drivers)) |
| return NULL; |
| |
| return &iommu_drivers[index]; |
| } |
| |
| static const struct pkvm_iommu_ops *get_driver_ops(enum pkvm_iommu_driver_id id) |
| { |
| switch (id) { |
| case PKVM_IOMMU_DRIVER_S2MPU: |
| return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_s2mpu_ops : NULL; |
| case PKVM_IOMMU_DRIVER_SYSMMU_SYNC: |
| return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_sysmmu_sync_ops : NULL; |
| default: |
| return NULL; |
| } |
| } |
| |
| static inline bool driver_acquire_init(struct pkvm_iommu_driver *drv) |
| { |
| return atomic_cmpxchg_acquire(&drv->state, IOMMU_DRIVER_NOT_READY, |
| IOMMU_DRIVER_INITIALIZING) |
| == IOMMU_DRIVER_NOT_READY; |
| } |
| |
| static inline void driver_release_init(struct pkvm_iommu_driver *drv, |
| bool success) |
| { |
| atomic_set_release(&drv->state, success ? IOMMU_DRIVER_READY |
| : IOMMU_DRIVER_NOT_READY); |
| } |
| |
| static inline bool is_driver_ready(struct pkvm_iommu_driver *drv) |
| { |
| return atomic_read(&drv->state) == IOMMU_DRIVER_READY; |
| } |
| |
| static size_t __iommu_alloc_size(struct pkvm_iommu_driver *drv) |
| { |
| return ALIGN(sizeof(struct pkvm_iommu) + drv->ops->data_size, |
| sizeof(unsigned long)); |
| } |
| |
| /* Global memory pool for allocating IOMMU list entry structs. */ |
| static inline struct pkvm_iommu *alloc_iommu(struct pkvm_iommu_driver *drv, |
| void *mem, size_t mem_size) |
| { |
| size_t size = __iommu_alloc_size(drv); |
| void *ptr; |
| |
| assert_host_component_locked(); |
| |
| /* |
| * If new memory is being provided, replace the existing pool with it. |
| * Any remaining memory in the pool is discarded. |
| */ |
| if (mem && mem_size) { |
| iommu_mem_pool = mem; |
| iommu_mem_remaining = mem_size; |
| } |
| |
| if (size > iommu_mem_remaining) |
| return NULL; |
| |
| ptr = iommu_mem_pool; |
| iommu_mem_pool += size; |
| iommu_mem_remaining -= size; |
| return ptr; |
| } |
| |
| static inline void free_iommu(struct pkvm_iommu_driver *drv, struct pkvm_iommu *ptr) |
| { |
| size_t size = __iommu_alloc_size(drv); |
| |
| assert_host_component_locked(); |
| |
| if (!ptr) |
| return; |
| |
| /* Only allow freeing the last allocated buffer. */ |
| if ((void*)ptr + size != iommu_mem_pool) |
| return; |
| |
| iommu_mem_pool -= size; |
| iommu_mem_remaining += size; |
| } |
| |
| static bool is_overlap(phys_addr_t r1_start, size_t r1_size, |
| phys_addr_t r2_start, size_t r2_size) |
| { |
| phys_addr_t r1_end = r1_start + r1_size; |
| phys_addr_t r2_end = r2_start + r2_size; |
| |
| return (r1_start < r2_end) && (r2_start < r1_end); |
| } |
| |
| static bool is_mmio_range(phys_addr_t base, size_t size) |
| { |
| struct memblock_region *reg; |
| phys_addr_t limit = BIT(host_kvm.pgt.ia_bits); |
| size_t i; |
| |
| /* Check against limits of host IPA space. */ |
| if ((base >= limit) || !size || (size > limit - base)) |
| return false; |
| |
| for (i = 0; i < hyp_memblock_nr; i++) { |
| reg = &hyp_memory[i]; |
| if (is_overlap(base, size, reg->base, reg->size)) |
| return false; |
| } |
| return true; |
| } |
| |
| static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level, |
| kvm_pte_t *ptep, |
| enum kvm_pgtable_walk_flags flags, |
| void * const arg) |
| { |
| struct pkvm_iommu_driver * const drv = arg; |
| u64 end = start + kvm_granule_size(level); |
| kvm_pte_t pte = *ptep; |
| |
| /* |
| * Valid stage-2 entries are created lazily, invalid ones eagerly. |
| * Note: In the future we may need to check if [start,end) is MMIO. |
| * Note: Drivers initialize their PTs to all memory owned by the host, |
| * so we only call the driver on regions where that is not the case. |
| */ |
| if (pte && !kvm_pte_valid(pte)) |
| drv->ops->host_stage2_idmap_prepare(start, end, /*prot*/ 0); |
| return 0; |
| } |
| |
| static int snapshot_host_stage2(struct pkvm_iommu_driver * const drv) |
| { |
| struct kvm_pgtable_walker walker = { |
| .cb = __snapshot_host_stage2, |
| .arg = drv, |
| .flags = KVM_PGTABLE_WALK_LEAF, |
| }; |
| struct kvm_pgtable *pgt = &host_kvm.pgt; |
| |
| if (!drv->ops->host_stage2_idmap_prepare) |
| return 0; |
| |
| return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker); |
| } |
| |
| static bool validate_against_existing_iommus(struct pkvm_iommu *dev) |
| { |
| struct pkvm_iommu *other; |
| |
| assert_host_component_locked(); |
| |
| list_for_each_entry(other, &iommu_list, list) { |
| /* Device ID must be unique. */ |
| if (dev->id == other->id) |
| return false; |
| |
| /* MMIO regions must not overlap. */ |
| if (is_overlap(dev->pa, dev->size, other->pa, other->size)) |
| return false; |
| } |
| return true; |
| } |
| |
| static struct pkvm_iommu *find_iommu_by_id(unsigned long id) |
| { |
| struct pkvm_iommu *dev; |
| |
| assert_host_component_locked(); |
| |
| list_for_each_entry(dev, &iommu_list, list) { |
| if (dev->id == id) |
| return dev; |
| } |
| return NULL; |
| } |
| |
| /* |
| * Initialize EL2 IOMMU driver. |
| * |
| * This is a common hypercall for driver initialization. Driver-specific |
| * arguments are passed in a shared memory buffer. The driver is expected to |
| * initialize it's page-table bookkeeping. |
| */ |
| int __pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size) |
| { |
| struct pkvm_iommu_driver *drv; |
| const struct pkvm_iommu_ops *ops; |
| int ret = 0; |
| |
| data = kern_hyp_va(data); |
| |
| /* New driver initialization not allowed after __pkvm_iommu_finalize(). */ |
| hyp_spin_lock(&iommu_registration_lock); |
| if (iommu_finalized) { |
| ret = -EPERM; |
| goto out_unlock; |
| } |
| |
| drv = get_driver(id); |
| ops = get_driver_ops(id); |
| if (!drv || !ops) { |
| ret = -EINVAL; |
| goto out_unlock; |
| } |
| |
| if (!driver_acquire_init(drv)) { |
| ret = -EBUSY; |
| goto out_unlock; |
| } |
| |
| drv->ops = ops; |
| |
| /* This can change stage-2 mappings. */ |
| if (ops->init) { |
| ret = hyp_pin_shared_mem(data, data + size); |
| if (!ret) { |
| ret = ops->init(data, size); |
| hyp_unpin_shared_mem(data, data + size); |
| } |
| if (ret) |
| goto out_release; |
| } |
| |
| /* |
| * Walk host stage-2 and pass current mappings to the driver. Start |
| * accepting host stage-2 updates as soon as the host lock is released. |
| */ |
| host_lock_component(); |
| ret = snapshot_host_stage2(drv); |
| if (!ret) |
| driver_release_init(drv, /*success=*/true); |
| host_unlock_component(); |
| |
| out_release: |
| if (ret) |
| driver_release_init(drv, /*success=*/false); |
| |
| out_unlock: |
| hyp_spin_unlock(&iommu_registration_lock); |
| return ret; |
| } |
| |
| int __pkvm_iommu_register(unsigned long dev_id, |
| enum pkvm_iommu_driver_id drv_id, |
| phys_addr_t dev_pa, size_t dev_size, |
| unsigned long parent_id, |
| void *kern_mem_va, size_t mem_size) |
| { |
| struct pkvm_iommu *dev = NULL; |
| struct pkvm_iommu_driver *drv; |
| void *mem_va = NULL; |
| int ret = 0; |
| |
| /* New device registration not allowed after __pkvm_iommu_finalize(). */ |
| hyp_spin_lock(&iommu_registration_lock); |
| if (iommu_finalized) { |
| ret = -EPERM; |
| goto out_unlock; |
| } |
| |
| drv = get_driver(drv_id); |
| if (!drv || !is_driver_ready(drv)) { |
| ret = -ENOENT; |
| goto out_unlock; |
| } |
| |
| if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) { |
| ret = -EINVAL; |
| goto out_unlock; |
| } |
| |
| if (!is_mmio_range(dev_pa, dev_size)) { |
| ret = -EINVAL; |
| goto out_unlock; |
| } |
| |
| /* |
| * Accept memory donation if the host is providing new memory. |
| * Note: We do not return the memory even if there is an error later. |
| */ |
| if (kern_mem_va && mem_size) { |
| mem_va = kern_hyp_va(kern_mem_va); |
| |
| if (!PAGE_ALIGNED(mem_va) || !PAGE_ALIGNED(mem_size)) { |
| ret = -EINVAL; |
| goto out_unlock; |
| } |
| |
| ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va), |
| mem_size >> PAGE_SHIFT); |
| if (ret) |
| goto out_unlock; |
| } |
| |
| host_lock_component(); |
| |
| /* Allocate memory for the new device entry. */ |
| dev = alloc_iommu(drv, mem_va, mem_size); |
| if (!dev) { |
| ret = -ENOMEM; |
| goto out_free; |
| } |
| |
| /* Populate the new device entry. */ |
| *dev = (struct pkvm_iommu){ |
| .children = LIST_HEAD_INIT(dev->children), |
| .id = dev_id, |
| .ops = drv->ops, |
| .pa = dev_pa, |
| .size = dev_size, |
| }; |
| |
| if (!validate_against_existing_iommus(dev)) { |
| ret = -EBUSY; |
| goto out_free; |
| } |
| |
| if (parent_id) { |
| dev->parent = find_iommu_by_id(parent_id); |
| if (!dev->parent) { |
| ret = -EINVAL; |
| goto out_free; |
| } |
| |
| if (dev->parent->ops->validate_child) { |
| ret = dev->parent->ops->validate_child(dev->parent, dev); |
| if (ret) |
| goto out_free; |
| } |
| } |
| |
| if (dev->ops->validate) { |
| ret = dev->ops->validate(dev); |
| if (ret) |
| goto out_free; |
| } |
| |
| /* |
| * Unmap the device's MMIO range from host stage-2. If registration |
| * is successful, future attempts to re-map will be blocked by |
| * pkvm_iommu_host_stage2_adjust_range. |
| */ |
| ret = host_stage2_unmap_dev_locked(dev_pa, dev_size); |
| if (ret) |
| goto out_free; |
| |
| /* Create EL2 mapping for the device. Do it last as it is irreversible. */ |
| dev->va = (void *)__pkvm_create_private_mapping(dev_pa, dev_size, |
| PAGE_HYP_DEVICE); |
| if (IS_ERR(dev->va)) { |
| ret = PTR_ERR(dev->va); |
| goto out_free; |
| } |
| |
| /* Register device and prevent host from mapping the MMIO range. */ |
| list_add_tail(&dev->list, &iommu_list); |
| if (dev->parent) |
| list_add_tail(&dev->siblings, &dev->parent->children); |
| |
| out_free: |
| if (ret) |
| free_iommu(drv, dev); |
| host_unlock_component(); |
| |
| out_unlock: |
| hyp_spin_unlock(&iommu_registration_lock); |
| return ret; |
| } |
| |
| int __pkvm_iommu_finalize(void) |
| { |
| int ret = 0; |
| |
| hyp_spin_lock(&iommu_registration_lock); |
| if (!iommu_finalized) |
| iommu_finalized = true; |
| else |
| ret = -EPERM; |
| hyp_spin_unlock(&iommu_registration_lock); |
| return ret; |
| } |
| |
| int __pkvm_iommu_pm_notify(unsigned long dev_id, enum pkvm_iommu_pm_event event) |
| { |
| struct pkvm_iommu *dev; |
| int ret; |
| |
| host_lock_component(); |
| dev = find_iommu_by_id(dev_id); |
| if (dev) { |
| if (event == PKVM_IOMMU_PM_SUSPEND) { |
| ret = dev->ops->suspend ? dev->ops->suspend(dev) : 0; |
| if (!ret) |
| dev->powered = false; |
| } else if (event == PKVM_IOMMU_PM_RESUME) { |
| ret = dev->ops->resume ? dev->ops->resume(dev) : 0; |
| if (!ret) |
| dev->powered = true; |
| } else { |
| ret = -EINVAL; |
| } |
| } else { |
| ret = -ENODEV; |
| } |
| host_unlock_component(); |
| return ret; |
| } |
| |
| /* |
| * Check host memory access against IOMMUs' MMIO regions. |
| * Returns -EPERM if the address is within the bounds of a registered device. |
| * Otherwise returns zero and adjusts boundaries of the new mapping to avoid |
| * MMIO regions of registered IOMMUs. |
| */ |
| int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start, |
| phys_addr_t *end) |
| { |
| struct pkvm_iommu *dev; |
| phys_addr_t new_start = *start; |
| phys_addr_t new_end = *end; |
| phys_addr_t dev_start, dev_end; |
| |
| assert_host_component_locked(); |
| |
| list_for_each_entry(dev, &iommu_list, list) { |
| dev_start = dev->pa; |
| dev_end = dev_start + dev->size; |
| |
| if (addr < dev_start) |
| new_end = min(new_end, dev_start); |
| else if (addr >= dev_end) |
| new_start = max(new_start, dev_end); |
| else |
| return -EPERM; |
| } |
| |
| *start = new_start; |
| *end = new_end; |
| return 0; |
| } |
| |
| bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr, |
| phys_addr_t pa) |
| { |
| struct pkvm_iommu *dev; |
| |
| assert_host_component_locked(); |
| |
| list_for_each_entry(dev, &iommu_list, list) { |
| if (pa < dev->pa || pa >= dev->pa + dev->size) |
| continue; |
| |
| /* No 'powered' check - the host assumes it is powered. */ |
| if (!dev->ops->host_dabt_handler || |
| !dev->ops->host_dabt_handler(dev, host_ctxt, esr, pa - dev->pa)) |
| return false; |
| |
| kvm_skip_host_instr(); |
| return true; |
| } |
| return false; |
| } |
| |
| void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end, |
| enum kvm_pgtable_prot prot) |
| { |
| struct pkvm_iommu_driver *drv; |
| struct pkvm_iommu *dev; |
| size_t i; |
| |
| assert_host_component_locked(); |
| |
| for (i = 0; i < ARRAY_SIZE(iommu_drivers); i++) { |
| drv = get_driver(i); |
| if (drv && is_driver_ready(drv) && drv->ops->host_stage2_idmap_prepare) |
| drv->ops->host_stage2_idmap_prepare(start, end, prot); |
| } |
| |
| list_for_each_entry(dev, &iommu_list, list) { |
| if (dev->powered && dev->ops->host_stage2_idmap_apply) |
| dev->ops->host_stage2_idmap_apply(dev, start, end); |
| } |
| } |