| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * IOMMU operations for pKVM |
| * |
| * Copyright (C) 2022 Linaro Ltd. |
| */ |
| |
| #include <asm/kvm_hyp.h> |
| |
| #include <hyp/adjust_pc.h> |
| |
| #include <kvm/iommu.h> |
| |
| #include <nvhe/alloc_mgt.h> |
| #include <nvhe/iommu.h> |
| #include <nvhe/mem_protect.h> |
| #include <nvhe/mm.h> |
| #include <nvhe/pviommu-host.h> |
| |
| enum { |
| IOMMU_DRIVER_NOT_READY = 0, |
| IOMMU_DRIVER_INITIALIZING, |
| IOMMU_DRIVER_READY, |
| }; |
| static atomic_t kvm_iommu_initialized; |
| |
| void **kvm_hyp_iommu_domains; |
| static struct hyp_pool iommu_idmap_pool; |
| static struct hyp_pool iommu_host_pool; |
| static int snapshot_host_stage2(void); |
| |
| /* |
| * This lock protect domain operations, that can't be done using the atomic refcount |
| * It is used for alloc/free domains, so it shouldn't have a lot of overhead as |
| * these are rare operations, while map/unmap are left lockless. |
| */ |
| static DEFINE_HYP_SPINLOCK(iommu_domains_lock); |
| |
| /* Hypervisor is non-preemptable, so cur_context can be per cpu. */ |
| DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, __cur_context); |
| #define cur_context (*this_cpu_ptr(&__cur_context)) |
| |
| DECLARE_PER_CPU(struct kvm_hyp_req, host_hyp_reqs); |
| |
| static void host_lock_component(void) |
| { |
| hyp_spin_lock(&host_mmu.lock); |
| } |
| |
| static void host_unlock_component(void) |
| { |
| hyp_spin_unlock(&host_mmu.lock); |
| } |
| |
| struct pkvm_hyp_vcpu * __get_ctxt(void) |
| { |
| return this_cpu_ptr(&kvm_host_data)->host_ctxt.__hyp_running_vcpu; |
| } |
| |
| void hyp_iommu_lock(struct kvm_hyp_iommu *iommu) |
| { |
| hyp_spin_lock(&iommu->iommu_lock); |
| } |
| |
| void hyp_iommu_unlock(struct kvm_hyp_iommu *iommu) |
| { |
| hyp_spin_unlock(&iommu->iommu_lock); |
| } |
| |
| void hyp_assert_iommu_lock_held(struct kvm_hyp_iommu *iommu) |
| { |
| hyp_assert_lock_held(&iommu->iommu_lock); |
| } |
| |
| void hyp_domains_lock(void) |
| { |
| hyp_spin_lock(&iommu_domains_lock); |
| } |
| |
| void hyp_domains_unlock(void) |
| { |
| hyp_spin_unlock(&iommu_domains_lock); |
| } |
| |
| |
| static inline bool kvm_iommu_acquire_init(void) |
| { |
| return atomic_cmpxchg_acquire(&kvm_iommu_initialized, IOMMU_DRIVER_NOT_READY, |
| IOMMU_DRIVER_INITIALIZING) == IOMMU_DRIVER_NOT_READY; |
| } |
| |
| static inline void kvm_iommu_release_init(void) |
| { |
| atomic_set_release(&kvm_iommu_initialized, IOMMU_DRIVER_READY); |
| } |
| |
| static inline bool kvm_iommu_is_ready(void) |
| { |
| return atomic_read(&kvm_iommu_initialized) == IOMMU_DRIVER_READY; |
| } |
| |
| void *kvm_iommu_donate_pages(u8 order, bool fill_req) |
| { |
| void *p; |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| struct kvm_hyp_req *req = this_cpu_ptr(&host_hyp_reqs); |
| |
| /* For vcpus only use it's allocator. */ |
| if (ctxt) { |
| p = guest_alloc_contig_pages(ctxt, order); |
| if (!p && fill_req) { |
| req = pkvm_hyp_req_reserve(ctxt, KVM_HYP_REQ_MEM); |
| goto ret_fill_req; |
| } |
| return p; |
| } |
| p = hyp_alloc_pages(&iommu_host_pool, order); |
| if (p) |
| return p; |
| |
| ret_fill_req: |
| if (fill_req) { |
| req->type = KVM_HYP_REQ_MEM; |
| req->mem.dest = REQ_MEM_IOMMU; |
| req->mem.sz_alloc = (1 << order) * PAGE_SIZE; |
| req->mem.nr_pages = 1; |
| } |
| return NULL; |
| } |
| |
| void *kvm_iommu_donate_pgtable_pages(struct io_pgtable *iop, u8 order, bool fill_req) |
| { |
| if (iop && ((struct kvm_iommu_tlb_cookie *)iop->cookie)->domain_id == |
| KVM_IOMMU_IDMAPPED_DOMAIN) { |
| return hyp_alloc_pages(&iommu_idmap_pool, order); |
| } else { |
| return kvm_iommu_donate_pages(order, fill_req); |
| } |
| } |
| |
| void kvm_iommu_reclaim_pages(void *p, u8 order) |
| { |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| |
| /* Maybe guest is not loaded but we are in teardown context. */ |
| if (!ctxt) |
| ctxt = cur_context; |
| |
| if (ctxt) |
| guest_free_contig_pages(ctxt, p, order); |
| else |
| hyp_put_page(&iommu_host_pool, p); |
| } |
| |
| int kvm_iommu_refill(struct kvm_hyp_memcache *host_mc) |
| { |
| void *p; |
| unsigned long order; |
| |
| while (host_mc->nr_pages) { |
| order = host_mc->head & (PAGE_SIZE - 1); |
| p = pkvm_admit_host_page(host_mc, order); |
| hyp_virt_to_page(p)->order = order; |
| hyp_set_page_refcounted(hyp_virt_to_page(p)); |
| hyp_put_page(&iommu_host_pool, p); |
| } |
| |
| return 0; |
| } |
| |
| void kvm_iommu_reclaim(struct kvm_hyp_memcache *host_mc, int target) |
| { |
| void *p; |
| |
| while (target--) { |
| p = hyp_alloc_pages(&iommu_idmap_pool, 0); |
| if (!p) |
| return; |
| push_hyp_memcache(host_mc, p, hyp_virt_to_phys, 0); |
| WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(p), 1)); |
| } |
| } |
| |
| int kvm_iommu_reclaimable(void) |
| { |
| return hyp_pool_free_pages(&iommu_host_pool); |
| } |
| |
| struct hyp_mgt_allocator_ops kvm_iommu_allocator_ops = { |
| .refill = kvm_iommu_refill, |
| .reclaim = kvm_iommu_reclaim, |
| .reclaimable = kvm_iommu_reclaimable, |
| }; |
| |
| static struct kvm_hyp_iommu_domain * |
| handle_to_domain(pkvm_handle_t domain_id) |
| { |
| int idx; |
| struct kvm_hyp_iommu_domain *domains; |
| |
| if (domain_id >= KVM_IOMMU_MAX_DOMAINS) |
| return NULL; |
| domain_id = array_index_nospec(domain_id, KVM_IOMMU_MAX_DOMAINS); |
| |
| idx = domain_id >> KVM_IOMMU_DOMAIN_ID_SPLIT; |
| domains = (struct kvm_hyp_iommu_domain *)READ_ONCE(kvm_hyp_iommu_domains[idx]); |
| if (!domains) { |
| domains = kvm_iommu_donate_pages(0, true); |
| if (!domains) |
| return NULL; |
| |
| /* |
| * handle_to_domain() does not have to be called under a lock, |
| * but even though we allocate a leaf in all cases, it's only |
| * really a valid thing to do under alloc_domain(), which uses a |
| * lock. Races are therefore a host bug and we don't need to be |
| * delicate about it. |
| */ |
| if (WARN_ON(cmpxchg64_relaxed(&kvm_hyp_iommu_domains[idx], 0, |
| (void *)domains) != 0)) |
| return NULL; |
| } |
| |
| return &domains[domain_id & KVM_IOMMU_DOMAIN_ID_LEAF_MASK]; |
| } |
| |
| static int domain_get(struct kvm_hyp_iommu_domain *domain) |
| { |
| int old = atomic_fetch_inc_acquire(&domain->refs); |
| |
| if (WARN_ON(!old)) |
| return -EINVAL; |
| else if (old < 0 || old + 1 < 0) |
| return -EOVERFLOW; |
| return 0; |
| } |
| |
| static void domain_put(struct kvm_hyp_iommu_domain *domain) |
| { |
| BUG_ON(!atomic_dec_return_release(&domain->refs)); |
| } |
| |
| static int access_allowed(struct kvm_hyp_iommu_domain *domain) |
| { |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| |
| /* Maybe guest is not loaded but we are in teardown context. */ |
| if (!ctxt) |
| ctxt = cur_context; |
| |
| if (!ctxt && domain->vm) |
| return -EPERM; |
| if (ctxt && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(ctxt))) |
| return -EPERM; |
| return 0; |
| } |
| |
| static int kvm_iommu_alloc_domain_nolock(pkvm_handle_t domain_id, unsigned long pgd_hva, |
| unsigned long pgd_size, u32 type) |
| { |
| struct kvm_hyp_iommu_domain *domain; |
| int ret = -EINVAL; |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| struct pkvm_hyp_vm *vm; |
| |
| if (!kvm_iommu_ops) |
| return -ENODEV; |
| |
| domain = handle_to_domain(domain_id); |
| if (!domain) |
| return -EINVAL; |
| |
| if (atomic_read(&domain->refs)) |
| return -EINVAL; |
| |
| ret = kvm_iommu_ops->alloc_domain(domain, domain_id, pgd_hva, pgd_size, type); |
| if (ret) |
| return ret; |
| atomic_set_release(&domain->refs, 1); |
| if (ctxt) { |
| vm = pkvm_hyp_vcpu_to_hyp_vm(ctxt); |
| domain->vm = vm; |
| } |
| |
| return ret; |
| } |
| |
| int kvm_iommu_alloc_domain(pkvm_handle_t domain_id, u32 type) |
| { |
| int ret; |
| unsigned long pgd_hva, pgd_size; |
| |
| /* Host only has access to the lower half of the domain IDs. */ |
| if (domain_id >= (KVM_IOMMU_MAX_DOMAINS >> 1)) |
| return -EINVAL; |
| |
| pgd_size = kvm_iommu_ops->pgd_size(type); |
| /* |
| * Guest memory are already donated as they come from memcache |
| * while host memory passed from HVC needs to be donated. |
| */ |
| pgd_hva = (unsigned long)kvm_iommu_donate_pages(get_order(pgd_size), true); |
| if (!pgd_hva) |
| return -ENOMEM; |
| |
| hyp_spin_lock(&iommu_domains_lock); |
| ret = kvm_iommu_alloc_domain_nolock(domain_id, pgd_hva, pgd_size, type); |
| if (ret) |
| pkvm_unmap_donated_memory((void *)pgd_hva, pgd_size); |
| |
| hyp_spin_unlock(&iommu_domains_lock); |
| return ret; |
| } |
| |
| /* |
| * The domain ID space is shared between guests (second half), so this is a |
| * (dummy) allocator for guest domain IDs. |
| */ |
| int kvm_iommu_alloc_guest_domain(pkvm_handle_t *ret_domain) |
| { |
| pkvm_handle_t domain_id = KVM_IOMMU_MAX_DOMAINS >> 1; |
| struct kvm_hyp_iommu_domain *domain; |
| int ret = -EINVAL; |
| unsigned long pgd_hva, pgd_size; |
| |
| pgd_size = kvm_iommu_ops->pgd_size(DOMAIN_ANY_TYPE); |
| |
| if (!ret_domain) |
| return -EINVAL; |
| hyp_spin_lock(&iommu_domains_lock); |
| /* |
| * Not optimal but works for guests as this operation is rare as |
| * guests doesn't allocate many domains. |
| */ |
| for ( ; domain_id < KVM_IOMMU_MAX_DOMAINS; ++domain_id) { |
| domain = handle_to_domain(domain_id); |
| if (!domain) { |
| ret = -ENOMEM; |
| goto out_unlock; |
| } |
| /* A free domain we can use... hopefully */ |
| if (atomic_read(&domain->refs) == 0) |
| break; |
| } |
| |
| if (domain_id == KVM_IOMMU_MAX_DOMAINS) { |
| ret = -EBUSY; |
| goto out_unlock; |
| } |
| |
| pgd_hva = (u64)kvm_iommu_donate_pages(get_order(pgd_size), true); |
| if (!pgd_hva) { |
| ret = -ENOMEM; |
| goto out_unlock; |
| } |
| |
| ret = kvm_iommu_alloc_domain_nolock(domain_id, pgd_hva, pgd_size, DOMAIN_ANY_TYPE); |
| if (ret) |
| kvm_iommu_reclaim_pages((void *)pgd_hva, get_order(pgd_size)); |
| |
| *ret_domain = domain_id; |
| out_unlock: |
| hyp_spin_unlock(&iommu_domains_lock); |
| return ret; |
| } |
| |
| int kvm_iommu_free_domain(pkvm_handle_t domain_id) |
| { |
| struct kvm_hyp_iommu_domain *domain; |
| int ret = -EINVAL; |
| |
| if (!kvm_iommu_ops) |
| return -ENODEV; |
| |
| if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN) |
| return -EINVAL; |
| |
| domain = handle_to_domain(domain_id); |
| if (!domain) |
| return -EINVAL; |
| |
| hyp_spin_lock(&iommu_domains_lock); |
| ret = access_allowed(domain); |
| if (ret) |
| goto out_unlock; |
| |
| if (WARN_ON(atomic_cmpxchg_release(&domain->refs, 1, 0) != 1)) |
| goto out_unlock; |
| |
| ret = kvm_iommu_ops->free_domain(domain, domain_id); |
| memset(domain, 0, sizeof(*domain)); |
| |
| out_unlock: |
| hyp_spin_unlock(&iommu_domains_lock); |
| return ret; |
| } |
| |
| /* |
| * A guest is dying before freeing its domains, free them for it. |
| */ |
| int kvm_iommu_free_guest_domains(struct pkvm_hyp_vm *hyp_vm) |
| { |
| pkvm_handle_t domain_id = (KVM_IOMMU_MAX_DOMAINS >> 1); |
| struct kvm_hyp_iommu_domain *domain; |
| |
| /* Doesn't matter which vcpu. */ |
| cur_context = hyp_vm->vcpus[0]; |
| |
| for ( ; (domain_id < KVM_IOMMU_MAX_DOMAINS) ; ++domain_id) { |
| domain = handle_to_domain(domain_id); |
| if (domain && domain->vm == hyp_vm) { |
| /* |
| * Guest is dying, it can't do any operations on this domains, so it is safe |
| * modify the domain without look. |
| * And a guest can die while attaching devices to a domain, so we don't care |
| * about the refcount as the pvIOMMU will block the device anyway. |
| */ |
| atomic_set_release(&domain->refs, 1); |
| kvm_iommu_free_domain(domain_id); |
| } |
| } |
| |
| cur_context = NULL; |
| return 0; |
| } |
| |
| int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| u32 endpoint_id, u32 pasid, u32 pasid_bits, u64 flags) |
| { |
| int ret = -EINVAL; |
| struct kvm_hyp_iommu *iommu; |
| struct kvm_hyp_iommu_domain *domain; |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| |
| /* |
| * This is attached to a running guest!. |
| * Guest is prevented from attaching to host domain from pviommu handler |
| * as it won't find a translation for the requested device. |
| */ |
| if (!ctxt) { |
| ret = pkvm_pviommu_host_allowed(iommu_id, endpoint_id); |
| if (ret) |
| return ret; |
| } |
| |
| if (!kvm_iommu_ops) |
| return -ENODEV; |
| |
| iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| if (!iommu) |
| return -ENOENT; |
| hyp_spin_lock(&iommu->iommu_lock); |
| domain = handle_to_domain(domain_id); |
| if (!domain || domain_get(domain)) |
| goto out_unlock; |
| |
| ret = access_allowed(domain); |
| if (ret) |
| goto out_unlock; |
| |
| ret = kvm_iommu_ops->attach_dev(iommu, domain_id, domain, endpoint_id, |
| pasid, pasid_bits, flags); |
| |
| if (ret) |
| goto err_put_domain; |
| |
| if ((domain_id == KVM_IOMMU_IDMAPPED_DOMAIN) && kvm_iommu_acquire_init()) { |
| host_lock_component(); |
| snapshot_host_stage2(); |
| host_unlock_component(); |
| kvm_iommu_release_init(); |
| } |
| |
| out_unlock: |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| |
| err_put_domain: |
| domain_put(domain); |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| u32 endpoint_id, u32 pasid) |
| { |
| int ret = -EINVAL; |
| struct kvm_hyp_iommu *iommu; |
| struct kvm_hyp_iommu_domain *domain; |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| |
| /* Sneaky sneaky. */ |
| if (!ctxt) { |
| ret = pkvm_pviommu_host_allowed(iommu_id, endpoint_id); |
| if (ret) |
| return ret; |
| } |
| |
| if (!kvm_iommu_ops) |
| return -ENODEV; |
| |
| iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| if (!iommu) |
| return -ENOENT; |
| hyp_spin_lock(&iommu->iommu_lock); |
| domain = handle_to_domain(domain_id); |
| if (!domain || atomic_read(&domain->refs) <= 1) |
| goto out_unlock; |
| |
| ret = access_allowed(domain); |
| if (ret) |
| goto out_unlock; |
| |
| ret = kvm_iommu_ops->detach_dev(iommu, domain_id, domain, endpoint_id, pasid); |
| if (ret) |
| goto out_unlock; |
| |
| domain_put(domain); |
| out_unlock: |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| #define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\ |
| IOMMU_NOEXEC | IOMMU_MMIO | IOMMU_PRIV) |
| |
| int kvm_iommu_map_pages_ret(pkvm_handle_t domain_id, |
| unsigned long iova, phys_addr_t paddr, size_t pgsize, |
| size_t pgcount, int prot, size_t *total_mapped) |
| { |
| size_t size; |
| size_t mapped; |
| size_t granule; |
| int ret; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu_domain *domain; |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| |
| *total_mapped = 0; |
| |
| if (!kvm_iommu_ops) |
| return -ENODEV; |
| |
| if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN) |
| return -EINVAL; |
| |
| if (prot & ~IOMMU_PROT_MASK) |
| return -EINVAL; |
| |
| if (__builtin_mul_overflow(pgsize, pgcount, &size) || |
| iova + size < iova || paddr + size < paddr) |
| return -EINVAL; |
| |
| /* |
| * TODO: check whether it is safe here to call io-pgtable without a |
| * lock. Does the driver make assumptions that don't hold for the |
| * hypervisor, for example that device drivers don't call map/unmap |
| * concurrently on the same page? |
| * |
| * Command queue and iommu->power_is_off are also protected by the |
| * iommu_lock, taken by the TLB invalidation callbacks. |
| */ |
| |
| domain = handle_to_domain(domain_id); |
| if (!domain || domain_get(domain)) |
| return -EINVAL; |
| |
| ret = access_allowed(domain); |
| if (ret) |
| goto err_domain_put; |
| |
| granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap); |
| if (!IS_ALIGNED(iova | paddr | pgsize, granule)) { |
| ret = -EINVAL; |
| goto err_domain_put; |
| } |
| |
| ret = __pkvm_share_dma(paddr, size, ctxt); |
| if (ret) |
| goto err_domain_put; |
| |
| iopt = domain_to_iopt(domain, domain_id); |
| while (pgcount && !ret) { |
| mapped = 0; |
| ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot, |
| 0, &mapped); |
| WARN_ON(!IS_ALIGNED(mapped, pgsize)); |
| WARN_ON(mapped > pgcount * pgsize); |
| pgcount -= mapped / pgsize; |
| |
| *total_mapped += mapped; |
| iova += mapped; |
| paddr += mapped; |
| } |
| |
| /* |
| * Unshare the bits that haven't been mapped yet. The host calls back |
| * either to continue mapping, or to unmap and unshare what's been done |
| * so far. |
| */ |
| if (pgcount) |
| __pkvm_unshare_dma(paddr, pgcount * pgsize); |
| err_domain_put: |
| domain_put(domain); |
| return ret; |
| } |
| |
| size_t kvm_iommu_map_pages(pkvm_handle_t domain_id, |
| unsigned long iova, phys_addr_t paddr, size_t pgsize, |
| size_t pgcount, int prot) |
| { |
| size_t mapped = 0; |
| |
| kvm_iommu_map_pages_ret(domain_id, iova, paddr, pgsize, |
| pgcount, prot, &mapped); |
| |
| return mapped; |
| } |
| |
| size_t kvm_iommu_unmap_pages(pkvm_handle_t domain_id, |
| unsigned long iova, size_t pgsize, size_t pgcount) |
| { |
| int ret; |
| size_t size; |
| size_t granule; |
| size_t unmapped; |
| phys_addr_t paddr = 0; |
| struct io_pgtable iopt; |
| size_t total_unmapped = 0; |
| struct kvm_hyp_iommu_domain *domain; |
| |
| if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN) |
| return -EINVAL; |
| |
| if (!kvm_iommu_ops) |
| return -ENODEV; |
| |
| if (!pgsize || !pgcount) |
| return 0; |
| |
| if (__builtin_mul_overflow(pgsize, pgcount, &size) || |
| iova + size < iova) |
| return 0; |
| |
| domain = handle_to_domain(domain_id); |
| if (!domain || domain_get(domain)) |
| return 0; |
| |
| ret = access_allowed(domain); |
| if (ret) |
| return ret; |
| |
| granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap); |
| if (!IS_ALIGNED(iova | pgsize, granule)) |
| goto out_put_domain; |
| |
| iopt = domain_to_iopt(domain, domain_id); |
| |
| while (total_unmapped < size) { |
| /* |
| * One page/block at a time so that we can unshare each page. |
| * The IOVA range provided may not be physically contiguous, and |
| * @pgsize may be larger than the one used when mapping. |
| */ |
| unmapped = iopt_unmap_leaf(&iopt, iova, pgsize, &paddr); |
| if (!unmapped || !paddr) |
| goto out_put_domain; |
| |
| ret = __pkvm_unshare_dma(paddr, unmapped); |
| if (WARN_ON(ret)) |
| goto out_put_domain; |
| |
| iova += unmapped; |
| total_unmapped += unmapped; |
| } |
| |
| out_put_domain: |
| domain_put(domain); |
| return total_unmapped; |
| } |
| |
| phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t domain_id, unsigned long iova) |
| { |
| phys_addr_t phys = 0; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu_domain *domain; |
| |
| if (!kvm_iommu_ops) |
| return -ENODEV; |
| |
| if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN) |
| return iova; |
| |
| domain = handle_to_domain(domain_id); |
| if (!domain || domain_get(domain)) |
| return 0; |
| |
| iopt = domain_to_iopt(domain, domain_id); |
| phys = iopt_iova_to_phys(&iopt, iova); |
| |
| domain_put(domain); |
| return phys; |
| } |
| |
| int kvm_iommu_block_dev(pkvm_handle_t iommu_id, u32 endpoint_id, struct pkvm_hyp_vm *hyp_vm) |
| { |
| int ret = -ENOENT; |
| struct kvm_hyp_iommu *iommu; |
| |
| if (hyp_vm) |
| cur_context = hyp_vm->vcpus[0]; |
| |
| iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| hyp_spin_lock(&iommu->iommu_lock); |
| /* This is optional as it is used for guests only */ |
| if (kvm_iommu_ops->block_dev) |
| ret = kvm_iommu_ops->block_dev(iommu, endpoint_id); |
| |
| hyp_spin_unlock(&iommu->iommu_lock); |
| |
| cur_context = NULL; |
| |
| return ret; |
| } |
| |
| bool kvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr) |
| { |
| bool ret = false; |
| |
| if (kvm_iommu_ops && kvm_iommu_ops->dabt_handler) |
| ret = kvm_iommu_ops->dabt_handler(host_ctxt, esr, addr); |
| |
| if (ret) |
| kvm_skip_host_instr(); |
| |
| return ret; |
| } |
| |
| static int iommu_power_on(struct kvm_power_domain *pd) |
| { |
| struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu, |
| power_domain); |
| bool prev; |
| int ret; |
| |
| /* |
| * We currently assume that the device retains its architectural state |
| * across power off, hence no save/restore. |
| */ |
| hyp_spin_lock(&iommu->iommu_lock); |
| prev = iommu->power_is_off; |
| iommu->power_is_off = false; |
| |
| ret = kvm_iommu_ops->resume ? kvm_iommu_ops->resume(iommu) : 0; |
| if (ret) |
| iommu->power_is_off = prev; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| static int iommu_power_off(struct kvm_power_domain *pd) |
| { |
| struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu, |
| power_domain); |
| bool prev; |
| int ret; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| prev = iommu->power_is_off; |
| iommu->power_is_off = true; |
| |
| ret = kvm_iommu_ops->suspend ? kvm_iommu_ops->suspend(iommu) : 0; |
| if (ret) |
| iommu->power_is_off = prev; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| static const struct kvm_power_domain_ops iommu_power_ops = { |
| .power_on = iommu_power_on, |
| .power_off = iommu_power_off, |
| }; |
| |
| int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu) |
| { |
| hyp_spin_lock_init(&iommu->iommu_lock); |
| |
| return pkvm_init_power_domain(&iommu->power_domain, &iommu_power_ops); |
| } |
| |
| void __kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end, |
| enum kvm_pgtable_prot prot) |
| { |
| int pgcount = (end - start) >> PAGE_SHIFT; |
| size_t mapped, unmapped; |
| int ret; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu_domain *domain; |
| |
| domain = handle_to_domain(KVM_IOMMU_IDMAPPED_DOMAIN); |
| iopt = domain_to_iopt(domain, KVM_IOMMU_IDMAPPED_DOMAIN); |
| if (prot) { |
| while (pgcount) { |
| mapped = 0; |
| ret = iopt_map_pages(&iopt, start, start, PAGE_SIZE, pgcount, prot, |
| 0, &mapped); |
| pgcount -= mapped / PAGE_SIZE; |
| start += mapped; |
| if (!mapped || ret) |
| return; |
| } |
| } else { |
| while (pgcount) { |
| unmapped = iopt_unmap_pages(&iopt, start, PAGE_SIZE, pgcount, NULL); |
| pgcount -= unmapped / PAGE_SIZE; |
| start += unmapped; |
| if (!unmapped || ret) |
| return; |
| } |
| } |
| } |
| |
| void kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end, |
| enum kvm_pgtable_prot prot) |
| { |
| if (!kvm_iommu_is_ready()) |
| return; |
| __kvm_iommu_host_stage2_idmap(start, end, prot); |
| } |
| |
| static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level, |
| kvm_pte_t *ptep, |
| enum kvm_pgtable_walk_flags flags, |
| void * const arg) |
| { |
| u64 end = start + kvm_granule_size(level); |
| kvm_pte_t pte = *ptep; |
| enum kvm_pgtable_prot prot; |
| |
| /* |
| * We only snapshot memory now, as the MMIO regions are unknow to |
| * the hypervisor, and they will be mapped once touched by the CPU |
| * this is not ideal, but works for now ... |
| */ |
| if ((!pte || kvm_pte_valid(pte)) && addr_is_memory(start)) { |
| prot = default_host_prot(addr_is_memory(start)); |
| __kvm_iommu_host_stage2_idmap(start, end, prot); |
| } |
| |
| return 0; |
| } |
| |
| static int snapshot_host_stage2(void) |
| { |
| struct kvm_pgtable_walker walker = { |
| .cb = __snapshot_host_stage2, |
| .flags = KVM_PGTABLE_WALK_LEAF, |
| }; |
| struct kvm_pgtable *pgt = &host_mmu.pgt; |
| |
| return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker); |
| } |
| |
| int kvm_iommu_init(struct kvm_iommu_ops *ops, struct kvm_hyp_memcache *mc, |
| unsigned long init_arg) |
| { |
| int ret; |
| void *idmap_pgd; |
| size_t idmap_pgd_sz; |
| void *p; |
| |
| BUILD_BUG_ON(sizeof(hyp_spinlock_t) != HYP_SPINLOCK_SIZE); |
| |
| if (WARN_ON(!ops->get_iommu_by_id || |
| !ops->free_domain || |
| !ops->alloc_domain || |
| !ops->attach_dev || |
| !ops->detach_dev || |
| !ops->pgd_size || |
| !ops->get_iommu_token_by_id)) |
| return -ENODEV; |
| |
| ret = ops->init ? ops->init(init_arg) : 0; |
| if (ret) |
| return ret; |
| |
| ret = pkvm_create_mappings(kvm_hyp_iommu_domains, kvm_hyp_iommu_domains + |
| KVM_IOMMU_DOMAINS_ROOT_ENTRIES, PAGE_HYP); |
| if (ret) |
| return ret; |
| |
| kvm_iommu_ops = ops; |
| |
| ret = hyp_pool_init(&iommu_host_pool, 0, 16 /* order = 4*/, 0, true); |
| /* Init IDMAPPED page tables. */ |
| if (mc->head) { |
| u8 order; |
| ret = hyp_pool_init(&iommu_idmap_pool, 0, |
| 16 /* order = 4*/, 0, true); |
| if (ret) |
| return ret; |
| |
| while (mc->nr_pages) { |
| order = mc->head & (PAGE_SIZE - 1); |
| p = pkvm_admit_host_page(mc, order); |
| hyp_set_page_refcounted(hyp_virt_to_page(p)); |
| hyp_virt_to_page(p)->order = order; |
| hyp_put_page(&iommu_idmap_pool, p); |
| } |
| |
| idmap_pgd_sz = kvm_iommu_ops->pgd_size(DOMAIN_IDMAPPED_TYPE); |
| idmap_pgd = hyp_alloc_pages(&iommu_idmap_pool, get_order(idmap_pgd_sz)); |
| if (!idmap_pgd) |
| return -ENOMEM; |
| |
| /* A bit hacky way to populate first domain to be used immediately. */ |
| kvm_hyp_iommu_domains[0] = hyp_alloc_pages(&iommu_idmap_pool, 0); |
| ret = kvm_iommu_alloc_domain_nolock(KVM_IOMMU_IDMAPPED_DOMAIN, (u64)idmap_pgd, |
| idmap_pgd_sz, DOMAIN_IDMAPPED_TYPE); |
| } |
| |
| return ret; |
| } |
| |
| /* Request to hypervisor. */ |
| int kvm_iommu_request(struct kvm_hyp_req *req) |
| { |
| struct kvm_hyp_req *cur_req; |
| struct pkvm_hyp_vcpu *ctxt = __get_ctxt(); |
| |
| if (ctxt) |
| cur_req = pkvm_hyp_req_reserve(ctxt, KVM_HYP_REQ_EMP); |
| else |
| cur_req = this_cpu_ptr(&host_hyp_reqs); |
| |
| if (cur_req->type != KVM_HYP_REQ_EMP) |
| return -EBUSY; |
| |
| memcpy(cur_req, req, sizeof(struct kvm_hyp_req)); |
| |
| return 0; |
| } |
| |
| u64 kvm_iommu_id_to_token(pkvm_handle_t id) |
| { |
| return kvm_iommu_ops->get_iommu_token_by_id(id); |
| } |