| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * IOMMU operations for pKVM |
| * |
| * Copyright (C) 2022 Linaro Ltd. |
| */ |
| |
| #include <asm/kvm_hyp.h> |
| #include <kvm/iommu.h> |
| #include <kvm/pl011.h> |
| #include <nvhe/iommu.h> |
| #include <nvhe/mem_protect.h> |
| #include <nvhe/mm.h> |
| |
| /* Hypervisor is non-preemptable, so cur_context can be per cpu. */ |
| DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, __cur_context); |
| #define cur_context (*this_cpu_ptr(&__cur_context)) |
| |
| struct kvm_hyp_iommu_memcache __ro_after_init *kvm_hyp_iommu_memcaches; |
| |
| #define domain_to_iopt(_iommu, _domain, _domain_id) \ |
| (struct io_pgtable) { \ |
| .ops = &(_domain)->pgtable->ops, \ |
| .pgd = (_domain)->pgd, \ |
| .cookie = &(struct kvm_iommu_tlb_cookie) { \ |
| .iommu = (_iommu), \ |
| .domain_id = (_domain_id), \ |
| .domain = (_domain), \ |
| }, \ |
| } |
| |
| void *kvm_iommu_donate_page(void) |
| { |
| void *p; |
| int cpu = hyp_smp_processor_id(); |
| struct kvm_hyp_memcache tmp = kvm_hyp_iommu_memcaches[cpu].pages; |
| |
| /* For vcpus only use it's allocator as it is accounted. */ |
| if (cur_context) { |
| p = guest_alloc_contig_pages(cur_context, 1); |
| return p; |
| } |
| if (!tmp.nr_pages) { |
| kvm_hyp_iommu_memcaches[cpu].needs_page = true; |
| return NULL; |
| } |
| |
| p = pkvm_admit_host_page(&tmp); |
| if (!p) |
| return NULL; |
| |
| kvm_hyp_iommu_memcaches[cpu].pages = tmp; |
| memset(p, 0, PAGE_SIZE); |
| return p; |
| } |
| |
| void kvm_iommu_reclaim_page(void *p) |
| { |
| int cpu = hyp_smp_processor_id(); |
| |
| pkvm_teardown_donated_memory(&kvm_hyp_iommu_memcaches[cpu].pages, p, |
| PAGE_SIZE); |
| } |
| |
| static int access_allowed(struct pkvm_hyp_vcpu *ctxt, |
| struct kvm_hyp_iommu_domain *domain) |
| { |
| if (!ctxt && domain->vm) |
| return -EPERM; |
| if (ctxt && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(ctxt))) |
| return -EPERM; |
| return 0; |
| } |
| |
| static struct kvm_hyp_iommu_domain * |
| handle_to_domain(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| struct kvm_hyp_iommu **out_iommu, struct pkvm_hyp_vcpu *ctxt) |
| { |
| int idx; |
| struct kvm_hyp_iommu *iommu; |
| struct kvm_hyp_iommu_domain *domains; |
| |
| iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| if (!iommu) |
| return NULL; |
| |
| /* Guests only have access to top half of the domain IDs. */ |
| if (ctxt && (domain_id < (iommu->nr_domains >> 1))) |
| return NULL; |
| |
| /* Host only has access to bottom half of the domain IDs. */ |
| else if (!ctxt && (domain_id >= (iommu->nr_domains >> 1))) |
| return NULL; |
| |
| domain_id = array_index_nospec(domain_id, iommu->nr_domains); |
| |
| idx = domain_id >> KVM_IOMMU_DOMAIN_ID_SPLIT; |
| domains = iommu->domains[idx]; |
| if (!domains) { |
| /* |
| * Although we have guest context we don't allocate new domain |
| * pages from guest mc, as one domain page can include many VMs |
| * domains, so we allocate this from host mc to make sure data is |
| * available after one guest teardown. |
| */ |
| domains = kvm_iommu_donate_page(); |
| if (!domains) |
| return NULL; |
| iommu->domains[idx] = domains; |
| } |
| |
| *out_iommu = iommu; |
| return &domains[domain_id & KVM_IOMMU_DOMAIN_ID_LEAF_MASK]; |
| } |
| |
| int kvm_iommu_alloc_domain_nolock(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| unsigned long pgd_hva, int type, struct pkvm_hyp_vcpu *ctxt) |
| { |
| int ret = -EINVAL; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu *iommu; |
| struct kvm_hyp_iommu_domain *domain; |
| size_t pgd_size; |
| |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| if (!domain) |
| return ret; |
| |
| if (domain->refs) |
| return ret; |
| |
| ret = kvm_iommu_ops->alloc_domain(iommu_id, &domain->pgtable, type); |
| if (ret) |
| return ret; |
| |
| iopt = domain_to_iopt(iommu, domain, domain_id); |
| |
| pgd_size = kvm_iommu_ops->iopt_size(&iopt); |
| |
| if (ctxt) { |
| pgd_hva = (unsigned long)guest_alloc_contig_pages(ctxt, pgd_size >> PAGE_SHIFT); |
| if (!pgd_hva) |
| return -ENOMEM; |
| domain->vm = pkvm_hyp_vcpu_to_hyp_vm(ctxt); |
| } |
| else { |
| pgd_hva = (unsigned long)pkvm_map_donated_memory(pgd_hva, pgd_size); |
| } |
| |
| ret = kvm_iommu_ops->alloc_iopt(&iopt, pgd_hva); |
| if (ret) |
| return ret; |
| |
| domain->refs = 1; |
| domain->pgd = iopt.pgd; |
| |
| return 0; |
| } |
| |
| int kvm_iommu_alloc_domain(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| unsigned long pgd_hva, int type, struct pkvm_hyp_vcpu *ctxt) |
| { |
| int ret; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| ret = kvm_iommu_alloc_domain_nolock(iommu_id, domain_id, pgd_hva, type, ctxt); |
| hyp_spin_unlock(&iommu->iommu_lock); |
| |
| return ret; |
| } |
| |
| int kvm_iommu_free_domain(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| struct pkvm_hyp_vcpu *ctxt) |
| { |
| int ret = -EINVAL; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| struct kvm_hyp_iommu_domain *domain; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| if (!domain) |
| goto out_unlock; |
| |
| ret = access_allowed(ctxt, domain); |
| if (ret) |
| return ret; |
| |
| if (domain->refs != 1) |
| goto out_unlock; |
| |
| iopt = domain_to_iopt(iommu, domain, domain_id); |
| ret = kvm_iommu_ops->free_iopt(&iopt); |
| |
| memset(domain, 0, sizeof(*domain)); |
| |
| out_unlock: |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| u32 endpoint_id, u32 pasid, |
| u32 pasid_bits, struct pkvm_hyp_vcpu *ctxt) |
| { |
| int ret = -EINVAL; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| struct kvm_hyp_iommu_domain *domain; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| cur_context = ctxt; |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| if (!domain || !domain->refs || domain->refs == UINT_MAX) |
| goto out_unlock; |
| |
| ret = access_allowed(ctxt, domain); |
| if (ret) |
| return ret; |
| |
| ret = kvm_iommu_ops->attach_dev(iommu, domain_id, domain, endpoint_id, pasid, |
| pasid_bits); |
| if (ret) |
| goto out_unlock; |
| |
| domain->refs++; |
| out_unlock: |
| cur_context = NULL; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| u32 endpoint_id, u32 pasid, struct pkvm_hyp_vcpu *ctxt) |
| { |
| int ret = -EINVAL; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| struct kvm_hyp_iommu_domain *domain; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| if (!domain || domain->refs <= 1) |
| goto out_unlock; |
| |
| ret = access_allowed(ctxt, domain); |
| if (ret) |
| return ret; |
| |
| ret = kvm_iommu_ops->detach_dev(iommu, domain_id, domain, endpoint_id, pasid); |
| if (ret) |
| goto out_unlock; |
| |
| domain->refs--; |
| out_unlock: |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| int kvm_iommu_force_detach_dev(pkvm_handle_t iommu_id, u32 endpoint_id, u32 pasid) |
| { |
| int ret; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| /* |
| * Driver shouldn't use domain in detach operation. |
| */ |
| ret = kvm_iommu_ops->detach_dev(iommu, 0, NULL, endpoint_id, pasid); |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| static int __kvm_iommu_unmap_pages(struct io_pgtable *iopt, unsigned long iova, |
| size_t pgsize, size_t pgcount, bool unshare, |
| struct pkvm_hyp_vcpu *ctxt) |
| { |
| int ret; |
| size_t unmapped; |
| phys_addr_t paddr; |
| size_t total_unmapped = 0; |
| size_t size = pgsize * pgcount; |
| |
| while (total_unmapped < size) { |
| paddr = iopt_iova_to_phys(iopt, iova); |
| if (paddr == 0) |
| return -EINVAL; |
| |
| /* |
| * One page/block at a time, because the range provided may not |
| * be physically contiguous, and we need to unshare all physical |
| * pages. |
| */ |
| unmapped = iopt_unmap_pages(iopt, iova, pgsize, 1, NULL); |
| if (!unmapped) |
| return -EINVAL; |
| |
| if (unshare) { |
| ret = __pkvm_unshare_dma(paddr, pgsize, ctxt); |
| if (ret) |
| return ret; |
| } |
| |
| iova += unmapped; |
| pgcount -= unmapped / pgsize; |
| total_unmapped += unmapped; |
| } |
| |
| return 0; |
| } |
| |
| #define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\ |
| IOMMU_NOEXEC | IOMMU_MMIO) |
| |
| int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| unsigned long iova, phys_addr_t paddr, size_t pgsize, |
| size_t pgcount, int prot, struct pkvm_hyp_vcpu *ctxt) |
| { |
| size_t size; |
| size_t granule; |
| int ret = -EINVAL; |
| size_t mapped = 0; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| size_t pgcount_orig = pgcount; |
| unsigned long iova_orig = iova; |
| phys_addr_t orig_paddr = paddr; |
| struct kvm_hyp_iommu_domain *domain; |
| |
| cur_context = ctxt; |
| if (prot & ~IOMMU_PROT_MASK) |
| return -EINVAL; |
| |
| if (__builtin_mul_overflow(pgsize, pgcount, &size) || |
| iova + size < iova || paddr + size < paddr) |
| return -EOVERFLOW; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| if (!domain) |
| goto err_unlock; |
| |
| ret = access_allowed(ctxt, domain); |
| if (ret) |
| return ret; |
| |
| granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap); |
| if (!IS_ALIGNED(iova | paddr | pgsize, granule)) |
| goto err_unlock; |
| |
| ret = __pkvm_share_dma(paddr, size, ctxt); |
| if (ret) |
| goto err_unlock; |
| |
| iopt = domain_to_iopt(iommu, domain, domain_id); |
| while (pgcount) { |
| ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot, |
| 0, &mapped); |
| WARN_ON(!IS_ALIGNED(mapped, pgsize)); |
| pgcount -= mapped / pgsize; |
| if (ret) |
| goto err_unmap; |
| iova += mapped; |
| paddr += mapped; |
| } |
| cur_context = NULL; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return 0; |
| |
| err_unmap: |
| __pkvm_unshare_dma(orig_paddr, size, ctxt); |
| __kvm_iommu_unmap_pages(&iopt, iova_orig, pgsize, pgcount_orig - pgcount, false, ctxt); |
| err_unlock: |
| cur_context = NULL; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| int kvm_iommu_unmap_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id, |
| unsigned long iova, size_t pgsize, size_t pgcount, |
| struct pkvm_hyp_vcpu *ctxt) |
| { |
| size_t size; |
| size_t granule; |
| int ret = -EINVAL; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| struct kvm_hyp_iommu_domain *domain; |
| |
| if (__builtin_mul_overflow(pgsize, pgcount, &size) || |
| iova + size < iova) |
| return -EOVERFLOW; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| if (!domain) |
| goto out_unlock; |
| |
| ret = access_allowed(ctxt, domain); |
| if (ret) |
| return ret; |
| |
| granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap); |
| if (!IS_ALIGNED(iova | pgsize, granule)) |
| goto out_unlock; |
| |
| iopt = domain_to_iopt(iommu, domain, domain_id); |
| ret = __kvm_iommu_unmap_pages(&iopt, iova, pgsize, pgcount, true, ctxt); |
| out_unlock: |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t iommu_id, |
| pkvm_handle_t domain_id, unsigned long iova, |
| struct pkvm_hyp_vcpu *ctxt) |
| { |
| phys_addr_t phys = 0; |
| int ret; |
| struct io_pgtable iopt; |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| struct kvm_hyp_iommu_domain *domain; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| |
| ret = access_allowed(ctxt, domain); |
| if (ret) |
| return ret; |
| |
| if (domain) { |
| iopt = domain_to_iopt(iommu, domain, domain_id); |
| |
| phys = iopt_iova_to_phys(&iopt, iova); |
| } |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return phys; |
| } |
| |
| static int iommu_power_on(struct kvm_power_domain *pd) |
| { |
| struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu, |
| power_domain); |
| |
| pkvm_debug("%s\n", __func__); |
| |
| /* |
| * We currently assume that the device retains its architectural state |
| * across power off, hence no save/restore. |
| */ |
| hyp_spin_lock(&iommu->iommu_lock); |
| iommu->power_is_off = false; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return 0; |
| } |
| |
| static int iommu_power_off(struct kvm_power_domain *pd) |
| { |
| struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu, |
| power_domain); |
| |
| pkvm_debug("%s\n", __func__); |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| iommu->power_is_off = true; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return 0; |
| } |
| |
| static const struct kvm_power_domain_ops iommu_power_ops = { |
| .power_on = iommu_power_on, |
| .power_off = iommu_power_off, |
| }; |
| |
| /* |
| * Guests need pass domain ID when dealing with IOMMUs, and we must ensure that |
| * guest IDs doesn't collide with each other, we can specify fixed ranges for each |
| * VM that we keep track off, but it is more flexible to have a function that |
| * allocated one domain ID at a time so we don't have max number of domains |
| * for a VM. And we would need to keep track for each guest ranges for each IOMMU. |
| */ |
| int kvm_iommu_alloc_guest_domain(pkvm_handle_t iommu_id, struct pkvm_hyp_vcpu *ctxt, |
| pkvm_handle_t *ret_domain) |
| { |
| struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id); |
| pkvm_handle_t domain_id = (iommu->nr_domains >> 1); |
| struct kvm_hyp_iommu_domain *domain; |
| int ret; |
| unsigned long pgd_hva = 0; |
| |
| if (!ret_domain) |
| return -EINVAL; |
| |
| hyp_spin_lock(&iommu->iommu_lock); |
| cur_context = ctxt; |
| /* |
| * Not optimal but works for guests as this operation is rare and |
| * guests doesn't allocate many domains. |
| */ |
| for ( ; domain_id < iommu->nr_domains; ++domain_id) { |
| domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt); |
| if (!domain) { |
| ret = -ENOMEM; |
| goto out_unlock; |
| } |
| /* A free domain we can use. */ |
| if (domain->refs == 0) |
| break; |
| } |
| |
| /* Out of domains to allocate. */ |
| if (!domain) { |
| ret = -EBUSY; |
| goto out_unlock; |
| } |
| |
| ret = kvm_iommu_alloc_domain_nolock(iommu_id, domain_id, pgd_hva, 2, ctxt); |
| *ret_domain = domain_id; |
| out_unlock: |
| cur_context = ctxt; |
| hyp_spin_unlock(&iommu->iommu_lock); |
| return ret; |
| } |
| |
| int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu) |
| { |
| int ret; |
| void *domains; |
| |
| hyp_spin_lock_init(&iommu->iommu_lock); |
| |
| ret = pkvm_init_power_domain(&iommu->power_domain, &iommu_power_ops); |
| if (ret) |
| return ret; |
| |
| domains = iommu->domains; |
| iommu->domains = kern_hyp_va(domains); |
| return pkvm_create_mappings(iommu->domains, iommu->domains + |
| KVM_IOMMU_DOMAINS_ROOT_ENTRIES, PAGE_HYP); |
| } |
| |
| int kvm_iommu_init(void) |
| { |
| enum kvm_pgtable_prot prot; |
| |
| if (WARN_ON(!kvm_iommu_ops->get_iommu_by_id || |
| !kvm_iommu_ops->alloc_iopt || |
| !kvm_iommu_ops->free_iopt || |
| !kvm_iommu_ops->iopt_size || |
| !kvm_iommu_ops->attach_dev || |
| !kvm_iommu_ops->alloc_domain || |
| !kvm_iommu_ops->detach_dev)) |
| return -ENODEV; |
| |
| /* The memcache is shared with the host */ |
| prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_OWNED); |
| return pkvm_create_mappings(kvm_hyp_iommu_memcaches, |
| kvm_hyp_iommu_memcaches + NR_CPUS, prot); |
| } |