blob: ee9ac0c06420e39aae2edf1818a38ad9f0bb420b [file] [log] [blame] [edit]
// SPDX-License-Identifier: GPL-2.0
/*
* IOMMU operations for pKVM
*
* Copyright (C) 2022 Linaro Ltd.
*/
#include <asm/kvm_hyp.h>
#include <kvm/iommu.h>
#include <kvm/pl011.h>
#include <nvhe/iommu.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
/* Hypervisor is non-preemptable, so cur_context can be per cpu. */
DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, __cur_context);
#define cur_context (*this_cpu_ptr(&__cur_context))
struct kvm_hyp_iommu_memcache __ro_after_init *kvm_hyp_iommu_memcaches;
#define domain_to_iopt(_iommu, _domain, _domain_id) \
(struct io_pgtable) { \
.ops = &(_domain)->pgtable->ops, \
.pgd = (_domain)->pgd, \
.cookie = &(struct kvm_iommu_tlb_cookie) { \
.iommu = (_iommu), \
.domain_id = (_domain_id), \
.domain = (_domain), \
}, \
}
void *kvm_iommu_donate_page(void)
{
void *p;
int cpu = hyp_smp_processor_id();
struct kvm_hyp_memcache tmp = kvm_hyp_iommu_memcaches[cpu].pages;
/* For vcpus only use it's allocator as it is accounted. */
if (cur_context) {
p = guest_alloc_contig_pages(cur_context, 1);
return p;
}
if (!tmp.nr_pages) {
kvm_hyp_iommu_memcaches[cpu].needs_page = true;
return NULL;
}
p = pkvm_admit_host_page(&tmp);
if (!p)
return NULL;
kvm_hyp_iommu_memcaches[cpu].pages = tmp;
memset(p, 0, PAGE_SIZE);
return p;
}
void kvm_iommu_reclaim_page(void *p)
{
int cpu = hyp_smp_processor_id();
pkvm_teardown_donated_memory(&kvm_hyp_iommu_memcaches[cpu].pages, p,
PAGE_SIZE);
}
static int access_allowed(struct pkvm_hyp_vcpu *ctxt,
struct kvm_hyp_iommu_domain *domain)
{
if (!ctxt && domain->vm)
return -EPERM;
if (ctxt && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(ctxt)))
return -EPERM;
return 0;
}
static struct kvm_hyp_iommu_domain *
handle_to_domain(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
struct kvm_hyp_iommu **out_iommu, struct pkvm_hyp_vcpu *ctxt)
{
int idx;
struct kvm_hyp_iommu *iommu;
struct kvm_hyp_iommu_domain *domains;
iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
if (!iommu)
return NULL;
/* Guests only have access to top half of the domain IDs. */
if (ctxt && (domain_id < (iommu->nr_domains >> 1)))
return NULL;
/* Host only has access to bottom half of the domain IDs. */
else if (!ctxt && (domain_id >= (iommu->nr_domains >> 1)))
return NULL;
domain_id = array_index_nospec(domain_id, iommu->nr_domains);
idx = domain_id >> KVM_IOMMU_DOMAIN_ID_SPLIT;
domains = iommu->domains[idx];
if (!domains) {
/*
* Although we have guest context we don't allocate new domain
* pages from guest mc, as one domain page can include many VMs
* domains, so we allocate this from host mc to make sure data is
* available after one guest teardown.
*/
domains = kvm_iommu_donate_page();
if (!domains)
return NULL;
iommu->domains[idx] = domains;
}
*out_iommu = iommu;
return &domains[domain_id & KVM_IOMMU_DOMAIN_ID_LEAF_MASK];
}
int kvm_iommu_alloc_domain_nolock(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
unsigned long pgd_hva, int type, struct pkvm_hyp_vcpu *ctxt)
{
int ret = -EINVAL;
struct io_pgtable iopt;
struct kvm_hyp_iommu *iommu;
struct kvm_hyp_iommu_domain *domain;
size_t pgd_size;
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
if (!domain)
return ret;
if (domain->refs)
return ret;
ret = kvm_iommu_ops->alloc_domain(iommu_id, &domain->pgtable, type);
if (ret)
return ret;
iopt = domain_to_iopt(iommu, domain, domain_id);
pgd_size = kvm_iommu_ops->iopt_size(&iopt);
if (ctxt) {
pgd_hva = (unsigned long)guest_alloc_contig_pages(ctxt, pgd_size >> PAGE_SHIFT);
if (!pgd_hva)
return -ENOMEM;
domain->vm = pkvm_hyp_vcpu_to_hyp_vm(ctxt);
}
else {
pgd_hva = (unsigned long)pkvm_map_donated_memory(pgd_hva, pgd_size);
}
ret = kvm_iommu_ops->alloc_iopt(&iopt, pgd_hva);
if (ret)
return ret;
domain->refs = 1;
domain->pgd = iopt.pgd;
return 0;
}
int kvm_iommu_alloc_domain(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
unsigned long pgd_hva, int type, struct pkvm_hyp_vcpu *ctxt)
{
int ret;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
hyp_spin_lock(&iommu->iommu_lock);
ret = kvm_iommu_alloc_domain_nolock(iommu_id, domain_id, pgd_hva, type, ctxt);
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
int kvm_iommu_free_domain(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
struct pkvm_hyp_vcpu *ctxt)
{
int ret = -EINVAL;
struct io_pgtable iopt;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
struct kvm_hyp_iommu_domain *domain;
hyp_spin_lock(&iommu->iommu_lock);
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
if (!domain)
goto out_unlock;
ret = access_allowed(ctxt, domain);
if (ret)
return ret;
if (domain->refs != 1)
goto out_unlock;
iopt = domain_to_iopt(iommu, domain, domain_id);
ret = kvm_iommu_ops->free_iopt(&iopt);
memset(domain, 0, sizeof(*domain));
out_unlock:
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
u32 endpoint_id, u32 pasid,
u32 pasid_bits, struct pkvm_hyp_vcpu *ctxt)
{
int ret = -EINVAL;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
struct kvm_hyp_iommu_domain *domain;
hyp_spin_lock(&iommu->iommu_lock);
cur_context = ctxt;
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
if (!domain || !domain->refs || domain->refs == UINT_MAX)
goto out_unlock;
ret = access_allowed(ctxt, domain);
if (ret)
return ret;
ret = kvm_iommu_ops->attach_dev(iommu, domain_id, domain, endpoint_id, pasid,
pasid_bits);
if (ret)
goto out_unlock;
domain->refs++;
out_unlock:
cur_context = NULL;
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
u32 endpoint_id, u32 pasid, struct pkvm_hyp_vcpu *ctxt)
{
int ret = -EINVAL;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
struct kvm_hyp_iommu_domain *domain;
hyp_spin_lock(&iommu->iommu_lock);
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
if (!domain || domain->refs <= 1)
goto out_unlock;
ret = access_allowed(ctxt, domain);
if (ret)
return ret;
ret = kvm_iommu_ops->detach_dev(iommu, domain_id, domain, endpoint_id, pasid);
if (ret)
goto out_unlock;
domain->refs--;
out_unlock:
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
int kvm_iommu_force_detach_dev(pkvm_handle_t iommu_id, u32 endpoint_id, u32 pasid)
{
int ret;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
hyp_spin_lock(&iommu->iommu_lock);
/*
* Driver shouldn't use domain in detach operation.
*/
ret = kvm_iommu_ops->detach_dev(iommu, 0, NULL, endpoint_id, pasid);
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
static int __kvm_iommu_unmap_pages(struct io_pgtable *iopt, unsigned long iova,
size_t pgsize, size_t pgcount, bool unshare,
struct pkvm_hyp_vcpu *ctxt)
{
int ret;
size_t unmapped;
phys_addr_t paddr;
size_t total_unmapped = 0;
size_t size = pgsize * pgcount;
while (total_unmapped < size) {
paddr = iopt_iova_to_phys(iopt, iova);
if (paddr == 0)
return -EINVAL;
/*
* One page/block at a time, because the range provided may not
* be physically contiguous, and we need to unshare all physical
* pages.
*/
unmapped = iopt_unmap_pages(iopt, iova, pgsize, 1, NULL);
if (!unmapped)
return -EINVAL;
if (unshare) {
ret = __pkvm_unshare_dma(paddr, pgsize, ctxt);
if (ret)
return ret;
}
iova += unmapped;
pgcount -= unmapped / pgsize;
total_unmapped += unmapped;
}
return 0;
}
#define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
IOMMU_NOEXEC | IOMMU_MMIO)
int kvm_iommu_map_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
unsigned long iova, phys_addr_t paddr, size_t pgsize,
size_t pgcount, int prot, struct pkvm_hyp_vcpu *ctxt)
{
size_t size;
size_t granule;
int ret = -EINVAL;
size_t mapped = 0;
struct io_pgtable iopt;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
size_t pgcount_orig = pgcount;
unsigned long iova_orig = iova;
phys_addr_t orig_paddr = paddr;
struct kvm_hyp_iommu_domain *domain;
cur_context = ctxt;
if (prot & ~IOMMU_PROT_MASK)
return -EINVAL;
if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
iova + size < iova || paddr + size < paddr)
return -EOVERFLOW;
hyp_spin_lock(&iommu->iommu_lock);
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
if (!domain)
goto err_unlock;
ret = access_allowed(ctxt, domain);
if (ret)
return ret;
granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap);
if (!IS_ALIGNED(iova | paddr | pgsize, granule))
goto err_unlock;
ret = __pkvm_share_dma(paddr, size, ctxt);
if (ret)
goto err_unlock;
iopt = domain_to_iopt(iommu, domain, domain_id);
while (pgcount) {
ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot,
0, &mapped);
WARN_ON(!IS_ALIGNED(mapped, pgsize));
pgcount -= mapped / pgsize;
if (ret)
goto err_unmap;
iova += mapped;
paddr += mapped;
}
cur_context = NULL;
hyp_spin_unlock(&iommu->iommu_lock);
return 0;
err_unmap:
__pkvm_unshare_dma(orig_paddr, size, ctxt);
__kvm_iommu_unmap_pages(&iopt, iova_orig, pgsize, pgcount_orig - pgcount, false, ctxt);
err_unlock:
cur_context = NULL;
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
int kvm_iommu_unmap_pages(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
unsigned long iova, size_t pgsize, size_t pgcount,
struct pkvm_hyp_vcpu *ctxt)
{
size_t size;
size_t granule;
int ret = -EINVAL;
struct io_pgtable iopt;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
struct kvm_hyp_iommu_domain *domain;
if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
iova + size < iova)
return -EOVERFLOW;
hyp_spin_lock(&iommu->iommu_lock);
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
if (!domain)
goto out_unlock;
ret = access_allowed(ctxt, domain);
if (ret)
return ret;
granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap);
if (!IS_ALIGNED(iova | pgsize, granule))
goto out_unlock;
iopt = domain_to_iopt(iommu, domain, domain_id);
ret = __kvm_iommu_unmap_pages(&iopt, iova, pgsize, pgcount, true, ctxt);
out_unlock:
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t iommu_id,
pkvm_handle_t domain_id, unsigned long iova,
struct pkvm_hyp_vcpu *ctxt)
{
phys_addr_t phys = 0;
int ret;
struct io_pgtable iopt;
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
struct kvm_hyp_iommu_domain *domain;
hyp_spin_lock(&iommu->iommu_lock);
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
ret = access_allowed(ctxt, domain);
if (ret)
return ret;
if (domain) {
iopt = domain_to_iopt(iommu, domain, domain_id);
phys = iopt_iova_to_phys(&iopt, iova);
}
hyp_spin_unlock(&iommu->iommu_lock);
return phys;
}
static int iommu_power_on(struct kvm_power_domain *pd)
{
struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
power_domain);
pkvm_debug("%s\n", __func__);
/*
* We currently assume that the device retains its architectural state
* across power off, hence no save/restore.
*/
hyp_spin_lock(&iommu->iommu_lock);
iommu->power_is_off = false;
hyp_spin_unlock(&iommu->iommu_lock);
return 0;
}
static int iommu_power_off(struct kvm_power_domain *pd)
{
struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
power_domain);
pkvm_debug("%s\n", __func__);
hyp_spin_lock(&iommu->iommu_lock);
iommu->power_is_off = true;
hyp_spin_unlock(&iommu->iommu_lock);
return 0;
}
static const struct kvm_power_domain_ops iommu_power_ops = {
.power_on = iommu_power_on,
.power_off = iommu_power_off,
};
/*
* Guests need pass domain ID when dealing with IOMMUs, and we must ensure that
* guest IDs doesn't collide with each other, we can specify fixed ranges for each
* VM that we keep track off, but it is more flexible to have a function that
* allocated one domain ID at a time so we don't have max number of domains
* for a VM. And we would need to keep track for each guest ranges for each IOMMU.
*/
int kvm_iommu_alloc_guest_domain(pkvm_handle_t iommu_id, struct pkvm_hyp_vcpu *ctxt,
pkvm_handle_t *ret_domain)
{
struct kvm_hyp_iommu *iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
pkvm_handle_t domain_id = (iommu->nr_domains >> 1);
struct kvm_hyp_iommu_domain *domain;
int ret;
unsigned long pgd_hva = 0;
if (!ret_domain)
return -EINVAL;
hyp_spin_lock(&iommu->iommu_lock);
cur_context = ctxt;
/*
* Not optimal but works for guests as this operation is rare and
* guests doesn't allocate many domains.
*/
for ( ; domain_id < iommu->nr_domains; ++domain_id) {
domain = handle_to_domain(iommu_id, domain_id, &iommu, ctxt);
if (!domain) {
ret = -ENOMEM;
goto out_unlock;
}
/* A free domain we can use. */
if (domain->refs == 0)
break;
}
/* Out of domains to allocate. */
if (!domain) {
ret = -EBUSY;
goto out_unlock;
}
ret = kvm_iommu_alloc_domain_nolock(iommu_id, domain_id, pgd_hva, 2, ctxt);
*ret_domain = domain_id;
out_unlock:
cur_context = ctxt;
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu)
{
int ret;
void *domains;
hyp_spin_lock_init(&iommu->iommu_lock);
ret = pkvm_init_power_domain(&iommu->power_domain, &iommu_power_ops);
if (ret)
return ret;
domains = iommu->domains;
iommu->domains = kern_hyp_va(domains);
return pkvm_create_mappings(iommu->domains, iommu->domains +
KVM_IOMMU_DOMAINS_ROOT_ENTRIES, PAGE_HYP);
}
int kvm_iommu_init(void)
{
enum kvm_pgtable_prot prot;
if (WARN_ON(!kvm_iommu_ops->get_iommu_by_id ||
!kvm_iommu_ops->alloc_iopt ||
!kvm_iommu_ops->free_iopt ||
!kvm_iommu_ops->iopt_size ||
!kvm_iommu_ops->attach_dev ||
!kvm_iommu_ops->alloc_domain ||
!kvm_iommu_ops->detach_dev))
return -ENODEV;
/* The memcache is shared with the host */
prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_OWNED);
return pkvm_create_mappings(kvm_hyp_iommu_memcaches,
kvm_hyp_iommu_memcaches + NR_CPUS, prot);
}