blob: 2c9c8e9e0ce94f3071dab09053e61120bdc6d691 [file] [log] [blame] [edit]
// SPDX-License-Identifier: GPL-2.0
/*
* IOMMU operations for pKVM
*
* Copyright (C) 2022 Linaro Ltd.
*/
#include <asm/kvm_hyp.h>
#include <hyp/adjust_pc.h>
#include <kvm/iommu.h>
#include <nvhe/alloc_mgt.h>
#include <nvhe/iommu.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/pviommu-host.h>
enum {
IOMMU_DRIVER_NOT_READY = 0,
IOMMU_DRIVER_INITIALIZING,
IOMMU_DRIVER_READY,
};
static atomic_t kvm_iommu_initialized;
void **kvm_hyp_iommu_domains;
static struct hyp_pool iommu_idmap_pool;
static struct hyp_pool iommu_host_pool;
static int snapshot_host_stage2(void);
/*
* This lock protect domain operations, that can't be done using the atomic refcount
* It is used for alloc/free domains, so it shouldn't have a lot of overhead as
* these are rare operations, while map/unmap are left lockless.
*/
static DEFINE_HYP_SPINLOCK(iommu_domains_lock);
/* Hypervisor is non-preemptable, so cur_context can be per cpu. */
DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, __cur_context);
#define cur_context (*this_cpu_ptr(&__cur_context))
DECLARE_PER_CPU(struct kvm_hyp_req, host_hyp_reqs);
static void host_lock_component(void)
{
hyp_spin_lock(&host_mmu.lock);
}
static void host_unlock_component(void)
{
hyp_spin_unlock(&host_mmu.lock);
}
struct pkvm_hyp_vcpu * __get_ctxt(void)
{
return this_cpu_ptr(&kvm_host_data)->host_ctxt.__hyp_running_vcpu;
}
void hyp_iommu_lock(struct kvm_hyp_iommu *iommu)
{
hyp_spin_lock(&iommu->iommu_lock);
}
void hyp_iommu_unlock(struct kvm_hyp_iommu *iommu)
{
hyp_spin_unlock(&iommu->iommu_lock);
}
void hyp_assert_iommu_lock_held(struct kvm_hyp_iommu *iommu)
{
hyp_assert_lock_held(&iommu->iommu_lock);
}
void hyp_domains_lock(void)
{
hyp_spin_lock(&iommu_domains_lock);
}
void hyp_domains_unlock(void)
{
hyp_spin_unlock(&iommu_domains_lock);
}
static inline bool kvm_iommu_acquire_init(void)
{
return atomic_cmpxchg_acquire(&kvm_iommu_initialized, IOMMU_DRIVER_NOT_READY,
IOMMU_DRIVER_INITIALIZING) == IOMMU_DRIVER_NOT_READY;
}
static inline void kvm_iommu_release_init(void)
{
atomic_set_release(&kvm_iommu_initialized, IOMMU_DRIVER_READY);
}
static inline bool kvm_iommu_is_ready(void)
{
return atomic_read(&kvm_iommu_initialized) == IOMMU_DRIVER_READY;
}
void *kvm_iommu_donate_pages(u8 order, bool fill_req)
{
void *p;
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
struct kvm_hyp_req *req = this_cpu_ptr(&host_hyp_reqs);
/* For vcpus only use it's allocator. */
if (ctxt) {
p = guest_alloc_contig_pages(ctxt, order);
if (!p && fill_req) {
req = pkvm_hyp_req_reserve(ctxt, KVM_HYP_REQ_MEM);
goto ret_fill_req;
}
return p;
}
p = hyp_alloc_pages(&iommu_host_pool, order);
if (p)
return p;
ret_fill_req:
if (fill_req) {
req->type = KVM_HYP_REQ_MEM;
req->mem.dest = REQ_MEM_IOMMU;
req->mem.sz_alloc = (1 << order) * PAGE_SIZE;
req->mem.nr_pages = 1;
}
return NULL;
}
void *kvm_iommu_donate_pgtable_pages(struct io_pgtable *iop, u8 order, bool fill_req)
{
if (iop && ((struct kvm_iommu_tlb_cookie *)iop->cookie)->domain_id ==
KVM_IOMMU_IDMAPPED_DOMAIN) {
return hyp_alloc_pages(&iommu_idmap_pool, order);
} else {
return kvm_iommu_donate_pages(order, fill_req);
}
}
void kvm_iommu_reclaim_pages(void *p, u8 order)
{
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
/* Maybe guest is not loaded but we are in teardown context. */
if (!ctxt)
ctxt = cur_context;
if (ctxt)
guest_free_contig_pages(ctxt, p, order);
else
hyp_put_page(&iommu_host_pool, p);
}
int kvm_iommu_refill(struct kvm_hyp_memcache *host_mc)
{
void *p;
unsigned long order;
while (host_mc->nr_pages) {
order = host_mc->head & (PAGE_SIZE - 1);
p = pkvm_admit_host_page(host_mc, order);
hyp_virt_to_page(p)->order = order;
hyp_set_page_refcounted(hyp_virt_to_page(p));
hyp_put_page(&iommu_host_pool, p);
}
return 0;
}
void kvm_iommu_reclaim(struct kvm_hyp_memcache *host_mc, int target)
{
void *p;
while (target--) {
p = hyp_alloc_pages(&iommu_idmap_pool, 0);
if (!p)
return;
push_hyp_memcache(host_mc, p, hyp_virt_to_phys, 0);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(p), 1));
}
}
int kvm_iommu_reclaimable(void)
{
return hyp_pool_free_pages(&iommu_host_pool);
}
struct hyp_mgt_allocator_ops kvm_iommu_allocator_ops = {
.refill = kvm_iommu_refill,
.reclaim = kvm_iommu_reclaim,
.reclaimable = kvm_iommu_reclaimable,
};
static struct kvm_hyp_iommu_domain *
handle_to_domain(pkvm_handle_t domain_id)
{
int idx;
struct kvm_hyp_iommu_domain *domains;
if (domain_id >= KVM_IOMMU_MAX_DOMAINS)
return NULL;
domain_id = array_index_nospec(domain_id, KVM_IOMMU_MAX_DOMAINS);
idx = domain_id >> KVM_IOMMU_DOMAIN_ID_SPLIT;
domains = (struct kvm_hyp_iommu_domain *)READ_ONCE(kvm_hyp_iommu_domains[idx]);
if (!domains) {
domains = kvm_iommu_donate_pages(0, true);
if (!domains)
return NULL;
/*
* handle_to_domain() does not have to be called under a lock,
* but even though we allocate a leaf in all cases, it's only
* really a valid thing to do under alloc_domain(), which uses a
* lock. Races are therefore a host bug and we don't need to be
* delicate about it.
*/
if (WARN_ON(cmpxchg64_relaxed(&kvm_hyp_iommu_domains[idx], 0,
(void *)domains) != 0))
return NULL;
}
return &domains[domain_id & KVM_IOMMU_DOMAIN_ID_LEAF_MASK];
}
static int domain_get(struct kvm_hyp_iommu_domain *domain)
{
int old = atomic_fetch_inc_acquire(&domain->refs);
if (WARN_ON(!old))
return -EINVAL;
else if (old < 0 || old + 1 < 0)
return -EOVERFLOW;
return 0;
}
static void domain_put(struct kvm_hyp_iommu_domain *domain)
{
BUG_ON(!atomic_dec_return_release(&domain->refs));
}
static int access_allowed(struct kvm_hyp_iommu_domain *domain)
{
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
/* Maybe guest is not loaded but we are in teardown context. */
if (!ctxt)
ctxt = cur_context;
if (!ctxt && domain->vm)
return -EPERM;
if (ctxt && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(ctxt)))
return -EPERM;
return 0;
}
static int kvm_iommu_alloc_domain_nolock(pkvm_handle_t domain_id, unsigned long pgd_hva,
unsigned long pgd_size, u32 type)
{
struct kvm_hyp_iommu_domain *domain;
int ret = -EINVAL;
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
struct pkvm_hyp_vm *vm;
if (!kvm_iommu_ops)
return -ENODEV;
domain = handle_to_domain(domain_id);
if (!domain)
return -EINVAL;
if (atomic_read(&domain->refs))
return -EINVAL;
ret = kvm_iommu_ops->alloc_domain(domain, domain_id, pgd_hva, pgd_size, type);
if (ret)
return ret;
atomic_set_release(&domain->refs, 1);
if (ctxt) {
vm = pkvm_hyp_vcpu_to_hyp_vm(ctxt);
domain->vm = vm;
}
return ret;
}
int kvm_iommu_alloc_domain(pkvm_handle_t domain_id, u32 type)
{
int ret;
unsigned long pgd_hva, pgd_size;
/* Host only has access to the lower half of the domain IDs. */
if (domain_id >= (KVM_IOMMU_MAX_DOMAINS >> 1))
return -EINVAL;
pgd_size = kvm_iommu_ops->pgd_size(type);
/*
* Guest memory are already donated as they come from memcache
* while host memory passed from HVC needs to be donated.
*/
pgd_hva = (unsigned long)kvm_iommu_donate_pages(get_order(pgd_size), true);
if (!pgd_hva)
return -ENOMEM;
hyp_spin_lock(&iommu_domains_lock);
ret = kvm_iommu_alloc_domain_nolock(domain_id, pgd_hva, pgd_size, type);
if (ret)
pkvm_unmap_donated_memory((void *)pgd_hva, pgd_size);
hyp_spin_unlock(&iommu_domains_lock);
return ret;
}
/*
* The domain ID space is shared between guests (second half), so this is a
* (dummy) allocator for guest domain IDs.
*/
int kvm_iommu_alloc_guest_domain(pkvm_handle_t *ret_domain)
{
pkvm_handle_t domain_id = KVM_IOMMU_MAX_DOMAINS >> 1;
struct kvm_hyp_iommu_domain *domain;
int ret = -EINVAL;
unsigned long pgd_hva, pgd_size;
pgd_size = kvm_iommu_ops->pgd_size(DOMAIN_ANY_TYPE);
if (!ret_domain)
return -EINVAL;
hyp_spin_lock(&iommu_domains_lock);
/*
* Not optimal but works for guests as this operation is rare as
* guests doesn't allocate many domains.
*/
for ( ; domain_id < KVM_IOMMU_MAX_DOMAINS; ++domain_id) {
domain = handle_to_domain(domain_id);
if (!domain) {
ret = -ENOMEM;
goto out_unlock;
}
/* A free domain we can use... hopefully */
if (atomic_read(&domain->refs) == 0)
break;
}
if (domain_id == KVM_IOMMU_MAX_DOMAINS) {
ret = -EBUSY;
goto out_unlock;
}
pgd_hva = (u64)kvm_iommu_donate_pages(get_order(pgd_size), true);
if (!pgd_hva) {
ret = -ENOMEM;
goto out_unlock;
}
ret = kvm_iommu_alloc_domain_nolock(domain_id, pgd_hva, pgd_size, DOMAIN_ANY_TYPE);
if (ret)
kvm_iommu_reclaim_pages((void *)pgd_hva, get_order(pgd_size));
*ret_domain = domain_id;
out_unlock:
hyp_spin_unlock(&iommu_domains_lock);
return ret;
}
int kvm_iommu_free_domain(pkvm_handle_t domain_id)
{
struct kvm_hyp_iommu_domain *domain;
int ret = -EINVAL;
if (!kvm_iommu_ops)
return -ENODEV;
if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN)
return -EINVAL;
domain = handle_to_domain(domain_id);
if (!domain)
return -EINVAL;
hyp_spin_lock(&iommu_domains_lock);
ret = access_allowed(domain);
if (ret)
goto out_unlock;
if (WARN_ON(atomic_cmpxchg_release(&domain->refs, 1, 0) != 1))
goto out_unlock;
ret = kvm_iommu_ops->free_domain(domain, domain_id);
memset(domain, 0, sizeof(*domain));
out_unlock:
hyp_spin_unlock(&iommu_domains_lock);
return ret;
}
/*
* A guest is dying before freeing its domains, free them for it.
*/
int kvm_iommu_free_guest_domains(struct pkvm_hyp_vm *hyp_vm)
{
pkvm_handle_t domain_id = (KVM_IOMMU_MAX_DOMAINS >> 1);
struct kvm_hyp_iommu_domain *domain;
/* Doesn't matter which vcpu. */
cur_context = hyp_vm->vcpus[0];
for ( ; (domain_id < KVM_IOMMU_MAX_DOMAINS) ; ++domain_id) {
domain = handle_to_domain(domain_id);
if (domain && domain->vm == hyp_vm) {
/*
* Guest is dying, it can't do any operations on this domains, so it is safe
* modify the domain without look.
* And a guest can die while attaching devices to a domain, so we don't care
* about the refcount as the pvIOMMU will block the device anyway.
*/
atomic_set_release(&domain->refs, 1);
kvm_iommu_free_domain(domain_id);
}
}
cur_context = NULL;
return 0;
}
int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
u32 endpoint_id, u32 pasid, u32 pasid_bits, u64 flags)
{
int ret = -EINVAL;
struct kvm_hyp_iommu *iommu;
struct kvm_hyp_iommu_domain *domain;
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
/*
* This is attached to a running guest!.
* Guest is prevented from attaching to host domain from pviommu handler
* as it won't find a translation for the requested device.
*/
if (!ctxt) {
ret = pkvm_pviommu_host_allowed(iommu_id, endpoint_id);
if (ret)
return ret;
}
if (!kvm_iommu_ops)
return -ENODEV;
iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
if (!iommu)
return -ENOENT;
hyp_spin_lock(&iommu->iommu_lock);
domain = handle_to_domain(domain_id);
if (!domain || domain_get(domain))
goto out_unlock;
ret = access_allowed(domain);
if (ret)
goto out_unlock;
ret = kvm_iommu_ops->attach_dev(iommu, domain_id, domain, endpoint_id,
pasid, pasid_bits, flags);
if (ret)
goto err_put_domain;
if ((domain_id == KVM_IOMMU_IDMAPPED_DOMAIN) && kvm_iommu_acquire_init()) {
host_lock_component();
snapshot_host_stage2();
host_unlock_component();
kvm_iommu_release_init();
}
out_unlock:
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
err_put_domain:
domain_put(domain);
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
u32 endpoint_id, u32 pasid)
{
int ret = -EINVAL;
struct kvm_hyp_iommu *iommu;
struct kvm_hyp_iommu_domain *domain;
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
/* Sneaky sneaky. */
if (!ctxt) {
ret = pkvm_pviommu_host_allowed(iommu_id, endpoint_id);
if (ret)
return ret;
}
if (!kvm_iommu_ops)
return -ENODEV;
iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
if (!iommu)
return -ENOENT;
hyp_spin_lock(&iommu->iommu_lock);
domain = handle_to_domain(domain_id);
if (!domain || atomic_read(&domain->refs) <= 1)
goto out_unlock;
ret = access_allowed(domain);
if (ret)
goto out_unlock;
ret = kvm_iommu_ops->detach_dev(iommu, domain_id, domain, endpoint_id, pasid);
if (ret)
goto out_unlock;
domain_put(domain);
out_unlock:
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
#define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
IOMMU_NOEXEC | IOMMU_MMIO | IOMMU_PRIV)
int kvm_iommu_map_pages_ret(pkvm_handle_t domain_id,
unsigned long iova, phys_addr_t paddr, size_t pgsize,
size_t pgcount, int prot, size_t *total_mapped)
{
size_t size;
size_t mapped;
size_t granule;
int ret;
struct io_pgtable iopt;
struct kvm_hyp_iommu_domain *domain;
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
*total_mapped = 0;
if (!kvm_iommu_ops)
return -ENODEV;
if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN)
return -EINVAL;
if (prot & ~IOMMU_PROT_MASK)
return -EINVAL;
if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
iova + size < iova || paddr + size < paddr)
return -EINVAL;
/*
* TODO: check whether it is safe here to call io-pgtable without a
* lock. Does the driver make assumptions that don't hold for the
* hypervisor, for example that device drivers don't call map/unmap
* concurrently on the same page?
*
* Command queue and iommu->power_is_off are also protected by the
* iommu_lock, taken by the TLB invalidation callbacks.
*/
domain = handle_to_domain(domain_id);
if (!domain || domain_get(domain))
return -EINVAL;
ret = access_allowed(domain);
if (ret)
goto err_domain_put;
granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap);
if (!IS_ALIGNED(iova | paddr | pgsize, granule)) {
ret = -EINVAL;
goto err_domain_put;
}
ret = __pkvm_share_dma(paddr, size, ctxt);
if (ret)
goto err_domain_put;
iopt = domain_to_iopt(domain, domain_id);
while (pgcount && !ret) {
mapped = 0;
ret = iopt_map_pages(&iopt, iova, paddr, pgsize, pgcount, prot,
0, &mapped);
WARN_ON(!IS_ALIGNED(mapped, pgsize));
WARN_ON(mapped > pgcount * pgsize);
pgcount -= mapped / pgsize;
*total_mapped += mapped;
iova += mapped;
paddr += mapped;
}
/*
* Unshare the bits that haven't been mapped yet. The host calls back
* either to continue mapping, or to unmap and unshare what's been done
* so far.
*/
if (pgcount)
__pkvm_unshare_dma(paddr, pgcount * pgsize);
err_domain_put:
domain_put(domain);
return ret;
}
size_t kvm_iommu_map_pages(pkvm_handle_t domain_id,
unsigned long iova, phys_addr_t paddr, size_t pgsize,
size_t pgcount, int prot)
{
size_t mapped = 0;
kvm_iommu_map_pages_ret(domain_id, iova, paddr, pgsize,
pgcount, prot, &mapped);
return mapped;
}
size_t kvm_iommu_unmap_pages(pkvm_handle_t domain_id,
unsigned long iova, size_t pgsize, size_t pgcount)
{
int ret;
size_t size;
size_t granule;
size_t unmapped;
phys_addr_t paddr = 0;
struct io_pgtable iopt;
size_t total_unmapped = 0;
struct kvm_hyp_iommu_domain *domain;
if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN)
return -EINVAL;
if (!kvm_iommu_ops)
return -ENODEV;
if (!pgsize || !pgcount)
return 0;
if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
iova + size < iova)
return 0;
domain = handle_to_domain(domain_id);
if (!domain || domain_get(domain))
return 0;
ret = access_allowed(domain);
if (ret)
return ret;
granule = 1 << __ffs(domain->pgtable->cfg.pgsize_bitmap);
if (!IS_ALIGNED(iova | pgsize, granule))
goto out_put_domain;
iopt = domain_to_iopt(domain, domain_id);
while (total_unmapped < size) {
/*
* One page/block at a time so that we can unshare each page.
* The IOVA range provided may not be physically contiguous, and
* @pgsize may be larger than the one used when mapping.
*/
unmapped = iopt_unmap_leaf(&iopt, iova, pgsize, &paddr);
if (!unmapped || !paddr)
goto out_put_domain;
ret = __pkvm_unshare_dma(paddr, unmapped);
if (WARN_ON(ret))
goto out_put_domain;
iova += unmapped;
total_unmapped += unmapped;
}
out_put_domain:
domain_put(domain);
return total_unmapped;
}
phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t domain_id, unsigned long iova)
{
phys_addr_t phys = 0;
struct io_pgtable iopt;
struct kvm_hyp_iommu_domain *domain;
if (!kvm_iommu_ops)
return -ENODEV;
if (domain_id == KVM_IOMMU_IDMAPPED_DOMAIN)
return iova;
domain = handle_to_domain(domain_id);
if (!domain || domain_get(domain))
return 0;
iopt = domain_to_iopt(domain, domain_id);
phys = iopt_iova_to_phys(&iopt, iova);
domain_put(domain);
return phys;
}
int kvm_iommu_block_dev(pkvm_handle_t iommu_id, u32 endpoint_id, struct pkvm_hyp_vm *hyp_vm)
{
int ret = -ENOENT;
struct kvm_hyp_iommu *iommu;
if (hyp_vm)
cur_context = hyp_vm->vcpus[0];
iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
hyp_spin_lock(&iommu->iommu_lock);
/* This is optional as it is used for guests only */
if (kvm_iommu_ops->block_dev)
ret = kvm_iommu_ops->block_dev(iommu, endpoint_id);
hyp_spin_unlock(&iommu->iommu_lock);
cur_context = NULL;
return ret;
}
bool kvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
{
bool ret = false;
if (kvm_iommu_ops && kvm_iommu_ops->dabt_handler)
ret = kvm_iommu_ops->dabt_handler(host_ctxt, esr, addr);
if (ret)
kvm_skip_host_instr();
return ret;
}
static int iommu_power_on(struct kvm_power_domain *pd)
{
struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
power_domain);
bool prev;
int ret;
/*
* We currently assume that the device retains its architectural state
* across power off, hence no save/restore.
*/
hyp_spin_lock(&iommu->iommu_lock);
prev = iommu->power_is_off;
iommu->power_is_off = false;
ret = kvm_iommu_ops->resume ? kvm_iommu_ops->resume(iommu) : 0;
if (ret)
iommu->power_is_off = prev;
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
static int iommu_power_off(struct kvm_power_domain *pd)
{
struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
power_domain);
bool prev;
int ret;
hyp_spin_lock(&iommu->iommu_lock);
prev = iommu->power_is_off;
iommu->power_is_off = true;
ret = kvm_iommu_ops->suspend ? kvm_iommu_ops->suspend(iommu) : 0;
if (ret)
iommu->power_is_off = prev;
hyp_spin_unlock(&iommu->iommu_lock);
return ret;
}
static const struct kvm_power_domain_ops iommu_power_ops = {
.power_on = iommu_power_on,
.power_off = iommu_power_off,
};
int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu)
{
hyp_spin_lock_init(&iommu->iommu_lock);
return pkvm_init_power_domain(&iommu->power_domain, &iommu_power_ops);
}
void __kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot)
{
int pgcount = (end - start) >> PAGE_SHIFT;
size_t mapped, unmapped;
int ret;
struct io_pgtable iopt;
struct kvm_hyp_iommu_domain *domain;
domain = handle_to_domain(KVM_IOMMU_IDMAPPED_DOMAIN);
iopt = domain_to_iopt(domain, KVM_IOMMU_IDMAPPED_DOMAIN);
if (prot) {
while (pgcount) {
mapped = 0;
ret = iopt_map_pages(&iopt, start, start, PAGE_SIZE, pgcount, prot,
0, &mapped);
pgcount -= mapped / PAGE_SIZE;
start += mapped;
if (!mapped || ret)
return;
}
} else {
while (pgcount) {
unmapped = iopt_unmap_pages(&iopt, start, PAGE_SIZE, pgcount, NULL);
pgcount -= unmapped / PAGE_SIZE;
start += unmapped;
if (!unmapped || ret)
return;
}
}
}
void kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
enum kvm_pgtable_prot prot)
{
if (!kvm_iommu_is_ready())
return;
__kvm_iommu_host_stage2_idmap(start, end, prot);
}
static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flags,
void * const arg)
{
u64 end = start + kvm_granule_size(level);
kvm_pte_t pte = *ptep;
enum kvm_pgtable_prot prot;
/*
* We only snapshot memory now, as the MMIO regions are unknow to
* the hypervisor, and they will be mapped once touched by the CPU
* this is not ideal, but works for now ...
*/
if ((!pte || kvm_pte_valid(pte)) && addr_is_memory(start)) {
prot = default_host_prot(addr_is_memory(start));
__kvm_iommu_host_stage2_idmap(start, end, prot);
}
return 0;
}
static int snapshot_host_stage2(void)
{
struct kvm_pgtable_walker walker = {
.cb = __snapshot_host_stage2,
.flags = KVM_PGTABLE_WALK_LEAF,
};
struct kvm_pgtable *pgt = &host_mmu.pgt;
return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
}
int kvm_iommu_init(struct kvm_iommu_ops *ops, struct kvm_hyp_memcache *mc,
unsigned long init_arg)
{
int ret;
void *idmap_pgd;
size_t idmap_pgd_sz;
void *p;
BUILD_BUG_ON(sizeof(hyp_spinlock_t) != HYP_SPINLOCK_SIZE);
if (WARN_ON(!ops->get_iommu_by_id ||
!ops->free_domain ||
!ops->alloc_domain ||
!ops->attach_dev ||
!ops->detach_dev ||
!ops->pgd_size ||
!ops->get_iommu_token_by_id))
return -ENODEV;
ret = ops->init ? ops->init(init_arg) : 0;
if (ret)
return ret;
ret = pkvm_create_mappings(kvm_hyp_iommu_domains, kvm_hyp_iommu_domains +
KVM_IOMMU_DOMAINS_ROOT_ENTRIES, PAGE_HYP);
if (ret)
return ret;
kvm_iommu_ops = ops;
ret = hyp_pool_init(&iommu_host_pool, 0, 16 /* order = 4*/, 0, true);
/* Init IDMAPPED page tables. */
if (mc->head) {
u8 order;
ret = hyp_pool_init(&iommu_idmap_pool, 0,
16 /* order = 4*/, 0, true);
if (ret)
return ret;
while (mc->nr_pages) {
order = mc->head & (PAGE_SIZE - 1);
p = pkvm_admit_host_page(mc, order);
hyp_set_page_refcounted(hyp_virt_to_page(p));
hyp_virt_to_page(p)->order = order;
hyp_put_page(&iommu_idmap_pool, p);
}
idmap_pgd_sz = kvm_iommu_ops->pgd_size(DOMAIN_IDMAPPED_TYPE);
idmap_pgd = hyp_alloc_pages(&iommu_idmap_pool, get_order(idmap_pgd_sz));
if (!idmap_pgd)
return -ENOMEM;
/* A bit hacky way to populate first domain to be used immediately. */
kvm_hyp_iommu_domains[0] = hyp_alloc_pages(&iommu_idmap_pool, 0);
ret = kvm_iommu_alloc_domain_nolock(KVM_IOMMU_IDMAPPED_DOMAIN, (u64)idmap_pgd,
idmap_pgd_sz, DOMAIN_IDMAPPED_TYPE);
}
return ret;
}
/* Request to hypervisor. */
int kvm_iommu_request(struct kvm_hyp_req *req)
{
struct kvm_hyp_req *cur_req;
struct pkvm_hyp_vcpu *ctxt = __get_ctxt();
if (ctxt)
cur_req = pkvm_hyp_req_reserve(ctxt, KVM_HYP_REQ_EMP);
else
cur_req = this_cpu_ptr(&host_hyp_reqs);
if (cur_req->type != KVM_HYP_REQ_EMP)
return -EBUSY;
memcpy(cur_req, req, sizeof(struct kvm_hyp_req));
return 0;
}
u64 kvm_iommu_id_to_token(pkvm_handle_t id)
{
return kvm_iommu_ops->get_iommu_token_by_id(id);
}