blob: fe0ae219b0cc122c8ca9b99296f09e753db98bf5 [file] [log] [blame] [edit]
// SPDX-License-Identifier: GPL-2.0
/*
* pKVM host driver for the Arm SMMUv3
*
* Copyright (C) 2022 Linaro Ltd.
*/
#include <asm/kvm_mmu.h>
#include <asm/hyp_alloc.h>
#include <asm/kvm_pkvm.h>
#include <linux/local_lock.h>
#include <linux/memblock.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include "pkvm/arm_smmu_v3.h"
#include <linux/kvm_host.h>
#include "arm-smmu-v3.h"
struct host_arm_smmu_device {
struct arm_smmu_device smmu;
pkvm_handle_t id;
u32 boot_gbpa;
unsigned int pgd_order_s1;
unsigned int pgd_order_s2;
unsigned long pgsize_bitmap_s1;
unsigned long pgsize_bitmap_s2;
atomic_t initialized;
bool hvc_pd;
};
#define smmu_to_host(_smmu) \
container_of(_smmu, struct host_arm_smmu_device, smmu);
struct kvm_arm_smmu_master {
struct arm_smmu_device *smmu;
struct device *dev;
struct kvm_arm_smmu_domain *domain;
u32 ssid_bits;
bool idmapped; /* Stage-2 is transparently identity mapped*/
};
struct kvm_arm_smmu_domain {
struct iommu_domain domain;
struct arm_smmu_device *smmu;
struct mutex init_mutex;
unsigned long pgd;
pkvm_handle_t id;
};
#define to_kvm_smmu_domain(_domain) \
container_of(_domain, struct kvm_arm_smmu_domain, domain)
#ifdef MODULE
static unsigned long pkvm_module_token;
#define ksym_ref_addr_nvhe(x) \
((typeof(kvm_nvhe_sym(x)) *)(pkvm_el2_mod_va(&kvm_nvhe_sym(x), pkvm_module_token)))
#else
#define ksym_ref_addr_nvhe(x) \
((typeof(kvm_nvhe_sym(x)) *)(kern_hyp_va(lm_alias(&kvm_nvhe_sym(x)))))
#endif
static size_t kvm_arm_smmu_cur;
static size_t kvm_arm_smmu_count;
static struct hyp_arm_smmu_v3_device *kvm_arm_smmu_array;
static struct device **smmus_arr;
static DEFINE_IDA(kvm_arm_smmu_domain_ida);
extern struct kvm_iommu_ops kvm_nvhe_sym(smmu_ops);
int kvm_nvhe_sym(smmu_init_hyp_module)(const struct pkvm_module_ops *ops);
static int kvm_arm_smmu_topup_memcache(struct arm_smmu_device *smmu,
struct arm_smccc_res res)
{
struct kvm_hyp_req req;
int ret;
hyp_reqs_smccc_decode(res, &req);
if ((res.a1 == -ENOMEM) && (req.type != KVM_HYP_REQ_MEM)) {
/*
* There is no way for drivers to populate hyp_alloc requests,
* so -ENOMEM + no request indicates that.
*/
return __pkvm_topup_hyp_alloc(req.mem.nr_pages);
} else if (req.type != KVM_HYP_REQ_MEM) {
return -EBADE;
}
if (req.mem.dest == REQ_MEM_IOMMU) {
__pkvm_topup_hyp_alloc_mgt(HYP_ALLOC_MGT_IOMMU_ID,
req.mem.nr_pages, req.mem.sz_alloc);
return ret;
} else if (req.mem.dest == REQ_MEM_HYP_ALLOC) {
/* Fill hyp alloc*/
return __pkvm_topup_hyp_alloc(req.mem.nr_pages);
}
dev_err(smmu->dev, "Bogus mem request");
return -EBADE;
}
pkvm_handle_t kvm_arm_smmu_v3_id(struct device *dev)
{
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
return host_smmu->id;
}
/*
* Issue hypercall, and retry after filling the memcache if necessary.
* After the call, reclaim pages pushed in the memcache by the hypervisor.
*/
#define kvm_call_hyp_nvhe_mc(smmu, ...) \
({ \
struct arm_smccc_res __res; \
do { \
__res = kvm_call_hyp_nvhe_smccc(__VA_ARGS__); \
} while (__res.a1 && !kvm_arm_smmu_topup_memcache(smmu, __res));\
__res.a1; \
})
static struct platform_driver kvm_arm_smmu_driver;
static struct arm_smmu_device *
kvm_arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
struct device *dev;
dev = driver_find_device_by_fwnode(&kvm_arm_smmu_driver.driver, fwnode);
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
static pkvm_handle_t kvm_arm_v3_id_by_of(struct device_node *np)
{
struct device *dev;
dev = driver_find_device_by_of_node(&kvm_arm_smmu_driver.driver, np);
put_device(dev);
return kvm_arm_smmu_v3_id(dev);
}
static struct iommu_ops kvm_arm_smmu_ops;
static struct iommu_device *kvm_arm_smmu_probe_device(struct device *dev)
{
int ret;
struct arm_smmu_device *smmu;
struct kvm_arm_smmu_master *master;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (!fwspec || fwspec->ops != &kvm_arm_smmu_ops)
return ERR_PTR(-ENODEV);
if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
return ERR_PTR(-EBUSY);
smmu = kvm_arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
if (!smmu)
return ERR_PTR(-ENODEV);
master = kzalloc(sizeof(*master), GFP_KERNEL);
if (!master)
return ERR_PTR(-ENOMEM);
master->dev = dev;
master->smmu = smmu;
device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
//master->idmapped = device_property_read_bool(dev, "iommu-idmapped");
master->idmapped = false;
dev_iommu_priv_set(dev, master);
if (!device_link_add(dev, smmu->dev,
DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE |
DL_FLAG_AUTOREMOVE_SUPPLIER)) {
ret = -ENOLINK;
goto err_free;
}
return &smmu->iommu;
err_free:
kfree(master);
return ERR_PTR(ret);
}
static void kvm_arm_smmu_release_device(struct device *dev)
{
struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
kfree(master);
iommu_fwspec_free(dev);
}
static struct iommu_domain *kvm_arm_smmu_domain_alloc(unsigned type)
{
struct kvm_arm_smmu_domain *kvm_smmu_domain;
/*
* We don't support
* - IOMMU_DOMAIN_DMA_FQ because lazy unmap would clash with memory
* donation to guests.
*/
if (type != IOMMU_DOMAIN_DMA &&
type != IOMMU_DOMAIN_UNMANAGED &&
type != IOMMU_DOMAIN_IDENTITY)
return NULL;
kvm_smmu_domain = kzalloc(sizeof(*kvm_smmu_domain), GFP_KERNEL);
if (!kvm_smmu_domain)
return NULL;
mutex_init(&kvm_smmu_domain->init_mutex);
return &kvm_smmu_domain->domain;
}
static int kvm_arm_smmu_domain_finalize(struct kvm_arm_smmu_domain *kvm_smmu_domain,
struct kvm_arm_smmu_master *master)
{
int ret = 0;
struct arm_smmu_device *smmu = master->smmu;
struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
if (kvm_smmu_domain->smmu) {
if (kvm_smmu_domain->smmu != smmu)
return -EINVAL;
return 0;
}
if (kvm_smmu_domain->domain.type == IOMMU_DOMAIN_IDENTITY) {
kvm_smmu_domain->id = KVM_IOMMU_IDMAPPED_DOMAIN;
/* Nothing to do. */
return 0;
}
ret = ida_alloc_range(&kvm_arm_smmu_domain_ida, KVM_IOMMU_IDMAPPED_DOMAIN + 1,
1 << (smmu->vmid_bits - 1), GFP_KERNEL);
if (ret < 0)
return ret;
kvm_smmu_domain->id = ret;
ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_alloc_domain,
kvm_smmu_domain->id, ARM_64_LPAE_S1);
if (ret)
goto err_free;
kvm_smmu_domain->domain.pgsize_bitmap = host_smmu->pgsize_bitmap_s1;
kvm_smmu_domain->domain.geometry.aperture_end = (1UL << smmu->ias) - 1;
kvm_smmu_domain->domain.geometry.force_aperture = true;
kvm_smmu_domain->smmu = smmu;
return 0;
err_free:
ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
return ret;
}
static void kvm_arm_smmu_domain_free(struct iommu_domain *domain)
{
int ret;
struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
if (smmu && (kvm_smmu_domain->domain.type != IOMMU_DOMAIN_IDENTITY)) {
ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_free_domain, kvm_smmu_domain->id);
ida_free(&kvm_arm_smmu_domain_ida, kvm_smmu_domain->id);
}
kfree(kvm_smmu_domain);
}
static int kvm_arm_smmu_detach_dev_pasid(struct host_arm_smmu_device *host_smmu,
struct kvm_arm_smmu_master *master,
ioasid_t pasid)
{
int i, ret;
struct arm_smmu_device *smmu = &host_smmu->smmu;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
if (!master->domain)
return 0;
for (i = 0; i < fwspec->num_ids; i++) {
int sid = fwspec->ids[i];
ret = kvm_call_hyp_nvhe(__pkvm_host_iommu_detach_dev,
host_smmu->id, master->domain->id, sid, pasid);
if (ret) {
dev_err(smmu->dev, "cannot detach device %s (0x%x): %d\n",
dev_name(master->dev), sid, ret);
break;
}
}
master->domain = NULL;
return ret;
}
static int kvm_arm_smmu_detach_dev(struct host_arm_smmu_device *host_smmu,
struct kvm_arm_smmu_master *master)
{
return kvm_arm_smmu_detach_dev_pasid(host_smmu, master, 0);
}
static void kvm_arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
{
struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
struct host_arm_smmu_device *host_smmu = smmu_to_host(master->smmu);
kvm_arm_smmu_detach_dev_pasid(host_smmu, master, pasid);
}
static int kvm_arm_smmu_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
int i, ret;
struct arm_smmu_device *smmu;
struct host_arm_smmu_device *host_smmu;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
if (!master)
return -ENODEV;
smmu = master->smmu;
host_smmu = smmu_to_host(smmu);
ret = kvm_arm_smmu_detach_dev(host_smmu, master);
if (ret)
return ret;
mutex_lock(&kvm_smmu_domain->init_mutex);
ret = kvm_arm_smmu_domain_finalize(kvm_smmu_domain, master);
mutex_unlock(&kvm_smmu_domain->init_mutex);
if (ret)
return ret;
for (i = 0; i < fwspec->num_ids; i++) {
int sid = fwspec->ids[i];
ret = kvm_call_hyp_nvhe_mc(smmu, __pkvm_host_iommu_attach_dev,
host_smmu->id, kvm_smmu_domain->id,
sid, pasid, master->ssid_bits, 0);
if (ret) {
dev_err(smmu->dev, "cannot attach device %s (0x%x): %d\n",
dev_name(dev), sid, ret);
goto out_unlock;
}
}
master->domain = kvm_smmu_domain;
out_unlock:
if (ret)
kvm_arm_smmu_detach_dev(host_smmu, master);
return ret;
}
static int kvm_arm_smmu_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
return kvm_arm_smmu_set_dev_pasid(domain, dev, 0);
}
static int kvm_arm_smmu_map_pages(struct iommu_domain *domain,
unsigned long iova, phys_addr_t paddr,
size_t pgsize, size_t pgcount, int prot,
gfp_t gfp, size_t *total_mapped)
{
size_t mapped;
size_t size = pgsize * pgcount;
struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
struct arm_smccc_res res;
do {
res = kvm_call_hyp_nvhe_smccc(__pkvm_host_iommu_map_pages,
kvm_smmu_domain->id, iova, paddr,
pgsize, pgcount, prot);
mapped = res.a1;
iova += mapped;
paddr += mapped;
WARN_ON(mapped % pgsize);
WARN_ON(mapped > pgcount * pgsize);
pgcount -= mapped / pgsize;
*total_mapped += mapped;
} while (*total_mapped < size && !kvm_arm_smmu_topup_memcache(smmu, res));
if (*total_mapped < size)
return -EINVAL;
return 0;
}
static size_t kvm_arm_smmu_unmap_pages(struct iommu_domain *domain,
unsigned long iova, size_t pgsize,
size_t pgcount,
struct iommu_iotlb_gather *iotlb_gather)
{
size_t unmapped;
size_t total_unmapped = 0;
size_t size = pgsize * pgcount;
struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
struct arm_smmu_device *smmu = kvm_smmu_domain->smmu;
struct arm_smccc_res res;
do {
res = kvm_call_hyp_nvhe_smccc(__pkvm_host_iommu_unmap_pages,
kvm_smmu_domain->id,
iova, pgsize, pgcount);
unmapped = res.a1;
total_unmapped += unmapped;
iova += unmapped;
WARN_ON(unmapped % pgsize);
pgcount -= unmapped / pgsize;
/*
* The page table driver can unmap less than we asked for. If it
* didn't unmap anything at all, then it either reached the end
* of the range, or it needs a page in the memcache to break a
* block mapping.
*/
} while (total_unmapped < size &&
(unmapped || !kvm_arm_smmu_topup_memcache(smmu, res)));
return total_unmapped;
}
static phys_addr_t kvm_arm_smmu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
struct kvm_arm_smmu_domain *kvm_smmu_domain = to_kvm_smmu_domain(domain);
return kvm_call_hyp_nvhe(__pkvm_host_iommu_iova_to_phys, kvm_smmu_domain->id, iova);
}
static int kvm_arm_smmu_def_domain_type(struct device *dev)
{
struct kvm_arm_smmu_master *master = dev_iommu_priv_get(dev);
if (master->idmapped)
return IOMMU_DOMAIN_IDENTITY;
return 0;
}
static struct iommu_ops kvm_arm_smmu_ops = {
.capable = arm_smmu_capable,
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.probe_device = kvm_arm_smmu_probe_device,
.release_device = kvm_arm_smmu_release_device,
.domain_alloc = kvm_arm_smmu_domain_alloc,
.remove_dev_pasid = kvm_arm_smmu_remove_dev_pasid,
.def_domain_type = kvm_arm_smmu_def_domain_type,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = kvm_arm_smmu_attach_dev,
.free = kvm_arm_smmu_domain_free,
.map_pages = kvm_arm_smmu_map_pages,
.unmap_pages = kvm_arm_smmu_unmap_pages,
.iova_to_phys = kvm_arm_smmu_iova_to_phys,
.set_dev_pasid = kvm_arm_smmu_set_dev_pasid,
}
};
static bool kvm_arm_smmu_validate_features(struct arm_smmu_device *smmu)
{
unsigned int required_features =
ARM_SMMU_FEAT_TRANS_S1 |
ARM_SMMU_FEAT_TT_LE;
unsigned int forbidden_features =
ARM_SMMU_FEAT_STALL_FORCE;
unsigned int keep_features =
ARM_SMMU_FEAT_2_LVL_STRTAB |
ARM_SMMU_FEAT_2_LVL_CDTAB |
ARM_SMMU_FEAT_TT_LE |
ARM_SMMU_FEAT_SEV |
ARM_SMMU_FEAT_COHERENCY |
ARM_SMMU_FEAT_TRANS_S1 |
ARM_SMMU_FEAT_TRANS_S2 |
ARM_SMMU_FEAT_VAX |
ARM_SMMU_FEAT_RANGE_INV;
if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) {
dev_err(smmu->dev, "unsupported layout\n");
return false;
}
if ((smmu->features & required_features) != required_features) {
dev_err(smmu->dev, "missing features 0x%x\n",
required_features & ~smmu->features);
return false;
}
if (smmu->features & forbidden_features) {
dev_err(smmu->dev, "features 0x%x forbidden\n",
smmu->features & forbidden_features);
return false;
}
smmu->features &= keep_features;
return true;
}
static irqreturn_t kvm_arm_smmu_evt_handler(int irq, void *dev)
{
int i;
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
u64 evt[EVTQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt)) {
u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
if (!__ratelimit(&rs))
continue;
dev_info(smmu->dev, "event 0x%02x received:\n", id);
for (i = 0; i < ARRAY_SIZE(evt); ++i)
dev_info(smmu->dev, "\t0x%016llx\n",
(unsigned long long)evt[i]);
cond_resched();
}
/*
* Not much we can do on overflow, so scream and pretend we're
* trying harder.
*/
if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
} while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
Q_IDX(llq, llq->cons);
return IRQ_HANDLED;
}
static irqreturn_t kvm_arm_smmu_gerror_handler(int irq, void *dev)
{
u32 gerror, gerrorn, active;
struct arm_smmu_device *smmu = dev;
gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
active = gerror ^ gerrorn;
if (!(active & GERROR_ERR_MASK))
return IRQ_NONE; /* No errors pending */
dev_warn(smmu->dev,
"unexpected global error reported (0x%08x), this could be serious\n",
active);
if (active & GERROR_SFM_ERR) {
dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
//reset device?
}
if (active & GERROR_MSI_GERROR_ABT_ERR)
dev_warn(smmu->dev, "GERROR MSI write aborted\n");
if (active & GERROR_MSI_PRIQ_ABT_ERR)
dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
if (active & GERROR_MSI_EVTQ_ABT_ERR)
dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
if (active & GERROR_MSI_CMDQ_ABT_ERR)
dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
if (active & GERROR_PRIQ_ABT_ERR)
dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
if (active & GERROR_EVTQ_ABT_ERR)
dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
if (active & GERROR_CMDQ_ERR) {
dev_err(smmu->dev, "CMDQ ERR -- Hypervisor corruption\n");
BUG();
}
writel(gerror, smmu->base + ARM_SMMU_GERRORN);
return IRQ_HANDLED;
}
static irqreturn_t kvm_arm_smmu_pri_handler(int irq, void *dev)
{
struct arm_smmu_device *smmu = dev;
dev_err(smmu->dev, "PRI not supported in KVM driver!\n");
return IRQ_HANDLED;
}
static int kvm_arm_smmu_device_reset(struct host_arm_smmu_device *host_smmu)
{
int ret;
u32 reg;
struct arm_smmu_device *smmu = &host_smmu->smmu;
u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
if (reg & CR0_SMMUEN)
dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
/* Disable bypass */
host_smmu->boot_gbpa = readl_relaxed(smmu->base + ARM_SMMU_GBPA);
ret = arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
if (ret)
return ret;
ret = arm_smmu_device_disable(smmu);
if (ret)
return ret;
/* Stream table */
writeq_relaxed(smmu->strtab_cfg.strtab_base,
smmu->base + ARM_SMMU_STRTAB_BASE);
writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
/* Command queue */
writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
writel_relaxed(smmu->evtq.q.llq.prod, smmu->base + SZ_64K + ARM_SMMU_EVTQ_PROD);
writel_relaxed(smmu->evtq.q.llq.cons, smmu->base + SZ_64K + ARM_SMMU_EVTQ_CONS);
/* Disable IRQs first */
ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
ARM_SMMU_IRQ_CTRLACK);
if (ret) {
dev_err(smmu->dev, "failed to disable irqs\n");
return ret;
}
arm_smmu_setup_unique_irqs(smmu, kvm_arm_smmu_evt_handler,
kvm_arm_smmu_gerror_handler,
kvm_arm_smmu_pri_handler);
if (smmu->features & ARM_SMMU_FEAT_PRI)
irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
/* Enable interrupt generation on the SMMU */
ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
if (ret)
dev_warn(smmu->dev, "failed to enable irqs\n");
return 0;
}
static int kvm_arm_probe_scmi_pd(struct device_node *scmi_node,
struct kvm_power_domain *pd)
{
int ret;
struct resource res;
struct of_phandle_args args;
pd->type = KVM_POWER_DOMAIN_ARM_SCMI;
ret = of_parse_phandle_with_args(scmi_node, "shmem", NULL, 0, &args);
if (ret)
return ret;
ret = of_address_to_resource(args.np, 0, &res);
if (ret)
goto out_put_nodes;
ret = of_property_read_u32(scmi_node, "arm,smc-id",
&pd->arm_scmi.smc_id);
if (ret)
goto out_put_nodes;
/*
* The shared buffer is unmapped from the host while a request is in
* flight, so it has to be on its own page.
*/
if (!IS_ALIGNED(res.start, SZ_64K) || resource_size(&res) < SZ_64K) {
ret = -EINVAL;
goto out_put_nodes;
}
pd->arm_scmi.shmem_base = res.start;
pd->arm_scmi.shmem_size = resource_size(&res);
out_put_nodes:
of_node_put(args.np);
return ret;
}
/* TODO: Move this. None of it is specific to SMMU */
static int kvm_arm_probe_power_domain(struct device *dev,
struct kvm_power_domain *pd)
{
int ret;
struct device_node *parent;
struct of_phandle_args args;
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
if (!of_get_property(dev->of_node, "power-domains", NULL)) {
dev_warn(dev, "No power-domains assuming host control\n");
/* SMMU MUST RESET TO BLOCK DMA. */
pd->type = KVM_POWER_DOMAIN_HOST_HVC;
pd->device_id = kvm_arm_smmu_cur;
host_smmu->hvc_pd = true;
return 0;
}
ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
"#power-domain-cells", 0, &args);
if (ret)
return ret;
parent = of_get_parent(args.np);
if (parent && of_device_is_compatible(parent, "arm,scmi-smc") &&
args.args_count > 0) {
pd->arm_scmi.domain_id = args.args[0];
ret = kvm_arm_probe_scmi_pd(parent, pd);
} else {
dev_warn(dev, "Unknown power-domains assuming host control\n");
/* SMMU MUST RESET TO BLOCK DMA. */
pd->type = KVM_POWER_DOMAIN_HOST_HVC;
pd->device_id = kvm_arm_smmu_cur;
host_smmu->hvc_pd = true;
}
of_node_put(parent);
of_node_put(args.np);
return ret;
}
int smmu_finalise_device(struct device *dev, void *data)
{
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
struct platform_device *pdev = container_of(dev, struct platform_device, dev);
resource_size_t mmio_addr = platform_get_mem_or_io(pdev, 0)->start;
pm_runtime_put_noidle(dev);
return arm_smmu_register_iommu(smmu, &kvm_arm_smmu_ops, mmio_addr);;
}
static int alloc_idmapped_mc(struct kvm_hyp_memcache *mc)
{
u64 i, total = 0;
phys_addr_t start, end;
int ret;
for_each_mem_range(i, &start, &end) {
total += __hyp_pgtable_max_pages((end - start) >> PAGE_SHIFT);
}
/* We don't know how much for MMIO we need, 1GB is very generous. */
total += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
/* For PGD*/
ret = topup_hyp_memcache(mc, 1, 0, 3);
if (ret)
return ret;
ret = topup_hyp_memcache(mc, total, 0, 0);
return ret;
}
static int kvm_arm_smmu_probe(struct platform_device *pdev)
{
int ret, i;
bool bypass;
struct resource *res;
phys_addr_t mmio_addr;
struct io_pgtable_cfg cfg_s1, cfg_s2;
size_t mmio_size, pgd_size;
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
struct host_arm_smmu_device *host_smmu;
struct hyp_arm_smmu_v3_device *hyp_smmu;
struct kvm_power_domain power_domain = {};
unsigned long ias;
struct kvm_hyp_memcache mc = {0, 0};
if (kvm_arm_smmu_cur >= kvm_arm_smmu_count)
return -ENOSPC;
hyp_smmu = &kvm_arm_smmu_array[kvm_arm_smmu_cur];
smmus_arr[kvm_arm_smmu_cur] = &pdev->dev;
host_smmu = devm_kzalloc(dev, sizeof(*host_smmu), GFP_KERNEL);
if (!host_smmu)
return -ENOMEM;
smmu = &host_smmu->smmu;
smmu->dev = dev;
ret = arm_smmu_fw_probe(pdev, smmu, &bypass);
if (ret || bypass)
return ret ?: -EINVAL;
platform_set_drvdata(pdev, host_smmu);
ret = kvm_arm_probe_power_domain(dev, &power_domain);
if (ret)
return ret;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
mmio_size = resource_size(res);
if (mmio_size < SZ_128K) {
dev_err(dev, "unsupported MMIO region size (%pr)\n", res);
return -EINVAL;
}
mmio_addr = res->start;
host_smmu->id = kvm_arm_smmu_cur;
smmu->base = devm_ioremap_resource(dev, res);
if (IS_ERR(smmu->base))
return PTR_ERR(smmu->base);
arm_smmu_probe_irq(pdev, smmu);
ret = arm_smmu_device_hw_probe(smmu);
if (ret)
return ret;
if (!kvm_arm_smmu_validate_features(smmu))
return -ENODEV;
ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
/*
* SMMU will hold possible configuration for both S1 and S2 as any of
* them can be chosen when a device is attached.
*/
cfg_s1 = (struct io_pgtable_cfg) {
.fmt = ARM_64_LPAE_S1,
.pgsize_bitmap = smmu->pgsize_bitmap,
.ias = min_t(unsigned long, ias, VA_BITS),
.oas = smmu->ias,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
};
cfg_s2 = (struct io_pgtable_cfg) {
.fmt = ARM_64_LPAE_S2,
.pgsize_bitmap = smmu->pgsize_bitmap,
.ias = smmu->ias,
.oas = smmu->oas,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
};
/*
* Choose the page and address size. Compute the PGD size as well, so we
* know how much memory to pre-allocate.
*/
if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) {
ret = io_pgtable_configure(&cfg_s1, &pgd_size);
if (ret)
return ret;
host_smmu->pgd_order_s1 = get_order(pgd_size);
host_smmu->pgsize_bitmap_s1 = cfg_s1.pgsize_bitmap;
}
if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) {
ret = io_pgtable_configure(&cfg_s2, &pgd_size);
if (ret)
return ret;
host_smmu->pgd_order_s2 = get_order(pgd_size);
host_smmu->pgsize_bitmap_s2 = cfg_s2.pgsize_bitmap;
}
ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
CMDQ_ENT_DWORDS, "cmdq");
if (ret)
return ret;
/* evtq */
ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->base + SZ_64K,
ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
EVTQ_ENT_DWORDS, "evtq");
if (ret)
return ret;
ret = arm_smmu_init_strtab(smmu);
if (ret)
return ret;
ret = kvm_arm_smmu_device_reset(host_smmu);
if (ret)
return ret;
/* Hypervisor parameters */
hyp_smmu->mmio_addr = mmio_addr;
hyp_smmu->mmio_size = mmio_size;
hyp_smmu->features = smmu->features;
hyp_smmu->pgtable_cfg_s1 = cfg_s1;
hyp_smmu->pgtable_cfg_s2 = cfg_s2;
hyp_smmu->iommu.power_domain = power_domain;
hyp_smmu->ssid_bits = smmu->ssid_bits;
kvm_arm_smmu_cur++;
/*
* The state of endpoints dictates when the SMMU is powered off. To turn
* the SMMU on and off, a genpd driver uses SCMI over the SMC transport,
* or some other platform-specific SMC. Those power requests are caught
* by the hypervisor, so that the hyp driver doesn't touch the hardware
* state while it is off.
*
* We are making a big assumption here, that TLBs and caches are invalid
* on power on, and therefore we don't need to wake the SMMU when
* modifying page tables, stream tables and context tables. If this
* assumption does not hold on some systems, then we'll need to grab RPM
* reference in map(), attach(), etc, so the hyp driver can send
* invalidations.
*/
hyp_smmu->caches_clean_on_power_on = true;
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
/*
* Take a reference to keep the SMMU powered on while the hypervisor
* initializes it.
*/
pm_runtime_resume_and_get(dev);
if (kvm_arm_smmu_cur == kvm_arm_smmu_count) {
ret = alloc_idmapped_mc(&mc);
if (ret)
pr_warn("No SMMUv3 IDMAPPED support err => %d\n", ret);
/* Topup hyp alloc so IOMMU driver can allocate domains. */
__pkvm_topup_hyp_alloc(1);
/* Go go go. */
ret = kvm_iommu_init_hyp(ksym_ref_addr_nvhe(smmu_ops), &mc, 0);
for (i = 0 ; i < kvm_arm_smmu_cur; ++i)
smmu_finalise_device(smmus_arr[i], NULL);
}
return ret;
}
static int kvm_arm_smmu_remove(struct platform_device *pdev)
{
struct host_arm_smmu_device *host_smmu = platform_get_drvdata(pdev);
struct arm_smmu_device *smmu = &host_smmu->smmu;
/*
* There was an error during hypervisor setup. The hyp driver may
* have already enabled the device, so disable it.
*/
if (!atomic_read(&host_smmu->initialized))
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_disable(&pdev->dev);
pm_runtime_set_suspended(&pdev->dev);
arm_smmu_unregister_iommu(smmu);
arm_smmu_device_disable(smmu);
arm_smmu_update_gbpa(smmu, host_smmu->boot_gbpa, GBPA_ABORT);
return 0;
}
int kvm_arm_smmu_suspend(struct device *dev)
{
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
if (host_smmu->hvc_pd)
return pkvm_iommu_suspend(dev);
return 0;
}
int kvm_arm_smmu_resume(struct device *dev)
{
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
struct host_arm_smmu_device *host_smmu = smmu_to_host(smmu);
if (host_smmu->hvc_pd)
return pkvm_iommu_resume(dev);
return 0;
}
static const struct dev_pm_ops kvm_arm_smmu_pm_ops = {
SET_RUNTIME_PM_OPS(kvm_arm_smmu_suspend, kvm_arm_smmu_resume, NULL)
};
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v3", },
{ },
};
static struct platform_driver kvm_arm_smmu_driver = {
.driver = {
.name = "kvm-arm-smmu-v3",
.of_match_table = arm_smmu_of_match,
.pm = &kvm_arm_smmu_pm_ops,
},
.probe = kvm_arm_smmu_probe,
.remove = kvm_arm_smmu_remove,
};
static int kvm_arm_smmu_array_alloc(void)
{
int smmu_order;
struct device_node *np;
kvm_arm_smmu_count = 0;
for_each_compatible_node(np, NULL, "arm,smmu-v3")
kvm_arm_smmu_count++;
if (!kvm_arm_smmu_count)
return 0;
/* Allocate the parameter list shared with the hypervisor */
smmu_order = get_order(kvm_arm_smmu_count * sizeof(*kvm_arm_smmu_array));
kvm_arm_smmu_array = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
smmu_order);
if (!kvm_arm_smmu_array)
return -ENOMEM;
smmus_arr = kmalloc_array(kvm_arm_smmu_count, sizeof(struct device *), GFP_KERNEL);
/*
* These variables are stored in the nVHE image, and won't be accessible
* after KVM initialization. Ownership of kvm_arm_smmu_array will be
* transferred to the hypervisor as well.
*/
kvm_hyp_arm_smmu_v3_smmus = kvm_kern_hyp_va(kvm_arm_smmu_array);
kvm_hyp_arm_smmu_v3_count = kvm_arm_smmu_count;
return 0;
}
static void kvm_arm_smmu_array_free(void)
{
int order;
order = get_order(kvm_arm_smmu_count * sizeof(*kvm_arm_smmu_array));
free_pages((unsigned long)kvm_arm_smmu_array, order);
}
/**
* kvm_arm_smmu_v3_init() - Reserve the SMMUv3 for KVM
* Return 0 if all present SMMUv3 were probed successfully, or an error.
* If no SMMU was found, return 0, with a count of 0.
*/
static int kvm_arm_smmu_v3_init(void)
{
int ret;
/*
* Check whether any device owned by the host is behind an SMMU.
*/
ret = kvm_arm_smmu_array_alloc();
if (ret || !kvm_arm_smmu_count)
return ret;
#ifdef MODULE
ret = pkvm_load_el2_module(kvm_nvhe_sym(smmu_init_hyp_module),
&pkvm_module_token);
if (ret) {
pr_err("Failed to load SMMUv3 IOMMU EL2 module: %d\n", ret);
return ret;
}
#endif
ret = platform_driver_register(&kvm_arm_smmu_driver);
if (ret)
kvm_arm_smmu_array_free();
return ret;
}
static void kvm_arm_smmu_v3_remove(void)
{
platform_driver_unregister(&kvm_arm_smmu_driver);
}
struct kvm_iommu_driver kvm_smmu_v3_ops = {
.init_driver = kvm_arm_smmu_v3_init,
.remove_driver = kvm_arm_smmu_v3_remove,
.get_iommu_id = kvm_arm_smmu_v3_id,
.get_iommu_id_by_of = kvm_arm_v3_id_by_of,
};
static int kvm_arm_smmu_v3_register(void)
{
return kvm_iommu_register_driver(&kvm_smmu_v3_ops);
}
/*
* Register must be run before de-privliage before kvm_iommu_init_driver
* for module case, it should be loaded using pKVM early loading which
* loads it before this point.
* For builtin drivers we use core_initcall
*/
#ifdef MODULE
module_init(kvm_arm_smmu_v3_register);
#else
core_initcall(kvm_arm_smmu_v3_register);
#endif
MODULE_LICENSE("GPL v2");