Merge branches 'arm/msm', 'arm/allwinner', 'arm/smmu', 'x86/vt-d', 'hyper-v', 'core' and 'x86/amd' into next
diff --git a/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml b/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml
new file mode 100644
index 0000000..5e125cf
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/allwinner,sun50i-h6-iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner H6 IOMMU Device Tree Bindings
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ "#iommu-cells":
+ const: 1
+ description:
+ The content of the cell is the master ID.
+
+ compatible:
+ const: allwinner,sun50i-h6-iommu
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - "#iommu-cells"
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ #include <dt-bindings/clock/sun50i-h6-ccu.h>
+ #include <dt-bindings/reset/sun50i-h6-ccu.h>
+
+ iommu: iommu@30f0000 {
+ compatible = "allwinner,sun50i-h6-iommu";
+ reg = <0x030f0000 0x10000>;
+ interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_IOMMU>;
+ resets = <&ccu RST_BUS_IOMMU>;
+ #iommu-cells = <1>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..e3ef1c6 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -41,7 +41,9 @@
- const: arm,mmu-500
- const: arm,smmu-v2
- items:
- - const: arm,mmu-401
+ - enum:
+ - arm,mmu-400
+ - arm,mmu-401
- const: arm,smmu-v1
- enum:
- arm,smmu-v1
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
index 88dfa3f..130e50a 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
@@ -184,6 +184,9 @@
followed by the offset within syscon for conn_box_spare0
register.
+The Hexagon node must contain iommus property as described in ../iommu/iommu.txt
+on platforms which do not have TrustZone.
+
= SUBNODES:
The Hexagon node must contain two subnodes, named "mba" and "mpss" representing
the memory regions used by the Hexagon firmware. Each sub-node must contain:
diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
index 9070be4..07081da 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi
@@ -631,6 +631,11 @@ &mdss_mdp {
status = "okay";
};
+&mss_pil {
+ iommus = <&apps_smmu 0x780 0x1>,
+ <&apps_smmu 0x724 0x3>;
+};
+
&pm8998_pwrkey {
status = "disabled";
};
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 2ab07ce..aca7638 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -303,6 +303,15 @@
Say Y here if you are using a Rockchip SoC that includes an IOMMU
device.
+config SUN50I_IOMMU
+ bool "Allwinner H6 IOMMU Support"
+ depends on ARCH_SUNXI || COMPILE_TEST
+ select ARM_DMA_USE_IOMMU
+ select IOMMU_API
+ select IOMMU_DMA
+ help
+ Support for the IOMMU introduced in the Allwinner H6 SoCs.
+
config TEGRA_IOMMU_GART
bool "Tegra GART IOMMU Support"
depends on ARCH_TEGRA_2x_SOC
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 9f33fdb..57cf4ba 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -29,6 +29,7 @@
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
+obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o
obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2883ac3..311ef71 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -22,7 +22,6 @@
#include <linux/dma-direct.h>
#include <linux/dma-iommu.h>
#include <linux/iommu-helper.h>
-#include <linux/iommu.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
@@ -43,8 +42,7 @@
#include <asm/gart.h>
#include <asm/dma.h>
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
+#include "amd_iommu.h"
#include "irq_remapping.h"
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
@@ -71,6 +69,8 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL
+
static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
@@ -99,7 +99,6 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
-static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
static void update_and_flush_device_table(struct protection_domain *domain,
struct domain_pgtable *pgtable);
@@ -280,12 +279,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
}
-struct iommu_dev_data *get_dev_data(struct device *dev)
-{
- return dev->archdata.iommu;
-}
-EXPORT_SYMBOL(get_dev_data);
-
/*
* Find or create an IOMMU group for a acpihid device.
*/
@@ -314,16 +307,15 @@ static struct iommu_group *acpihid_device_group(struct device *dev)
static bool pci_iommuv2_capable(struct pci_dev *pdev)
{
static const int caps[] = {
- PCI_EXT_CAP_ID_ATS,
PCI_EXT_CAP_ID_PRI,
PCI_EXT_CAP_ID_PASID,
};
int i, pos;
- if (pci_ats_disabled())
+ if (!pci_ats_supported(pdev))
return false;
- for (i = 0; i < 3; ++i) {
+ for (i = 0; i < 2; ++i) {
pos = pci_find_ext_capability(pdev, caps[i]);
if (pos == 0)
return false;
@@ -336,7 +328,7 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
{
struct iommu_dev_data *dev_data;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
return dev_data->errata & (1 << erratum) ? true : false;
}
@@ -349,7 +341,7 @@ static bool check_device(struct device *dev)
{
int devid;
- if (!dev || !dev->dma_mask)
+ if (!dev)
return false;
devid = get_device_id(dev);
@@ -366,32 +358,18 @@ static bool check_device(struct device *dev)
return true;
}
-static void init_iommu_group(struct device *dev)
-{
- struct iommu_group *group;
-
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return;
-
- iommu_group_put(group);
-}
-
static int iommu_init_device(struct device *dev)
{
struct iommu_dev_data *dev_data;
- struct amd_iommu *iommu;
int devid;
- if (dev->archdata.iommu)
+ if (dev_iommu_priv_get(dev))
return 0;
devid = get_device_id(dev);
if (devid < 0)
return devid;
- iommu = amd_iommu_rlookup_table[devid];
-
dev_data = find_dev_data(devid);
if (!dev_data)
return -ENOMEM;
@@ -412,9 +390,7 @@ static int iommu_init_device(struct device *dev)
dev_data->iommu_v2 = iommu->is_iommu_v2;
}
- dev->archdata.iommu = dev_data;
-
- iommu_device_link(&iommu->iommu, dev);
+ dev_iommu_priv_set(dev, dev_data);
return 0;
}
@@ -433,31 +409,18 @@ static void iommu_ignore_device(struct device *dev)
setup_aliases(dev);
}
-static void iommu_uninit_device(struct device *dev)
+static void amd_iommu_uninit_device(struct device *dev)
{
struct iommu_dev_data *dev_data;
- struct amd_iommu *iommu;
- int devid;
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
- iommu = amd_iommu_rlookup_table[devid];
-
- dev_data = search_dev_data(devid);
+ dev_data = dev_iommu_priv_get(dev);
if (!dev_data)
return;
if (dev_data->domain)
detach_device(dev);
- iommu_device_unlink(&iommu->iommu, dev);
-
- iommu_group_remove_device(dev);
-
- /* Remove dma-ops */
- dev->dma_ops = NULL;
+ dev_iommu_priv_set(dev, NULL);
/*
* We keep dev_data around for unplugged devices and reuse it when the
@@ -521,7 +484,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
devid & 0xff);
if (pdev)
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
@@ -1418,20 +1381,19 @@ static struct page *free_sub_pt(unsigned long root, int mode,
return freelist;
}
-static void free_pagetable(struct protection_domain *domain)
+static void free_pagetable(struct domain_pgtable *pgtable)
{
- struct domain_pgtable pgtable;
struct page *freelist = NULL;
unsigned long root;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- atomic64_set(&domain->pt_root, 0);
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return;
- BUG_ON(pgtable.mode < PAGE_MODE_NONE ||
- pgtable.mode > PAGE_MODE_6_LEVEL);
+ BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+ pgtable->mode > PAGE_MODE_6_LEVEL);
- root = (unsigned long)pgtable.root;
- freelist = free_sub_pt(root, pgtable.mode, freelist);
+ root = (unsigned long)pgtable->root;
+ freelist = free_sub_pt(root, pgtable->mode, freelist);
free_page_list(freelist);
}
@@ -1844,70 +1806,6 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
-/*
- * Free a domain, only used if something went wrong in the
- * allocation path and we need to free an already allocated page table
- */
-static void dma_ops_domain_free(struct protection_domain *domain)
-{
- if (!domain)
- return;
-
- iommu_put_dma_cookie(&domain->domain);
-
- free_pagetable(domain);
-
- if (domain->id)
- domain_id_free(domain->id);
-
- kfree(domain);
-}
-
-/*
- * Allocates a new protection domain usable for the dma_ops functions.
- * It also initializes the page table and the address allocator data
- * structures required for the dma_ops interface
- */
-static struct protection_domain *dma_ops_domain_alloc(void)
-{
- struct protection_domain *domain;
- u64 *pt_root, root;
-
- domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
- if (protection_domain_init(domain))
- goto free_domain;
-
- pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pt_root)
- goto free_domain;
-
- root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
- atomic64_set(&domain->pt_root, root);
- domain->flags = PD_DMA_OPS_MASK;
-
- if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
- goto free_domain;
-
- return domain;
-
-free_domain:
- dma_ops_domain_free(domain);
-
- return NULL;
-}
-
-/*
- * little helper function to check whether a given protection domain is a
- * dma_ops domain
- */
-static bool dma_ops_domain(struct protection_domain *domain)
-{
- return domain->flags & PD_DMA_OPS_MASK;
-}
-
static void set_dte_entry(u16 devid, struct protection_domain *domain,
struct domain_pgtable *pgtable,
bool ats, bool ppr)
@@ -2119,14 +2017,14 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
static int attach_device(struct device *dev,
struct protection_domain *domain)
{
- struct pci_dev *pdev;
struct iommu_dev_data *dev_data;
+ struct pci_dev *pdev;
unsigned long flags;
int ret;
spin_lock_irqsave(&domain->lock, flags);
- dev_data = get_dev_data(dev);
+ dev_data = dev_iommu_priv_get(dev);
spin_lock(&dev_data->lock);
@@ -2139,8 +2037,10 @@ static int attach_device(struct device *dev,
pdev = to_pci_dev(dev);
if (domain->flags & PD_IOMMUV2_MASK) {
+ struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
+
ret = -EINVAL;
- if (!dev_data->passthrough)
+ if (def_domain->type != IOMMU_DOMAIN_IDENTITY)
goto out;
if (dev_data->iommu_v2) {
@@ -2188,7 +2088,7 @@ static void detach_device(struct device *dev)
struct iommu_dev_data *dev_data;
unsigned long flags;
- dev_data = get_dev_data(dev);
+ dev_data = dev_iommu_priv_get(dev);
domain = dev_data->domain;
spin_lock_irqsave(&domain->lock, flags);
@@ -2222,68 +2122,60 @@ static void detach_device(struct device *dev)
spin_unlock_irqrestore(&domain->lock, flags);
}
-static int amd_iommu_add_device(struct device *dev)
+static struct iommu_device *amd_iommu_probe_device(struct device *dev)
{
- struct iommu_dev_data *dev_data;
- struct iommu_domain *domain;
+ struct iommu_device *iommu_dev;
struct amd_iommu *iommu;
int ret, devid;
- if (!check_device(dev) || get_dev_data(dev))
- return 0;
+ if (!check_device(dev))
+ return ERR_PTR(-ENODEV);
devid = get_device_id(dev);
if (devid < 0)
- return devid;
+ return ERR_PTR(devid);
iommu = amd_iommu_rlookup_table[devid];
+ if (dev_iommu_priv_get(dev))
+ return &iommu->iommu;
+
ret = iommu_init_device(dev);
if (ret) {
if (ret != -ENOTSUPP)
dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
-
+ iommu_dev = ERR_PTR(ret);
iommu_ignore_device(dev);
- dev->dma_ops = NULL;
- goto out;
+ } else {
+ iommu_dev = &iommu->iommu;
}
- init_iommu_group(dev);
- dev_data = get_dev_data(dev);
+ iommu_completion_wait(iommu);
- BUG_ON(!dev_data);
+ return iommu_dev;
+}
- if (dev_data->iommu_v2)
- iommu_request_dm_for_dev(dev);
+static void amd_iommu_probe_finalize(struct device *dev)
+{
+ struct iommu_domain *domain;
/* Domains are initialized for this device - have a look what we ended up with */
domain = iommu_get_domain_for_dev(dev);
- if (domain->type == IOMMU_DOMAIN_IDENTITY)
- dev_data->passthrough = true;
- else if (domain->type == IOMMU_DOMAIN_DMA)
+ if (domain->type == IOMMU_DOMAIN_DMA)
iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
-
-out:
- iommu_completion_wait(iommu);
-
- return 0;
}
-static void amd_iommu_remove_device(struct device *dev)
+static void amd_iommu_release_device(struct device *dev)
{
+ int devid = get_device_id(dev);
struct amd_iommu *iommu;
- int devid;
if (!check_device(dev))
return;
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
iommu = amd_iommu_rlookup_table[devid];
- iommu_uninit_device(dev);
+ amd_iommu_uninit_device(dev);
iommu_completion_wait(iommu);
}
@@ -2418,27 +2310,46 @@ static void cleanup_domain(struct protection_domain *domain)
static void protection_domain_free(struct protection_domain *domain)
{
+ struct domain_pgtable pgtable;
+
if (!domain)
return;
if (domain->id)
domain_id_free(domain->id);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ atomic64_set(&domain->pt_root, 0);
+ free_pagetable(&pgtable);
+
kfree(domain);
}
-static int protection_domain_init(struct protection_domain *domain)
+static int protection_domain_init(struct protection_domain *domain, int mode)
{
+ u64 *pt_root = NULL, root;
+
+ BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
+
spin_lock_init(&domain->lock);
domain->id = domain_id_alloc();
if (!domain->id)
return -ENOMEM;
INIT_LIST_HEAD(&domain->dev_list);
+ if (mode != PAGE_MODE_NONE) {
+ pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!pt_root)
+ return -ENOMEM;
+ }
+
+ root = amd_iommu_domain_encode_pgtable(pt_root, mode);
+ atomic64_set(&domain->pt_root, root);
+
return 0;
}
-static struct protection_domain *protection_domain_alloc(void)
+static struct protection_domain *protection_domain_alloc(int mode)
{
struct protection_domain *domain;
@@ -2446,7 +2357,7 @@ static struct protection_domain *protection_domain_alloc(void)
if (!domain)
return NULL;
- if (protection_domain_init(domain))
+ if (protection_domain_init(domain, mode))
goto out_err;
return domain;
@@ -2459,54 +2370,35 @@ static struct protection_domain *protection_domain_alloc(void)
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
- struct protection_domain *pdomain;
- u64 *pt_root, root;
+ struct protection_domain *domain;
+ int mode = DEFAULT_PGTABLE_LEVEL;
- switch (type) {
- case IOMMU_DOMAIN_UNMANAGED:
- pdomain = protection_domain_alloc();
- if (!pdomain)
- return NULL;
+ if (type == IOMMU_DOMAIN_IDENTITY)
+ mode = PAGE_MODE_NONE;
- pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pt_root) {
- protection_domain_free(pdomain);
- return NULL;
- }
-
- root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
- atomic64_set(&pdomain->pt_root, root);
-
- pdomain->domain.geometry.aperture_start = 0;
- pdomain->domain.geometry.aperture_end = ~0ULL;
- pdomain->domain.geometry.force_aperture = true;
-
- break;
- case IOMMU_DOMAIN_DMA:
- pdomain = dma_ops_domain_alloc();
- if (!pdomain) {
- pr_err("Failed to allocate\n");
- return NULL;
- }
- break;
- case IOMMU_DOMAIN_IDENTITY:
- pdomain = protection_domain_alloc();
- if (!pdomain)
- return NULL;
-
- atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE);
- break;
- default:
+ domain = protection_domain_alloc(mode);
+ if (!domain)
return NULL;
- }
- return &pdomain->domain;
+ domain->domain.geometry.aperture_start = 0;
+ domain->domain.geometry.aperture_end = ~0ULL;
+ domain->domain.geometry.force_aperture = true;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
+ goto free_domain;
+
+ return &domain->domain;
+
+free_domain:
+ protection_domain_free(domain);
+
+ return NULL;
}
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
- struct domain_pgtable pgtable;
domain = to_pdomain(dom);
@@ -2518,29 +2410,19 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
if (!dom)
return;
- switch (dom->type) {
- case IOMMU_DOMAIN_DMA:
- /* Now release the domain */
- dma_ops_domain_free(domain);
- break;
- default:
- amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (dom->type == IOMMU_DOMAIN_DMA)
+ iommu_put_dma_cookie(&domain->domain);
- if (pgtable.mode != PAGE_MODE_NONE)
- free_pagetable(domain);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
-
- protection_domain_free(domain);
- break;
- }
+ protection_domain_free(domain);
}
static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
- struct iommu_dev_data *dev_data = dev->archdata.iommu;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct amd_iommu *iommu;
int devid;
@@ -2578,7 +2460,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
if (!check_device(dev))
return -EINVAL;
- dev_data = dev->archdata.iommu;
+ dev_data = dev_iommu_priv_get(dev);
dev_data->defer_attach = false;
iommu = amd_iommu_rlookup_table[dev_data->devid];
@@ -2734,12 +2616,14 @@ static void amd_iommu_get_resv_regions(struct device *dev,
list_add_tail(®ion->list, head);
}
-static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
- struct device *dev)
+bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev)
{
- struct iommu_dev_data *dev_data = dev->archdata.iommu;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+
return dev_data->defer_attach;
}
+EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
@@ -2758,6 +2642,20 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
amd_iommu_flush_iotlb_all(domain);
}
+static int amd_iommu_def_domain_type(struct device *dev)
+{
+ struct iommu_dev_data *dev_data;
+
+ dev_data = dev_iommu_priv_get(dev);
+ if (!dev_data)
+ return 0;
+
+ if (dev_data->iommu_v2)
+ return IOMMU_DOMAIN_IDENTITY;
+
+ return 0;
+}
+
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -2767,8 +2665,9 @@ const struct iommu_ops amd_iommu_ops = {
.map = amd_iommu_map,
.unmap = amd_iommu_unmap,
.iova_to_phys = amd_iommu_iova_to_phys,
- .add_device = amd_iommu_add_device,
- .remove_device = amd_iommu_remove_device,
+ .probe_device = amd_iommu_probe_device,
+ .release_device = amd_iommu_release_device,
+ .probe_finalize = amd_iommu_probe_finalize,
.device_group = amd_iommu_device_group,
.domain_get_attr = amd_iommu_domain_get_attr,
.get_resv_regions = amd_iommu_get_resv_regions,
@@ -2777,6 +2676,7 @@ const struct iommu_ops amd_iommu_ops = {
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
.iotlb_sync = amd_iommu_iotlb_sync,
+ .def_domain_type = amd_iommu_def_domain_type,
};
/*****************************************************************************
@@ -2807,7 +2707,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
struct protection_domain *domain = to_pdomain(dom);
struct domain_pgtable pgtable;
unsigned long flags;
- u64 pt_root;
spin_lock_irqsave(&domain->lock, flags);
@@ -2815,18 +2714,13 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
amd_iommu_domain_get_pgtable(domain, &pgtable);
/* Update data structure */
- pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE);
- atomic64_set(&domain->pt_root, pt_root);
+ atomic64_set(&domain->pt_root, 0);
/* Make changes visible to IOMMUs */
update_domain(domain);
- /* Restore old pgtable in domain->ptroot to free page-table */
- pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode);
- atomic64_set(&domain->pt_root, pt_root);
-
/* Page-table is not visible to IOMMU anymore, so free it */
- free_pagetable(domain);
+ free_pagetable(&pgtable);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -3085,7 +2979,7 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
struct amd_iommu *iommu;
struct iommu_cmd cmd;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
iommu = amd_iommu_rlookup_table[dev_data->devid];
build_complete_ppr(&cmd, dev_data->devid, pasid, status,
@@ -3098,23 +2992,27 @@ EXPORT_SYMBOL(amd_iommu_complete_ppr);
struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
{
struct protection_domain *pdomain;
- struct iommu_domain *io_domain;
+ struct iommu_dev_data *dev_data;
struct device *dev = &pdev->dev;
+ struct iommu_domain *io_domain;
if (!check_device(dev))
return NULL;
- pdomain = get_dev_data(dev)->domain;
- if (pdomain == NULL && get_dev_data(dev)->defer_attach) {
- get_dev_data(dev)->defer_attach = false;
- io_domain = iommu_get_domain_for_dev(dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+ pdomain = dev_data->domain;
+ io_domain = iommu_get_domain_for_dev(dev);
+
+ if (pdomain == NULL && dev_data->defer_attach) {
+ dev_data->defer_attach = false;
pdomain = to_pdomain(io_domain);
attach_device(dev, pdomain);
}
+
if (pdomain == NULL)
return NULL;
- if (!dma_ops_domain(pdomain))
+ if (io_domain->type != IOMMU_DOMAIN_DMA)
return NULL;
/* Only return IOMMUv2 domains */
@@ -3132,7 +3030,7 @@ void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
if (!amd_iommu_v2_supported())
return;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
dev_data->errata |= (1 << erratum);
}
EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
@@ -3151,11 +3049,8 @@ int amd_iommu_device_info(struct pci_dev *pdev,
memset(info, 0, sizeof(*info));
- if (!pci_ats_disabled()) {
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
- if (pos)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
- }
+ if (pci_ats_supported(pdev))
+ info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (pos)
diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h
index 12d540d..f892992 100644
--- a/drivers/iommu/amd_iommu.h
+++ b/drivers/iommu/amd_iommu.h
@@ -1,9 +1,103 @@
/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
#ifndef AMD_IOMMU_H
#define AMD_IOMMU_H
-int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line);
+#include <linux/iommu.h>
+
+#include "amd_iommu_types.h"
+
+extern int amd_iommu_get_num_iommus(void);
+extern int amd_iommu_init_dma_ops(void);
+extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
+extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_apply_erratum_63(u16 devid);
+extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
+extern int amd_iommu_init_devices(void);
+extern void amd_iommu_uninit_devices(void);
+extern void amd_iommu_init_notifier(void);
+extern int amd_iommu_init_api(void);
+
+#ifdef CONFIG_AMD_IOMMU_DEBUGFS
+void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
+#else
+static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
+#endif
+
+/* Needed for interrupt remapping */
+extern int amd_iommu_prepare(void);
+extern int amd_iommu_enable(void);
+extern void amd_iommu_disable(void);
+extern int amd_iommu_reenable(int);
+extern int amd_iommu_enable_faulting(void);
+extern int amd_iommu_guest_ir;
+
+/* IOMMUv2 specific functions */
+struct iommu_domain;
+
+extern bool amd_iommu_v2_supported(void);
+extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
+extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
+extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
+extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+ u64 address);
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+ unsigned long cr3);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
+
+#ifdef CONFIG_IRQ_REMAP
+extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
+#else
+static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
+{
+ return 0;
+}
+#endif
+
+#define PPR_SUCCESS 0x0
+#define PPR_INVALID 0x1
+#define PPR_FAILURE 0xf
+
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+ int status, int tag);
+
+static inline bool is_rd890_iommu(struct pci_dev *pdev)
+{
+ return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
+ (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
+}
+
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+{
+ if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+ return false;
+
+ return !!(iommu->features & f);
+}
+
+static inline u64 iommu_virt_to_phys(void *vaddr)
+{
+ return (u64)__sme_set(virt_to_phys(vaddr));
+}
+
+static inline void *iommu_phys_to_virt(unsigned long paddr)
+{
+ return phys_to_virt(__sme_clr(paddr));
+}
+
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
+extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev);
+extern int __init add_special_device(u8 type, u8 id, u16 *devid,
+ bool cmd_line);
#ifdef CONFIG_DMI
void amd_iommu_apply_ivrs_quirks(void);
diff --git a/drivers/iommu/amd_iommu_debugfs.c b/drivers/iommu/amd_iommu_debugfs.c
index c6a5c73..545372f 100644
--- a/drivers/iommu/amd_iommu_debugfs.c
+++ b/drivers/iommu/amd_iommu_debugfs.c
@@ -8,10 +8,9 @@
*/
#include <linux/debugfs.h>
-#include <linux/iommu.h>
#include <linux/pci.h>
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
+
+#include "amd_iommu.h"
static struct dentry *amd_iommu_debugfs;
static DEFINE_MUTEX(amd_iommu_debugfs_lock);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5b81fd1..3faff7f 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -18,7 +18,6 @@
#include <linux/msi.h>
#include <linux/amd-iommu.h>
#include <linux/export.h>
-#include <linux/iommu.h>
#include <linux/kmemleak.h>
#include <linux/mem_encrypt.h>
#include <asm/pci-direct.h>
@@ -32,9 +31,8 @@
#include <asm/irq_remapping.h>
#include <linux/crash_dump.h>
+
#include "amd_iommu.h"
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
#include "irq_remapping.h"
/*
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
deleted file mode 100644
index 92c2ba6..0000000
--- a/drivers/iommu/amd_iommu_proto.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
-#define _ASM_X86_AMD_IOMMU_PROTO_H
-
-#include "amd_iommu_types.h"
-
-extern int amd_iommu_get_num_iommus(void);
-extern int amd_iommu_init_dma_ops(void);
-extern int amd_iommu_init_passthrough(void);
-extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
-extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_apply_erratum_63(u16 devid);
-extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
-extern int amd_iommu_init_devices(void);
-extern void amd_iommu_uninit_devices(void);
-extern void amd_iommu_init_notifier(void);
-extern int amd_iommu_init_api(void);
-
-#ifdef CONFIG_AMD_IOMMU_DEBUGFS
-void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
-#else
-static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
-#endif
-
-/* Needed for interrupt remapping */
-extern int amd_iommu_prepare(void);
-extern int amd_iommu_enable(void);
-extern void amd_iommu_disable(void);
-extern int amd_iommu_reenable(int);
-extern int amd_iommu_enable_faulting(void);
-extern int amd_iommu_guest_ir;
-
-/* IOMMUv2 specific functions */
-struct iommu_domain;
-
-extern bool amd_iommu_v2_supported(void);
-extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
-extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
- u64 address);
-extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
-extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
- unsigned long cr3);
-extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
-extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
-
-#ifdef CONFIG_IRQ_REMAP
-extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
-#else
-static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
-{
- return 0;
-}
-#endif
-
-#define PPR_SUCCESS 0x0
-#define PPR_INVALID 0x1
-#define PPR_FAILURE 0xf
-
-extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
- int status, int tag);
-
-static inline bool is_rd890_iommu(struct pci_dev *pdev)
-{
- return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
- (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
-}
-
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
-{
- if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
- return false;
-
- return !!(iommu->features & f);
-}
-
-static inline u64 iommu_virt_to_phys(void *vaddr)
-{
- return (u64)__sme_set(virt_to_phys(vaddr));
-}
-
-static inline void *iommu_phys_to_virt(unsigned long paddr)
-{
- return phys_to_virt(__sme_clr(paddr));
-}
-
-extern bool translation_pre_enabled(struct amd_iommu *iommu);
-extern struct iommu_dev_data *get_dev_data(struct device *dev);
-#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 7a8fdec..30a5d41 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -395,10 +395,10 @@
#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
extern bool amd_iommu_dump;
-#define DUMP_printk(format, arg...) \
- do { \
- if (amd_iommu_dump) \
- printk(KERN_INFO "AMD-Vi: " format, ## arg); \
+#define DUMP_printk(format, arg...) \
+ do { \
+ if (amd_iommu_dump) \
+ pr_info("AMD-Vi: " format, ## arg); \
} while(0);
/* global flag if IOMMUs cache non-present entries */
@@ -645,7 +645,6 @@ struct iommu_dev_data {
struct pci_dev *pdev;
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
- bool passthrough; /* Device is identity mapped */
struct {
bool enabled;
int qdep;
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index d6d85de..c8a7b6b 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -13,13 +13,11 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
-#include <linux/iommu.h>
#include <linux/wait.h>
#include <linux/pci.h>
#include <linux/gfp.h>
-#include "amd_iommu_types.h"
-#include "amd_iommu_proto.h"
+#include "amd_iommu.h"
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>");
@@ -517,13 +515,12 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
struct amd_iommu_fault *iommu_fault;
struct pasid_state *pasid_state;
struct device_state *dev_state;
+ struct pci_dev *pdev = NULL;
unsigned long flags;
struct fault *fault;
bool finish;
u16 tag, devid;
int ret;
- struct iommu_dev_data *dev_data;
- struct pci_dev *pdev = NULL;
iommu_fault = data;
tag = iommu_fault->tag & 0x1ff;
@@ -534,12 +531,11 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
devid & 0xff);
if (!pdev)
return -ENODEV;
- dev_data = get_dev_data(&pdev->dev);
+
+ ret = NOTIFY_DONE;
/* In kdump kernel pci dev is not initialized yet -> send INVALID */
- ret = NOTIFY_DONE;
- if (translation_pre_enabled(amd_iommu_rlookup_table[devid])
- && dev_data->defer_attach) {
+ if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) {
amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
PPR_INVALID, tag);
goto out;
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index 74d97a8..c75b9d9 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -150,6 +150,8 @@ static const struct arm_smmu_impl arm_mmu500_impl = {
struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
{
+ const struct device_node *np = smmu->dev->of_node;
+
/*
* We will inevitably have to combine model-specific implementation
* quirks with platform-specific integration quirks, but everything
@@ -166,11 +168,11 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
break;
}
- if (of_property_read_bool(smmu->dev->of_node,
- "calxeda,smmu-secure-config-access"))
+ if (of_property_read_bool(np, "calxeda,smmu-secure-config-access"))
smmu->impl = &calxeda_impl;
- if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm845-smmu-500"))
+ if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") ||
+ of_device_is_compatible(np, "qcom,sc7180-smmu-500"))
return qcom_smmu_impl_init(smmu);
return smmu;
diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
index 24c071c..cf01d02 100644
--- a/drivers/iommu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm-smmu-qcom.c
@@ -3,6 +3,7 @@
* Copyright (c) 2019, The Linux Foundation. All rights reserved.
*/
+#include <linux/of_device.h>
#include <linux/qcom_scm.h>
#include "arm-smmu.h"
@@ -11,12 +12,29 @@ struct qcom_smmu {
struct arm_smmu_device smmu;
};
+static const struct of_device_id qcom_smmu_client_of_match[] = {
+ { .compatible = "qcom,adreno" },
+ { .compatible = "qcom,mdp4" },
+ { .compatible = "qcom,mdss" },
+ { .compatible = "qcom,sc7180-mdss" },
+ { .compatible = "qcom,sc7180-mss-pil" },
+ { .compatible = "qcom,sdm845-mdss" },
+ { .compatible = "qcom,sdm845-mss-pil" },
+ { }
+};
+
+static int qcom_smmu_def_domain_type(struct device *dev)
+{
+ const struct of_device_id *match =
+ of_match_device(qcom_smmu_client_of_match, dev);
+
+ return match ? IOMMU_DOMAIN_IDENTITY : 0;
+}
+
static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
{
int ret;
- arm_mmu500_reset(smmu);
-
/*
* To address performance degradation in non-real time clients,
* such as USB and UFS, turn off wait-for-safe on sdm845 based boards,
@@ -30,8 +48,21 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
return ret;
}
+static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
+{
+ const struct device_node *np = smmu->dev->of_node;
+
+ arm_mmu500_reset(smmu);
+
+ if (of_device_is_compatible(np, "qcom,sdm845-smmu-500"))
+ return qcom_sdm845_smmu500_reset(smmu);
+
+ return 0;
+}
+
static const struct arm_smmu_impl qcom_smmu_impl = {
- .reset = qcom_sdm845_smmu500_reset,
+ .def_domain_type = qcom_smmu_def_domain_type,
+ .reset = qcom_smmu500_reset,
};
struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 8250873..f578677a 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -171,6 +171,8 @@
#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
+#define ARM_SMMU_REG_SZ 0xe00
+
/* Common MSI config fields */
#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
#define MSI_CFG2_SH GENMASK(5, 4)
@@ -628,6 +630,7 @@ struct arm_smmu_strtab_cfg {
struct arm_smmu_device {
struct device *dev;
void __iomem *base;
+ void __iomem *page1;
#define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
#define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
@@ -664,7 +667,6 @@ struct arm_smmu_device {
#define ARM_SMMU_MAX_ASIDS (1 << 16)
unsigned int asid_bits;
- DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
#define ARM_SMMU_MAX_VMIDS (1 << 16)
unsigned int vmid_bits;
@@ -724,6 +726,8 @@ struct arm_smmu_option_prop {
const char *prop;
};
+static DEFINE_XARRAY_ALLOC1(asid_xa);
+
static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
@@ -733,9 +737,8 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
struct arm_smmu_device *smmu)
{
- if ((offset > SZ_64K) &&
- (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
- offset -= SZ_64K;
+ if (offset > SZ_64K)
+ return smmu->page1 + offset - SZ_64K;
return smmu->base + offset;
}
@@ -1763,6 +1766,14 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
cdcfg->cdtab = NULL;
}
+static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
+{
+ if (!cd->asid)
+ return;
+
+ xa_erase(&asid_xa, cd->asid);
+}
+
/* Stream table manipulation functions */
static void
arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
@@ -2448,10 +2459,9 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
- if (cfg->cdcfg.cdtab) {
+ if (cfg->cdcfg.cdtab)
arm_smmu_free_cd_tables(smmu_domain);
- arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
- }
+ arm_smmu_free_asid(&cfg->cd);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
if (cfg->vmid)
@@ -2466,14 +2476,15 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
- int asid;
+ u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
- if (asid < 0)
- return asid;
+ ret = xa_alloc(&asid_xa, &asid, &cfg->cd,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+ if (ret)
+ return ret;
cfg->s1cdmax = master->ssid_bits;
@@ -2506,7 +2517,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
out_free_cd_tables:
arm_smmu_free_cd_tables(smmu_domain);
out_free_asid:
- arm_smmu_bitmap_free(smmu->asid_map, asid);
+ arm_smmu_free_asid(&cfg->cd);
return ret;
}
@@ -2652,26 +2663,20 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
}
}
-#ifdef CONFIG_PCI_ATS
static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
{
- struct pci_dev *pdev;
+ struct device *dev = master->dev;
struct arm_smmu_device *smmu = master->smmu;
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
- !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
+ if (!(smmu->features & ARM_SMMU_FEAT_ATS))
return false;
- pdev = to_pci_dev(master->dev);
- return !pdev->untrusted && pdev->ats_cap;
+ if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
+ return false;
+
+ return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
}
-#else
-static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
-{
- return false;
-}
-#endif
static void arm_smmu_enable_ats(struct arm_smmu_master *master)
{
@@ -2914,27 +2919,26 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
static struct iommu_ops arm_smmu_ops;
-static int arm_smmu_add_device(struct device *dev)
+static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int i, ret;
struct arm_smmu_device *smmu;
struct arm_smmu_master *master;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct iommu_group *group;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
if (!smmu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
master = kzalloc(sizeof(*master), GFP_KERNEL);
if (!master)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
master->dev = dev;
master->smmu = smmu;
@@ -2975,43 +2979,24 @@ static int arm_smmu_add_device(struct device *dev)
master->ssid_bits = min_t(u8, master->ssid_bits,
CTXDESC_LINEAR_CDMAX);
- ret = iommu_device_link(&smmu->iommu, dev);
- if (ret)
- goto err_disable_pasid;
+ return &smmu->iommu;
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto err_unlink;
- }
-
- iommu_group_put(group);
- return 0;
-
-err_unlink:
- iommu_device_unlink(&smmu->iommu, dev);
-err_disable_pasid:
- arm_smmu_disable_pasid(master);
err_free_master:
kfree(master);
dev_iommu_priv_set(dev, NULL);
- return ret;
+ return ERR_PTR(ret);
}
-static void arm_smmu_remove_device(struct device *dev)
+static void arm_smmu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master *master;
- struct arm_smmu_device *smmu;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
return;
master = dev_iommu_priv_get(dev);
- smmu = master->smmu;
arm_smmu_detach_dev(master);
- iommu_group_remove_device(dev);
- iommu_device_unlink(&smmu->iommu, dev);
arm_smmu_disable_pasid(master);
kfree(master);
iommu_fwspec_free(dev);
@@ -3138,8 +3123,8 @@ static struct iommu_ops arm_smmu_ops = {
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
- .add_device = arm_smmu_add_device,
- .remove_device = arm_smmu_remove_device,
+ .probe_device = arm_smmu_probe_device,
+ .release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
@@ -4021,6 +4006,18 @@ err_reset_pci_ops: __maybe_unused;
return err;
}
+static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
+ resource_size_t size)
+{
+ struct resource res = {
+ .flags = IORESOURCE_MEM,
+ .start = start,
+ .end = start + size - 1,
+ };
+
+ return devm_ioremap_resource(dev, &res);
+}
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
@@ -4056,10 +4053,23 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
}
ioaddr = res->start;
- smmu->base = devm_ioremap_resource(dev, res);
+ /*
+ * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
+ * the PMCG registers which are reserved by the PMU driver.
+ */
+ smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
if (IS_ERR(smmu->base))
return PTR_ERR(smmu->base);
+ if (arm_smmu_resource_size(smmu) > SZ_64K) {
+ smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
+ ARM_SMMU_REG_SZ);
+ if (IS_ERR(smmu->page1))
+ return PTR_ERR(smmu->page1);
+ } else {
+ smmu->page1 = smmu->base;
+ }
+
/* Interrupt lines */
irq = platform_get_irq_byname_optional(pdev, "combined");
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index a6a5796..243bc4c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -220,7 +220,7 @@ static int arm_smmu_register_legacy_master(struct device *dev,
* With the legacy DT binding in play, we have no guarantees about
* probe order, but then we're also not doing default domains, so we can
* delay setting bus ops until we're sure every possible SMMU is ready,
- * and that way ensure that no add_device() calls get missed.
+ * and that way ensure that no probe_device() calls get missed.
*/
static int arm_smmu_legacy_bus_init(void)
{
@@ -1062,7 +1062,6 @@ static int arm_smmu_master_alloc_smes(struct device *dev)
struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
struct arm_smmu_device *smmu = cfg->smmu;
struct arm_smmu_smr *smrs = smmu->smrs;
- struct iommu_group *group;
int i, idx, ret;
mutex_lock(&smmu->stream_map_mutex);
@@ -1090,18 +1089,9 @@ static int arm_smmu_master_alloc_smes(struct device *dev)
cfg->smendx[i] = (s16)idx;
}
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto out_err;
- }
- iommu_group_put(group);
-
/* It worked! Now, poke the actual hardware */
- for_each_cfg_sme(cfg, fwspec, i, idx) {
+ for_each_cfg_sme(cfg, fwspec, i, idx)
arm_smmu_write_sme(smmu, idx);
- smmu->s2crs[idx].group = group;
- }
mutex_unlock(&smmu->stream_map_mutex);
return 0;
@@ -1172,7 +1162,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
/*
* FIXME: The arch/arm DMA API code tries to attach devices to its own
- * domains between of_xlate() and add_device() - we have no way to cope
+ * domains between of_xlate() and probe_device() - we have no way to cope
* with that, so until ARM gets converted to rely on groups and default
* domains, just say no (but more politely than by dereferencing NULL).
* This should be at least a WARN_ON once that's sorted.
@@ -1382,7 +1372,7 @@ struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
return dev ? dev_get_drvdata(dev) : NULL;
}
-static int arm_smmu_add_device(struct device *dev)
+static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
struct arm_smmu_device *smmu = NULL;
struct arm_smmu_master_cfg *cfg;
@@ -1403,7 +1393,7 @@ static int arm_smmu_add_device(struct device *dev)
} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
} else {
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
}
ret = -EINVAL;
@@ -1444,21 +1434,19 @@ static int arm_smmu_add_device(struct device *dev)
if (ret)
goto out_cfg_free;
- iommu_device_link(&smmu->iommu, dev);
-
device_link_add(dev, smmu->dev,
DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
- return 0;
+ return &smmu->iommu;
out_cfg_free:
kfree(cfg);
out_free:
iommu_fwspec_free(dev);
- return ret;
+ return ERR_PTR(ret);
}
-static void arm_smmu_remove_device(struct device *dev)
+static void arm_smmu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_master_cfg *cfg;
@@ -1475,13 +1463,11 @@ static void arm_smmu_remove_device(struct device *dev)
if (ret < 0)
return;
- iommu_device_unlink(&smmu->iommu, dev);
arm_smmu_master_free_smes(cfg, fwspec);
arm_smmu_rpm_put(smmu);
dev_iommu_priv_set(dev, NULL);
- iommu_group_remove_device(dev);
kfree(cfg);
iommu_fwspec_free(dev);
}
@@ -1512,6 +1498,11 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
else
group = generic_device_group(dev);
+ /* Remember group for faster lookups */
+ if (!IS_ERR(group))
+ for_each_cfg_sme(cfg, fwspec, i, idx)
+ smmu->s2crs[idx].group = group;
+
return group;
}
@@ -1618,6 +1609,17 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
}
+static int arm_smmu_def_domain_type(struct device *dev)
+{
+ struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
+ const struct arm_smmu_impl *impl = cfg->smmu->impl;
+
+ if (impl && impl->def_domain_type)
+ return impl->def_domain_type(dev);
+
+ return 0;
+}
+
static struct iommu_ops arm_smmu_ops = {
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
@@ -1628,14 +1630,15 @@ static struct iommu_ops arm_smmu_ops = {
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
- .add_device = arm_smmu_add_device,
- .remove_device = arm_smmu_remove_device,
+ .probe_device = arm_smmu_probe_device,
+ .release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
+ .def_domain_type = arm_smmu_def_domain_type,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
};
@@ -2253,7 +2256,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
return -ENODEV;
if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
- dev_err(&pdev->dev, "removing device with active domains!\n");
+ dev_notice(&pdev->dev, "disabling translation\n");
arm_smmu_bus_init(NULL);
iommu_device_unregister(&smmu->iommu);
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index 8d1cd54..d172c02 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -386,6 +386,7 @@ struct arm_smmu_impl {
int (*init_context)(struct arm_smmu_domain *smmu_domain);
void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync,
int status);
+ int (*def_domain_type)(struct device *dev);
};
static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index f77dae7..60a2970 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -963,6 +963,7 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
warn_invalid_dmar(phys_addr, " returns all ones");
goto unmap;
}
+ iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
@@ -1156,12 +1157,11 @@ static inline void reclaim_free_desc(struct q_inval *qi)
}
}
-static int qi_check_fault(struct intel_iommu *iommu, int index)
+static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
{
u32 fault;
int head, tail;
struct q_inval *qi = iommu->qi;
- int wait_index = (index + 1) % QI_LENGTH;
int shift = qi_shift(iommu);
if (qi->desc_status[wait_index] == QI_ABORT)
@@ -1224,17 +1224,21 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
}
/*
- * Submit the queued invalidation descriptor to the remapping
- * hardware unit and wait for its completion.
+ * Function to submit invalidation descriptors of all types to the queued
+ * invalidation interface(QI). Multiple descriptors can be submitted at a
+ * time, a wait descriptor will be appended to each submission to ensure
+ * hardware has completed the invalidation before return. Wait descriptors
+ * can be part of the submission but it will not be polled for completion.
*/
-int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
+ unsigned int count, unsigned long options)
{
- int rc;
struct q_inval *qi = iommu->qi;
- int offset, shift, length;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
+ int offset, shift;
+ int rc, i;
if (!qi)
return 0;
@@ -1243,32 +1247,41 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
rc = 0;
raw_spin_lock_irqsave(&qi->q_lock, flags);
- while (qi->free_cnt < 3) {
+ /*
+ * Check if we have enough empty slots in the queue to submit,
+ * the calculation is based on:
+ * # of desc + 1 wait desc + 1 space between head and tail
+ */
+ while (qi->free_cnt < count + 2) {
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
cpu_relax();
raw_spin_lock_irqsave(&qi->q_lock, flags);
}
index = qi->free_head;
- wait_index = (index + 1) % QI_LENGTH;
+ wait_index = (index + count) % QI_LENGTH;
shift = qi_shift(iommu);
- length = 1 << shift;
- qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+ for (i = 0; i < count; i++) {
+ offset = ((index + i) % QI_LENGTH) << shift;
+ memcpy(qi->desc + offset, &desc[i], 1 << shift);
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
+ }
+ qi->desc_status[wait_index] = QI_IN_USE;
- offset = index << shift;
- memcpy(qi->desc + offset, desc, length);
wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+ if (options & QI_OPT_WAIT_DRAIN)
+ wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
wait_desc.qw2 = 0;
wait_desc.qw3 = 0;
offset = wait_index << shift;
- memcpy(qi->desc + offset, &wait_desc, length);
+ memcpy(qi->desc + offset, &wait_desc, 1 << shift);
- qi->free_head = (qi->free_head + 2) % QI_LENGTH;
- qi->free_cnt -= 2;
+ qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
+ qi->free_cnt -= count + 1;
/*
* update the HW tail register indicating the presence of
@@ -1284,7 +1297,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
* a deadlock where the interrupt context can wait indefinitely
* for free slots in the queue.
*/
- rc = qi_check_fault(iommu, index);
+ rc = qi_check_fault(iommu, index, wait_index);
if (rc)
break;
@@ -1293,7 +1306,8 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
raw_spin_lock(&qi->q_lock);
}
- qi->desc_status[index] = QI_DONE;
+ for (i = 0; i < count; i++)
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -1317,7 +1331,7 @@ void qi_global_iec(struct intel_iommu *iommu)
desc.qw3 = 0;
/* should never fail */
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
@@ -1331,7 +1345,7 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
@@ -1355,7 +1369,7 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
@@ -1377,7 +1391,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
/* PASID-based IOTLB invalidation */
@@ -1418,7 +1432,46 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
QI_EIOTLB_AM(mask);
}
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+/* PASID-based device IOTLB Invalidate */
+void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr,
+ unsigned int size_order, u64 granu)
+{
+ unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
+ struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+
+ desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(pfsid);
+ desc.qw1 = QI_DEV_EIOTLB_GLOB(granu);
+
+ /*
+ * If S bit is 0, we only flush a single page. If S bit is set,
+ * The least significant zero bit indicates the invalidation address
+ * range. VT-d spec 6.5.2.6.
+ * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
+ * size order = 0 is PAGE_SIZE 4KB
+ * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
+ * ECAP.
+ */
+ desc.qw1 |= addr & ~mask;
+ if (size_order)
+ desc.qw1 |= QI_DEV_EIOTLB_SIZE;
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
+ u64 granu, int pasid)
+{
+ struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+
+ desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
+ QI_PC_GRAN(granu) | QI_PC_TYPE;
+ qi_submit_sync(iommu, &desc, 1, 0);
}
/*
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 186ff5c..60c8a56 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1235,19 +1235,13 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
return phys;
}
-static int exynos_iommu_add_device(struct device *dev)
+static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
{
struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct sysmmu_drvdata *data;
- struct iommu_group *group;
if (!has_sysmmu(dev))
- return -ENODEV;
-
- group = iommu_group_get_for_dev(dev);
-
- if (IS_ERR(group))
- return PTR_ERR(group);
+ return ERR_PTR(-ENODEV);
list_for_each_entry(data, &owner->controllers, owner_node) {
/*
@@ -1259,12 +1253,15 @@ static int exynos_iommu_add_device(struct device *dev)
DL_FLAG_STATELESS |
DL_FLAG_PM_RUNTIME);
}
- iommu_group_put(group);
- return 0;
+ /* There is always at least one entry, see exynos_iommu_of_xlate() */
+ data = list_first_entry(&owner->controllers,
+ struct sysmmu_drvdata, owner_node);
+
+ return &data->iommu;
}
-static void exynos_iommu_remove_device(struct device *dev)
+static void exynos_iommu_release_device(struct device *dev)
{
struct exynos_iommu_owner *owner = dev->archdata.iommu;
struct sysmmu_drvdata *data;
@@ -1282,7 +1279,6 @@ static void exynos_iommu_remove_device(struct device *dev)
iommu_group_put(group);
}
}
- iommu_group_remove_device(dev);
list_for_each_entry(data, &owner->controllers, owner_node)
device_link_del(data->link);
@@ -1331,8 +1327,8 @@ static const struct iommu_ops exynos_iommu_ops = {
.unmap = exynos_iommu_unmap,
.iova_to_phys = exynos_iommu_iova_to_phys,
.device_group = generic_device_group,
- .add_device = exynos_iommu_add_device,
- .remove_device = exynos_iommu_remove_device,
+ .probe_device = exynos_iommu_probe_device,
+ .release_device = exynos_iommu_release_device,
.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
.of_xlate = exynos_iommu_of_xlate,
};
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 06828e2..928d377 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -1016,25 +1016,13 @@ static struct iommu_group *fsl_pamu_device_group(struct device *dev)
return group;
}
-static int fsl_pamu_add_device(struct device *dev)
+static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
{
- struct iommu_group *group;
-
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- iommu_device_link(&pamu_iommu, dev);
-
- return 0;
+ return &pamu_iommu;
}
-static void fsl_pamu_remove_device(struct device *dev)
+static void fsl_pamu_release_device(struct device *dev)
{
- iommu_device_unlink(&pamu_iommu, dev);
- iommu_group_remove_device(dev);
}
static const struct iommu_ops fsl_pamu_ops = {
@@ -1048,8 +1036,8 @@ static const struct iommu_ops fsl_pamu_ops = {
.iova_to_phys = fsl_pamu_iova_to_phys,
.domain_set_attr = fsl_pamu_set_domain_attr,
.domain_get_attr = fsl_pamu_get_domain_attr,
- .add_device = fsl_pamu_add_device,
- .remove_device = fsl_pamu_remove_device,
+ .probe_device = fsl_pamu_probe_device,
+ .release_device = fsl_pamu_release_device,
.device_group = fsl_pamu_device_group,
};
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index a386b83..3c0c67a 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -131,7 +131,7 @@ static int hyperv_irq_remapping_activate(struct irq_domain *domain,
return 0;
}
-static struct irq_domain_ops hyperv_ir_domain_ops = {
+static const struct irq_domain_ops hyperv_ir_domain_ops = {
.alloc = hyperv_irq_remapping_alloc,
.free = hyperv_irq_remapping_free,
.activate = hyperv_irq_remapping_activate,
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
index 3eb1fe2..cf1ebb9 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -372,6 +372,66 @@ static int domain_translation_struct_show(struct seq_file *m, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+static void invalidation_queue_entry_show(struct seq_file *m,
+ struct intel_iommu *iommu)
+{
+ int index, shift = qi_shift(iommu);
+ struct qi_desc *desc;
+ int offset;
+
+ if (ecap_smts(iommu->ecap))
+ seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tqw2\t\t\tqw3\t\t\tstatus\n");
+ else
+ seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tstatus\n");
+
+ for (index = 0; index < QI_LENGTH; index++) {
+ offset = index << shift;
+ desc = iommu->qi->desc + offset;
+ if (ecap_smts(iommu->ecap))
+ seq_printf(m, "%5d\t%016llx\t%016llx\t%016llx\t%016llx\t%016x\n",
+ index, desc->qw0, desc->qw1,
+ desc->qw2, desc->qw3,
+ iommu->qi->desc_status[index]);
+ else
+ seq_printf(m, "%5d\t%016llx\t%016llx\t%016x\n",
+ index, desc->qw0, desc->qw1,
+ iommu->qi->desc_status[index]);
+ }
+}
+
+static int invalidation_queue_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ struct q_inval *qi;
+ int shift;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ qi = iommu->qi;
+ shift = qi_shift(iommu);
+
+ if (!qi || !ecap_qis(iommu->ecap))
+ continue;
+
+ seq_printf(m, "Invalidation queue on IOMMU: %s\n", iommu->name);
+
+ raw_spin_lock_irqsave(&qi->q_lock, flags);
+ seq_printf(m, " Base: 0x%llx\tHead: %lld\tTail: %lld\n",
+ (u64)virt_to_phys(qi->desc),
+ dmar_readq(iommu->reg + DMAR_IQH_REG) >> shift,
+ dmar_readq(iommu->reg + DMAR_IQT_REG) >> shift);
+ invalidation_queue_entry_show(m, iommu);
+ raw_spin_unlock_irqrestore(&qi->q_lock, flags);
+ seq_putc(m, '\n');
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(invalidation_queue);
+
#ifdef CONFIG_IRQ_REMAP
static void ir_tbl_remap_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -490,6 +550,8 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("domain_translation_struct", 0444,
intel_iommu_debug, NULL,
&domain_translation_struct_fops);
+ debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
+ NULL, &invalidation_queue_fops);
#ifdef CONFIG_IRQ_REMAP
debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
NULL, &ir_translation_struct_fops);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0182cff..648a785 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -296,31 +296,6 @@ static inline void context_clear_entry(struct context_entry *context)
static struct dmar_domain *si_domain;
static int hw_pass_through = 1;
-/* si_domain contains mulitple devices */
-#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
-
-/*
- * This is a DMA domain allocated through the iommu domain allocation
- * interface. But one or more devices belonging to this domain have
- * been chosen to use a private domain. We should avoid to use the
- * map/unmap/iova_to_phys APIs on it.
- */
-#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
-
-/*
- * When VT-d works in the scalable mode, it allows DMA translation to
- * happen through either first level or second level page table. This
- * bit marks that the DMA translation for the domain goes through the
- * first level page table, otherwise, it goes through the second level.
- */
-#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2)
-
-/*
- * Domain represents a virtual machine which demands iommu nested
- * translation mode support.
- */
-#define DOMAIN_FLAG_NESTING_MODE BIT(3)
-
#define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
@@ -355,11 +330,6 @@ static void domain_exit(struct dmar_domain *domain);
static void domain_remove_dev_info(struct dmar_domain *domain);
static void dmar_remove_one_dev_info(struct device *dev);
static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static void domain_context_clear(struct intel_iommu *iommu,
- struct device *dev);
-static int domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu);
-static bool device_is_rmrr_locked(struct device *dev);
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -395,6 +365,21 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
+struct device_domain_info *get_domain_info(struct device *dev)
+{
+ struct device_domain_info *info;
+
+ if (!dev)
+ return NULL;
+
+ info = dev->archdata.iommu;
+ if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
+ info == DEFER_DEVICE_DOMAIN_INFO))
+ return NULL;
+
+ return info;
+}
+
DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
@@ -446,12 +431,6 @@ static void init_translation_status(struct intel_iommu *iommu)
iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
}
-/* Convert generic 'struct iommu_domain to private struct dmar_domain */
-static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
-{
- return container_of(dom, struct dmar_domain, domain);
-}
-
static int __init intel_iommu_setup(char *str)
{
if (!str)
@@ -480,8 +459,7 @@ static int __init intel_iommu_setup(char *str)
pr_info("Intel-IOMMU: scalable mode supported\n");
intel_iommu_sm = 1;
} else if (!strncmp(str, "tboot_noforce", 13)) {
- printk(KERN_INFO
- "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+ pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
} else if (!strncmp(str, "nobounce", 8)) {
pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
@@ -1454,8 +1432,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
info->pri_enabled = 1;
#endif
- if (!pdev->untrusted && info->ats_supported &&
- pci_ats_page_aligned(pdev) &&
+ if (info->ats_supported && pci_ats_page_aligned(pdev) &&
!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
info->ats_enabled = 1;
domain_update_iotlb(info->domain);
@@ -1763,6 +1740,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
+ if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap))
+ ioasid_unregister_allocator(&iommu->pasid_allocator);
+
#endif
}
@@ -1911,11 +1891,6 @@ static int dmar_init_reserved_ranges(void)
return 0;
}
-static void domain_reserve_special_ranges(struct dmar_domain *domain)
-{
- copy_reserved_iova(&reserved_iova_list, &domain->iovad);
-}
-
static inline int guestwidth_to_adjustwidth(int gaw)
{
int agaw;
@@ -1930,65 +1905,6 @@ static inline int guestwidth_to_adjustwidth(int gaw)
return agaw;
}
-static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
- int guest_width)
-{
- int adjust_width, agaw;
- unsigned long sagaw;
- int ret;
-
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
-
- if (!intel_iommu_strict) {
- ret = init_iova_flush_queue(&domain->iovad,
- iommu_flush_iova, iova_entry_free);
- if (ret)
- pr_info("iova flush queue initialization failed\n");
- }
-
- domain_reserve_special_ranges(domain);
-
- /* calculate AGAW */
- if (guest_width > cap_mgaw(iommu->cap))
- guest_width = cap_mgaw(iommu->cap);
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- agaw = width_to_agaw(adjust_width);
- sagaw = cap_sagaw(iommu->cap);
- if (!test_bit(agaw, &sagaw)) {
- /* hardware doesn't support it, choose a bigger one */
- pr_debug("Hardware doesn't support agaw %d\n", agaw);
- agaw = find_next_bit(&sagaw, 5, agaw);
- if (agaw >= 5)
- return -ENODEV;
- }
- domain->agaw = agaw;
-
- if (ecap_coherent(iommu->ecap))
- domain->iommu_coherency = 1;
- else
- domain->iommu_coherency = 0;
-
- if (ecap_sc_support(iommu->ecap))
- domain->iommu_snooping = 1;
- else
- domain->iommu_snooping = 0;
-
- if (intel_iommu_superpage)
- domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
- else
- domain->iommu_superpage = 0;
-
- domain->nid = iommu->node;
-
- /* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
- if (!domain->pgd)
- return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
- return 0;
-}
-
static void domain_exit(struct dmar_domain *domain)
{
@@ -1996,7 +1912,8 @@ static void domain_exit(struct dmar_domain *domain)
domain_remove_dev_info(domain);
/* destroy iovas */
- put_iova_domain(&domain->iovad);
+ if (domain->domain.type == IOMMU_DOMAIN_DMA)
+ put_iova_domain(&domain->iovad);
if (domain->pgd) {
struct page *freelist;
@@ -2518,11 +2435,8 @@ struct dmar_domain *find_domain(struct device *dev)
if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
return NULL;
- if (dev_is_pci(dev))
- dev = &pci_real_dma_dev(to_pci_dev(dev))->dev;
-
/* No lock here, assumes no domain exit in normal case */
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (likely(info))
return info->domain;
@@ -2545,7 +2459,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
struct device_domain_info *info;
list_for_each_entry(info, &device_domain_list, global)
- if (info->iommu->segment == segment && info->bus == bus &&
+ if (info->segment == segment && info->bus == bus &&
info->devfn == devfn)
return info;
@@ -2582,6 +2496,12 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
flags);
}
+static bool dev_is_real_dma_subdevice(struct device *dev)
+{
+ return dev && dev_is_pci(dev) &&
+ pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
@@ -2596,8 +2516,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (!info)
return NULL;
- info->bus = bus;
- info->devfn = devfn;
+ if (!dev_is_real_dma_subdevice(dev)) {
+ info->bus = bus;
+ info->devfn = devfn;
+ info->segment = iommu->segment;
+ } else {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->segment = pci_domain_nr(pdev->bus);
+ }
+
info->ats_supported = info->pasid_supported = info->pri_supported = 0;
info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
info->ats_qdep = 0;
@@ -2611,10 +2541,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (dev && dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(info->dev);
- if (!pdev->untrusted &&
- !pci_ats_disabled() &&
- ecap_dev_iotlb_support(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
+ if (ecap_dev_iotlb_support(iommu->ecap) &&
+ pci_ats_supported(pdev) &&
dmar_find_matched_atsr_unit(pdev))
info->ats_supported = 1;
@@ -2637,7 +2565,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (!found) {
struct device_domain_info *info2;
- info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+ info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
+ info->devfn);
if (info2) {
found = info2->domain;
info2->dev = dev;
@@ -2704,108 +2633,10 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
return domain;
}
-static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
-{
- *(u16 *)opaque = alias;
- return 0;
-}
-
-static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
-{
- struct device_domain_info *info;
- struct dmar_domain *domain = NULL;
- struct intel_iommu *iommu;
- u16 dma_alias;
- unsigned long flags;
- u8 bus, devfn;
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- return NULL;
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
- spin_lock_irqsave(&device_domain_lock, flags);
- info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
- PCI_BUS_NUM(dma_alias),
- dma_alias & 0xff);
- if (info) {
- iommu = info->iommu;
- domain = info->domain;
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
- /* DMA alias already has a domain, use it */
- if (info)
- goto out;
- }
-
- /* Allocate and initialize new domain for the device */
- domain = alloc_domain(0);
- if (!domain)
- return NULL;
- if (domain_init(domain, iommu, gaw)) {
- domain_exit(domain);
- return NULL;
- }
-
-out:
- return domain;
-}
-
-static struct dmar_domain *set_domain_for_dev(struct device *dev,
- struct dmar_domain *domain)
-{
- struct intel_iommu *iommu;
- struct dmar_domain *tmp;
- u16 req_id, dma_alias;
- u8 bus, devfn;
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- return NULL;
-
- req_id = ((u16)bus << 8) | devfn;
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
- /* register PCI DMA alias device */
- if (req_id != dma_alias) {
- tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
- dma_alias & 0xff, NULL, domain);
-
- if (!tmp || tmp != domain)
- return tmp;
- }
- }
-
- tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
- if (!tmp || tmp != domain)
- return tmp;
-
- return domain;
-}
-
static int iommu_domain_identity_map(struct dmar_domain *domain,
- unsigned long long start,
- unsigned long long end)
+ unsigned long first_vpfn,
+ unsigned long last_vpfn)
{
- unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
- unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
-
- if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
- dma_to_mm_pfn(last_vpfn))) {
- pr_err("Reserving iova failed\n");
- return -ENOMEM;
- }
-
- pr_debug("Mapping reserved region %llx-%llx\n", start, end);
/*
* RMRR range might have overlap with physical memory range,
* clear it first
@@ -2817,45 +2648,6 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
DMA_PTE_READ|DMA_PTE_WRITE);
}
-static int domain_prepare_identity_map(struct device *dev,
- struct dmar_domain *domain,
- unsigned long long start,
- unsigned long long end)
-{
- /* For _hardware_ passthrough, don't bother. But for software
- passthrough, we do it anyway -- it may indicate a memory
- range which is reserved in E820, so which didn't get set
- up to start with in si_domain */
- if (domain == si_domain && hw_pass_through) {
- dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
- start, end);
- return 0;
- }
-
- dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
-
- if (end < start) {
- WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
- "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- dmi_get_system_info(DMI_BIOS_VENDOR),
- dmi_get_system_info(DMI_BIOS_VERSION),
- dmi_get_system_info(DMI_PRODUCT_VERSION));
- return -EIO;
- }
-
- if (end >> agaw_to_width(domain->agaw)) {
- WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
- "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- agaw_to_width(domain->agaw),
- dmi_get_system_info(DMI_BIOS_VENDOR),
- dmi_get_system_info(DMI_BIOS_VERSION),
- dmi_get_system_info(DMI_PRODUCT_VERSION));
- return -EIO;
- }
-
- return iommu_domain_identity_map(domain, start, end);
-}
-
static int md_domain_init(struct dmar_domain *domain, int guest_width);
static int __init si_domain_init(int hw)
@@ -2882,7 +2674,8 @@ static int __init si_domain_init(int hw)
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
ret = iommu_domain_identity_map(si_domain,
- PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+ mm_to_dma_pfn(start_pfn),
+ mm_to_dma_pfn(end_pfn));
if (ret)
return ret;
}
@@ -2911,17 +2704,6 @@ static int __init si_domain_init(int hw)
return 0;
}
-static int identity_mapping(struct device *dev)
-{
- struct device_domain_info *info;
-
- info = dev->archdata.iommu;
- if (info)
- return (info->domain == si_domain);
-
- return 0;
-}
-
static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
{
struct dmar_domain *ndomain;
@@ -3048,31 +2830,6 @@ static int device_def_domain_type(struct device *dev)
if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
return IOMMU_DOMAIN_IDENTITY;
-
- /*
- * We want to start off with all devices in the 1:1 domain, and
- * take them out later if we find they can't access all of memory.
- *
- * However, we can't do this for PCI devices behind bridges,
- * because all PCI devices behind the same bridge will end up
- * with the same source-id on their transactions.
- *
- * Practically speaking, we can't change things around for these
- * devices at run-time, because we can't be sure there'll be no
- * DMA transactions in flight for any of their siblings.
- *
- * So PCI devices (unless they're on the root bus) as well as
- * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
- * the 1:1 domain, just in _case_ one of their siblings turns out
- * not to be able to map all of memory.
- */
- if (!pci_is_pcie(pdev)) {
- if (!pci_is_root_bus(pdev->bus))
- return IOMMU_DOMAIN_DMA;
- if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
- return IOMMU_DOMAIN_DMA;
- } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
- return IOMMU_DOMAIN_DMA;
}
return 0;
@@ -3297,6 +3054,85 @@ static int copy_translation_tables(struct intel_iommu *iommu)
return ret;
}
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
+{
+ struct intel_iommu *iommu = data;
+ ioasid_t ioasid;
+
+ if (!iommu)
+ return INVALID_IOASID;
+ /*
+ * VT-d virtual command interface always uses the full 20 bit
+ * PASID range. Host can partition guest PASID range based on
+ * policies but it is out of guest's control.
+ */
+ if (min < PASID_MIN || max > intel_pasid_max_id)
+ return INVALID_IOASID;
+
+ if (vcmd_alloc_pasid(iommu, &ioasid))
+ return INVALID_IOASID;
+
+ return ioasid;
+}
+
+static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
+{
+ struct intel_iommu *iommu = data;
+
+ if (!iommu)
+ return;
+ /*
+ * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
+ * We can only free the PASID when all the devices are unbound.
+ */
+ if (ioasid_find(NULL, ioasid, NULL)) {
+ pr_alert("Cannot free active IOASID %d\n", ioasid);
+ return;
+ }
+ vcmd_free_pasid(iommu, ioasid);
+}
+
+static void register_pasid_allocator(struct intel_iommu *iommu)
+{
+ /*
+ * If we are running in the host, no need for custom allocator
+ * in that PASIDs are allocated from the host system-wide.
+ */
+ if (!cap_caching_mode(iommu->cap))
+ return;
+
+ if (!sm_supported(iommu)) {
+ pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
+ return;
+ }
+
+ /*
+ * Register a custom PASID allocator if we are running in a guest,
+ * guest PASID must be obtained via virtual command interface.
+ * There can be multiple vIOMMUs in each guest but only one allocator
+ * is active. All vIOMMU allocators will eventually be calling the same
+ * host allocator.
+ */
+ if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap))
+ return;
+
+ pr_info("Register custom PASID allocator\n");
+ iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
+ iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
+ iommu->pasid_allocator.pdata = (void *)iommu;
+ if (ioasid_register_allocator(&iommu->pasid_allocator)) {
+ pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
+ /*
+ * Disable scalable mode on this IOMMU if there
+ * is no custom allocator. Mixing SM capable vIOMMU
+ * and non-SM vIOMMU are not supported.
+ */
+ intel_iommu_sm = 0;
+ }
+}
+#endif
+
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
@@ -3414,6 +3250,9 @@ static int __init init_dmars(void)
*/
for_each_active_iommu(iommu, drhd) {
iommu_flush_write_buffer(iommu);
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ register_pasid_allocator(iommu);
+#endif
iommu_set_root_entry(iommu);
iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
@@ -3531,100 +3370,6 @@ static unsigned long intel_alloc_iova(struct device *dev,
return iova_pfn;
}
-static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
-{
- struct dmar_domain *domain, *tmp;
- struct dmar_rmrr_unit *rmrr;
- struct device *i_dev;
- int i, ret;
-
- /* Device shouldn't be attached by any domains. */
- domain = find_domain(dev);
- if (domain)
- return NULL;
-
- domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
- if (!domain)
- goto out;
-
- /* We have a new domain - setup possible RMRRs for the device */
- rcu_read_lock();
- for_each_rmrr_units(rmrr) {
- for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
- i, i_dev) {
- if (i_dev != dev)
- continue;
-
- ret = domain_prepare_identity_map(dev, domain,
- rmrr->base_address,
- rmrr->end_address);
- if (ret)
- dev_err(dev, "Mapping reserved region failed\n");
- }
- }
- rcu_read_unlock();
-
- tmp = set_domain_for_dev(dev, domain);
- if (!tmp || domain != tmp) {
- domain_exit(domain);
- domain = tmp;
- }
-
-out:
- if (!domain)
- dev_err(dev, "Allocating domain failed\n");
- else
- domain->domain.type = IOMMU_DOMAIN_DMA;
-
- return domain;
-}
-
-/* Check if the dev needs to go through non-identity map and unmap process.*/
-static bool iommu_need_mapping(struct device *dev)
-{
- int ret;
-
- if (iommu_dummy(dev))
- return false;
-
- if (unlikely(attach_deferred(dev)))
- do_deferred_attach(dev);
-
- ret = identity_mapping(dev);
- if (ret) {
- u64 dma_mask = *dev->dma_mask;
-
- if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
- dma_mask = dev->coherent_dma_mask;
-
- if (dma_mask >= dma_direct_get_required_mask(dev))
- return false;
-
- /*
- * 32 bit DMA is removed from si_domain and fall back to
- * non-identity mapping.
- */
- dmar_remove_one_dev_info(dev);
- ret = iommu_request_dma_domain_for_dev(dev);
- if (ret) {
- struct iommu_domain *domain;
- struct dmar_domain *dmar_domain;
-
- domain = iommu_get_domain_for_dev(dev);
- if (domain) {
- dmar_domain = to_dmar_domain(domain);
- dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
- }
- dmar_remove_one_dev_info(dev);
- get_private_domain_for_dev(dev);
- }
-
- dev_info(dev, "32bit DMA uses non-identity mapping\n");
- }
-
- return true;
-}
-
static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
size_t size, int dir, u64 dma_mask)
{
@@ -3638,6 +3383,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
BUG_ON(dir == DMA_NONE);
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
+
domain = find_domain(dev);
if (!domain)
return DMA_MAPPING_ERROR;
@@ -3689,20 +3437,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- return __intel_map_single(dev, page_to_phys(page) + offset,
- size, dir, *dev->dma_mask);
- return dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ return __intel_map_single(dev, page_to_phys(page) + offset,
+ size, dir, *dev->dma_mask);
}
static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- return __intel_map_single(dev, phys_addr, size, dir,
- *dev->dma_mask);
- return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
+ return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
}
static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3753,17 +3496,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- intel_unmap(dev, dev_addr, size);
- else
- dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
+ intel_unmap(dev, dev_addr, size);
}
static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- intel_unmap(dev, dev_addr, size);
+ intel_unmap(dev, dev_addr, size);
}
static void *intel_alloc_coherent(struct device *dev, size_t size,
@@ -3773,8 +3512,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
struct page *page = NULL;
int order;
- if (!iommu_need_mapping(dev))
- return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -3809,9 +3548,6 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
int order;
struct page *page = virt_to_page(vaddr);
- if (!iommu_need_mapping(dev))
- return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
-
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -3829,9 +3565,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
struct scatterlist *sg;
int i;
- if (!iommu_need_mapping(dev))
- return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
-
for_each_sg(sglist, sg, nelems, i) {
nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
}
@@ -3855,8 +3588,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
- if (!iommu_need_mapping(dev))
- return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
+
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
domain = find_domain(dev);
if (!domain)
@@ -3903,8 +3637,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
static u64 intel_get_required_mask(struct device *dev)
{
- if (!iommu_need_mapping(dev))
- return dma_direct_get_required_mask(dev);
return DMA_BIT_MASK(32);
}
@@ -4813,58 +4545,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
unsigned long val, void *v)
{
struct memory_notify *mhp = v;
- unsigned long long start, end;
- unsigned long start_vpfn, last_vpfn;
+ unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
+ unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
+ mhp->nr_pages - 1);
switch (val) {
case MEM_GOING_ONLINE:
- start = mhp->start_pfn << PAGE_SHIFT;
- end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
- if (iommu_domain_identity_map(si_domain, start, end)) {
- pr_warn("Failed to build identity map for [%llx-%llx]\n",
- start, end);
+ if (iommu_domain_identity_map(si_domain,
+ start_vpfn, last_vpfn)) {
+ pr_warn("Failed to build identity map for [%lx-%lx]\n",
+ start_vpfn, last_vpfn);
return NOTIFY_BAD;
}
break;
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
- start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
- last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
- while (start_vpfn <= last_vpfn) {
- struct iova *iova;
+ {
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
struct page *freelist;
- iova = find_iova(&si_domain->iovad, start_vpfn);
- if (iova == NULL) {
- pr_debug("Failed get IOVA for PFN %lx\n",
- start_vpfn);
- break;
- }
-
- iova = split_and_remove_iova(&si_domain->iovad, iova,
- start_vpfn, last_vpfn);
- if (iova == NULL) {
- pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
- start_vpfn, last_vpfn);
- return NOTIFY_BAD;
- }
-
- freelist = domain_unmap(si_domain, iova->pfn_lo,
- iova->pfn_hi);
+ freelist = domain_unmap(si_domain,
+ start_vpfn, last_vpfn);
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
iommu_flush_iotlb_psi(iommu, si_domain,
- iova->pfn_lo, iova_size(iova),
+ start_vpfn, mhp->nr_pages,
!freelist, 0);
rcu_read_unlock();
dma_free_pagelist(freelist);
-
- start_vpfn = iova->pfn_hi + 1;
- free_iova_mem(iova);
}
break;
}
@@ -4892,8 +4603,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
for (did = 0; did < cap_ndoms(iommu->cap); did++) {
domain = get_iommu_domain(iommu, (u16)did);
- if (!domain)
+ if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
continue;
+
free_cpu_cached_iovas(cpu, &domain->iovad);
}
}
@@ -5186,18 +4898,6 @@ int __init intel_iommu_init(void)
}
up_write(&dmar_global_lock);
-#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
- /*
- * If the system has no untrusted device or the user has decided
- * to disable the bounce page mechanisms, we don't need swiotlb.
- * Mark this and the pre-allocated bounce pages will be released
- * later.
- */
- if (!has_untrusted_dev() || intel_no_bounce)
- swiotlb = 0;
-#endif
- dma_ops = &intel_dma_ops;
-
init_iommu_pm_ops();
down_read(&dmar_global_lock);
@@ -5283,10 +4983,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
if (info->dev) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
- PASID_RID2PASID);
+ PASID_RID2PASID, false);
iommu_disable_dev_iotlb(info);
- domain_context_clear(iommu, info->dev);
+ if (!dev_is_real_dma_subdevice(info->dev))
+ domain_context_clear(iommu, info->dev);
intel_pasid_free_table(info->dev);
}
@@ -5296,12 +4997,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
domain_detach_iommu(domain, iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
- /* free the private domain */
- if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
- !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
- list_empty(&domain->devices))
- domain_exit(info->domain);
-
free_devinfo_mem(info);
}
@@ -5311,9 +5006,8 @@ static void dmar_remove_one_dev_info(struct device *dev)
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
- if (info && info != DEFER_DEVICE_DOMAIN_INFO
- && info != DUMMY_DEVICE_DOMAIN_INFO)
+ info = get_domain_info(dev);
+ if (info)
__dmar_remove_one_dev_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -5322,9 +5016,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
{
int adjust_width;
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
- domain_reserve_special_ranges(domain);
-
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
@@ -5343,11 +5034,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
return 0;
}
+static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
+{
+ init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+ copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
+
+ if (!intel_iommu_strict &&
+ init_iova_flush_queue(&dmar_domain->iovad,
+ iommu_flush_iova, iova_entry_free))
+ pr_info("iova flush queue initialization failed\n");
+}
+
static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
- int ret;
switch (type) {
case IOMMU_DOMAIN_DMA:
@@ -5364,13 +5065,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return NULL;
}
- if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
- ret = init_iova_flush_queue(&dmar_domain->iovad,
- iommu_flush_iova,
- iova_entry_free);
- if (ret)
- pr_info("iova flush queue initialization failed\n");
- }
+ if (type == IOMMU_DOMAIN_DMA)
+ intel_init_iova_domain(dmar_domain);
domain_update_iommu_cap(dmar_domain);
@@ -5403,7 +5099,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
static inline bool
is_aux_domain(struct device *dev, struct iommu_domain *domain)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
return info && info->auxd_enabled &&
domain->type == IOMMU_DOMAIN_UNMANAGED;
@@ -5412,7 +5108,7 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain)
static void auxiliary_link_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
@@ -5425,7 +5121,7 @@ static void auxiliary_link_device(struct dmar_domain *domain,
static void auxiliary_unlink_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
@@ -5513,13 +5209,13 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
return;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
iommu = info->iommu;
auxiliary_unlink_device(domain, dev);
spin_lock(&iommu->lock);
- intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
+ intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
domain_detach_iommu(domain, iommu);
spin_unlock(&iommu->lock);
@@ -5626,6 +5322,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
aux_domain_remove_dev(to_dmar_domain(domain), dev);
}
+/*
+ * 2D array for converting and sanitizing IOMMU generic TLB granularity to
+ * VT-d granularity. Invalidation is typically included in the unmap operation
+ * as a result of DMA or VFIO unmap. However, for assigned devices guest
+ * owns the first level page tables. Invalidations of translation caches in the
+ * guest are trapped and passed down to the host.
+ *
+ * vIOMMU in the guest will only expose first level page tables, therefore
+ * we do not support IOTLB granularity for request without PASID (second level).
+ *
+ * For example, to find the VT-d granularity encoding for IOTLB
+ * type and page selective granularity within PASID:
+ * X: indexed by iommu cache type
+ * Y: indexed by enum iommu_inv_granularity
+ * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
+ */
+
+static const int
+inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
+ /*
+ * PASID based IOTLB invalidation: PASID selective (per PASID),
+ * page selective (address granularity)
+ */
+ {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
+ /* PASID based dev TLBs */
+ {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
+ /* PASID cache */
+ {-EINVAL, -EINVAL, -EINVAL}
+};
+
+static inline int to_vtd_granularity(int type, int granu)
+{
+ return inv_type_granu_table[type][granu];
+}
+
+static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
+{
+ u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
+
+ /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
+ * IOMMU cache invalidate API passes granu_size in bytes, and number of
+ * granu size in contiguous memory.
+ */
+ return order_base_2(nr_pages);
+}
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static int
+intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
+ struct iommu_cache_invalidate_info *inv_info)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ int cache_type;
+ u8 bus, devfn;
+ u16 did, sid;
+ int ret = 0;
+ u64 size = 0;
+
+ if (!inv_info || !dmar_domain ||
+ inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+ return -EINVAL;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
+ return -EINVAL;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&iommu->lock);
+ info = get_domain_info(dev);
+ if (!info) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ did = dmar_domain->iommu_did[iommu->seq_id];
+ sid = PCI_DEVID(bus, devfn);
+
+ /* Size is only valid in address selective invalidation */
+ if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
+ size = to_vtd_size(inv_info->addr_info.granule_size,
+ inv_info->addr_info.nb_granules);
+
+ for_each_set_bit(cache_type,
+ (unsigned long *)&inv_info->cache,
+ IOMMU_CACHE_INV_TYPE_NR) {
+ int granu = 0;
+ u64 pasid = 0;
+
+ granu = to_vtd_granularity(cache_type, inv_info->granularity);
+ if (granu == -EINVAL) {
+ pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
+ cache_type, inv_info->granularity);
+ break;
+ }
+
+ /*
+ * PASID is stored in different locations based on the
+ * granularity.
+ */
+ if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
+ (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
+ pasid = inv_info->pasid_info.pasid;
+ else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
+ pasid = inv_info->addr_info.pasid;
+
+ switch (BIT(cache_type)) {
+ case IOMMU_CACHE_INV_TYPE_IOTLB:
+ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ size &&
+ (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
+ pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
+ inv_info->addr_info.addr, size);
+ ret = -ERANGE;
+ goto out_unlock;
+ }
+
+ /*
+ * If granu is PASID-selective, address is ignored.
+ * We use npages = -1 to indicate that.
+ */
+ qi_flush_piotlb(iommu, did, pasid,
+ mm_to_dma_pfn(inv_info->addr_info.addr),
+ (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
+ inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+
+ /*
+ * Always flush device IOTLB if ATS is enabled. vIOMMU
+ * in the guest may assume IOTLB flush is inclusive,
+ * which is more efficient.
+ */
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(iommu, sid,
+ info->pfsid, pasid,
+ info->ats_qdep,
+ inv_info->addr_info.addr,
+ size, granu);
+ break;
+ case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(iommu, sid,
+ info->pfsid, pasid,
+ info->ats_qdep,
+ inv_info->addr_info.addr,
+ size, granu);
+ else
+ pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
+ break;
+ default:
+ dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
+ cache_type);
+ ret = -EINVAL;
+ }
+ }
+out_unlock:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return ret;
+}
+#endif
+
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
size_t size, int iommu_prot, gfp_t gfp)
@@ -5781,78 +5647,22 @@ static bool intel_iommu_capable(enum iommu_cap cap)
return false;
}
-static int intel_iommu_add_device(struct device *dev)
+static struct iommu_device *intel_iommu_probe_device(struct device *dev)
{
- struct dmar_domain *dmar_domain;
- struct iommu_domain *domain;
struct intel_iommu *iommu;
- struct iommu_group *group;
u8 bus, devfn;
- int ret;
iommu = device_to_iommu(dev, &bus, &devfn);
if (!iommu)
- return -ENODEV;
-
- iommu_device_link(&iommu->iommu, dev);
+ return ERR_PTR(-ENODEV);
if (translation_pre_enabled(iommu))
dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
- group = iommu_group_get_for_dev(dev);
-
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto unlink;
- }
-
- iommu_group_put(group);
-
- domain = iommu_get_domain_for_dev(dev);
- dmar_domain = to_dmar_domain(domain);
- if (domain->type == IOMMU_DOMAIN_DMA) {
- if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
- ret = iommu_request_dm_for_dev(dev);
- if (ret) {
- dmar_remove_one_dev_info(dev);
- dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
- domain_add_dev_info(si_domain, dev);
- dev_info(dev,
- "Device uses a private identity domain.\n");
- }
- }
- } else {
- if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
- ret = iommu_request_dma_domain_for_dev(dev);
- if (ret) {
- dmar_remove_one_dev_info(dev);
- dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
- if (!get_private_domain_for_dev(dev)) {
- dev_warn(dev,
- "Failed to get a private domain.\n");
- ret = -ENOMEM;
- goto unlink;
- }
-
- dev_info(dev,
- "Device uses a private dma domain.\n");
- }
- }
- }
-
- if (device_needs_bounce(dev)) {
- dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
- set_dma_ops(dev, &bounce_dma_ops);
- }
-
- return 0;
-
-unlink:
- iommu_device_unlink(&iommu->iommu, dev);
- return ret;
+ return &iommu->iommu;
}
-static void intel_iommu_remove_device(struct device *dev)
+static void intel_iommu_release_device(struct device *dev)
{
struct intel_iommu *iommu;
u8 bus, devfn;
@@ -5863,11 +5673,19 @@ static void intel_iommu_remove_device(struct device *dev)
dmar_remove_one_dev_info(dev);
- iommu_group_remove_device(dev);
+ set_dma_ops(dev, NULL);
+}
- iommu_device_unlink(&iommu->iommu, dev);
+static void intel_iommu_probe_finalize(struct device *dev)
+{
+ struct iommu_domain *domain;
+ domain = iommu_get_domain_for_dev(dev);
if (device_needs_bounce(dev))
+ set_dma_ops(dev, &bounce_dma_ops);
+ else if (domain && domain->type == IOMMU_DOMAIN_DMA)
+ set_dma_ops(dev, &intel_dma_ops);
+ else
set_dma_ops(dev, NULL);
}
@@ -5945,7 +5763,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
spin_lock(&iommu->lock);
ret = -EINVAL;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->pasid_supported)
goto out;
@@ -6041,7 +5859,7 @@ static int intel_iommu_enable_auxd(struct device *dev)
return -ENODEV;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
info->auxd_enabled = 1;
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -6054,7 +5872,7 @@ static int intel_iommu_disable_auxd(struct device *dev)
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!WARN_ON(!info))
info->auxd_enabled = 0;
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -6107,6 +5925,14 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
return !!siov_find_pci_dvsec(to_pci_dev(dev));
}
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
+ info->pasid_supported && info->pri_supported &&
+ info->ats_supported;
+ }
+
return false;
}
@@ -6116,6 +5942,16 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
if (feat == IOMMU_DEV_FEAT_AUX)
return intel_iommu_enable_auxd(dev);
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ if (!info)
+ return -EINVAL;
+
+ if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
+ return 0;
+ }
+
return -ENODEV;
}
@@ -6131,7 +5967,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
static bool
intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
if (feat == IOMMU_DEV_FEAT_AUX)
return scalable_mode_support() && info && info->auxd_enabled;
@@ -6198,8 +6034,9 @@ const struct iommu_ops intel_iommu_ops = {
.map = intel_iommu_map,
.unmap = intel_iommu_unmap,
.iova_to_phys = intel_iommu_iova_to_phys,
- .add_device = intel_iommu_add_device,
- .remove_device = intel_iommu_remove_device,
+ .probe_device = intel_iommu_probe_device,
+ .probe_finalize = intel_iommu_probe_finalize,
+ .release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
.apply_resv_region = intel_iommu_apply_resv_region,
@@ -6209,7 +6046,16 @@ const struct iommu_ops intel_iommu_ops = {
.dev_enable_feat = intel_iommu_dev_enable_feat,
.dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
+ .def_domain_type = device_def_domain_type,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ .cache_invalidate = intel_iommu_sva_invalidate,
+ .sva_bind_gpasid = intel_svm_bind_gpasid,
+ .sva_unbind_gpasid = intel_svm_unbind_gpasid,
+ .sva_bind = intel_svm_bind,
+ .sva_unbind = intel_svm_unbind,
+ .sva_get_pasid = intel_svm_get_pasid,
+#endif
};
static void quirk_iommu_igfx(struct pci_dev *dev)
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index 22b30f1..c81f0f1 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -27,6 +27,63 @@
static DEFINE_SPINLOCK(pasid_lock);
u32 intel_pasid_max_id = PASID_MAX;
+int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid)
+{
+ unsigned long flags;
+ u8 status_code;
+ int ret = 0;
+ u64 res;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC);
+ IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ status_code = VCMD_VRSP_SC(res);
+ switch (status_code) {
+ case VCMD_VRSP_SC_SUCCESS:
+ *pasid = VCMD_VRSP_RESULT_PASID(res);
+ break;
+ case VCMD_VRSP_SC_NO_PASID_AVAIL:
+ pr_info("IOMMU: %s: No PASID available\n", iommu->name);
+ ret = -ENOSPC;
+ break;
+ default:
+ ret = -ENODEV;
+ pr_warn("IOMMU: %s: Unexpected error code %d\n",
+ iommu->name, status_code);
+ }
+
+ return ret;
+}
+
+void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid)
+{
+ unsigned long flags;
+ u8 status_code;
+ u64 res;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writeq(iommu->reg + DMAR_VCMD_REG,
+ VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE);
+ IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ status_code = VCMD_VRSP_SC(res);
+ switch (status_code) {
+ case VCMD_VRSP_SC_SUCCESS:
+ break;
+ case VCMD_VRSP_SC_INVALID_PASID:
+ pr_info("IOMMU: %s: Invalid PASID\n", iommu->name);
+ break;
+ default:
+ pr_warn("IOMMU: %s: Unexpected error code %d\n",
+ iommu->name, status_code);
+ }
+}
+
/*
* Per device pasid table management:
*/
@@ -94,7 +151,7 @@ int intel_pasid_alloc_table(struct device *dev)
int size;
might_sleep();
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;
@@ -141,7 +198,7 @@ void intel_pasid_free_table(struct device *dev)
struct pasid_entry *table;
int i, max_pde;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !dev_is_pci(dev) || !info->pasid_table)
return;
@@ -167,7 +224,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev)
{
struct device_domain_info *info;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info)
return NULL;
@@ -178,7 +235,7 @@ int intel_pasid_get_dev_max_id(struct device *dev)
{
struct device_domain_info *info;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->pasid_table)
return 0;
@@ -199,7 +256,7 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
return NULL;
dir = pasid_table->table;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
dir_index = pasid >> PASID_PDE_SHIFT;
index = pasid & PASID_PTE_MASK;
@@ -235,7 +292,20 @@ static inline void pasid_clear_entry(struct pasid_entry *pe)
WRITE_ONCE(pe->val[7], 0);
}
-static void intel_pasid_clear_entry(struct device *dev, int pasid)
+static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe)
+{
+ WRITE_ONCE(pe->val[0], PASID_PTE_FPD);
+ WRITE_ONCE(pe->val[1], 0);
+ WRITE_ONCE(pe->val[2], 0);
+ WRITE_ONCE(pe->val[3], 0);
+ WRITE_ONCE(pe->val[4], 0);
+ WRITE_ONCE(pe->val[5], 0);
+ WRITE_ONCE(pe->val[6], 0);
+ WRITE_ONCE(pe->val[7], 0);
+}
+
+static void
+intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore)
{
struct pasid_entry *pe;
@@ -243,7 +313,10 @@ static void intel_pasid_clear_entry(struct device *dev, int pasid)
if (WARN_ON(!pe))
return;
- pasid_clear_entry(pe);
+ if (fault_ignore && pasid_pte_is_present(pe))
+ pasid_clear_entry_with_fpd(pe);
+ else
+ pasid_clear_entry(pe);
}
static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
@@ -359,18 +432,29 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value)
pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
}
+/*
+ * Setup the Extended Access Flag Enable (EAFE) field (Bit 135)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_eafe(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7);
+}
+
static void
pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
u16 did, int pasid)
{
struct qi_desc desc;
- desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+ desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
+ QI_PC_PASID(pasid) | QI_PC_TYPE;
desc.qw1 = 0;
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
static void
@@ -384,7 +468,7 @@ iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
static void
@@ -394,7 +478,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
struct device_domain_info *info;
u16 sid, qdep, pfsid;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->ats_enabled)
return;
@@ -405,8 +489,8 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
}
-void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
- struct device *dev, int pasid)
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
+ int pasid, bool fault_ignore)
{
struct pasid_entry *pte;
u16 did;
@@ -416,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
return;
did = pasid_get_domain_id(pte);
- intel_pasid_clear_entry(dev, pasid);
+ intel_pasid_clear_entry(dev, pasid, fault_ignore);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
@@ -492,7 +576,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
/* Setup Present and PASID Granular Transfer Type: */
- pasid_set_translation_type(pte, 1);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
pasid_set_present(pte);
pasid_flush_caches(iommu, pte, pasid, did);
@@ -500,6 +584,25 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
}
/*
+ * Skip top levels of page tables for iommu which has less agaw
+ * than default. Unnecessary for PT mode.
+ */
+static inline int iommu_skip_agaw(struct dmar_domain *domain,
+ struct intel_iommu *iommu,
+ struct dma_pte **pgd)
+{
+ int agaw;
+
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ *pgd = phys_to_virt(dma_pte_addr(*pgd));
+ if (!dma_pte_present(*pgd))
+ return -EINVAL;
+ }
+
+ return agaw;
+}
+
+/*
* Set up the scalable mode pasid entry for second only translation type.
*/
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
@@ -522,17 +625,11 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -EINVAL;
}
- /*
- * Skip top levels of page tables for iommu which has less agaw
- * than default. Unnecessary for PT mode.
- */
pgd = domain->pgd;
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd)) {
- dev_err(dev, "Invalid domain page table\n");
- return -EINVAL;
- }
+ agaw = iommu_skip_agaw(domain, iommu, &pgd);
+ if (agaw < 0) {
+ dev_err(dev, "Invalid domain page table\n");
+ return -EINVAL;
}
pgd_val = virt_to_phys(pgd);
@@ -548,7 +645,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
pasid_set_domain_id(pte, did);
pasid_set_slptr(pte, pgd_val);
pasid_set_address_width(pte, agaw);
- pasid_set_translation_type(pte, 2);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
@@ -582,7 +679,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
- pasid_set_translation_type(pte, 4);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
@@ -596,3 +693,161 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
return 0;
}
+
+static int
+intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte,
+ struct iommu_gpasid_bind_data_vtd *pasid_data)
+{
+ /*
+ * Not all guest PASID table entry fields are passed down during bind,
+ * here we only set up the ones that are dependent on guest settings.
+ * Execution related bits such as NXE, SMEP are not supported.
+ * Other fields, such as snoop related, are set based on host needs
+ * regardless of guest settings.
+ */
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) {
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err_ratelimited("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_sre(pte);
+ }
+
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) {
+ if (!ecap_eafs(iommu->ecap)) {
+ pr_err_ratelimited("No extended access flag support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_eafe(pte);
+ }
+
+ /*
+ * Memory type is only applicable to devices inside processor coherent
+ * domain. Will add MTS support once coherent devices are available.
+ */
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) {
+ pr_warn_ratelimited("No memory type support %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
+ * This could be used for guest shared virtual address. In this case, the
+ * first level page tables are used for GVA-GPA translation in the guest,
+ * second level page tables are used for GPA-HPA translation.
+ *
+ * @iommu: IOMMU which the device belong to
+ * @dev: Device to be set up for translation
+ * @gpgd: FLPTPTR: First Level Page translation pointer in GPA
+ * @pasid: PASID to be programmed in the device PASID table
+ * @pasid_data: Additional PASID info from the guest bind request
+ * @domain: Domain info for setting up second level page tables
+ * @addr_width: Address width of the first level (guest)
+ */
+int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
+ pgd_t *gpgd, int pasid,
+ struct iommu_gpasid_bind_data_vtd *pasid_data,
+ struct dmar_domain *domain, int addr_width)
+{
+ struct pasid_entry *pte;
+ struct dma_pte *pgd;
+ int ret = 0;
+ u64 pgd_val;
+ int agaw;
+ u16 did;
+
+ if (!ecap_nest(iommu->ecap)) {
+ pr_err_ratelimited("IOMMU: %s: No nested translation support\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) {
+ pr_err_ratelimited("Domain is not in nesting mode, %x\n",
+ domain->flags);
+ return -EINVAL;
+ }
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ /*
+ * Caller must ensure PASID entry is not in use, i.e. not bind the
+ * same PASID to the same device twice.
+ */
+ if (pasid_pte_is_present(pte))
+ return -EBUSY;
+
+ pasid_clear_entry(pte);
+
+ /* Sanity checking performed by caller to make sure address
+ * width matching in two dimensions:
+ * 1. CPU vs. IOMMU
+ * 2. Guest vs. Host.
+ */
+ switch (addr_width) {
+#ifdef CONFIG_X86
+ case ADDR_WIDTH_5LEVEL:
+ if (!cpu_feature_enabled(X86_FEATURE_LA57) ||
+ !cap_5lp_support(iommu->cap)) {
+ dev_err_ratelimited(dev,
+ "5-level paging not supported\n");
+ return -EINVAL;
+ }
+
+ pasid_set_flpm(pte, 1);
+ break;
+#endif
+ case ADDR_WIDTH_4LEVEL:
+ pasid_set_flpm(pte, 0);
+ break;
+ default:
+ dev_err_ratelimited(dev, "Invalid guest address width %d\n",
+ addr_width);
+ return -EINVAL;
+ }
+
+ /* First level PGD is in GPA, must be supported by the second level */
+ if ((uintptr_t)gpgd > domain->max_addr) {
+ dev_err_ratelimited(dev,
+ "Guest PGD %lx not supported, max %llx\n",
+ (uintptr_t)gpgd, domain->max_addr);
+ return -EINVAL;
+ }
+ pasid_set_flptr(pte, (uintptr_t)gpgd);
+
+ ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data);
+ if (ret)
+ return ret;
+
+ /* Setup the second level based on the given domain */
+ pgd = domain->pgd;
+
+ agaw = iommu_skip_agaw(domain, iommu, &pgd);
+ if (agaw < 0) {
+ dev_err_ratelimited(dev, "Invalid domain page table\n");
+ return -EINVAL;
+ }
+ pgd_val = virt_to_phys(pgd);
+ pasid_set_slptr(pte, pgd_val);
+ pasid_set_fault_enable(pte);
+
+ did = domain->iommu_did[iommu->seq_id];
+ pasid_set_domain_id(pte, did);
+
+ pasid_set_address_width(pte, agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return ret;
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 92de6df..c5318d40 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -15,6 +15,7 @@
#define PASID_MAX 0x100000
#define PASID_PTE_MASK 0x3F
#define PASID_PTE_PRESENT 1
+#define PASID_PTE_FPD 2
#define PDE_PFN_MASK PAGE_MASK
#define PASID_PDE_SHIFT 6
#define MAX_NR_PASID_BITS 20
@@ -23,6 +24,16 @@
#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1)
#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7))
+/* Virtual command interface for enlightened pasid management. */
+#define VCMD_CMD_ALLOC 0x1
+#define VCMD_CMD_FREE 0x2
+#define VCMD_VRSP_IP 0x1
+#define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3)
+#define VCMD_VRSP_SC_SUCCESS 0
+#define VCMD_VRSP_SC_NO_PASID_AVAIL 1
+#define VCMD_VRSP_SC_INVALID_PASID 1
+#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff)
+#define VCMD_CMD_OPERAND(e) ((e) << 8)
/*
* Domain ID reserved for pasid entries programmed for first-level
* only and pass-through transfer modes.
@@ -36,6 +47,7 @@
* to vmalloc or even module mappings.
*/
#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
+#define PASID_FLAG_NESTED BIT(1)
/*
* The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first-
@@ -51,6 +63,11 @@ struct pasid_entry {
u64 val[8];
};
+#define PASID_ENTRY_PGTT_FL_ONLY (1)
+#define PASID_ENTRY_PGTT_SL_ONLY (2)
+#define PASID_ENTRY_PGTT_NESTED (3)
+#define PASID_ENTRY_PGTT_PT (4)
+
/* The representative of a PASID table */
struct pasid_table {
void *table; /* pasid table pointer */
@@ -99,7 +116,13 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, int pasid);
+int intel_pasid_setup_nested(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd, int pasid,
+ struct iommu_gpasid_bind_data_vtd *pasid_data,
+ struct dmar_domain *domain, int addr_width);
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
- struct device *dev, int pasid);
-
+ struct device *dev, int pasid,
+ bool fault_ignore);
+int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid);
+void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid);
#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 2998418..a035ef9 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
#include "intel-pasid.h"
static irqreturn_t prq_event_thread(int irq, void *d);
+static void intel_svm_drain_prq(struct device *dev, int pasid);
#define PRQ_ORDER 0
@@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+ init_completion(&iommu->prq_complete);
+
return 0;
}
@@ -138,7 +141,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
}
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, svm->iommu);
+ qi_submit_sync(svm->iommu, &desc, 1, 0);
if (sdev->dev_iotlb) {
desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
@@ -162,7 +165,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
}
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, svm->iommu);
+ qi_submit_sync(svm->iommu, &desc, 1, 0);
}
}
@@ -206,10 +209,9 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* *has* to handle gracefully without affecting other processes.
*/
rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
- }
+ list_for_each_entry_rcu(sdev, &svm->devs, list)
+ intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
+ svm->pasid, true);
rcu_read_unlock();
}
@@ -226,13 +228,212 @@ static LIST_HEAD(global_svm_list);
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else
-int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
+int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
+ struct iommu_gpasid_bind_data *data)
+{
+ struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct dmar_domain *dmar_domain;
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret = 0;
+
+ if (WARN_ON(!iommu) || !data)
+ return -EINVAL;
+
+ if (data->version != IOMMU_GPASID_BIND_VERSION_1 ||
+ data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
+ return -EINVAL;
+
+ if (!dev_is_pci(dev))
+ return -ENOTSUPP;
+
+ /* VT-d supports devices with full 20 bit PASIDs only */
+ if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX)
+ return -EINVAL;
+
+ /*
+ * We only check host PASID range, we have no knowledge to check
+ * guest PASID range.
+ */
+ if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
+ return -EINVAL;
+
+ dmar_domain = to_dmar_domain(domain);
+
+ mutex_lock(&pasid_mutex);
+ svm = ioasid_find(NULL, data->hpasid, NULL);
+ if (IS_ERR(svm)) {
+ ret = PTR_ERR(svm);
+ goto out;
+ }
+
+ if (svm) {
+ /*
+ * If we found svm for the PASID, there must be at
+ * least one device bond, otherwise svm should be freed.
+ */
+ if (WARN_ON(list_empty(&svm->devs))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for_each_svm_dev(sdev, svm, dev) {
+ /*
+ * For devices with aux domains, we should allow
+ * multiple bind calls with the same PASID and pdev.
+ */
+ if (iommu_dev_feature_enabled(dev,
+ IOMMU_DEV_FEAT_AUX)) {
+ sdev->users++;
+ } else {
+ dev_warn_ratelimited(dev,
+ "Already bound with PASID %u\n",
+ svm->pasid);
+ ret = -EBUSY;
+ }
+ goto out;
+ }
+ } else {
+ /* We come here when PASID has never been bond to a device. */
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /* REVISIT: upper layer/VFIO can track host process that bind
+ * the PASID. ioasid_set = mm might be sufficient for vfio to
+ * check pasid VMM ownership. We can drop the following line
+ * once VFIO and IOASID set check is in place.
+ */
+ svm->mm = get_task_mm(current);
+ svm->pasid = data->hpasid;
+ if (data->flags & IOMMU_SVA_GPASID_VAL) {
+ svm->gpasid = data->gpasid;
+ svm->flags |= SVM_FLAG_GUEST_PASID;
+ }
+ ioasid_set_data(data->hpasid, svm);
+ INIT_LIST_HEAD_RCU(&svm->devs);
+ mmput(svm->mm);
+ }
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sdev->dev = dev;
+
+ /* Only count users if device has aux domains */
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ sdev->users = 1;
+
+ /* Set up device context entry for PASID if not enabled already */
+ ret = intel_iommu_enable_pasid(iommu, sdev->dev);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to enable PASID capability\n");
+ kfree(sdev);
+ goto out;
+ }
+
+ /*
+ * PASID table is per device for better security. Therefore, for
+ * each bind of a new device even with an existing PASID, we need to
+ * call the nested mode setup function here.
+ */
+ spin_lock(&iommu->lock);
+ ret = intel_pasid_setup_nested(iommu, dev,
+ (pgd_t *)(uintptr_t)data->gpgd,
+ data->hpasid, &data->vtd, dmar_domain,
+ data->addr_width);
+ spin_unlock(&iommu->lock);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n",
+ data->hpasid, ret);
+ /*
+ * PASID entry should be in cleared state if nested mode
+ * set up failed. So we only need to clear IOASID tracking
+ * data such that free call will succeed.
+ */
+ kfree(sdev);
+ goto out;
+ }
+
+ svm->flags |= SVM_FLAG_GUEST_MODE;
+
+ init_rcu_head(&sdev->rcu);
+ list_add_rcu(&sdev->list, &svm->devs);
+ out:
+ if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
+ ioasid_set_data(data->hpasid, NULL);
+ kfree(svm);
+ }
+
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
+
+int intel_svm_unbind_gpasid(struct device *dev, int pasid)
+{
+ struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret = -EINVAL;
+
+ if (WARN_ON(!iommu))
+ return -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+ svm = ioasid_find(NULL, pasid, NULL);
+ if (!svm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (IS_ERR(svm)) {
+ ret = PTR_ERR(svm);
+ goto out;
+ }
+
+ for_each_svm_dev(sdev, svm, dev) {
+ ret = 0;
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ sdev->users--;
+ if (!sdev->users) {
+ list_del_rcu(&sdev->list);
+ intel_pasid_tear_down_entry(iommu, dev,
+ svm->pasid, false);
+ intel_svm_drain_prq(dev, svm->pasid);
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+ /*
+ * We do not free the IOASID here in that
+ * IOMMU driver did not allocate it.
+ * Unlike native SVM, IOASID for guest use was
+ * allocated prior to the bind call.
+ * In any case, if the free call comes before
+ * the unbind, IOMMU driver will get notified
+ * and perform cleanup.
+ */
+ ioasid_set_data(pasid, NULL);
+ kfree(svm);
+ }
+ }
+ break;
+ }
+out:
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
+
+/* Caller must hold pasid_mutex, mm reference */
+static int
+intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
+ struct mm_struct *mm, struct intel_svm_dev **sd)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
struct device_domain_info *info;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
- struct mm_struct *mm = NULL;
int pasid_max;
int ret;
@@ -249,16 +450,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
} else
pasid_max = 1 << 20;
+ /* Bind supervisor PASID shuld have mm = NULL */
if (flags & SVM_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap))
+ if (!ecap_srs(iommu->ecap) || mm) {
+ pr_err("Supervisor PASID with user provided mm.\n");
return -EINVAL;
- } else if (pasid) {
- mm = get_task_mm(current);
- BUG_ON(!mm);
+ }
}
- mutex_lock(&pasid_mutex);
- if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+ if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
struct intel_svm *t;
list_for_each_entry(t, &global_svm_list, list) {
@@ -296,19 +496,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
sdev->dev = dev;
ret = intel_iommu_enable_pasid(iommu, dev);
- if (ret || !pasid) {
- /* If they don't actually want to assign a PASID, this is
- * just an enabling check/preparation. */
+ if (ret) {
kfree(sdev);
goto out;
}
- info = dev->archdata.iommu;
- if (!info || !info->pasid_supported) {
- kfree(sdev);
- goto out;
- }
-
+ info = get_domain_info(dev);
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
if (info->ats_enabled) {
@@ -397,26 +590,24 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
}
}
list_add_rcu(&sdev->list, &svm->devs);
-
- success:
- *pasid = svm->pasid;
+success:
+ sdev->pasid = svm->pasid;
+ sdev->sva.dev = dev;
+ if (sd)
+ *sd = sdev;
ret = 0;
out:
- mutex_unlock(&pasid_mutex);
- if (mm)
- mmput(mm);
return ret;
}
-EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
-int intel_svm_unbind_mm(struct device *dev, int pasid)
+/* Caller must hold pasid_mutex */
+static int intel_svm_unbind_mm(struct device *dev, int pasid)
{
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
int ret = -EINVAL;
- mutex_lock(&pasid_mutex);
iommu = intel_svm_device_to_iommu(dev);
if (!iommu)
goto out;
@@ -442,8 +633,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
* to use. We have a *shared* PASID table, because it's
* large and has to be physically contiguous. So it's
* hard to be as defensive as we might like. */
- intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
+ intel_pasid_tear_down_entry(iommu, dev,
+ svm->pasid, false);
+ intel_svm_drain_prq(dev, svm->pasid);
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
@@ -462,45 +654,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
break;
}
out:
- mutex_unlock(&pasid_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
-
-int intel_svm_is_pasid_valid(struct device *dev, int pasid)
-{
- struct intel_iommu *iommu;
- struct intel_svm *svm;
- int ret = -EINVAL;
-
- mutex_lock(&pasid_mutex);
- iommu = intel_svm_device_to_iommu(dev);
- if (!iommu)
- goto out;
-
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm)
- goto out;
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
- goto out;
- }
- /* init_mm is used in this case */
- if (!svm->mm)
- ret = 1;
- else if (atomic_read(&svm->mm->mm_users) > 0)
- ret = 1;
- else
- ret = 0;
-
- out:
- mutex_unlock(&pasid_mutex);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
/* Page request queue descriptor */
struct page_req_dsc {
@@ -557,6 +713,93 @@ static bool is_canonical_address(u64 addr)
return (((saddr << shift) >> shift) == saddr);
}
+/**
+ * intel_svm_drain_prq - Drain page requests and responses for a pasid
+ * @dev: target device
+ * @pasid: pasid for draining
+ *
+ * Drain all pending page requests and responses related to @pasid in both
+ * software and hardware. This is supposed to be called after the device
+ * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
+ * and DevTLB have been invalidated.
+ *
+ * It waits until all pending page requests for @pasid in the page fault
+ * queue are completed by the prq handling thread. Then follow the steps
+ * described in VT-d spec CH7.10 to drain all page requests and page
+ * responses pending in the hardware.
+ */
+static void intel_svm_drain_prq(struct device *dev, int pasid)
+{
+ struct device_domain_info *info;
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ struct qi_desc desc[3];
+ struct pci_dev *pdev;
+ int head, tail;
+ u16 sid, did;
+ int qdep;
+
+ info = get_domain_info(dev);
+ if (WARN_ON(!info || !dev_is_pci(dev)))
+ return;
+
+ if (!info->pri_enabled)
+ return;
+
+ iommu = info->iommu;
+ domain = info->domain;
+ pdev = to_pci_dev(dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+ did = domain->iommu_did[iommu->seq_id];
+ qdep = pci_ats_queue_depth(pdev);
+
+ /*
+ * Check and wait until all pending page requests in the queue are
+ * handled by the prq handling thread.
+ */
+prq_retry:
+ reinit_completion(&iommu->prq_complete);
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ while (head != tail) {
+ struct page_req_dsc *req;
+
+ req = &iommu->prq[head / sizeof(*req)];
+ if (!req->pasid_present || req->pasid != pasid) {
+ head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ continue;
+ }
+
+ wait_for_completion(&iommu->prq_complete);
+ goto prq_retry;
+ }
+
+ /*
+ * Perform steps described in VT-d spec CH7.10 to drain page
+ * requests and responses in hardware.
+ */
+ memset(desc, 0, sizeof(desc));
+ desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
+ QI_IWD_FENCE |
+ QI_IWD_TYPE;
+ desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
+ QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) |
+ QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(info->pfsid);
+qi_retry:
+ reinit_completion(&iommu->prq_complete);
+ qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ wait_for_completion(&iommu->prq_complete);
+ goto qi_retry;
+ }
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_iommu *iommu = d;
@@ -685,12 +928,75 @@ static irqreturn_t prq_event_thread(int irq, void *d)
sizeof(req->priv_data));
resp.qw2 = 0;
resp.qw3 = 0;
- qi_submit_sync(&resp, iommu);
+ qi_submit_sync(iommu, &resp, 1, 0);
}
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+ /*
+ * Clear the page request overflow bit and wake up all threads that
+ * are waiting for the completion of this handling.
+ */
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO)
+ writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
+
+ if (!completion_done(&iommu->prq_complete))
+ complete(&iommu->prq_complete);
+
return IRQ_RETVAL(handled);
}
+
+#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
+struct iommu_sva *
+intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+{
+ struct iommu_sva *sva = ERR_PTR(-EINVAL);
+ struct intel_svm_dev *sdev = NULL;
+ int flags = 0;
+ int ret;
+
+ /*
+ * TODO: Consolidate with generic iommu-sva bind after it is merged.
+ * It will require shared SVM data structures, i.e. combine io_mm
+ * and intel_svm etc.
+ */
+ if (drvdata)
+ flags = *(int *)drvdata;
+ mutex_lock(&pasid_mutex);
+ ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
+ if (ret)
+ sva = ERR_PTR(ret);
+ else if (sdev)
+ sva = &sdev->sva;
+ else
+ WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+
+ mutex_unlock(&pasid_mutex);
+
+ return sva;
+}
+
+void intel_svm_unbind(struct iommu_sva *sva)
+{
+ struct intel_svm_dev *sdev;
+
+ mutex_lock(&pasid_mutex);
+ sdev = to_intel_svm_dev(sva);
+ intel_svm_unbind_mm(sdev->dev, sdev->pasid);
+ mutex_unlock(&pasid_mutex);
+}
+
+int intel_svm_get_pasid(struct iommu_sva *sva)
+{
+ struct intel_svm_dev *sdev;
+ int pasid;
+
+ mutex_lock(&pasid_mutex);
+ sdev = to_intel_svm_dev(sva);
+ pasid = sdev->pasid;
+ mutex_unlock(&pasid_mutex);
+
+ return pasid;
+}
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 81e43c1..a042f12 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -151,7 +151,7 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
desc.qw2 = 0;
desc.qw3 = 0;
- return qi_submit_sync(&desc, iommu);
+ return qi_submit_sync(iommu, &desc, 1, 0);
}
static int modify_irte(struct irq_2_iommu *irq_iommu,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 03d6a26..b5ea203 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -44,6 +44,7 @@ struct iommu_group {
int id;
struct iommu_domain *default_domain;
struct iommu_domain *domain;
+ struct list_head entry;
};
struct group_device {
@@ -79,6 +80,20 @@ static bool iommu_cmd_line_dma_api(void)
return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API);
}
+static int iommu_alloc_default_domain(struct iommu_group *group,
+ struct device *dev);
+static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
+ unsigned type);
+static int __iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+static int __iommu_attach_group(struct iommu_domain *domain,
+ struct iommu_group *group);
+static void __iommu_detach_group(struct iommu_domain *domain,
+ struct iommu_group *group);
+static int iommu_create_device_direct_mappings(struct iommu_group *group,
+ struct device *dev);
+static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
+
#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
struct iommu_group_attribute iommu_group_attr_##_name = \
__ATTR(_name, _mode, _show, _store)
@@ -175,58 +190,119 @@ static void dev_iommu_free(struct device *dev)
dev->iommu = NULL;
}
-int iommu_probe_device(struct device *dev)
+static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
+ struct iommu_device *iommu_dev;
+ struct iommu_group *group;
int ret;
- WARN_ON(dev->iommu_group);
if (!ops)
- return -EINVAL;
+ return -ENODEV;
if (!dev_iommu_get(dev))
return -ENOMEM;
if (!try_module_get(ops->owner)) {
ret = -EINVAL;
- goto err_free_dev_param;
+ goto err_free;
}
- ret = ops->add_device(dev);
- if (ret)
- goto err_module_put;
+ iommu_dev = ops->probe_device(dev);
+ if (IS_ERR(iommu_dev)) {
+ ret = PTR_ERR(iommu_dev);
+ goto out_module_put;
+ }
+
+ dev->iommu->iommu_dev = iommu_dev;
+
+ group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto out_release;
+ }
+ iommu_group_put(group);
+
+ if (group_list && !group->default_domain && list_empty(&group->entry))
+ list_add_tail(&group->entry, group_list);
+
+ iommu_device_link(iommu_dev, dev);
return 0;
-err_module_put:
+out_release:
+ ops->release_device(dev);
+
+out_module_put:
module_put(ops->owner);
-err_free_dev_param:
+
+err_free:
dev_iommu_free(dev);
+
return ret;
}
+int iommu_probe_device(struct device *dev)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ struct iommu_group *group;
+ int ret;
+
+ ret = __iommu_probe_device(dev, NULL);
+ if (ret)
+ goto err_out;
+
+ group = iommu_group_get(dev);
+ if (!group)
+ goto err_release;
+
+ /*
+ * Try to allocate a default domain - needs support from the
+ * IOMMU driver. There are still some drivers which don't
+ * support default domains, so the return value is not yet
+ * checked.
+ */
+ iommu_alloc_default_domain(group, dev);
+
+ if (group->default_domain)
+ ret = __iommu_attach_device(group->default_domain, dev);
+
+ iommu_create_device_direct_mappings(group, dev);
+
+ iommu_group_put(group);
+
+ if (ret)
+ goto err_release;
+
+ if (ops->probe_finalize)
+ ops->probe_finalize(dev);
+
+ return 0;
+
+err_release:
+ iommu_release_device(dev);
+
+err_out:
+ return ret;
+
+}
+
void iommu_release_device(struct device *dev)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
- if (dev->iommu_group)
- ops->remove_device(dev);
+ if (!dev->iommu)
+ return;
- if (dev->iommu) {
- module_put(ops->owner);
- dev_iommu_free(dev);
- }
+ iommu_device_unlink(dev->iommu->iommu_dev, dev);
+ iommu_group_remove_device(dev);
+
+ ops->release_device(dev);
+
+ module_put(ops->owner);
+ dev_iommu_free(dev);
}
-static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
- unsigned type);
-static int __iommu_attach_device(struct iommu_domain *domain,
- struct device *dev);
-static int __iommu_attach_group(struct iommu_domain *domain,
- struct iommu_group *group);
-static void __iommu_detach_group(struct iommu_domain *domain,
- struct iommu_group *group);
-
static int __init iommu_set_def_domain_type(char *str)
{
bool pt;
@@ -497,6 +573,7 @@ struct iommu_group *iommu_group_alloc(void)
group->kobj.kset = iommu_group_kset;
mutex_init(&group->mutex);
INIT_LIST_HEAD(&group->devices);
+ INIT_LIST_HEAD(&group->entry);
BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL);
@@ -638,8 +715,8 @@ int iommu_group_set_name(struct iommu_group *group, const char *name)
}
EXPORT_SYMBOL_GPL(iommu_group_set_name);
-static int iommu_group_create_direct_mappings(struct iommu_group *group,
- struct device *dev)
+static int iommu_create_device_direct_mappings(struct iommu_group *group,
+ struct device *dev)
{
struct iommu_domain *domain = group->default_domain;
struct iommu_resv_region *entry;
@@ -752,8 +829,6 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
dev->iommu_group = group;
- iommu_group_create_direct_mappings(group, dev);
-
mutex_lock(&group->mutex);
list_add_tail(&device->list, &group->devices);
if (group->domain && !iommu_is_attach_deferred(group->domain, dev))
@@ -1371,6 +1446,61 @@ struct iommu_group *fsl_mc_device_group(struct device *dev)
}
EXPORT_SYMBOL_GPL(fsl_mc_device_group);
+static int iommu_get_def_domain_type(struct device *dev)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ unsigned int type = 0;
+
+ if (ops->def_domain_type)
+ type = ops->def_domain_type(dev);
+
+ return (type == 0) ? iommu_def_domain_type : type;
+}
+
+static int iommu_group_alloc_default_domain(struct bus_type *bus,
+ struct iommu_group *group,
+ unsigned int type)
+{
+ struct iommu_domain *dom;
+
+ dom = __iommu_domain_alloc(bus, type);
+ if (!dom && type != IOMMU_DOMAIN_DMA) {
+ dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
+ if (dom)
+ pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
+ type, group->name);
+ }
+
+ if (!dom)
+ return -ENOMEM;
+
+ group->default_domain = dom;
+ if (!group->domain)
+ group->domain = dom;
+
+ if (!iommu_dma_strict) {
+ int attr = 1;
+ iommu_domain_set_attr(dom,
+ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+ &attr);
+ }
+
+ return 0;
+}
+
+static int iommu_alloc_default_domain(struct iommu_group *group,
+ struct device *dev)
+{
+ unsigned int type;
+
+ if (group->default_domain)
+ return 0;
+
+ type = iommu_get_def_domain_type(dev);
+
+ return iommu_group_alloc_default_domain(dev->bus, group, type);
+}
+
/**
* iommu_group_get_for_dev - Find or create the IOMMU group for a device
* @dev: target device
@@ -1381,7 +1511,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group);
* to the returned IOMMU group, which will already include the provided
* device. The reference should be released with iommu_group_put().
*/
-struct iommu_group *iommu_group_get_for_dev(struct device *dev)
+static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
{
const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
@@ -1401,59 +1531,37 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
if (IS_ERR(group))
return group;
- /*
- * Try to allocate a default domain - needs support from the
- * IOMMU driver.
- */
- if (!group->default_domain) {
- struct iommu_domain *dom;
-
- dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type);
- if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) {
- dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA);
- if (dom) {
- dev_warn(dev,
- "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
- iommu_def_domain_type);
- }
- }
-
- group->default_domain = dom;
- if (!group->domain)
- group->domain = dom;
-
- if (dom && !iommu_dma_strict) {
- int attr = 1;
- iommu_domain_set_attr(dom,
- DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
- &attr);
- }
- }
-
ret = iommu_group_add_device(group, dev);
- if (ret) {
- iommu_group_put(group);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto out_put_group;
return group;
+
+out_put_group:
+ iommu_group_put(group);
+
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL_GPL(iommu_group_get_for_dev);
struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
{
return group->default_domain;
}
-static int add_iommu_group(struct device *dev, void *data)
+static int probe_iommu_group(struct device *dev, void *data)
{
- int ret = iommu_probe_device(dev);
+ struct list_head *group_list = data;
+ struct iommu_group *group;
+ int ret;
- /*
- * We ignore -ENODEV errors for now, as they just mean that the
- * device is not translated by an IOMMU. We still care about
- * other errors and fail to initialize when they happen.
- */
+ /* Device is probed already if in a group */
+ group = iommu_group_get(dev);
+ if (group) {
+ iommu_group_put(group);
+ return 0;
+ }
+
+ ret = __iommu_probe_device(dev, group_list);
if (ret == -ENODEV)
ret = 0;
@@ -1519,10 +1627,148 @@ static int iommu_bus_notifier(struct notifier_block *nb,
return 0;
}
+struct __group_domain_type {
+ struct device *dev;
+ unsigned int type;
+};
+
+static int probe_get_default_domain_type(struct device *dev, void *data)
+{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
+ struct __group_domain_type *gtype = data;
+ unsigned int type = 0;
+
+ if (ops->def_domain_type)
+ type = ops->def_domain_type(dev);
+
+ if (type) {
+ if (gtype->type && gtype->type != type) {
+ dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
+ iommu_domain_type_str(type),
+ dev_name(gtype->dev),
+ iommu_domain_type_str(gtype->type));
+ gtype->type = 0;
+ }
+
+ if (!gtype->dev) {
+ gtype->dev = dev;
+ gtype->type = type;
+ }
+ }
+
+ return 0;
+}
+
+static void probe_alloc_default_domain(struct bus_type *bus,
+ struct iommu_group *group)
+{
+ struct __group_domain_type gtype;
+
+ memset(>ype, 0, sizeof(gtype));
+
+ /* Ask for default domain requirements of all devices in the group */
+ __iommu_group_for_each_dev(group, >ype,
+ probe_get_default_domain_type);
+
+ if (!gtype.type)
+ gtype.type = iommu_def_domain_type;
+
+ iommu_group_alloc_default_domain(bus, group, gtype.type);
+
+}
+
+static int iommu_group_do_dma_attach(struct device *dev, void *data)
+{
+ struct iommu_domain *domain = data;
+
+ return __iommu_attach_device(domain, dev);
+}
+
+static int __iommu_group_dma_attach(struct iommu_group *group)
+{
+ return __iommu_group_for_each_dev(group, group->default_domain,
+ iommu_group_do_dma_attach);
+}
+
+static int iommu_group_do_probe_finalize(struct device *dev, void *data)
+{
+ struct iommu_domain *domain = data;
+
+ if (domain->ops->probe_finalize)
+ domain->ops->probe_finalize(dev);
+
+ return 0;
+}
+
+static void __iommu_group_dma_finalize(struct iommu_group *group)
+{
+ __iommu_group_for_each_dev(group, group->default_domain,
+ iommu_group_do_probe_finalize);
+}
+
+static int iommu_do_create_direct_mappings(struct device *dev, void *data)
+{
+ struct iommu_group *group = data;
+
+ iommu_create_device_direct_mappings(group, dev);
+
+ return 0;
+}
+
+static int iommu_group_create_direct_mappings(struct iommu_group *group)
+{
+ return __iommu_group_for_each_dev(group, group,
+ iommu_do_create_direct_mappings);
+}
+
+int bus_iommu_probe(struct bus_type *bus)
+{
+ struct iommu_group *group, *next;
+ LIST_HEAD(group_list);
+ int ret;
+
+ /*
+ * This code-path does not allocate the default domain when
+ * creating the iommu group, so do it after the groups are
+ * created.
+ */
+ ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(group, next, &group_list, entry) {
+ /* Remove item from the list */
+ list_del_init(&group->entry);
+
+ mutex_lock(&group->mutex);
+
+ /* Try to allocate default domain */
+ probe_alloc_default_domain(bus, group);
+
+ if (!group->default_domain) {
+ mutex_unlock(&group->mutex);
+ continue;
+ }
+
+ iommu_group_create_direct_mappings(group);
+
+ ret = __iommu_group_dma_attach(group);
+
+ mutex_unlock(&group->mutex);
+
+ if (ret)
+ break;
+
+ __iommu_group_dma_finalize(group);
+ }
+
+ return ret;
+}
+
static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
{
- int err;
struct notifier_block *nb;
+ int err;
nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
if (!nb)
@@ -1534,7 +1780,7 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
if (err)
goto out_free;
- err = bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
+ err = bus_iommu_probe(bus);
if (err)
goto out_err;
@@ -2301,71 +2547,6 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
}
EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
-static int
-request_default_domain_for_dev(struct device *dev, unsigned long type)
-{
- struct iommu_domain *domain;
- struct iommu_group *group;
- int ret;
-
- /* Device must already be in a group before calling this function */
- group = iommu_group_get(dev);
- if (!group)
- return -EINVAL;
-
- mutex_lock(&group->mutex);
-
- ret = 0;
- if (group->default_domain && group->default_domain->type == type)
- goto out;
-
- /* Don't change mappings of existing devices */
- ret = -EBUSY;
- if (iommu_group_device_count(group) != 1)
- goto out;
-
- ret = -ENOMEM;
- domain = __iommu_domain_alloc(dev->bus, type);
- if (!domain)
- goto out;
-
- /* Attach the device to the domain */
- ret = __iommu_attach_group(domain, group);
- if (ret) {
- iommu_domain_free(domain);
- goto out;
- }
-
- /* Make the domain the default for this group */
- if (group->default_domain)
- iommu_domain_free(group->default_domain);
- group->default_domain = domain;
-
- iommu_group_create_direct_mappings(group, dev);
-
- dev_info(dev, "Using iommu %s mapping\n",
- type == IOMMU_DOMAIN_DMA ? "dma" : "direct");
-
- ret = 0;
-out:
- mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
- return ret;
-}
-
-/* Request that a device is direct mapped by the IOMMU */
-int iommu_request_dm_for_dev(struct device *dev)
-{
- return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY);
-}
-
-/* Request that a device can't be direct mapped by the IOMMU */
-int iommu_request_dma_domain_for_dev(struct device *dev)
-{
- return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA);
-}
-
void iommu_set_default_passthrough(bool cmd_line)
{
if (cmd_line)
@@ -2643,17 +2824,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
}
EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
-int iommu_sva_set_ops(struct iommu_sva *handle,
- const struct iommu_sva_ops *sva_ops)
-{
- if (handle->ops && handle->ops != sva_ops)
- return -EEXIST;
-
- handle->ops = sva_ops;
- return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_sva_set_ops);
-
int iommu_sva_get_pasid(struct iommu_sva *handle)
{
const struct iommu_ops *ops = handle->dev->bus->iommu_ops;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 0e6a953..49fc01f 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -253,7 +253,7 @@ int iova_cache_get(void)
SLAB_HWCACHE_ALIGN, NULL);
if (!iova_cache) {
mutex_unlock(&iova_cache_mutex);
- printk(KERN_ERR "Couldn't create iova cache\n");
+ pr_err("Couldn't create iova cache\n");
return -ENOMEM;
}
}
@@ -718,8 +718,8 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
if (!new_iova)
- printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
- iova->pfn_lo, iova->pfn_lo);
+ pr_err("Reserve iova range %lx@%lx failed\n",
+ iova->pfn_lo, iova->pfn_lo);
}
spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 310cf09..4c2972f 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -805,24 +805,8 @@ static int ipmmu_of_xlate(struct device *dev,
static int ipmmu_init_arm_mapping(struct device *dev)
{
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
- struct iommu_group *group;
int ret;
- /* Create a device group and add the device to it. */
- group = iommu_group_alloc();
- if (IS_ERR(group)) {
- dev_err(dev, "Failed to allocate IOMMU group\n");
- return PTR_ERR(group);
- }
-
- ret = iommu_group_add_device(group, dev);
- iommu_group_put(group);
-
- if (ret < 0) {
- dev_err(dev, "Failed to add device to IPMMU group\n");
- return ret;
- }
-
/*
* Create the ARM mapping, used by the ARM DMA mapping core to allocate
* VAs. This will allocate a corresponding IOMMU domain.
@@ -856,48 +840,39 @@ static int ipmmu_init_arm_mapping(struct device *dev)
return 0;
error:
- iommu_group_remove_device(dev);
if (mmu->mapping)
arm_iommu_release_mapping(mmu->mapping);
return ret;
}
-static int ipmmu_add_device(struct device *dev)
+static struct iommu_device *ipmmu_probe_device(struct device *dev)
{
struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
- struct iommu_group *group;
- int ret;
/*
* Only let through devices that have been verified in xlate()
*/
if (!mmu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
- if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) {
- ret = ipmmu_init_arm_mapping(dev);
- if (ret)
- return ret;
- } else {
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
- }
-
- iommu_device_link(&mmu->iommu, dev);
- return 0;
+ return &mmu->iommu;
}
-static void ipmmu_remove_device(struct device *dev)
+static void ipmmu_probe_finalize(struct device *dev)
{
- struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
+ int ret = 0;
- iommu_device_unlink(&mmu->iommu, dev);
+ if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA))
+ ret = ipmmu_init_arm_mapping(dev);
+
+ if (ret)
+ dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n");
+}
+
+static void ipmmu_release_device(struct device *dev)
+{
arm_iommu_detach_device(dev);
- iommu_group_remove_device(dev);
}
static struct iommu_group *ipmmu_find_group(struct device *dev)
@@ -925,9 +900,11 @@ static const struct iommu_ops ipmmu_ops = {
.flush_iotlb_all = ipmmu_flush_iotlb_all,
.iotlb_sync = ipmmu_iotlb_sync,
.iova_to_phys = ipmmu_iova_to_phys,
- .add_device = ipmmu_add_device,
- .remove_device = ipmmu_remove_device,
- .device_group = ipmmu_find_group,
+ .probe_device = ipmmu_probe_device,
+ .release_device = ipmmu_release_device,
+ .probe_finalize = ipmmu_probe_finalize,
+ .device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)
+ ? generic_device_group : ipmmu_find_group,
.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate,
};
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 94a6df1..3d8a635 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -34,7 +34,7 @@ __asm__ __volatile__ ( \
/* bitmap of the page sizes currently supported */
#define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
-DEFINE_SPINLOCK(msm_iommu_lock);
+static DEFINE_SPINLOCK(msm_iommu_lock);
static LIST_HEAD(qcom_iommu_devices);
static struct iommu_ops msm_iommu_ops;
@@ -388,43 +388,23 @@ static struct msm_iommu_dev *find_iommu_for_dev(struct device *dev)
return ret;
}
-static int msm_iommu_add_device(struct device *dev)
+static struct iommu_device *msm_iommu_probe_device(struct device *dev)
{
struct msm_iommu_dev *iommu;
- struct iommu_group *group;
unsigned long flags;
spin_lock_irqsave(&msm_iommu_lock, flags);
iommu = find_iommu_for_dev(dev);
spin_unlock_irqrestore(&msm_iommu_lock, flags);
- if (iommu)
- iommu_device_link(&iommu->iommu, dev);
- else
- return -ENODEV;
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- return 0;
+ return &iommu->iommu;
}
-static void msm_iommu_remove_device(struct device *dev)
+static void msm_iommu_release_device(struct device *dev)
{
- struct msm_iommu_dev *iommu;
- unsigned long flags;
-
- spin_lock_irqsave(&msm_iommu_lock, flags);
- iommu = find_iommu_for_dev(dev);
- spin_unlock_irqrestore(&msm_iommu_lock, flags);
-
- if (iommu)
- iommu_device_unlink(&iommu->iommu, dev);
-
- iommu_group_remove_device(dev);
}
static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -708,8 +688,8 @@ static struct iommu_ops msm_iommu_ops = {
*/
.iotlb_sync = NULL,
.iova_to_phys = msm_iommu_iova_to_phys,
- .add_device = msm_iommu_add_device,
- .remove_device = msm_iommu_remove_device,
+ .probe_device = msm_iommu_probe_device,
+ .release_device = msm_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 5f4d6df..2be96f1 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -441,38 +441,26 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
return pa;
}
-static int mtk_iommu_add_device(struct device *dev)
+static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct mtk_iommu_data *data;
- struct iommu_group *group;
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
- return -ENODEV; /* Not a iommu client device */
+ return ERR_PTR(-ENODEV); /* Not a iommu client device */
data = dev_iommu_priv_get(dev);
- iommu_device_link(&data->iommu, dev);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
- return 0;
+ return &data->iommu;
}
-static void mtk_iommu_remove_device(struct device *dev)
+static void mtk_iommu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct mtk_iommu_data *data;
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = dev_iommu_priv_get(dev);
- iommu_device_unlink(&data->iommu, dev);
-
- iommu_group_remove_device(dev);
iommu_fwspec_free(dev);
}
@@ -526,8 +514,8 @@ static const struct iommu_ops mtk_iommu_ops = {
.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
.iotlb_sync = mtk_iommu_iotlb_sync,
.iova_to_phys = mtk_iommu_iova_to_phys,
- .add_device = mtk_iommu_add_device,
- .remove_device = mtk_iommu_remove_device,
+ .probe_device = mtk_iommu_probe_device,
+ .release_device = mtk_iommu_release_device,
.device_group = mtk_iommu_device_group,
.of_xlate = mtk_iommu_of_xlate,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a31be05..c9d79cff 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -265,10 +265,13 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
{
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+ struct dma_iommu_mapping *mtk_mapping;
int ret;
- if (!data)
- return -ENODEV;
+ /* Only allow the domain created internally. */
+ mtk_mapping = data->dev->archdata.iommu;
+ if (mtk_mapping->domain != domain)
+ return 0;
if (!data->m4u_dom) {
data->m4u_dom = dom;
@@ -288,9 +291,6 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain,
{
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
- if (!data)
- return;
-
mtk_iommu_config(data, dev, false);
}
@@ -416,14 +416,17 @@ static int mtk_iommu_create_mapping(struct device *dev,
return 0;
}
-static int mtk_iommu_add_device(struct device *dev)
+static int mtk_iommu_def_domain_type(struct device *dev)
+{
+ return IOMMU_DOMAIN_UNMANAGED;
+}
+
+static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct dma_iommu_mapping *mtk_mapping;
struct of_phandle_args iommu_spec;
struct of_phandle_iterator it;
struct mtk_iommu_data *data;
- struct iommu_group *group;
int err;
of_for_each_phandle(&it, err, dev->of_node, "iommus",
@@ -442,46 +445,34 @@ static int mtk_iommu_add_device(struct device *dev)
}
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
- return -ENODEV; /* Not a iommu client device */
-
- /*
- * This is a short-term bodge because the ARM DMA code doesn't
- * understand multi-device groups, but we have to call into it
- * successfully (and not just rely on a normal IOMMU API attach
- * here) in order to set the correct DMA API ops on @dev.
- */
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- err = iommu_group_add_device(group, dev);
- iommu_group_put(group);
- if (err)
- return err;
+ return ERR_PTR(-ENODEV); /* Not a iommu client device */
data = dev_iommu_priv_get(dev);
- mtk_mapping = data->dev->archdata.iommu;
- err = arm_iommu_attach_device(dev, mtk_mapping);
- if (err) {
- iommu_group_remove_device(dev);
- return err;
- }
- return iommu_device_link(&data->iommu, dev);
+ return &data->iommu;
}
-static void mtk_iommu_remove_device(struct device *dev)
+static void mtk_iommu_probe_finalize(struct device *dev)
+{
+ struct dma_iommu_mapping *mtk_mapping;
+ struct mtk_iommu_data *data;
+ int err;
+
+ data = dev_iommu_priv_get(dev);
+ mtk_mapping = data->dev->archdata.iommu;
+
+ err = arm_iommu_attach_device(dev, mtk_mapping);
+ if (err)
+ dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n");
+}
+
+static void mtk_iommu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct mtk_iommu_data *data;
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = dev_iommu_priv_get(dev);
- iommu_device_unlink(&data->iommu, dev);
-
- iommu_group_remove_device(dev);
iommu_fwspec_free(dev);
}
@@ -534,8 +525,11 @@ static const struct iommu_ops mtk_iommu_ops = {
.map = mtk_iommu_map,
.unmap = mtk_iommu_unmap,
.iova_to_phys = mtk_iommu_iova_to_phys,
- .add_device = mtk_iommu_add_device,
- .remove_device = mtk_iommu_remove_device,
+ .probe_device = mtk_iommu_probe_device,
+ .probe_finalize = mtk_iommu_probe_finalize,
+ .release_device = mtk_iommu_release_device,
+ .def_domain_type = mtk_iommu_def_domain_type,
+ .device_group = generic_device_group,
.pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT,
};
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 887fefc..c8282cc 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -35,15 +35,6 @@
static const struct iommu_ops omap_iommu_ops;
-struct orphan_dev {
- struct device *dev;
- struct list_head node;
-};
-
-static LIST_HEAD(orphan_dev_list);
-
-static DEFINE_SPINLOCK(orphan_lock);
-
#define to_iommu(dev) ((struct omap_iommu *)dev_get_drvdata(dev))
/* bitmap of the page sizes currently supported */
@@ -62,8 +53,6 @@ static DEFINE_SPINLOCK(orphan_lock);
static struct platform_driver omap_iommu_driver;
static struct kmem_cache *iopte_cachep;
-static int _omap_iommu_add_device(struct device *dev);
-
/**
* to_omap_domain - Get struct omap_iommu_domain from generic iommu_domain
* @dom: generic iommu domain handle
@@ -1177,7 +1166,6 @@ static int omap_iommu_probe(struct platform_device *pdev)
struct omap_iommu *obj;
struct resource *res;
struct device_node *of = pdev->dev.of_node;
- struct orphan_dev *orphan_dev, *tmp;
if (!of) {
pr_err("%s: only DT-based devices are supported\n", __func__);
@@ -1248,6 +1236,7 @@ static int omap_iommu_probe(struct platform_device *pdev)
goto out_group;
iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
+ iommu_device_set_fwnode(&obj->iommu, &of->fwnode);
err = iommu_device_register(&obj->iommu);
if (err)
@@ -1260,13 +1249,8 @@ static int omap_iommu_probe(struct platform_device *pdev)
dev_info(&pdev->dev, "%s registered\n", obj->name);
- list_for_each_entry_safe(orphan_dev, tmp, &orphan_dev_list, node) {
- err = _omap_iommu_add_device(orphan_dev->dev);
- if (!err) {
- list_del(&orphan_dev->node);
- kfree(orphan_dev);
- }
- }
+ /* Re-probe bus to probe device attached to this IOMMU */
+ bus_iommu_probe(&platform_bus_type);
return 0;
@@ -1657,17 +1641,13 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
return ret;
}
-static int _omap_iommu_add_device(struct device *dev)
+static struct iommu_device *omap_iommu_probe_device(struct device *dev)
{
struct omap_iommu_arch_data *arch_data, *tmp;
- struct omap_iommu *oiommu;
- struct iommu_group *group;
- struct device_node *np;
struct platform_device *pdev;
+ struct omap_iommu *oiommu;
+ struct device_node *np;
int num_iommus, i;
- int ret;
- struct orphan_dev *orphan_dev;
- unsigned long flags;
/*
* Allocate the archdata iommu structure for DT-based devices.
@@ -1676,7 +1656,7 @@ static int _omap_iommu_add_device(struct device *dev)
* IOMMU users.
*/
if (!dev->of_node)
- return 0;
+ return ERR_PTR(-ENODEV);
/*
* retrieve the count of IOMMU nodes using phandle size as element size
@@ -1689,43 +1669,27 @@ static int _omap_iommu_add_device(struct device *dev)
arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL);
if (!arch_data)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) {
np = of_parse_phandle(dev->of_node, "iommus", i);
if (!np) {
kfree(arch_data);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
pdev = of_find_device_by_node(np);
if (!pdev) {
of_node_put(np);
kfree(arch_data);
- spin_lock_irqsave(&orphan_lock, flags);
- list_for_each_entry(orphan_dev, &orphan_dev_list,
- node) {
- if (orphan_dev->dev == dev)
- break;
- }
- spin_unlock_irqrestore(&orphan_lock, flags);
-
- if (orphan_dev && orphan_dev->dev == dev)
- return -EPROBE_DEFER;
-
- orphan_dev = kzalloc(sizeof(*orphan_dev), GFP_KERNEL);
- orphan_dev->dev = dev;
- spin_lock_irqsave(&orphan_lock, flags);
- list_add(&orphan_dev->node, &orphan_dev_list);
- spin_unlock_irqrestore(&orphan_lock, flags);
- return -EPROBE_DEFER;
+ return ERR_PTR(-ENODEV);
}
oiommu = platform_get_drvdata(pdev);
if (!oiommu) {
of_node_put(np);
kfree(arch_data);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
tmp->iommu_dev = oiommu;
@@ -1734,57 +1698,25 @@ static int _omap_iommu_add_device(struct device *dev)
of_node_put(np);
}
+ dev->archdata.iommu = arch_data;
+
/*
* use the first IOMMU alone for the sysfs device linking.
* TODO: Evaluate if a single iommu_group needs to be
* maintained for both IOMMUs
*/
oiommu = arch_data->iommu_dev;
- ret = iommu_device_link(&oiommu->iommu, dev);
- if (ret) {
- kfree(arch_data);
- return ret;
- }
- dev->archdata.iommu = arch_data;
-
- /*
- * IOMMU group initialization calls into omap_iommu_device_group, which
- * needs a valid dev->archdata.iommu pointer
- */
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group)) {
- iommu_device_unlink(&oiommu->iommu, dev);
- dev->archdata.iommu = NULL;
- kfree(arch_data);
- return PTR_ERR(group);
- }
- iommu_group_put(group);
-
- return 0;
+ return &oiommu->iommu;
}
-static int omap_iommu_add_device(struct device *dev)
-{
- int ret;
-
- ret = _omap_iommu_add_device(dev);
- if (ret == -EPROBE_DEFER)
- return 0;
-
- return ret;
-}
-
-static void omap_iommu_remove_device(struct device *dev)
+static void omap_iommu_release_device(struct device *dev)
{
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
if (!dev->of_node || !arch_data)
return;
- iommu_device_unlink(&arch_data->iommu_dev->iommu, dev);
- iommu_group_remove_device(dev);
-
dev->archdata.iommu = NULL;
kfree(arch_data);
@@ -1795,6 +1727,9 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev)
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
struct iommu_group *group = ERR_PTR(-EINVAL);
+ if (!arch_data)
+ return ERR_PTR(-ENODEV);
+
if (arch_data->iommu_dev)
group = iommu_group_ref_get(arch_data->iommu_dev->group);
@@ -1809,8 +1744,8 @@ static const struct iommu_ops omap_iommu_ops = {
.map = omap_iommu_map,
.unmap = omap_iommu_unmap,
.iova_to_phys = omap_iommu_iova_to_phys,
- .add_device = omap_iommu_add_device,
- .remove_device = omap_iommu_remove_device,
+ .probe_device = omap_iommu_probe_device,
+ .release_device = omap_iommu_release_device,
.device_group = omap_iommu_device_group,
.pgsize_bitmap = OMAP_IOMMU_PGSIZES,
};
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 5b3b270..c3e1fbd 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -524,14 +524,13 @@ static bool qcom_iommu_capable(enum iommu_cap cap)
}
}
-static int qcom_iommu_add_device(struct device *dev)
+static struct iommu_device *qcom_iommu_probe_device(struct device *dev)
{
struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
- struct iommu_group *group;
struct device_link *link;
if (!qcom_iommu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
/*
* Establish the link between iommu and master, so that the
@@ -542,28 +541,19 @@ static int qcom_iommu_add_device(struct device *dev)
if (!link) {
dev_err(qcom_iommu->dev, "Unable to create device link between %s and %s\n",
dev_name(qcom_iommu->dev), dev_name(dev));
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
}
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
- iommu_device_link(&qcom_iommu->iommu, dev);
-
- return 0;
+ return &qcom_iommu->iommu;
}
-static void qcom_iommu_remove_device(struct device *dev)
+static void qcom_iommu_release_device(struct device *dev)
{
struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
if (!qcom_iommu)
return;
- iommu_device_unlink(&qcom_iommu->iommu, dev);
- iommu_group_remove_device(dev);
iommu_fwspec_free(dev);
}
@@ -619,8 +609,8 @@ static const struct iommu_ops qcom_iommu_ops = {
.flush_iotlb_all = qcom_iommu_flush_iotlb_all,
.iotlb_sync = qcom_iommu_iotlb_sync,
.iova_to_phys = qcom_iommu_iova_to_phys,
- .add_device = qcom_iommu_add_device,
- .remove_device = qcom_iommu_remove_device,
+ .probe_device = qcom_iommu_probe_device,
+ .release_device = qcom_iommu_release_device,
.device_group = generic_device_group,
.of_xlate = qcom_iommu_of_xlate,
.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index b33cdd5..d25c248 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1054,40 +1054,28 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
kfree(rk_domain);
}
-static int rk_iommu_add_device(struct device *dev)
+static struct iommu_device *rk_iommu_probe_device(struct device *dev)
{
- struct iommu_group *group;
- struct rk_iommu *iommu;
struct rk_iommudata *data;
+ struct rk_iommu *iommu;
data = dev->archdata.iommu;
if (!data)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
iommu = rk_iommu_from_dev(dev);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
- iommu_group_put(group);
-
- iommu_device_link(&iommu->iommu, dev);
data->link = device_link_add(dev, iommu->dev,
DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
- return 0;
+ return &iommu->iommu;
}
-static void rk_iommu_remove_device(struct device *dev)
+static void rk_iommu_release_device(struct device *dev)
{
- struct rk_iommu *iommu;
struct rk_iommudata *data = dev->archdata.iommu;
- iommu = rk_iommu_from_dev(dev);
-
device_link_del(data->link);
- iommu_device_unlink(&iommu->iommu, dev);
- iommu_group_remove_device(dev);
}
static struct iommu_group *rk_iommu_device_group(struct device *dev)
@@ -1126,8 +1114,8 @@ static const struct iommu_ops rk_iommu_ops = {
.detach_dev = rk_iommu_detach_device,
.map = rk_iommu_map,
.unmap = rk_iommu_unmap,
- .add_device = rk_iommu_add_device,
- .remove_device = rk_iommu_remove_device,
+ .probe_device = rk_iommu_probe_device,
+ .release_device = rk_iommu_release_device,
.iova_to_phys = rk_iommu_iova_to_phys,
.device_group = rk_iommu_device_group,
.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 1137f3d..610f082 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -166,21 +166,14 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
}
}
-static int s390_iommu_add_device(struct device *dev)
+static struct iommu_device *s390_iommu_probe_device(struct device *dev)
{
- struct iommu_group *group = iommu_group_get_for_dev(dev);
struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
- iommu_device_link(&zdev->iommu_dev, dev);
-
- return 0;
+ return &zdev->iommu_dev;
}
-static void s390_iommu_remove_device(struct device *dev)
+static void s390_iommu_release_device(struct device *dev)
{
struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
struct iommu_domain *domain;
@@ -191,7 +184,7 @@ static void s390_iommu_remove_device(struct device *dev)
* to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
* the attach_dev), removing the device via
* "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
- * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
+ * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
* notifier.
*
* So let's call detach_dev from here if it hasn't been called before.
@@ -201,9 +194,6 @@ static void s390_iommu_remove_device(struct device *dev)
if (domain)
s390_iommu_detach_device(domain, dev);
}
-
- iommu_device_unlink(&zdev->iommu_dev, dev);
- iommu_group_remove_device(dev);
}
static int s390_iommu_update_trans(struct s390_domain *s390_domain,
@@ -373,8 +363,8 @@ static const struct iommu_ops s390_iommu_ops = {
.map = s390_iommu_map,
.unmap = s390_iommu_unmap,
.iova_to_phys = s390_iommu_iova_to_phys,
- .add_device = s390_iommu_add_device,
- .remove_device = s390_iommu_remove_device,
+ .probe_device = s390_iommu_probe_device,
+ .release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
};
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
new file mode 100644
index 0000000..fce605e
--- /dev/null
+++ b/drivers/iommu/sun50i-iommu.c
@@ -0,0 +1,1023 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (C) 2016-2018, Allwinner Technology CO., LTD.
+// Copyright (C) 2019-2020, Cerno
+
+#include <linux/bitfield.h>
+#include <linux/bug.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/ioport.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define IOMMU_RESET_REG 0x010
+#define IOMMU_ENABLE_REG 0x020
+#define IOMMU_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_BYPASS_REG 0x030
+#define IOMMU_AUTO_GATING_REG 0x040
+#define IOMMU_AUTO_GATING_ENABLE BIT(0)
+
+#define IOMMU_WBUF_CTRL_REG 0x044
+#define IOMMU_OOO_CTRL_REG 0x048
+#define IOMMU_4KB_BDY_PRT_CTRL_REG 0x04c
+#define IOMMU_TTB_REG 0x050
+#define IOMMU_TLB_ENABLE_REG 0x060
+#define IOMMU_TLB_PREFETCH_REG 0x070
+#define IOMMU_TLB_PREFETCH_MASTER_ENABLE(m) BIT(m)
+
+#define IOMMU_TLB_FLUSH_REG 0x080
+#define IOMMU_TLB_FLUSH_PTW_CACHE BIT(17)
+#define IOMMU_TLB_FLUSH_MACRO_TLB BIT(16)
+#define IOMMU_TLB_FLUSH_MICRO_TLB(i) (BIT(i) & GENMASK(5, 0))
+
+#define IOMMU_TLB_IVLD_ADDR_REG 0x090
+#define IOMMU_TLB_IVLD_ADDR_MASK_REG 0x094
+#define IOMMU_TLB_IVLD_ENABLE_REG 0x098
+#define IOMMU_TLB_IVLD_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_PC_IVLD_ADDR_REG 0x0a0
+#define IOMMU_PC_IVLD_ENABLE_REG 0x0a8
+#define IOMMU_PC_IVLD_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_DM_AUT_CTRL_REG(d) (0x0b0 + ((d) / 2) * 4)
+#define IOMMU_DM_AUT_CTRL_RD_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2)))
+#define IOMMU_DM_AUT_CTRL_WR_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2) + 1))
+
+#define IOMMU_DM_AUT_OVWT_REG 0x0d0
+#define IOMMU_INT_ENABLE_REG 0x100
+#define IOMMU_INT_CLR_REG 0x104
+#define IOMMU_INT_STA_REG 0x108
+#define IOMMU_INT_ERR_ADDR_REG(i) (0x110 + (i) * 4)
+#define IOMMU_INT_ERR_ADDR_L1_REG 0x130
+#define IOMMU_INT_ERR_ADDR_L2_REG 0x134
+#define IOMMU_INT_ERR_DATA_REG(i) (0x150 + (i) * 4)
+#define IOMMU_L1PG_INT_REG 0x0180
+#define IOMMU_L2PG_INT_REG 0x0184
+
+#define IOMMU_INT_INVALID_L2PG BIT(17)
+#define IOMMU_INT_INVALID_L1PG BIT(16)
+#define IOMMU_INT_MASTER_PERMISSION(m) BIT(m)
+#define IOMMU_INT_MASTER_MASK (IOMMU_INT_MASTER_PERMISSION(0) | \
+ IOMMU_INT_MASTER_PERMISSION(1) | \
+ IOMMU_INT_MASTER_PERMISSION(2) | \
+ IOMMU_INT_MASTER_PERMISSION(3) | \
+ IOMMU_INT_MASTER_PERMISSION(4) | \
+ IOMMU_INT_MASTER_PERMISSION(5))
+#define IOMMU_INT_MASK (IOMMU_INT_INVALID_L1PG | \
+ IOMMU_INT_INVALID_L2PG | \
+ IOMMU_INT_MASTER_MASK)
+
+#define PT_ENTRY_SIZE sizeof(u32)
+
+#define NUM_DT_ENTRIES 4096
+#define DT_SIZE (NUM_DT_ENTRIES * PT_ENTRY_SIZE)
+
+#define NUM_PT_ENTRIES 256
+#define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE)
+
+struct sun50i_iommu {
+ struct iommu_device iommu;
+
+ /* Lock to modify the IOMMU registers */
+ spinlock_t iommu_lock;
+
+ struct device *dev;
+ void __iomem *base;
+ struct reset_control *reset;
+ struct clk *clk;
+
+ struct iommu_domain *domain;
+ struct iommu_group *group;
+ struct kmem_cache *pt_pool;
+};
+
+struct sun50i_iommu_domain {
+ struct iommu_domain domain;
+
+ /* Number of devices attached to the domain */
+ refcount_t refcnt;
+
+ /* L1 Page Table */
+ u32 *dt;
+ dma_addr_t dt_dma;
+
+ struct sun50i_iommu *iommu;
+};
+
+static struct sun50i_iommu_domain *to_sun50i_domain(struct iommu_domain *domain)
+{
+ return container_of(domain, struct sun50i_iommu_domain, domain);
+}
+
+static struct sun50i_iommu *sun50i_iommu_from_dev(struct device *dev)
+{
+ return dev_iommu_priv_get(dev);
+}
+
+static u32 iommu_read(struct sun50i_iommu *iommu, u32 offset)
+{
+ return readl(iommu->base + offset);
+}
+
+static void iommu_write(struct sun50i_iommu *iommu, u32 offset, u32 value)
+{
+ writel(value, iommu->base + offset);
+}
+
+/*
+ * The Allwinner H6 IOMMU uses a 2-level page table.
+ *
+ * The first level is the usual Directory Table (DT), that consists of
+ * 4096 4-bytes Directory Table Entries (DTE), each pointing to a Page
+ * Table (PT).
+ *
+ * Each PT consits of 256 4-bytes Page Table Entries (PTE), each
+ * pointing to a 4kB page of physical memory.
+ *
+ * The IOMMU supports a single DT, pointed by the IOMMU_TTB_REG
+ * register that contains its physical address.
+ */
+
+#define SUN50I_IOVA_DTE_MASK GENMASK(31, 20)
+#define SUN50I_IOVA_PTE_MASK GENMASK(19, 12)
+#define SUN50I_IOVA_PAGE_MASK GENMASK(11, 0)
+
+static u32 sun50i_iova_get_dte_index(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_DTE_MASK, iova);
+}
+
+static u32 sun50i_iova_get_pte_index(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_PTE_MASK, iova);
+}
+
+static u32 sun50i_iova_get_page_offset(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_PAGE_MASK, iova);
+}
+
+/*
+ * Each Directory Table Entry has a Page Table address and a valid
+ * bit:
+
+ * +---------------------+-----------+-+
+ * | PT address | Reserved |V|
+ * +---------------------+-----------+-+
+ * 31:10 - Page Table address
+ * 9:2 - Reserved
+ * 1:0 - 1 if the entry is valid
+ */
+
+#define SUN50I_DTE_PT_ADDRESS_MASK GENMASK(31, 10)
+#define SUN50I_DTE_PT_ATTRS GENMASK(1, 0)
+#define SUN50I_DTE_PT_VALID 1
+
+static phys_addr_t sun50i_dte_get_pt_address(u32 dte)
+{
+ return (phys_addr_t)dte & SUN50I_DTE_PT_ADDRESS_MASK;
+}
+
+static bool sun50i_dte_is_pt_valid(u32 dte)
+{
+ return (dte & SUN50I_DTE_PT_ATTRS) == SUN50I_DTE_PT_VALID;
+}
+
+static u32 sun50i_mk_dte(dma_addr_t pt_dma)
+{
+ return (pt_dma & SUN50I_DTE_PT_ADDRESS_MASK) | SUN50I_DTE_PT_VALID;
+}
+
+/*
+ * Each PTE has a Page address, an authority index and a valid bit:
+ *
+ * +----------------+-----+-----+-----+---+-----+
+ * | Page address | Rsv | ACI | Rsv | V | Rsv |
+ * +----------------+-----+-----+-----+---+-----+
+ * 31:12 - Page address
+ * 11:8 - Reserved
+ * 7:4 - Authority Control Index
+ * 3:2 - Reserved
+ * 1 - 1 if the entry is valid
+ * 0 - Reserved
+ *
+ * The way permissions work is that the IOMMU has 16 "domains" that
+ * can be configured to give each masters either read or write
+ * permissions through the IOMMU_DM_AUT_CTRL_REG registers. The domain
+ * 0 seems like the default domain, and its permissions in the
+ * IOMMU_DM_AUT_CTRL_REG are only read-only, so it's not really
+ * useful to enforce any particular permission.
+ *
+ * Each page entry will then have a reference to the domain they are
+ * affected to, so that we can actually enforce them on a per-page
+ * basis.
+ *
+ * In order to make it work with the IOMMU framework, we will be using
+ * 4 different domains, starting at 1: RD_WR, RD, WR and NONE
+ * depending on the permission we want to enforce. Each domain will
+ * have each master setup in the same way, since the IOMMU framework
+ * doesn't seem to restrict page access on a per-device basis. And
+ * then we will use the relevant domain index when generating the page
+ * table entry depending on the permissions we want to be enforced.
+ */
+
+enum sun50i_iommu_aci {
+ SUN50I_IOMMU_ACI_DO_NOT_USE = 0,
+ SUN50I_IOMMU_ACI_NONE,
+ SUN50I_IOMMU_ACI_RD,
+ SUN50I_IOMMU_ACI_WR,
+ SUN50I_IOMMU_ACI_RD_WR,
+};
+
+#define SUN50I_PTE_PAGE_ADDRESS_MASK GENMASK(31, 12)
+#define SUN50I_PTE_ACI_MASK GENMASK(7, 4)
+#define SUN50I_PTE_PAGE_VALID BIT(1)
+
+static phys_addr_t sun50i_pte_get_page_address(u32 pte)
+{
+ return (phys_addr_t)pte & SUN50I_PTE_PAGE_ADDRESS_MASK;
+}
+
+static enum sun50i_iommu_aci sun50i_get_pte_aci(u32 pte)
+{
+ return FIELD_GET(SUN50I_PTE_ACI_MASK, pte);
+}
+
+static bool sun50i_pte_is_page_valid(u32 pte)
+{
+ return pte & SUN50I_PTE_PAGE_VALID;
+}
+
+static u32 sun50i_mk_pte(phys_addr_t page, int prot)
+{
+ enum sun50i_iommu_aci aci;
+ u32 flags = 0;
+
+ if (prot & (IOMMU_READ | IOMMU_WRITE))
+ aci = SUN50I_IOMMU_ACI_RD_WR;
+ else if (prot & IOMMU_READ)
+ aci = SUN50I_IOMMU_ACI_RD;
+ else if (prot & IOMMU_WRITE)
+ aci = SUN50I_IOMMU_ACI_WR;
+ else
+ aci = SUN50I_IOMMU_ACI_NONE;
+
+ flags |= FIELD_PREP(SUN50I_PTE_ACI_MASK, aci);
+ page &= SUN50I_PTE_PAGE_ADDRESS_MASK;
+ return page | flags | SUN50I_PTE_PAGE_VALID;
+}
+
+static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain,
+ void *vaddr, unsigned int count)
+{
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ dma_addr_t dma = virt_to_phys(vaddr);
+ size_t size = count * PT_ENTRY_SIZE;
+
+ dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE);
+}
+
+static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu)
+{
+ u32 reg;
+ int ret;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ iommu_write(iommu,
+ IOMMU_TLB_FLUSH_REG,
+ IOMMU_TLB_FLUSH_PTW_CACHE |
+ IOMMU_TLB_FLUSH_MACRO_TLB |
+ IOMMU_TLB_FLUSH_MICRO_TLB(5) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(4) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(3) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(2) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(1) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(0));
+
+ ret = readl_poll_timeout(iommu->base + IOMMU_TLB_FLUSH_REG,
+ reg, !reg,
+ 1, 2000);
+ if (ret)
+ dev_warn(iommu->dev, "TLB Flush timed out!\n");
+
+ return ret;
+}
+
+static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ unsigned long flags;
+
+ /*
+ * At boot, we'll have a first call into .flush_iotlb_all right after
+ * .probe_device, and since we link our (single) domain to our iommu in
+ * the .attach_device callback, we don't have that pointer set.
+ *
+ * It shouldn't really be any trouble to ignore it though since we flush
+ * all caches as part of the device powerup.
+ */
+ if (!iommu)
+ return;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+ sun50i_iommu_flush_all_tlb(iommu);
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+}
+
+static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ sun50i_iommu_flush_iotlb_all(domain);
+}
+
+static int sun50i_iommu_enable(struct sun50i_iommu *iommu)
+{
+ struct sun50i_iommu_domain *sun50i_domain;
+ unsigned long flags;
+ int ret;
+
+ if (!iommu->domain)
+ return 0;
+
+ sun50i_domain = to_sun50i_domain(iommu->domain);
+
+ ret = reset_control_deassert(iommu->reset);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(iommu->clk);
+ if (ret)
+ goto err_reset_assert;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+
+ iommu_write(iommu, IOMMU_TTB_REG, sun50i_domain->dt_dma);
+ iommu_write(iommu, IOMMU_TLB_PREFETCH_REG,
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(0) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(1) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(2) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(5));
+ iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK);
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE),
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5));
+
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_RD),
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 0) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 1) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 2) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 3) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 4) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 5));
+
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_WR),
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 0) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 1) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 2) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 3) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 4) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 5));
+
+ ret = sun50i_iommu_flush_all_tlb(iommu);
+ if (ret) {
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+ goto err_clk_disable;
+ }
+
+ iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE);
+ iommu_write(iommu, IOMMU_ENABLE_REG, IOMMU_ENABLE_ENABLE);
+
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ return 0;
+
+err_clk_disable:
+ clk_disable_unprepare(iommu->clk);
+
+err_reset_assert:
+ reset_control_assert(iommu->reset);
+
+ return ret;
+}
+
+static void sun50i_iommu_disable(struct sun50i_iommu *iommu)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+
+ iommu_write(iommu, IOMMU_ENABLE_REG, 0);
+ iommu_write(iommu, IOMMU_TTB_REG, 0);
+
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ clk_disable_unprepare(iommu->clk);
+ reset_control_assert(iommu->reset);
+}
+
+static void *sun50i_iommu_alloc_page_table(struct sun50i_iommu *iommu,
+ gfp_t gfp)
+{
+ dma_addr_t pt_dma;
+ u32 *page_table;
+
+ page_table = kmem_cache_zalloc(iommu->pt_pool, gfp);
+ if (!page_table)
+ return ERR_PTR(-ENOMEM);
+
+ pt_dma = dma_map_single(iommu->dev, page_table, PT_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(iommu->dev, pt_dma)) {
+ dev_err(iommu->dev, "Couldn't map L2 Page Table\n");
+ kmem_cache_free(iommu->pt_pool, page_table);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* We rely on the physical address and DMA address being the same */
+ WARN_ON(pt_dma != virt_to_phys(page_table));
+
+ return page_table;
+}
+
+static void sun50i_iommu_free_page_table(struct sun50i_iommu *iommu,
+ u32 *page_table)
+{
+ phys_addr_t pt_phys = virt_to_phys(page_table);
+
+ dma_unmap_single(iommu->dev, pt_phys, PT_SIZE, DMA_TO_DEVICE);
+ kmem_cache_free(iommu->pt_pool, page_table);
+}
+
+static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
+ dma_addr_t iova, gfp_t gfp)
+{
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ u32 *page_table;
+ u32 *dte_addr;
+ u32 old_dte;
+ u32 dte;
+
+ dte_addr = &sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ dte = *dte_addr;
+ if (sun50i_dte_is_pt_valid(dte)) {
+ phys_addr_t pt_phys = sun50i_dte_get_pt_address(dte);
+ return (u32 *)phys_to_virt(pt_phys);
+ }
+
+ page_table = sun50i_iommu_alloc_page_table(iommu, gfp);
+ if (IS_ERR(page_table))
+ return page_table;
+
+ dte = sun50i_mk_dte(virt_to_phys(page_table));
+ old_dte = cmpxchg(dte_addr, 0, dte);
+ if (old_dte) {
+ phys_addr_t installed_pt_phys =
+ sun50i_dte_get_pt_address(old_dte);
+ u32 *installed_pt = phys_to_virt(installed_pt_phys);
+ u32 *drop_pt = page_table;
+
+ page_table = installed_pt;
+ dte = old_dte;
+ sun50i_iommu_free_page_table(iommu, drop_pt);
+ }
+
+ sun50i_table_flush(sun50i_domain, page_table, PT_SIZE);
+ sun50i_table_flush(sun50i_domain, dte_addr, 1);
+
+ return page_table;
+}
+
+static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ u32 pte_index;
+ u32 *page_table, *pte_addr;
+ int ret = 0;
+
+ page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp);
+ if (IS_ERR(page_table)) {
+ ret = PTR_ERR(page_table);
+ goto out;
+ }
+
+ pte_index = sun50i_iova_get_pte_index(iova);
+ pte_addr = &page_table[pte_index];
+ if (unlikely(sun50i_pte_is_page_valid(*pte_addr))) {
+ phys_addr_t page_phys = sun50i_pte_get_page_address(*pte_addr);
+ dev_err(iommu->dev,
+ "iova %pad already mapped to %pa cannot remap to %pa prot: %#x\n",
+ &iova, &page_phys, &paddr, prot);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ *pte_addr = sun50i_mk_pte(paddr, prot);
+ sun50i_table_flush(sun50i_domain, pte_addr, 1);
+
+out:
+ return ret;
+}
+
+static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ phys_addr_t pt_phys;
+ dma_addr_t pte_dma;
+ u32 *pte_addr;
+ u32 dte;
+
+ dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ if (!sun50i_dte_is_pt_valid(dte))
+ return 0;
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ pte_addr = (u32 *)phys_to_virt(pt_phys) + sun50i_iova_get_pte_index(iova);
+ pte_dma = pt_phys + sun50i_iova_get_pte_index(iova) * PT_ENTRY_SIZE;
+
+ if (!sun50i_pte_is_page_valid(*pte_addr))
+ return 0;
+
+ memset(pte_addr, 0, sizeof(*pte_addr));
+ sun50i_table_flush(sun50i_domain, pte_addr, 1);
+
+ return SZ_4K;
+}
+
+static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ phys_addr_t pt_phys;
+ u32 *page_table;
+ u32 dte, pte;
+
+ dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ if (!sun50i_dte_is_pt_valid(dte))
+ return 0;
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ page_table = (u32 *)phys_to_virt(pt_phys);
+ pte = page_table[sun50i_iova_get_pte_index(iova)];
+ if (!sun50i_pte_is_page_valid(pte))
+ return 0;
+
+ return sun50i_pte_get_page_address(pte) +
+ sun50i_iova_get_page_offset(iova);
+}
+
+static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
+{
+ struct sun50i_iommu_domain *sun50i_domain;
+
+ if (type != IOMMU_DOMAIN_DMA &&
+ type != IOMMU_DOMAIN_IDENTITY &&
+ type != IOMMU_DOMAIN_UNMANAGED)
+ return NULL;
+
+ sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL);
+ if (!sun50i_domain)
+ return NULL;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&sun50i_domain->domain))
+ goto err_free_domain;
+
+ sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(DT_SIZE));
+ if (!sun50i_domain->dt)
+ goto err_put_cookie;
+
+ refcount_set(&sun50i_domain->refcnt, 1);
+
+ sun50i_domain->domain.geometry.aperture_start = 0;
+ sun50i_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32);
+ sun50i_domain->domain.geometry.force_aperture = true;
+
+ return &sun50i_domain->domain;
+
+err_put_cookie:
+ if (type == IOMMU_DOMAIN_DMA)
+ iommu_put_dma_cookie(&sun50i_domain->domain);
+
+err_free_domain:
+ kfree(sun50i_domain);
+
+ return NULL;
+}
+
+static void sun50i_iommu_domain_free(struct iommu_domain *domain)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+
+ free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE));
+ sun50i_domain->dt = NULL;
+
+ iommu_put_dma_cookie(domain);
+
+ kfree(sun50i_domain);
+}
+
+static int sun50i_iommu_attach_domain(struct sun50i_iommu *iommu,
+ struct sun50i_iommu_domain *sun50i_domain)
+{
+ iommu->domain = &sun50i_domain->domain;
+ sun50i_domain->iommu = iommu;
+
+ sun50i_domain->dt_dma = dma_map_single(iommu->dev, sun50i_domain->dt,
+ DT_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(iommu->dev, sun50i_domain->dt_dma)) {
+ dev_err(iommu->dev, "Couldn't map L1 Page Table\n");
+ return -ENOMEM;
+ }
+
+ return sun50i_iommu_enable(iommu);
+}
+
+static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu,
+ struct sun50i_iommu_domain *sun50i_domain)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_DT_ENTRIES; i++) {
+ phys_addr_t pt_phys;
+ u32 *page_table;
+ u32 *dte_addr;
+ u32 dte;
+
+ dte_addr = &sun50i_domain->dt[i];
+ dte = *dte_addr;
+ if (!sun50i_dte_is_pt_valid(dte))
+ continue;
+
+ memset(dte_addr, 0, sizeof(*dte_addr));
+ sun50i_table_flush(sun50i_domain, dte_addr, 1);
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ page_table = phys_to_virt(pt_phys);
+ sun50i_iommu_free_page_table(iommu, page_table);
+ }
+
+
+ sun50i_iommu_disable(iommu);
+
+ dma_unmap_single(iommu->dev, virt_to_phys(sun50i_domain->dt),
+ DT_SIZE, DMA_TO_DEVICE);
+
+ iommu->domain = NULL;
+}
+
+static void sun50i_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = dev_iommu_priv_get(dev);
+
+ dev_dbg(dev, "Detaching from IOMMU domain\n");
+
+ if (iommu->domain != domain)
+ return;
+
+ if (refcount_dec_and_test(&sun50i_domain->refcnt))
+ sun50i_iommu_detach_domain(iommu, sun50i_domain);
+}
+
+static int sun50i_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu;
+
+ iommu = sun50i_iommu_from_dev(dev);
+ if (!iommu)
+ return -ENODEV;
+
+ dev_dbg(dev, "Attaching to IOMMU domain\n");
+
+ refcount_inc(&sun50i_domain->refcnt);
+
+ if (iommu->domain == domain)
+ return 0;
+
+ if (iommu->domain)
+ sun50i_iommu_detach_device(iommu->domain, dev);
+
+ sun50i_iommu_attach_domain(iommu, sun50i_domain);
+
+ return 0;
+}
+
+static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
+{
+ struct sun50i_iommu *iommu;
+
+ iommu = sun50i_iommu_from_dev(dev);
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
+
+ return &iommu->iommu;
+}
+
+static void sun50i_iommu_release_device(struct device *dev) {}
+
+static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
+{
+ struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
+
+ return iommu_group_ref_get(iommu->group);
+}
+
+static int sun50i_iommu_of_xlate(struct device *dev,
+ struct of_phandle_args *args)
+{
+ struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
+ unsigned id = args->args[0];
+
+ dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev));
+
+ return iommu_fwspec_add_ids(dev, &id, 1);
+}
+
+static const struct iommu_ops sun50i_iommu_ops = {
+ .pgsize_bitmap = SZ_4K,
+ .attach_dev = sun50i_iommu_attach_device,
+ .detach_dev = sun50i_iommu_detach_device,
+ .device_group = sun50i_iommu_device_group,
+ .domain_alloc = sun50i_iommu_domain_alloc,
+ .domain_free = sun50i_iommu_domain_free,
+ .flush_iotlb_all = sun50i_iommu_flush_iotlb_all,
+ .iotlb_sync = sun50i_iommu_iotlb_sync,
+ .iova_to_phys = sun50i_iommu_iova_to_phys,
+ .map = sun50i_iommu_map,
+ .of_xlate = sun50i_iommu_of_xlate,
+ .probe_device = sun50i_iommu_probe_device,
+ .release_device = sun50i_iommu_release_device,
+ .unmap = sun50i_iommu_unmap,
+};
+
+static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu,
+ unsigned master, phys_addr_t iova,
+ unsigned prot)
+{
+ dev_err(iommu->dev, "Page fault for %pad (master %d, dir %s)\n",
+ &iova, master, (prot == IOMMU_FAULT_WRITE) ? "wr" : "rd");
+
+ if (iommu->domain)
+ report_iommu_fault(iommu->domain, iommu->dev, iova, prot);
+ else
+ dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n");
+}
+
+static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu,
+ unsigned addr_reg,
+ unsigned blame_reg)
+{
+ phys_addr_t iova;
+ unsigned master;
+ u32 blame;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ iova = iommu_read(iommu, addr_reg);
+ blame = iommu_read(iommu, blame_reg);
+ master = ilog2(blame & IOMMU_INT_MASTER_MASK);
+
+ /*
+ * If the address is not in the page table, we can't get what
+ * operation triggered the fault. Assume it's a read
+ * operation.
+ */
+ sun50i_iommu_report_fault(iommu, master, iova, IOMMU_FAULT_READ);
+
+ return iova;
+}
+
+static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu)
+{
+ enum sun50i_iommu_aci aci;
+ phys_addr_t iova;
+ unsigned master;
+ unsigned dir;
+ u32 blame;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ blame = iommu_read(iommu, IOMMU_INT_STA_REG);
+ master = ilog2(blame & IOMMU_INT_MASTER_MASK);
+ iova = iommu_read(iommu, IOMMU_INT_ERR_ADDR_REG(master));
+ aci = sun50i_get_pte_aci(iommu_read(iommu,
+ IOMMU_INT_ERR_DATA_REG(master)));
+
+ switch (aci) {
+ /*
+ * If we are in the read-only domain, then it means we
+ * tried to write.
+ */
+ case SUN50I_IOMMU_ACI_RD:
+ dir = IOMMU_FAULT_WRITE;
+ break;
+
+ /*
+ * If we are in the write-only domain, then it means
+ * we tried to read.
+ */
+ case SUN50I_IOMMU_ACI_WR:
+
+ /*
+ * If we are in the domain without any permission, we
+ * can't really tell. Let's default to a read
+ * operation.
+ */
+ case SUN50I_IOMMU_ACI_NONE:
+
+ /* WTF? */
+ case SUN50I_IOMMU_ACI_RD_WR:
+ default:
+ dir = IOMMU_FAULT_READ;
+ break;
+ }
+
+ /*
+ * If the address is not in the page table, we can't get what
+ * operation triggered the fault. Assume it's a read
+ * operation.
+ */
+ sun50i_iommu_report_fault(iommu, master, iova, dir);
+
+ return iova;
+}
+
+static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
+{
+ struct sun50i_iommu *iommu = dev_id;
+ phys_addr_t iova;
+ u32 status;
+
+ spin_lock(&iommu->iommu_lock);
+
+ status = iommu_read(iommu, IOMMU_INT_STA_REG);
+ if (!(status & IOMMU_INT_MASK)) {
+ spin_unlock(&iommu->iommu_lock);
+ return IRQ_NONE;
+ }
+
+ if (status & IOMMU_INT_INVALID_L2PG)
+ iova = sun50i_iommu_handle_pt_irq(iommu,
+ IOMMU_INT_ERR_ADDR_L2_REG,
+ IOMMU_L2PG_INT_REG);
+ else if (status & IOMMU_INT_INVALID_L1PG)
+ iova = sun50i_iommu_handle_pt_irq(iommu,
+ IOMMU_INT_ERR_ADDR_L1_REG,
+ IOMMU_L1PG_INT_REG);
+ else
+ iova = sun50i_iommu_handle_perm_irq(iommu);
+
+ iommu_write(iommu, IOMMU_INT_CLR_REG, status);
+
+ iommu_write(iommu, IOMMU_RESET_REG, ~status);
+ iommu_write(iommu, IOMMU_RESET_REG, status);
+
+ spin_unlock(&iommu->iommu_lock);
+
+ return IRQ_HANDLED;
+}
+
+static int sun50i_iommu_probe(struct platform_device *pdev)
+{
+ struct sun50i_iommu *iommu;
+ int ret, irq;
+
+ iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL);
+ if (!iommu)
+ return -ENOMEM;
+ spin_lock_init(&iommu->iommu_lock);
+ platform_set_drvdata(pdev, iommu);
+ iommu->dev = &pdev->dev;
+
+ iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev),
+ PT_SIZE, PT_SIZE,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!iommu->pt_pool)
+ return -ENOMEM;
+
+ iommu->group = iommu_group_alloc();
+ if (IS_ERR(iommu->group)) {
+ ret = PTR_ERR(iommu->group);
+ goto err_free_cache;
+ }
+
+ iommu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(iommu->base)) {
+ ret = PTR_ERR(iommu->base);
+ goto err_free_group;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_group;
+ }
+
+ iommu->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(iommu->clk)) {
+ dev_err(&pdev->dev, "Couldn't get our clock.\n");
+ ret = PTR_ERR(iommu->clk);
+ goto err_free_group;
+ }
+
+ iommu->reset = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(iommu->reset)) {
+ dev_err(&pdev->dev, "Couldn't get our reset line.\n");
+ ret = PTR_ERR(iommu->reset);
+ goto err_free_group;
+ }
+
+ ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev,
+ NULL, dev_name(&pdev->dev));
+ if (ret)
+ goto err_free_group;
+
+ iommu_device_set_ops(&iommu->iommu, &sun50i_iommu_ops);
+ iommu_device_set_fwnode(&iommu->iommu, &pdev->dev.of_node->fwnode);
+
+ ret = iommu_device_register(&iommu->iommu);
+ if (ret)
+ goto err_remove_sysfs;
+
+ ret = devm_request_irq(&pdev->dev, irq, sun50i_iommu_irq, 0,
+ dev_name(&pdev->dev), iommu);
+ if (ret < 0)
+ goto err_unregister;
+
+ bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops);
+
+ return 0;
+
+err_unregister:
+ iommu_device_unregister(&iommu->iommu);
+
+err_remove_sysfs:
+ iommu_device_sysfs_remove(&iommu->iommu);
+
+err_free_group:
+ iommu_group_put(iommu->group);
+
+err_free_cache:
+ kmem_cache_destroy(iommu->pt_pool);
+
+ return ret;
+}
+
+static const struct of_device_id sun50i_iommu_dt[] = {
+ { .compatible = "allwinner,sun50i-h6-iommu", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, sun50i_iommu_dt);
+
+static struct platform_driver sun50i_iommu_driver = {
+ .driver = {
+ .name = "sun50i-iommu",
+ .of_match_table = sun50i_iommu_dt,
+ .suppress_bind_attrs = true,
+ }
+};
+builtin_platform_driver_probe(sun50i_iommu_driver, sun50i_iommu_probe);
+
+MODULE_DESCRIPTION("Allwinner H6 IOMMU driver");
+MODULE_AUTHOR("Maxime Ripard <maxime@cerno.tech>");
+MODULE_AUTHOR("zhuxianbin <zhuxianbin@allwinnertech.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index db6559e..5fbdff6 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -243,28 +243,16 @@ static bool gart_iommu_capable(enum iommu_cap cap)
return false;
}
-static int gart_iommu_add_device(struct device *dev)
+static struct iommu_device *gart_iommu_probe_device(struct device *dev)
{
- struct iommu_group *group;
-
if (!dev_iommu_fwspec_get(dev))
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- iommu_device_link(&gart_handle->iommu, dev);
-
- return 0;
+ return &gart_handle->iommu;
}
-static void gart_iommu_remove_device(struct device *dev)
+static void gart_iommu_release_device(struct device *dev)
{
- iommu_group_remove_device(dev);
- iommu_device_unlink(&gart_handle->iommu, dev);
}
static int gart_iommu_of_xlate(struct device *dev,
@@ -290,8 +278,8 @@ static const struct iommu_ops gart_iommu_ops = {
.domain_free = gart_iommu_domain_free,
.attach_dev = gart_iommu_attach_dev,
.detach_dev = gart_iommu_detach_dev,
- .add_device = gart_iommu_add_device,
- .remove_device = gart_iommu_remove_device,
+ .probe_device = gart_iommu_probe_device,
+ .release_device = gart_iommu_release_device,
.device_group = generic_device_group,
.map = gart_iommu_map,
.unmap = gart_iommu_unmap,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 63a147b..7426b76 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -757,11 +757,10 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
return 0;
}
-static int tegra_smmu_add_device(struct device *dev)
+static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
{
struct device_node *np = dev->of_node;
struct tegra_smmu *smmu = NULL;
- struct iommu_group *group;
struct of_phandle_args args;
unsigned int index = 0;
int err;
@@ -774,7 +773,7 @@ static int tegra_smmu_add_device(struct device *dev)
of_node_put(args.np);
if (err < 0)
- return err;
+ return ERR_PTR(err);
/*
* Only a single IOMMU master interface is currently
@@ -783,8 +782,6 @@ static int tegra_smmu_add_device(struct device *dev)
*/
dev->archdata.iommu = smmu;
- iommu_device_link(&smmu->iommu, dev);
-
break;
}
@@ -793,26 +790,14 @@ static int tegra_smmu_add_device(struct device *dev)
}
if (!smmu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
-
- iommu_group_put(group);
-
- return 0;
+ return &smmu->iommu;
}
-static void tegra_smmu_remove_device(struct device *dev)
+static void tegra_smmu_release_device(struct device *dev)
{
- struct tegra_smmu *smmu = dev->archdata.iommu;
-
- if (smmu)
- iommu_device_unlink(&smmu->iommu, dev);
-
dev->archdata.iommu = NULL;
- iommu_group_remove_device(dev);
}
static const struct tegra_smmu_group_soc *
@@ -895,8 +880,8 @@ static const struct iommu_ops tegra_smmu_ops = {
.domain_free = tegra_smmu_domain_free,
.attach_dev = tegra_smmu_attach_dev,
.detach_dev = tegra_smmu_detach_dev,
- .add_device = tegra_smmu_add_device,
- .remove_device = tegra_smmu_remove_device,
+ .probe_device = tegra_smmu_probe_device,
+ .release_device = tegra_smmu_release_device,
.device_group = tegra_smmu_device_group,
.map = tegra_smmu_map,
.unmap = tegra_smmu_unmap,
@@ -1015,7 +1000,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
* value. However the IOMMU registration process will attempt to add
* all devices to the IOMMU when bus_set_iommu() is called. In order
* not to rely on global variables to track the IOMMU instance, we
- * set it here so that it can be looked up from the .add_device()
+ * set it here so that it can be looked up from the .probe_device()
* callback via the IOMMU device's .drvdata field.
*/
mc->smmu = smmu;
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 4e1d11a..f6f0748 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -865,24 +865,23 @@ static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode)
return dev ? dev_to_virtio(dev)->priv : NULL;
}
-static int viommu_add_device(struct device *dev)
+static struct iommu_device *viommu_probe_device(struct device *dev)
{
int ret;
- struct iommu_group *group;
struct viommu_endpoint *vdev;
struct viommu_dev *viommu = NULL;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (!fwspec || fwspec->ops != &viommu_ops)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
if (!viommu)
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
vdev->dev = dev;
vdev->viommu = viommu;
@@ -896,45 +895,25 @@ static int viommu_add_device(struct device *dev)
goto err_free_dev;
}
- ret = iommu_device_link(&viommu->iommu, dev);
- if (ret)
- goto err_free_dev;
+ return &viommu->iommu;
- /*
- * Last step creates a default domain and attaches to it. Everything
- * must be ready.
- */
- group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto err_unlink_dev;
- }
-
- iommu_group_put(group);
-
- return PTR_ERR_OR_ZERO(group);
-
-err_unlink_dev:
- iommu_device_unlink(&viommu->iommu, dev);
err_free_dev:
generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
- return ret;
+ return ERR_PTR(ret);
}
-static void viommu_remove_device(struct device *dev)
+static void viommu_release_device(struct device *dev)
{
- struct viommu_endpoint *vdev;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct viommu_endpoint *vdev;
if (!fwspec || fwspec->ops != &viommu_ops)
return;
vdev = dev_iommu_priv_get(dev);
- iommu_group_remove_device(dev);
- iommu_device_unlink(&vdev->viommu->iommu, dev);
generic_iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
}
@@ -960,8 +939,8 @@ static struct iommu_ops viommu_ops = {
.unmap = viommu_unmap,
.iova_to_phys = viommu_iova_to_phys,
.iotlb_sync = viommu_iotlb_sync,
- .add_device = viommu_add_device,
- .remove_device = viommu_remove_device,
+ .probe_device = viommu_probe_device,
+ .release_device = viommu_release_device,
.device_group = viommu_device_group,
.get_resv_regions = viommu_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index d39307f..107028e 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -90,109 +90,39 @@ static long uacce_fops_compat_ioctl(struct file *filep,
}
#endif
-static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle,
- void *data)
+static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
{
- struct uacce_mm *uacce_mm = data;
- struct uacce_queue *q;
+ int pasid;
+ struct iommu_sva *handle;
- /*
- * No new queue can be added concurrently because no caller can have a
- * reference to this mm. But there may be concurrent calls to
- * uacce_mm_put(), so we need the lock.
- */
- mutex_lock(&uacce_mm->lock);
- list_for_each_entry(q, &uacce_mm->queues, list)
- uacce_put_queue(q);
- uacce_mm->mm = NULL;
- mutex_unlock(&uacce_mm->lock);
+ if (!(uacce->flags & UACCE_DEV_SVA))
+ return 0;
+ handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ pasid = iommu_sva_get_pasid(handle);
+ if (pasid == IOMMU_PASID_INVALID) {
+ iommu_sva_unbind_device(handle);
+ return -ENODEV;
+ }
+
+ q->handle = handle;
+ q->pasid = pasid;
return 0;
}
-static struct iommu_sva_ops uacce_sva_ops = {
- .mm_exit = uacce_sva_exit,
-};
-
-static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce,
- struct uacce_queue *q,
- struct mm_struct *mm)
+static void uacce_unbind_queue(struct uacce_queue *q)
{
- struct uacce_mm *uacce_mm = NULL;
- struct iommu_sva *handle = NULL;
- int ret;
-
- lockdep_assert_held(&uacce->mm_lock);
-
- list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
- if (uacce_mm->mm == mm) {
- mutex_lock(&uacce_mm->lock);
- list_add(&q->list, &uacce_mm->queues);
- mutex_unlock(&uacce_mm->lock);
- return uacce_mm;
- }
- }
-
- uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL);
- if (!uacce_mm)
- return NULL;
-
- if (uacce->flags & UACCE_DEV_SVA) {
- /*
- * Safe to pass an incomplete uacce_mm, since mm_exit cannot
- * fire while we hold a reference to the mm.
- */
- handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm);
- if (IS_ERR(handle))
- goto err_free;
-
- ret = iommu_sva_set_ops(handle, &uacce_sva_ops);
- if (ret)
- goto err_unbind;
-
- uacce_mm->pasid = iommu_sva_get_pasid(handle);
- if (uacce_mm->pasid == IOMMU_PASID_INVALID)
- goto err_unbind;
- }
-
- uacce_mm->mm = mm;
- uacce_mm->handle = handle;
- INIT_LIST_HEAD(&uacce_mm->queues);
- mutex_init(&uacce_mm->lock);
- list_add(&q->list, &uacce_mm->queues);
- list_add(&uacce_mm->list, &uacce->mm_list);
-
- return uacce_mm;
-
-err_unbind:
- if (handle)
- iommu_sva_unbind_device(handle);
-err_free:
- kfree(uacce_mm);
- return NULL;
-}
-
-static void uacce_mm_put(struct uacce_queue *q)
-{
- struct uacce_mm *uacce_mm = q->uacce_mm;
-
- lockdep_assert_held(&q->uacce->mm_lock);
-
- mutex_lock(&uacce_mm->lock);
- list_del(&q->list);
- mutex_unlock(&uacce_mm->lock);
-
- if (list_empty(&uacce_mm->queues)) {
- if (uacce_mm->handle)
- iommu_sva_unbind_device(uacce_mm->handle);
- list_del(&uacce_mm->list);
- kfree(uacce_mm);
- }
+ if (!q->handle)
+ return;
+ iommu_sva_unbind_device(q->handle);
+ q->handle = NULL;
}
static int uacce_fops_open(struct inode *inode, struct file *filep)
{
- struct uacce_mm *uacce_mm = NULL;
struct uacce_device *uacce;
struct uacce_queue *q;
int ret = 0;
@@ -205,21 +135,16 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
if (!q)
return -ENOMEM;
- mutex_lock(&uacce->mm_lock);
- uacce_mm = uacce_mm_get(uacce, q, current->mm);
- mutex_unlock(&uacce->mm_lock);
- if (!uacce_mm) {
- ret = -ENOMEM;
+ ret = uacce_bind_queue(uacce, q);
+ if (ret)
goto out_with_mem;
- }
q->uacce = uacce;
- q->uacce_mm = uacce_mm;
if (uacce->ops->get_queue) {
- ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q);
+ ret = uacce->ops->get_queue(uacce, q->pasid, q);
if (ret < 0)
- goto out_with_mm;
+ goto out_with_bond;
}
init_waitqueue_head(&q->wait);
@@ -227,12 +152,14 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
uacce->inode = inode;
q->state = UACCE_Q_INIT;
+ mutex_lock(&uacce->queues_lock);
+ list_add(&q->list, &uacce->queues);
+ mutex_unlock(&uacce->queues_lock);
+
return 0;
-out_with_mm:
- mutex_lock(&uacce->mm_lock);
- uacce_mm_put(q);
- mutex_unlock(&uacce->mm_lock);
+out_with_bond:
+ uacce_unbind_queue(q);
out_with_mem:
kfree(q);
return ret;
@@ -241,14 +168,12 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
static int uacce_fops_release(struct inode *inode, struct file *filep)
{
struct uacce_queue *q = filep->private_data;
- struct uacce_device *uacce = q->uacce;
+ mutex_lock(&q->uacce->queues_lock);
+ list_del(&q->list);
+ mutex_unlock(&q->uacce->queues_lock);
uacce_put_queue(q);
-
- mutex_lock(&uacce->mm_lock);
- uacce_mm_put(q);
- mutex_unlock(&uacce->mm_lock);
-
+ uacce_unbind_queue(q);
kfree(q);
return 0;
@@ -513,8 +438,8 @@ struct uacce_device *uacce_alloc(struct device *parent,
if (ret < 0)
goto err_with_uacce;
- INIT_LIST_HEAD(&uacce->mm_list);
- mutex_init(&uacce->mm_lock);
+ INIT_LIST_HEAD(&uacce->queues);
+ mutex_init(&uacce->queues_lock);
device_initialize(&uacce->dev);
uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id);
uacce->dev.class = uacce_class;
@@ -561,8 +486,7 @@ EXPORT_SYMBOL_GPL(uacce_register);
*/
void uacce_remove(struct uacce_device *uacce)
{
- struct uacce_mm *uacce_mm;
- struct uacce_queue *q;
+ struct uacce_queue *q, *next_q;
if (!uacce)
return;
@@ -574,24 +498,12 @@ void uacce_remove(struct uacce_device *uacce)
unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
/* ensure no open queue remains */
- mutex_lock(&uacce->mm_lock);
- list_for_each_entry(uacce_mm, &uacce->mm_list, list) {
- /*
- * We don't take the uacce_mm->lock here. Since we hold the
- * device's mm_lock, no queue can be added to or removed from
- * this uacce_mm. We may run concurrently with mm_exit, but
- * uacce_put_queue() is serialized and iommu_sva_unbind_device()
- * waits for the lock that mm_exit is holding.
- */
- list_for_each_entry(q, &uacce_mm->queues, list)
- uacce_put_queue(q);
-
- if (uacce->flags & UACCE_DEV_SVA) {
- iommu_sva_unbind_device(uacce_mm->handle);
- uacce_mm->handle = NULL;
- }
+ mutex_lock(&uacce->queues_lock);
+ list_for_each_entry_safe(q, next_q, &uacce->queues, list) {
+ uacce_put_queue(q);
+ uacce_unbind_queue(q);
}
- mutex_unlock(&uacce->mm_lock);
+ mutex_unlock(&uacce->queues_lock);
/* disable sva now since no opened queues */
if (uacce->flags & UACCE_DEV_SVA)
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 390e92f..b761c1f 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -31,6 +31,22 @@ void pci_ats_init(struct pci_dev *dev)
}
/**
+ * pci_ats_supported - check if the device can use ATS
+ * @dev: the PCI device
+ *
+ * Returns true if the device supports ATS and is allowed to use it, false
+ * otherwise.
+ */
+bool pci_ats_supported(struct pci_dev *dev)
+{
+ if (!dev->ats_cap)
+ return false;
+
+ return (dev->untrusted == 0);
+}
+EXPORT_SYMBOL_GPL(pci_ats_supported);
+
+/**
* pci_enable_ats - enable the ATS capability
* @dev: the PCI device
* @ps: the IOMMU page shift
@@ -42,7 +58,7 @@ int pci_enable_ats(struct pci_dev *dev, int ps)
u16 ctrl;
struct pci_dev *pdev;
- if (!dev->ats_cap)
+ if (!pci_ats_supported(dev))
return -EINVAL;
if (WARN_ON(dev->ats_enabled))
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 980234a..4100bd2 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -19,6 +19,7 @@
#include <linux/iommu.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmar.h>
+#include <linux/ioasid.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -42,6 +43,9 @@
#define DMA_FL_PTE_PRESENT BIT_ULL(0)
#define DMA_FL_PTE_XD BIT_ULL(63)
+#define ADDR_WIDTH_5LEVEL (57)
+#define ADDR_WIDTH_4LEVEL (48)
+
#define CONTEXT_TT_MULTI_LEVEL 0
#define CONTEXT_TT_DEV_IOTLB 1
#define CONTEXT_TT_PASS_THROUGH 2
@@ -166,6 +170,7 @@
#define ecap_smpwc(e) (((e) >> 48) & 0x1)
#define ecap_flts(e) (((e) >> 47) & 0x1)
#define ecap_slts(e) (((e) >> 46) & 0x1)
+#define ecap_vcs(e) (((e) >> 44) & 0x1)
#define ecap_smts(e) (((e) >> 43) & 0x1)
#define ecap_dit(e) ((e >> 41) & 0x1)
#define ecap_pasid(e) ((e >> 40) & 0x1)
@@ -191,6 +196,9 @@
#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
#define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */
+/* Virtual command interface capability */
+#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */
+
/* IOTLB_REG */
#define DMA_TLB_FLUSH_GRANU_OFFSET 60
#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
@@ -284,6 +292,9 @@
/* PRS_REG */
#define DMA_PRS_PPR ((u32)1)
+#define DMA_PRS_PRO ((u32)2)
+
+#define DMA_VCS_PAS ((u64)1)
#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
do { \
@@ -324,6 +335,8 @@ enum {
#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
+#define QI_IWD_FENCE (((u64)1) << 6)
+#define QI_IWD_PRQ_DRAIN (((u64)1) << 7)
#define QI_IOTLB_DID(did) (((u64)did) << 16)
#define QI_IOTLB_DR(dr) (((u64)dr) << 7)
@@ -331,7 +344,7 @@ enum {
#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4))
#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK)
#define QI_IOTLB_IH(ih) (((u64)ih) << 6)
-#define QI_IOTLB_AM(am) (((u8)am))
+#define QI_IOTLB_AM(am) (((u8)am) & 0x3f)
#define QI_CC_FM(fm) (((u64)fm) << 48)
#define QI_CC_SID(sid) (((u64)sid) << 32)
@@ -350,16 +363,21 @@ enum {
#define QI_PC_DID(did) (((u64)did) << 16)
#define QI_PC_GRAN(gran) (((u64)gran) << 4)
-#define QI_PC_ALL_PASIDS (QI_PC_TYPE | QI_PC_GRAN(0))
-#define QI_PC_PASID_SEL (QI_PC_TYPE | QI_PC_GRAN(1))
+/* PASID cache invalidation granu */
+#define QI_PC_ALL_PASIDS 0
+#define QI_PC_PASID_SEL 1
#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
#define QI_EIOTLB_IH(ih) (((u64)ih) << 6)
-#define QI_EIOTLB_AM(am) (((u64)am))
+#define QI_EIOTLB_AM(am) (((u64)am) & 0x3f)
#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32)
#define QI_EIOTLB_DID(did) (((u64)did) << 16)
#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4)
+/* QI Dev-IOTLB inv granu */
+#define QI_DEV_IOTLB_GRAN_ALL 1
+#define QI_DEV_IOTLB_GRAN_PASID_SEL 0
+
#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK)
#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
#define QI_DEV_EIOTLB_GLOB(g) ((u64)g)
@@ -480,6 +498,23 @@ struct context_entry {
u64 hi;
};
+/* si_domain contains mulitple devices */
+#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
+
+/*
+ * When VT-d works in the scalable mode, it allows DMA translation to
+ * happen through either first level or second level page table. This
+ * bit marks that the DMA translation for the domain goes through the
+ * first level page table, otherwise, it goes through the second level.
+ */
+#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1)
+
+/*
+ * Domain represents a virtual machine which demands iommu nested
+ * translation mode support.
+ */
+#define DOMAIN_FLAG_NESTING_MODE BIT(2)
+
struct dmar_domain {
int nid; /* node id */
@@ -529,6 +564,7 @@ struct intel_iommu {
u64 reg_size; /* size of hw register set */
u64 cap;
u64 ecap;
+ u64 vccap;
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
raw_spinlock_t register_lock; /* protect register handling */
int seq_id; /* sequence id of the iommu */
@@ -549,6 +585,8 @@ struct intel_iommu {
#ifdef CONFIG_INTEL_IOMMU_SVM
struct page_req_dsc *prq;
unsigned char prq_name[16]; /* Name for PRQ interrupt */
+ struct completion prq_complete;
+ struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */
#endif
struct q_inval *qi; /* Queued invalidation info */
u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -571,6 +609,7 @@ struct device_domain_info {
struct list_head auxiliary_domains; /* auxiliary domains
* attached to this device
*/
+ u32 segment; /* PCI segment number */
u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
u16 pfsid; /* SRIOV physical function source ID */
@@ -595,6 +634,12 @@ static inline void __iommu_flush_cache(
clflush_cache_range(addr, size);
}
+/* Convert generic struct iommu_domain to private struct dmar_domain */
+static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct dmar_domain, domain);
+}
+
/*
* 0: readable
* 1: writable
@@ -653,9 +698,23 @@ extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type);
extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u16 qdep, u64 addr, unsigned mask);
+
void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
unsigned long npages, bool ih);
-extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
+
+void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr,
+ unsigned int size_order, u64 granu);
+void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
+ int pasid);
+
+int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
+ unsigned int count, unsigned long options);
+/*
+ * Options used in qi_submit_sync:
+ * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
+ */
+#define QI_OPT_WAIT_DRAIN BIT(0)
extern int dmar_ir_support(void);
@@ -667,12 +726,19 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info,
void iommu_flush_write_buffer(struct intel_iommu *iommu);
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
struct dmar_domain *find_domain(struct device *dev);
+struct device_domain_info *get_domain_info(struct device *dev);
#ifdef CONFIG_INTEL_IOMMU_SVM
extern void intel_svm_check(struct intel_iommu *iommu);
extern int intel_svm_enable_prq(struct intel_iommu *iommu);
extern int intel_svm_finish_prq(struct intel_iommu *iommu);
-
+int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
+ struct iommu_gpasid_bind_data *data);
+int intel_svm_unbind_gpasid(struct device *dev, int pasid);
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
+ void *drvdata);
+void intel_svm_unbind(struct iommu_sva *handle);
+int intel_svm_get_pasid(struct iommu_sva *handle);
struct svm_dev_ops;
struct intel_svm_dev {
@@ -680,6 +746,8 @@ struct intel_svm_dev {
struct rcu_head rcu;
struct device *dev;
struct svm_dev_ops *ops;
+ struct iommu_sva sva;
+ int pasid;
int users;
u16 did;
u16 dev_iotlb:1;
@@ -689,9 +757,11 @@ struct intel_svm_dev {
struct intel_svm {
struct mmu_notifier notifier;
struct mm_struct *mm;
+
struct intel_iommu *iommu;
int flags;
int pasid;
+ int gpasid; /* In case that guest PASID is different from host PASID */
struct list_head devs;
struct list_head list;
};
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index d7c403d..c9e7e60 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -21,7 +21,6 @@ struct svm_dev_ops {
#define SVM_REQ_EXEC (1<<1)
#define SVM_REQ_PRIV (1<<0)
-
/*
* The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main"
* PASID for the current process. Even if a PASID already exists, a new one
@@ -44,90 +43,17 @@ struct svm_dev_ops {
* do such IOTLB flushes automatically.
*/
#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
-
-/**
- * intel_svm_bind_mm() - Bind the current process to a PASID
- * @dev: Device to be granted access
- * @pasid: Address for allocated PASID
- * @flags: Flags. Later for requesting supervisor mode, etc.
- * @ops: Callbacks to device driver
- *
- * This function attempts to enable PASID support for the given device.
- * If the @pasid argument is non-%NULL, a PASID is allocated for access
- * to the MM of the current process.
- *
- * By using a %NULL value for the @pasid argument, this function can
- * be used to simply validate that PASID support is available for the
- * given device — i.e. that it is behind an IOMMU which has the
- * requisite support, and is enabled.
- *
- * Page faults are handled transparently by the IOMMU code, and there
- * should be no need for the device driver to be involved. If a page
- * fault cannot be handled (i.e. is an invalid address rather than
- * just needs paging in), then the page request will be completed by
- * the core IOMMU code with appropriate status, and the device itself
- * can then report the resulting fault to its driver via whatever
- * mechanism is appropriate.
- *
- * Multiple calls from the same process may result in the same PASID
- * being re-used. A reference count is kept.
+/*
+ * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest
+ * processes. Compared to the host bind, the primary differences are:
+ * 1. mm life cycle management
+ * 2. fault reporting
*/
-extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
- struct svm_dev_ops *ops);
-
-/**
- * intel_svm_unbind_mm() - Unbind a specified PASID
- * @dev: Device for which PASID was allocated
- * @pasid: PASID value to be unbound
- *
- * This function allows a PASID to be retired when the device no
- * longer requires access to the address space of a given process.
- *
- * If the use count for the PASID in question reaches zero, the
- * PASID is revoked and may no longer be used by hardware.
- *
- * Device drivers are required to ensure that no access (including
- * page requests) is currently outstanding for the PASID in question,
- * before calling this function.
+#define SVM_FLAG_GUEST_MODE (1<<2)
+/*
+ * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space,
+ * which requires guest and host PASID translation at both directions.
*/
-extern int intel_svm_unbind_mm(struct device *dev, int pasid);
-
-/**
- * intel_svm_is_pasid_valid() - check if pasid is valid
- * @dev: Device for which PASID was allocated
- * @pasid: PASID value to be checked
- *
- * This function checks if the specified pasid is still valid. A
- * valid pasid means the backing mm is still having a valid user.
- * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
- * is non-zero, it is valid.
- *
- * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
- * 1 if pasid is valid.
- */
-extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
-
-#else /* CONFIG_INTEL_IOMMU_SVM */
-
-static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
- int flags, struct svm_dev_ops *ops)
-{
- return -ENOSYS;
-}
-
-static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
-{
- BUG();
-}
-
-static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
-#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
+#define SVM_FLAG_GUEST_PASID (1<<3)
#endif /* __INTEL_SVM_H__ */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7ef8b0b..4e3cd32 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -53,8 +53,6 @@ struct iommu_fault_event;
typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
-typedef int (*iommu_mm_exit_handler_t)(struct device *dev, struct iommu_sva *,
- void *);
typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
struct iommu_domain_geometry {
@@ -171,25 +169,6 @@ enum iommu_dev_features {
#define IOMMU_PASID_INVALID (-1U)
-/**
- * struct iommu_sva_ops - device driver callbacks for an SVA context
- *
- * @mm_exit: called when the mm is about to be torn down by exit_mmap. After
- * @mm_exit returns, the device must not issue any more transaction
- * with the PASID given as argument.
- *
- * The @mm_exit handler is allowed to sleep. Be careful about the
- * locks taken in @mm_exit, because they might lead to deadlocks if
- * they are also held when dropping references to the mm. Consider the
- * following call chain:
- * mutex_lock(A); mmput(mm) -> exit_mm() -> @mm_exit() -> mutex_lock(A)
- * Using mmput_async() prevents this scenario.
- *
- */
-struct iommu_sva_ops {
- iommu_mm_exit_handler_t mm_exit;
-};
-
#ifdef CONFIG_IOMMU_API
/**
@@ -223,8 +202,10 @@ struct iommu_iotlb_gather {
* @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
* queue
* @iova_to_phys: translate iova to physical address
- * @add_device: add device to iommu grouping
- * @remove_device: remove device from iommu grouping
+ * @probe_device: Add device to iommu driver handling
+ * @release_device: Remove device from iommu driver handling
+ * @probe_finalize: Do final setup work after the device is added to an IOMMU
+ * group and attached to the groups domain
* @device_group: find iommu group for a particular device
* @domain_get_attr: Query domain attributes
* @domain_set_attr: Change domain attributes
@@ -248,6 +229,10 @@ struct iommu_iotlb_gather {
* @cache_invalidate: invalidate translation caches
* @sva_bind_gpasid: bind guest pasid and mm
* @sva_unbind_gpasid: unbind guest pasid and mm
+ * @def_domain_type: device default domain type, return value:
+ * - IOMMU_DOMAIN_IDENTITY: must use an identity domain
+ * - IOMMU_DOMAIN_DMA: must use a dma domain
+ * - 0: use the default setting
* @pgsize_bitmap: bitmap of all possible supported page sizes
* @owner: Driver module providing these ops
*/
@@ -269,8 +254,9 @@ struct iommu_ops {
void (*iotlb_sync)(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather);
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
- int (*add_device)(struct device *dev);
- void (*remove_device)(struct device *dev);
+ struct iommu_device *(*probe_device)(struct device *dev);
+ void (*release_device)(struct device *dev);
+ void (*probe_finalize)(struct device *dev);
struct iommu_group *(*device_group)(struct device *dev);
int (*domain_get_attr)(struct iommu_domain *domain,
enum iommu_attr attr, void *data);
@@ -318,6 +304,8 @@ struct iommu_ops {
int (*sva_unbind_gpasid)(struct device *dev, int pasid);
+ int (*def_domain_type)(struct device *dev);
+
unsigned long pgsize_bitmap;
struct module *owner;
};
@@ -369,6 +357,7 @@ struct iommu_fault_param {
*
* @fault_param: IOMMU detected device fault reporting data
* @fwspec: IOMMU fwspec data
+ * @iommu_dev: IOMMU device this device is linked to
* @priv: IOMMU Driver private data
*
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
@@ -378,6 +367,7 @@ struct dev_iommu {
struct mutex lock;
struct iommu_fault_param *fault_param;
struct iommu_fwspec *fwspec;
+ struct iommu_device *iommu_dev;
void *priv;
};
@@ -430,6 +420,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
#define IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER 6 /* Post Driver unbind */
extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops);
+extern int bus_iommu_probe(struct bus_type *bus);
extern bool iommu_present(struct bus_type *bus);
extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap);
extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
@@ -470,8 +461,6 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
extern void generic_iommu_put_resv_regions(struct device *dev,
struct list_head *list);
-extern int iommu_request_dm_for_dev(struct device *dev);
-extern int iommu_request_dma_domain_for_dev(struct device *dev);
extern void iommu_set_default_passthrough(bool cmd_line);
extern void iommu_set_default_translated(bool cmd_line);
extern bool iommu_default_passthrough(void);
@@ -515,7 +504,6 @@ extern int iommu_page_response(struct device *dev,
struct iommu_page_response *msg);
extern int iommu_group_id(struct iommu_group *group);
-extern struct iommu_group *iommu_group_get_for_dev(struct device *dev);
extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
@@ -605,7 +593,6 @@ struct iommu_fwspec {
*/
struct iommu_sva {
struct device *dev;
- const struct iommu_sva_ops *ops;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
@@ -653,8 +640,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
struct mm_struct *mm,
void *drvdata);
void iommu_sva_unbind_device(struct iommu_sva *handle);
-int iommu_sva_set_ops(struct iommu_sva *handle,
- const struct iommu_sva_ops *ops);
int iommu_sva_get_pasid(struct iommu_sva *handle);
#else /* CONFIG_IOMMU_API */
@@ -793,16 +778,6 @@ static inline int iommu_get_group_resv_regions(struct iommu_group *group,
return -ENODEV;
}
-static inline int iommu_request_dm_for_dev(struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline int iommu_request_dma_domain_for_dev(struct device *dev)
-{
- return -ENODEV;
-}
-
static inline void iommu_set_default_passthrough(bool cmd_line)
{
}
@@ -1058,12 +1033,6 @@ static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
{
}
-static inline int iommu_sva_set_ops(struct iommu_sva *handle,
- const struct iommu_sva_ops *ops)
-{
- return -EINVAL;
-}
-
static inline int iommu_sva_get_pasid(struct iommu_sva *handle)
{
return IOMMU_PASID_INVALID;
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index d08f0869..f75c307 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -6,11 +6,14 @@
#ifdef CONFIG_PCI_ATS
/* Address Translation Service */
+bool pci_ats_supported(struct pci_dev *dev);
int pci_enable_ats(struct pci_dev *dev, int ps);
void pci_disable_ats(struct pci_dev *dev);
int pci_ats_queue_depth(struct pci_dev *dev);
int pci_ats_page_aligned(struct pci_dev *dev);
#else /* CONFIG_PCI_ATS */
+static inline bool pci_ats_supported(struct pci_dev *d)
+{ return false; }
static inline int pci_enable_ats(struct pci_dev *d, int ps)
{ return -ENODEV; }
static inline void pci_disable_ats(struct pci_dev *d) { }
diff --git a/include/linux/uacce.h b/include/linux/uacce.h
index 0e215e6..454c2f6 100644
--- a/include/linux/uacce.h
+++ b/include/linux/uacce.h
@@ -68,19 +68,21 @@ enum uacce_q_state {
* @uacce: pointer to uacce
* @priv: private pointer
* @wait: wait queue head
- * @list: index into uacce_mm
- * @uacce_mm: the corresponding mm
+ * @list: index into uacce queues list
* @qfrs: pointer of qfr regions
* @state: queue state machine
+ * @pasid: pasid associated to the mm
+ * @handle: iommu_sva handle returned by iommu_sva_bind_device()
*/
struct uacce_queue {
struct uacce_device *uacce;
void *priv;
wait_queue_head_t wait;
struct list_head list;
- struct uacce_mm *uacce_mm;
struct uacce_qfile_region *qfrs[UACCE_MAX_REGION];
enum uacce_q_state state;
+ int pasid;
+ struct iommu_sva *handle;
};
/**
@@ -96,8 +98,8 @@ struct uacce_queue {
* @cdev: cdev of the uacce
* @dev: dev of the uacce
* @priv: private pointer of the uacce
- * @mm_list: list head of uacce_mm->list
- * @mm_lock: lock for mm_list
+ * @queues: list of queues
+ * @queues_lock: lock for queues list
* @inode: core vfs
*/
struct uacce_device {
@@ -112,27 +114,9 @@ struct uacce_device {
struct cdev *cdev;
struct device dev;
void *priv;
- struct list_head mm_list;
- struct mutex mm_lock;
- struct inode *inode;
-};
-
-/**
- * struct uacce_mm - keep track of queues bound to a process
- * @list: index into uacce_device
- * @queues: list of queues
- * @mm: the mm struct
- * @lock: protects the list of queues
- * @pasid: pasid of the uacce_mm
- * @handle: iommu_sva handle return from iommu_sva_bind_device
- */
-struct uacce_mm {
- struct list_head list;
struct list_head queues;
- struct mm_struct *mm;
- struct mutex lock;
- int pasid;
- struct iommu_sva *handle;
+ struct mutex queues_lock;
+ struct inode *inode;
};
#if IS_ENABLED(CONFIG_UACCE)
diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
index 4ad3496e..e907b70 100644
--- a/include/uapi/linux/iommu.h
+++ b/include/uapi/linux/iommu.h
@@ -285,6 +285,11 @@ struct iommu_gpasid_bind_data_vtd {
__u32 emt;
};
+#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \
+ IOMMU_SVA_VTD_GPASID_EMTE | \
+ IOMMU_SVA_VTD_GPASID_PCD | \
+ IOMMU_SVA_VTD_GPASID_PWT)
+
/**
* struct iommu_gpasid_bind_data - Information about device and guest PASID binding
* @version: Version of this data structure