Merge tag 'tegra-for-4.8-i2c' of git://git.kernel.org/pub/scm/linux/kernel/git/tegra...
[cascardo/linux.git] / drivers / iommu / arm-smmu-v3.c
index 641e887..15c01c3 100644 (file)
 #include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 
+#include <linux/amba/bus.h>
+
 #include "io-pgtable.h"
 
 /* MMIO registers */
 #define CR2_RECINVSID                  (1 << 1)
 #define CR2_E2H                                (1 << 0)
 
+#define ARM_SMMU_GBPA                  0x44
+#define GBPA_ABORT                     (1 << 20)
+#define GBPA_UPDATE                    (1 << 31)
+
 #define ARM_SMMU_IRQ_CTRL              0x50
 #define IRQ_CTRL_EVTQ_IRQEN            (1 << 2)
 #define IRQ_CTRL_PRIQ_IRQEN            (1 << 1)
 #define STRTAB_STE_1_SHCFG_INCOMING    1UL
 #define STRTAB_STE_1_SHCFG_SHIFT       44
 
+#define STRTAB_STE_1_PRIVCFG_UNPRIV    2UL
+#define STRTAB_STE_1_PRIVCFG_SHIFT     48
+
 #define STRTAB_STE_2_S2VMID_SHIFT      0
 #define STRTAB_STE_2_S2VMID_MASK       0xffffUL
 #define STRTAB_STE_2_VTCR_SHIFT                32
@@ -606,12 +616,9 @@ struct arm_smmu_device {
        struct arm_smmu_strtab_cfg      strtab_cfg;
 };
 
-/* SMMU private data for an IOMMU group */
-struct arm_smmu_group {
+/* SMMU private data for each master */
+struct arm_smmu_master_data {
        struct arm_smmu_device          *smmu;
-       struct arm_smmu_domain          *domain;
-       int                             num_sids;
-       u32                             *sids;
        struct arm_smmu_strtab_ent      ste;
 };
 
@@ -713,19 +720,15 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
        writel(q->prod, q->prod_reg);
 }
 
-static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
-{
-       if (Q_WRP(q, q->cons) == Q_WRP(q, until))
-               return Q_IDX(q, q->cons) < Q_IDX(q, until);
-
-       return Q_IDX(q, q->cons) >= Q_IDX(q, until);
-}
-
-static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
+/*
+ * Wait for the SMMU to consume items. If drain is true, wait until the queue
+ * is empty. Otherwise, wait until there is at least one free slot.
+ */
+static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
        ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 
-       while (queue_sync_cons(q), __queue_cons_before(q, until)) {
+       while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
                if (ktime_compare(ktime_get(), timeout) > 0)
                        return -ETIMEDOUT;
 
@@ -896,8 +899,8 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
                                    struct arm_smmu_cmdq_ent *ent)
 {
-       u32 until;
        u64 cmd[CMDQ_ENT_DWORDS];
+       unsigned long flags;
        bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
        struct arm_smmu_queue *q = &smmu->cmdq.q;
 
@@ -907,20 +910,15 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
                return;
        }
 
-       spin_lock(&smmu->cmdq.lock);
-       while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
-               /*
-                * Keep the queue locked, otherwise the producer could wrap
-                * twice and we could see a future consumer pointer that looks
-                * like it's behind us.
-                */
-               if (queue_poll_cons(q, until, wfe))
+       spin_lock_irqsave(&smmu->cmdq.lock, flags);
+       while (queue_insert_raw(q, cmd) == -ENOSPC) {
+               if (queue_poll_cons(q, false, wfe))
                        dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
        }
 
-       if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
+       if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe))
                dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
-       spin_unlock(&smmu->cmdq.lock);
+       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 }
 
 /* Context descriptor manipulation functions */
@@ -1073,7 +1071,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 #ifdef CONFIG_PCI_ATS
                         STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
 #endif
-                        STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
+                        STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT |
+                        STRTAB_STE_1_PRIVCFG_UNPRIV <<
+                        STRTAB_STE_1_PRIVCFG_SHIFT);
 
                if (smmu->features & ARM_SMMU_FEAT_STALLS)
                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
@@ -1161,36 +1161,66 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
        struct arm_smmu_queue *q = &smmu->evtq.q;
        u64 evt[EVTQ_ENT_DWORDS];
 
-       while (!queue_remove_raw(q, evt)) {
-               u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
+       do {
+               while (!queue_remove_raw(q, evt)) {
+                       u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
 
-               dev_info(smmu->dev, "event 0x%02x received:\n", id);
-               for (i = 0; i < ARRAY_SIZE(evt); ++i)
-                       dev_info(smmu->dev, "\t0x%016llx\n",
-                                (unsigned long long)evt[i]);
-       }
+                       dev_info(smmu->dev, "event 0x%02x received:\n", id);
+                       for (i = 0; i < ARRAY_SIZE(evt); ++i)
+                               dev_info(smmu->dev, "\t0x%016llx\n",
+                                        (unsigned long long)evt[i]);
+
+               }
+
+               /*
+                * Not much we can do on overflow, so scream and pretend we're
+                * trying harder.
+                */
+               if (queue_sync_prod(q) == -EOVERFLOW)
+                       dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
+       } while (!queue_empty(q));
 
        /* Sync our overflow flag, as we believe we're up to speed */
        q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
        return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
-{
-       irqreturn_t ret = IRQ_WAKE_THREAD;
-       struct arm_smmu_device *smmu = dev;
-       struct arm_smmu_queue *q = &smmu->evtq.q;
+static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
+{
+       u32 sid, ssid;
+       u16 grpid;
+       bool ssv, last;
+
+       sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
+       ssv = evt[0] & PRIQ_0_SSID_V;
+       ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
+       last = evt[0] & PRIQ_0_PRG_LAST;
+       grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
+
+       dev_info(smmu->dev, "unexpected PRI request received:\n");
+       dev_info(smmu->dev,
+                "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
+                sid, ssid, grpid, last ? "L" : "",
+                evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
+                evt[0] & PRIQ_0_PERM_READ ? "R" : "",
+                evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
+                evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
+                evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
+
+       if (last) {
+               struct arm_smmu_cmdq_ent cmd = {
+                       .opcode                 = CMDQ_OP_PRI_RESP,
+                       .substream_valid        = ssv,
+                       .pri                    = {
+                               .sid    = sid,
+                               .ssid   = ssid,
+                               .grpid  = grpid,
+                               .resp   = PRI_RESP_DENY,
+                       },
+               };
 
-       /*
-        * Not much we can do on overflow, so scream and pretend we're
-        * trying harder.
-        */
-       if (queue_sync_prod(q) == -EOVERFLOW)
-               dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
-       else if (queue_empty(q))
-               ret = IRQ_NONE;
-
-       return ret;
+               arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+       }
 }
 
 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
@@ -1199,63 +1229,19 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
        struct arm_smmu_queue *q = &smmu->priq.q;
        u64 evt[PRIQ_ENT_DWORDS];
 
-       while (!queue_remove_raw(q, evt)) {
-               u32 sid, ssid;
-               u16 grpid;
-               bool ssv, last;
+       do {
+               while (!queue_remove_raw(q, evt))
+                       arm_smmu_handle_ppr(smmu, evt);
 
-               sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
-               ssv = evt[0] & PRIQ_0_SSID_V;
-               ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
-               last = evt[0] & PRIQ_0_PRG_LAST;
-               grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
-
-               dev_info(smmu->dev, "unexpected PRI request received:\n");
-               dev_info(smmu->dev,
-                        "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
-                        sid, ssid, grpid, last ? "L" : "",
-                        evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
-                        evt[0] & PRIQ_0_PERM_READ ? "R" : "",
-                        evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
-                        evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
-                        evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
-
-               if (last) {
-                       struct arm_smmu_cmdq_ent cmd = {
-                               .opcode                 = CMDQ_OP_PRI_RESP,
-                               .substream_valid        = ssv,
-                               .pri                    = {
-                                       .sid    = sid,
-                                       .ssid   = ssid,
-                                       .grpid  = grpid,
-                                       .resp   = PRI_RESP_DENY,
-                               },
-                       };
-
-                       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-               }
-       }
+               if (queue_sync_prod(q) == -EOVERFLOW)
+                       dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
+       } while (!queue_empty(q));
 
        /* Sync our overflow flag, as we believe we're up to speed */
        q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
        return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
-{
-       irqreturn_t ret = IRQ_WAKE_THREAD;
-       struct arm_smmu_device *smmu = dev;
-       struct arm_smmu_queue *q = &smmu->priq.q;
-
-       /* PRIQ overflow indicates a programming error */
-       if (queue_sync_prod(q) == -EOVERFLOW)
-               dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
-       else if (queue_empty(q))
-               ret = IRQ_NONE;
-
-       return ret;
-}
-
 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
 {
        /* We don't actually use CMD_SYNC interrupts for anything */
@@ -1288,15 +1274,11 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
        if (active & GERROR_MSI_GERROR_ABT_ERR)
                dev_warn(smmu->dev, "GERROR MSI write aborted\n");
 
-       if (active & GERROR_MSI_PRIQ_ABT_ERR) {
+       if (active & GERROR_MSI_PRIQ_ABT_ERR)
                dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
-               arm_smmu_priq_handler(irq, smmu->dev);
-       }
 
-       if (active & GERROR_MSI_EVTQ_ABT_ERR) {
+       if (active & GERROR_MSI_EVTQ_ABT_ERR)
                dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
-               arm_smmu_evtq_handler(irq, smmu->dev);
-       }
 
        if (active & GERROR_MSI_CMDQ_ABT_ERR) {
                dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
@@ -1569,6 +1551,8 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
                return -ENOMEM;
 
        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+       domain->geometry.aperture_end = (1UL << ias) - 1;
+       domain->geometry.force_aperture = true;
        smmu_domain->pgtbl_ops = pgtbl_ops;
 
        ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
@@ -1578,20 +1562,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
        return ret;
 }
 
-static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
-{
-       struct iommu_group *group;
-       struct arm_smmu_group *smmu_group;
-
-       group = iommu_group_get(dev);
-       if (!group)
-               return NULL;
-
-       smmu_group = iommu_group_get_iommudata(group);
-       iommu_group_put(group);
-       return smmu_group;
-}
-
 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 {
        __le64 *step;
@@ -1614,27 +1584,17 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
        return step;
 }
 
-static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
+static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 {
        int i;
-       struct arm_smmu_domain *smmu_domain = smmu_group->domain;
-       struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
-       struct arm_smmu_device *smmu = smmu_group->smmu;
+       struct arm_smmu_master_data *master = fwspec->iommu_priv;
+       struct arm_smmu_device *smmu = master->smmu;
 
-       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-               ste->s1_cfg = &smmu_domain->s1_cfg;
-               ste->s2_cfg = NULL;
-               arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
-       } else {
-               ste->s1_cfg = NULL;
-               ste->s2_cfg = &smmu_domain->s2_cfg;
-       }
-
-       for (i = 0; i < smmu_group->num_sids; ++i) {
-               u32 sid = smmu_group->sids[i];
+       for (i = 0; i < fwspec->num_ids; ++i) {
+               u32 sid = fwspec->ids[i];
                __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
 
-               arm_smmu_write_strtab_ent(smmu, sid, step, ste);
+               arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
        }
 
        return 0;
@@ -1642,13 +1602,11 @@ static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
 
 static void arm_smmu_detach_dev(struct device *dev)
 {
-       struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
+       struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
 
-       smmu_group->ste.bypass = true;
-       if (arm_smmu_install_ste_for_group(smmu_group) < 0)
+       master->ste.bypass = true;
+       if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0)
                dev_warn(dev, "failed to install bypass STE\n");
-
-       smmu_group->domain = NULL;
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -1656,16 +1614,20 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
        int ret = 0;
        struct arm_smmu_device *smmu;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
+       struct arm_smmu_master_data *master;
+       struct arm_smmu_strtab_ent *ste;
 
-       if (!smmu_group)
+       if (!dev->iommu_fwspec)
                return -ENOENT;
 
+       master = dev->iommu_fwspec->iommu_priv;
+       smmu = master->smmu;
+       ste = &master->ste;
+
        /* Already attached to a different domain? */
-       if (smmu_group->domain && smmu_group->domain != smmu_domain)
+       if (!ste->bypass)
                arm_smmu_detach_dev(dev);
 
-       smmu = smmu_group->smmu;
        mutex_lock(&smmu_domain->init_mutex);
 
        if (!smmu_domain->smmu) {
@@ -1684,21 +1646,21 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
                goto out_unlock;
        }
 
-       /* Group already attached to this domain? */
-       if (smmu_group->domain)
-               goto out_unlock;
-
-       smmu_group->domain      = smmu_domain;
+       ste->bypass = false;
+       ste->valid = true;
 
-       /*
-        * FIXME: This should always be "false" once we have IOMMU-backed
-        * DMA ops for all devices behind the SMMU.
-        */
-       smmu_group->ste.bypass  = domain->type == IOMMU_DOMAIN_DMA;
+       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+               ste->s1_cfg = &smmu_domain->s1_cfg;
+               ste->s2_cfg = NULL;
+               arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
+       } else {
+               ste->s1_cfg = NULL;
+               ste->s2_cfg = &smmu_domain->s2_cfg;
+       }
 
-       ret = arm_smmu_install_ste_for_group(smmu_group);
+       ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
        if (ret < 0)
-               smmu_group->domain = NULL;
+               ste->valid = false;
 
 out_unlock:
        mutex_unlock(&smmu_domain->init_mutex);
@@ -1757,40 +1719,19 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
        return ret;
 }
 
-static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
-{
-       *(u32 *)sidp = alias;
-       return 0; /* Continue walking */
-}
+static struct platform_driver arm_smmu_driver;
 
-static void __arm_smmu_release_pci_iommudata(void *data)
+static int arm_smmu_match_node(struct device *dev, void *data)
 {
-       kfree(data);
+       return dev->of_node == data;
 }
 
-static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
+static struct arm_smmu_device *arm_smmu_get_by_node(struct device_node *np)
 {
-       struct device_node *of_node;
-       struct platform_device *smmu_pdev;
-       struct arm_smmu_device *smmu = NULL;
-       struct pci_bus *bus = pdev->bus;
-
-       /* Walk up to the root bus */
-       while (!pci_is_root_bus(bus))
-               bus = bus->parent;
-
-       /* Follow the "iommus" phandle from the host controller */
-       of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
-       if (!of_node)
-               return NULL;
-
-       /* See if we can find an SMMU corresponding to the phandle */
-       smmu_pdev = of_find_device_by_node(of_node);
-       if (smmu_pdev)
-               smmu = platform_get_drvdata(smmu_pdev);
-
-       of_node_put(of_node);
-       return smmu;
+       struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
+                                               np, arm_smmu_match_node);
+       put_device(dev);
+       return dev ? dev_get_drvdata(dev) : NULL;
 }
 
 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
@@ -1803,94 +1744,91 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
        return sid < limit;
 }
 
+static struct iommu_ops arm_smmu_ops;
+
 static int arm_smmu_add_device(struct device *dev)
 {
        int i, ret;
-       u32 sid, *sids;
-       struct pci_dev *pdev;
-       struct iommu_group *group;
-       struct arm_smmu_group *smmu_group;
        struct arm_smmu_device *smmu;
+       struct arm_smmu_master_data *master;
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct iommu_group *group;
 
-       /* We only support PCI, for now */
-       if (!dev_is_pci(dev))
+       if (!fwspec || fwspec->ops != &arm_smmu_ops)
                return -ENODEV;
-
-       pdev = to_pci_dev(dev);
-       group = iommu_group_get_for_dev(dev);
-       if (IS_ERR(group))
-               return PTR_ERR(group);
-
-       smmu_group = iommu_group_get_iommudata(group);
-       if (!smmu_group) {
-               smmu = arm_smmu_get_for_pci_dev(pdev);
-               if (!smmu) {
-                       ret = -ENOENT;
-                       goto out_remove_dev;
-               }
-
-               smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
-               if (!smmu_group) {
-                       ret = -ENOMEM;
-                       goto out_remove_dev;
-               }
-
-               smmu_group->ste.valid   = true;
-               smmu_group->smmu        = smmu;
-               iommu_group_set_iommudata(group, smmu_group,
-                                         __arm_smmu_release_pci_iommudata);
+       /*
+        * We _can_ actually withstand dodgy bus code re-calling add_device()
+        * without an intervening remove_device()/of_xlate() sequence, but
+        * we're not going to do so quietly...
+        */
+       if (WARN_ON_ONCE(fwspec->iommu_priv)) {
+               master = fwspec->iommu_priv;
+               smmu = master->smmu;
        } else {
-               smmu = smmu_group->smmu;
-       }
+               smmu = arm_smmu_get_by_node(to_of_node(fwspec->iommu_fwnode));
+               if (!smmu)
+                       return -ENODEV;
+               master = kzalloc(sizeof(*master), GFP_KERNEL);
+               if (!master)
+                       return -ENOMEM;
 
-       /* Assume SID == RID until firmware tells us otherwise */
-       pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
-       for (i = 0; i < smmu_group->num_sids; ++i) {
-               /* If we already know about this SID, then we're done */
-               if (smmu_group->sids[i] == sid)
-                       goto out_put_group;
+               master->smmu = smmu;
+               fwspec->iommu_priv = master;
        }
 
-       /* Check the SID is in range of the SMMU and our stream table */
-       if (!arm_smmu_sid_in_range(smmu, sid)) {
-               ret = -ERANGE;
-               goto out_remove_dev;
-       }
+       /* Check the SIDs are in range of the SMMU and our stream table */
+       for (i = 0; i < fwspec->num_ids; i++) {
+               u32 sid = fwspec->ids[i];
 
-       /* Ensure l2 strtab is initialised */
-       if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
-               ret = arm_smmu_init_l2_strtab(smmu, sid);
-               if (ret)
-                       goto out_remove_dev;
-       }
+               if (!arm_smmu_sid_in_range(smmu, sid))
+                       return -ERANGE;
 
-       /* Resize the SID array for the group */
-       smmu_group->num_sids++;
-       sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
-                       GFP_KERNEL);
-       if (!sids) {
-               smmu_group->num_sids--;
-               ret = -ENOMEM;
-               goto out_remove_dev;
+               /* Ensure l2 strtab is initialised */
+               if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
+                       ret = arm_smmu_init_l2_strtab(smmu, sid);
+                       if (ret)
+                               return ret;
+               }
        }
 
-       /* Add the new SID */
-       sids[smmu_group->num_sids - 1] = sid;
-       smmu_group->sids = sids;
-
-out_put_group:
-       iommu_group_put(group);
-       return 0;
+       group = iommu_group_get_for_dev(dev);
+       if (!IS_ERR(group))
+               iommu_group_put(group);
 
-out_remove_dev:
-       iommu_group_remove_device(dev);
-       iommu_group_put(group);
-       return ret;
+       return PTR_ERR_OR_ZERO(group);
 }
 
 static void arm_smmu_remove_device(struct device *dev)
 {
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_master_data *master;
+
+       if (!fwspec || fwspec->ops != &arm_smmu_ops)
+               return;
+
+       master = fwspec->iommu_priv;
+       if (master && master->ste.valid)
+               arm_smmu_detach_dev(dev);
        iommu_group_remove_device(dev);
+       kfree(master);
+       iommu_fwspec_free(dev);
+}
+
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+       struct iommu_group *group;
+
+       /*
+        * We don't support devices sharing stream IDs other than PCI RID
+        * aliases, since the necessary ID-to-device lookup becomes rather
+        * impractical given a potential sparse 32-bit stream ID space.
+        */
+       if (dev_is_pci(dev))
+               group = pci_device_group(dev);
+       else
+               group = generic_device_group(dev);
+
+       return group;
 }
 
 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
@@ -1937,6 +1875,11 @@ out_unlock:
        return ret;
 }
 
+static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+       return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
 static struct iommu_ops arm_smmu_ops = {
        .capable                = arm_smmu_capable,
        .domain_alloc           = arm_smmu_domain_alloc,
@@ -1948,9 +1891,10 @@ static struct iommu_ops arm_smmu_ops = {
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .add_device             = arm_smmu_add_device,
        .remove_device          = arm_smmu_remove_device,
-       .device_group           = pci_device_group,
+       .device_group           = arm_smmu_device_group,
        .domain_get_attr        = arm_smmu_domain_get_attr,
        .domain_set_attr        = arm_smmu_domain_set_attr,
+       .of_xlate               = arm_smmu_of_xlate,
        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
 };
 
@@ -2151,6 +2095,24 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
                                          1, ARM_SMMU_POLL_TIMEOUT_US);
 }
 
+/* GBPA is "special" */
+static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
+{
+       int ret;
+       u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
+
+       ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+                                        1, ARM_SMMU_POLL_TIMEOUT_US);
+       if (ret)
+               return ret;
+
+       reg &= ~clr;
+       reg |= set;
+       writel_relaxed(reg | GBPA_UPDATE, gbpa);
+       return readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+                                         1, ARM_SMMU_POLL_TIMEOUT_US);
+}
+
 static void arm_smmu_free_msis(void *data)
 {
        struct device *dev = data;
@@ -2235,10 +2197,10 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
        /* Request interrupt lines */
        irq = smmu->evtq.q.irq;
        if (irq) {
-               ret = devm_request_threaded_irq(smmu->dev, irq,
-                                               arm_smmu_evtq_handler,
+               ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
                                                arm_smmu_evtq_thread,
-                                               0, "arm-smmu-v3-evtq", smmu);
+                                               IRQF_ONESHOT,
+                                               "arm-smmu-v3-evtq", smmu);
                if (ret < 0)
                        dev_warn(smmu->dev, "failed to enable evtq irq\n");
        }
@@ -2263,10 +2225,10 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
        if (smmu->features & ARM_SMMU_FEAT_PRI) {
                irq = smmu->priq.q.irq;
                if (irq) {
-                       ret = devm_request_threaded_irq(smmu->dev, irq,
-                                                       arm_smmu_priq_handler,
+                       ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
                                                        arm_smmu_priq_thread,
-                                                       0, "arm-smmu-v3-priq",
+                                                       IRQF_ONESHOT,
+                                                       "arm-smmu-v3-priq",
                                                        smmu);
                        if (ret < 0)
                                dev_warn(smmu->dev,
@@ -2296,7 +2258,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
        return ret;
 }
 
-static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
+static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 {
        int ret;
        u32 reg, enables;
@@ -2397,8 +2359,17 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
                return ret;
        }
 
-       /* Enable the SMMU interface */
-       enables |= CR0_SMMUEN;
+
+       /* Enable the SMMU interface, or ensure bypass */
+       if (!bypass || disable_bypass) {
+               enables |= CR0_SMMUEN;
+       } else {
+               ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
+               if (ret) {
+                       dev_err(smmu->dev, "GBPA not responding to update\n");
+                       return ret;
+               }
+       }
        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
                                      ARM_SMMU_CR0ACK);
        if (ret) {
@@ -2597,6 +2568,15 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        struct resource *res;
        struct arm_smmu_device *smmu;
        struct device *dev = &pdev->dev;
+       bool bypass = true;
+       u32 cells;
+
+       if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
+               dev_err(dev, "missing #iommu-cells property\n");
+       else if (cells != 1)
+               dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
+       else
+               bypass = false;
 
        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
        if (!smmu) {
@@ -2649,7 +2629,24 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, smmu);
 
        /* Reset the device */
-       return arm_smmu_device_reset(smmu);
+       ret = arm_smmu_device_reset(smmu, bypass);
+       if (ret)
+               return ret;
+
+       /* And we're up. Go go go! */
+       of_iommu_set_ops(dev->of_node, &arm_smmu_ops);
+#ifdef CONFIG_PCI
+       pci_request_acs();
+       ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+       if (ret)
+               return ret;
+#endif
+#ifdef CONFIG_ARM_AMBA
+       ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+       if (ret)
+               return ret;
+#endif
+       return bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
@@ -2677,22 +2674,14 @@ static struct platform_driver arm_smmu_driver = {
 
 static int __init arm_smmu_init(void)
 {
-       struct device_node *np;
-       int ret;
-
-       np = of_find_matching_node(NULL, arm_smmu_of_match);
-       if (!np)
-               return 0;
-
-       of_node_put(np);
-
-       ret = platform_driver_register(&arm_smmu_driver);
-       if (ret)
-               return ret;
-
-       pci_request_acs();
+       static bool registered;
+       int ret = 0;
 
-       return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+       if (!registered) {
+               ret = platform_driver_register(&arm_smmu_driver);
+               registered = !ret;
+       }
+       return ret;
 }
 
 static void __exit arm_smmu_exit(void)
@@ -2703,6 +2692,20 @@ static void __exit arm_smmu_exit(void)
 subsys_initcall(arm_smmu_init);
 module_exit(arm_smmu_exit);
 
+static int __init arm_smmu_of_init(struct device_node *np)
+{
+       int ret = arm_smmu_init();
+
+       if (ret)
+               return ret;
+
+       if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
+               return -ENODEV;
+
+       return 0;
+}
+IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", arm_smmu_of_init);
+
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");