Linux 4.8-rc3
[cascardo/linux.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  */
28
29 #define pr_fmt(fmt) "arm-smmu: " fmt
30
31 #include <linux/delay.h>
32 #include <linux/dma-iommu.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/err.h>
35 #include <linux/interrupt.h>
36 #include <linux/io.h>
37 #include <linux/io-64-nonatomic-hi-lo.h>
38 #include <linux/iommu.h>
39 #include <linux/iopoll.h>
40 #include <linux/module.h>
41 #include <linux/of.h>
42 #include <linux/of_address.h>
43 #include <linux/pci.h>
44 #include <linux/platform_device.h>
45 #include <linux/slab.h>
46 #include <linux/spinlock.h>
47
48 #include <linux/amba/bus.h>
49
50 #include "io-pgtable.h"
51
52 /* Maximum number of stream IDs assigned to a single device */
53 #define MAX_MASTER_STREAMIDS            128
54
55 /* Maximum number of context banks per SMMU */
56 #define ARM_SMMU_MAX_CBS                128
57
58 /* Maximum number of mapping groups per SMMU */
59 #define ARM_SMMU_MAX_SMRS               128
60
61 /* SMMU global address space */
62 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
63 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
64
65 /*
66  * SMMU global address space with conditional offset to access secure
67  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
68  * nsGFSYNR0: 0x450)
69  */
70 #define ARM_SMMU_GR0_NS(smmu)                                           \
71         ((smmu)->base +                                                 \
72                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
73                         ? 0x400 : 0))
74
75 /*
76  * Some 64-bit registers only make sense to write atomically, but in such
77  * cases all the data relevant to AArch32 formats lies within the lower word,
78  * therefore this actually makes more sense than it might first appear.
79  */
80 #ifdef CONFIG_64BIT
81 #define smmu_write_atomic_lq            writeq_relaxed
82 #else
83 #define smmu_write_atomic_lq            writel_relaxed
84 #endif
85
86 /* Configuration registers */
87 #define ARM_SMMU_GR0_sCR0               0x0
88 #define sCR0_CLIENTPD                   (1 << 0)
89 #define sCR0_GFRE                       (1 << 1)
90 #define sCR0_GFIE                       (1 << 2)
91 #define sCR0_GCFGFRE                    (1 << 4)
92 #define sCR0_GCFGFIE                    (1 << 5)
93 #define sCR0_USFCFG                     (1 << 10)
94 #define sCR0_VMIDPNE                    (1 << 11)
95 #define sCR0_PTM                        (1 << 12)
96 #define sCR0_FB                         (1 << 13)
97 #define sCR0_VMID16EN                   (1 << 31)
98 #define sCR0_BSU_SHIFT                  14
99 #define sCR0_BSU_MASK                   0x3
100
101 /* Auxiliary Configuration register */
102 #define ARM_SMMU_GR0_sACR               0x10
103
104 /* Identification registers */
105 #define ARM_SMMU_GR0_ID0                0x20
106 #define ARM_SMMU_GR0_ID1                0x24
107 #define ARM_SMMU_GR0_ID2                0x28
108 #define ARM_SMMU_GR0_ID3                0x2c
109 #define ARM_SMMU_GR0_ID4                0x30
110 #define ARM_SMMU_GR0_ID5                0x34
111 #define ARM_SMMU_GR0_ID6                0x38
112 #define ARM_SMMU_GR0_ID7                0x3c
113 #define ARM_SMMU_GR0_sGFSR              0x48
114 #define ARM_SMMU_GR0_sGFSYNR0           0x50
115 #define ARM_SMMU_GR0_sGFSYNR1           0x54
116 #define ARM_SMMU_GR0_sGFSYNR2           0x58
117
118 #define ID0_S1TS                        (1 << 30)
119 #define ID0_S2TS                        (1 << 29)
120 #define ID0_NTS                         (1 << 28)
121 #define ID0_SMS                         (1 << 27)
122 #define ID0_ATOSNS                      (1 << 26)
123 #define ID0_PTFS_NO_AARCH32             (1 << 25)
124 #define ID0_PTFS_NO_AARCH32S            (1 << 24)
125 #define ID0_CTTW                        (1 << 14)
126 #define ID0_NUMIRPT_SHIFT               16
127 #define ID0_NUMIRPT_MASK                0xff
128 #define ID0_NUMSIDB_SHIFT               9
129 #define ID0_NUMSIDB_MASK                0xf
130 #define ID0_NUMSMRG_SHIFT               0
131 #define ID0_NUMSMRG_MASK                0xff
132
133 #define ID1_PAGESIZE                    (1 << 31)
134 #define ID1_NUMPAGENDXB_SHIFT           28
135 #define ID1_NUMPAGENDXB_MASK            7
136 #define ID1_NUMS2CB_SHIFT               16
137 #define ID1_NUMS2CB_MASK                0xff
138 #define ID1_NUMCB_SHIFT                 0
139 #define ID1_NUMCB_MASK                  0xff
140
141 #define ID2_OAS_SHIFT                   4
142 #define ID2_OAS_MASK                    0xf
143 #define ID2_IAS_SHIFT                   0
144 #define ID2_IAS_MASK                    0xf
145 #define ID2_UBS_SHIFT                   8
146 #define ID2_UBS_MASK                    0xf
147 #define ID2_PTFS_4K                     (1 << 12)
148 #define ID2_PTFS_16K                    (1 << 13)
149 #define ID2_PTFS_64K                    (1 << 14)
150 #define ID2_VMID16                      (1 << 15)
151
152 #define ID7_MAJOR_SHIFT                 4
153 #define ID7_MAJOR_MASK                  0xf
154
155 /* Global TLB invalidation */
156 #define ARM_SMMU_GR0_TLBIVMID           0x64
157 #define ARM_SMMU_GR0_TLBIALLNSNH        0x68
158 #define ARM_SMMU_GR0_TLBIALLH           0x6c
159 #define ARM_SMMU_GR0_sTLBGSYNC          0x70
160 #define ARM_SMMU_GR0_sTLBGSTATUS        0x74
161 #define sTLBGSTATUS_GSACTIVE            (1 << 0)
162 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
163
164 /* Stream mapping registers */
165 #define ARM_SMMU_GR0_SMR(n)             (0x800 + ((n) << 2))
166 #define SMR_VALID                       (1 << 31)
167 #define SMR_MASK_SHIFT                  16
168 #define SMR_MASK_MASK                   0x7fff
169 #define SMR_ID_SHIFT                    0
170 #define SMR_ID_MASK                     0x7fff
171
172 #define ARM_SMMU_GR0_S2CR(n)            (0xc00 + ((n) << 2))
173 #define S2CR_CBNDX_SHIFT                0
174 #define S2CR_CBNDX_MASK                 0xff
175 #define S2CR_TYPE_SHIFT                 16
176 #define S2CR_TYPE_MASK                  0x3
177 #define S2CR_TYPE_TRANS                 (0 << S2CR_TYPE_SHIFT)
178 #define S2CR_TYPE_BYPASS                (1 << S2CR_TYPE_SHIFT)
179 #define S2CR_TYPE_FAULT                 (2 << S2CR_TYPE_SHIFT)
180
181 #define S2CR_PRIVCFG_SHIFT              24
182 #define S2CR_PRIVCFG_UNPRIV             (2 << S2CR_PRIVCFG_SHIFT)
183
184 /* Context bank attribute registers */
185 #define ARM_SMMU_GR1_CBAR(n)            (0x0 + ((n) << 2))
186 #define CBAR_VMID_SHIFT                 0
187 #define CBAR_VMID_MASK                  0xff
188 #define CBAR_S1_BPSHCFG_SHIFT           8
189 #define CBAR_S1_BPSHCFG_MASK            3
190 #define CBAR_S1_BPSHCFG_NSH             3
191 #define CBAR_S1_MEMATTR_SHIFT           12
192 #define CBAR_S1_MEMATTR_MASK            0xf
193 #define CBAR_S1_MEMATTR_WB              0xf
194 #define CBAR_TYPE_SHIFT                 16
195 #define CBAR_TYPE_MASK                  0x3
196 #define CBAR_TYPE_S2_TRANS              (0 << CBAR_TYPE_SHIFT)
197 #define CBAR_TYPE_S1_TRANS_S2_BYPASS    (1 << CBAR_TYPE_SHIFT)
198 #define CBAR_TYPE_S1_TRANS_S2_FAULT     (2 << CBAR_TYPE_SHIFT)
199 #define CBAR_TYPE_S1_TRANS_S2_TRANS     (3 << CBAR_TYPE_SHIFT)
200 #define CBAR_IRPTNDX_SHIFT              24
201 #define CBAR_IRPTNDX_MASK               0xff
202
203 #define ARM_SMMU_GR1_CBA2R(n)           (0x800 + ((n) << 2))
204 #define CBA2R_RW64_32BIT                (0 << 0)
205 #define CBA2R_RW64_64BIT                (1 << 0)
206 #define CBA2R_VMID_SHIFT                16
207 #define CBA2R_VMID_MASK                 0xffff
208
209 /* Translation context bank */
210 #define ARM_SMMU_CB_BASE(smmu)          ((smmu)->base + ((smmu)->size >> 1))
211 #define ARM_SMMU_CB(smmu, n)            ((n) * (1 << (smmu)->pgshift))
212
213 #define ARM_SMMU_CB_SCTLR               0x0
214 #define ARM_SMMU_CB_ACTLR               0x4
215 #define ARM_SMMU_CB_RESUME              0x8
216 #define ARM_SMMU_CB_TTBCR2              0x10
217 #define ARM_SMMU_CB_TTBR0               0x20
218 #define ARM_SMMU_CB_TTBR1               0x28
219 #define ARM_SMMU_CB_TTBCR               0x30
220 #define ARM_SMMU_CB_S1_MAIR0            0x38
221 #define ARM_SMMU_CB_S1_MAIR1            0x3c
222 #define ARM_SMMU_CB_PAR                 0x50
223 #define ARM_SMMU_CB_FSR                 0x58
224 #define ARM_SMMU_CB_FAR                 0x60
225 #define ARM_SMMU_CB_FSYNR0              0x68
226 #define ARM_SMMU_CB_S1_TLBIVA           0x600
227 #define ARM_SMMU_CB_S1_TLBIASID         0x610
228 #define ARM_SMMU_CB_S1_TLBIVAL          0x620
229 #define ARM_SMMU_CB_S2_TLBIIPAS2        0x630
230 #define ARM_SMMU_CB_S2_TLBIIPAS2L       0x638
231 #define ARM_SMMU_CB_ATS1PR              0x800
232 #define ARM_SMMU_CB_ATSR                0x8f0
233
234 #define SCTLR_S1_ASIDPNE                (1 << 12)
235 #define SCTLR_CFCFG                     (1 << 7)
236 #define SCTLR_CFIE                      (1 << 6)
237 #define SCTLR_CFRE                      (1 << 5)
238 #define SCTLR_E                         (1 << 4)
239 #define SCTLR_AFE                       (1 << 2)
240 #define SCTLR_TRE                       (1 << 1)
241 #define SCTLR_M                         (1 << 0)
242 #define SCTLR_EAE_SBOP                  (SCTLR_AFE | SCTLR_TRE)
243
244 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
245
246 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
247
248 #define CB_PAR_F                        (1 << 0)
249
250 #define ATSR_ACTIVE                     (1 << 0)
251
252 #define RESUME_RETRY                    (0 << 0)
253 #define RESUME_TERMINATE                (1 << 0)
254
255 #define TTBCR2_SEP_SHIFT                15
256 #define TTBCR2_SEP_UPSTREAM             (0x7 << TTBCR2_SEP_SHIFT)
257
258 #define TTBRn_ASID_SHIFT                48
259
260 #define FSR_MULTI                       (1 << 31)
261 #define FSR_SS                          (1 << 30)
262 #define FSR_UUT                         (1 << 8)
263 #define FSR_ASF                         (1 << 7)
264 #define FSR_TLBLKF                      (1 << 6)
265 #define FSR_TLBMCF                      (1 << 5)
266 #define FSR_EF                          (1 << 4)
267 #define FSR_PF                          (1 << 3)
268 #define FSR_AFF                         (1 << 2)
269 #define FSR_TF                          (1 << 1)
270
271 #define FSR_IGN                         (FSR_AFF | FSR_ASF | \
272                                          FSR_TLBMCF | FSR_TLBLKF)
273 #define FSR_FAULT                       (FSR_MULTI | FSR_SS | FSR_UUT | \
274                                          FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
275
276 #define FSYNR0_WNR                      (1 << 4)
277
278 static int force_stage;
279 module_param(force_stage, int, S_IRUGO);
280 MODULE_PARM_DESC(force_stage,
281         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
282 static bool disable_bypass;
283 module_param(disable_bypass, bool, S_IRUGO);
284 MODULE_PARM_DESC(disable_bypass,
285         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
286
287 enum arm_smmu_arch_version {
288         ARM_SMMU_V1,
289         ARM_SMMU_V1_64K,
290         ARM_SMMU_V2,
291 };
292
293 enum arm_smmu_implementation {
294         GENERIC_SMMU,
295         ARM_MMU500,
296         CAVIUM_SMMUV2,
297 };
298
299 struct arm_smmu_smr {
300         u8                              idx;
301         u16                             mask;
302         u16                             id;
303 };
304
305 struct arm_smmu_master_cfg {
306         int                             num_streamids;
307         u16                             streamids[MAX_MASTER_STREAMIDS];
308         struct arm_smmu_smr             *smrs;
309 };
310
311 struct arm_smmu_master {
312         struct device_node              *of_node;
313         struct rb_node                  node;
314         struct arm_smmu_master_cfg      cfg;
315 };
316
317 struct arm_smmu_device {
318         struct device                   *dev;
319
320         void __iomem                    *base;
321         unsigned long                   size;
322         unsigned long                   pgshift;
323
324 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
325 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
326 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
327 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
328 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
329 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
330 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
331 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
332 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
333 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
334 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
335 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
336         u32                             features;
337
338 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
339         u32                             options;
340         enum arm_smmu_arch_version      version;
341         enum arm_smmu_implementation    model;
342
343         u32                             num_context_banks;
344         u32                             num_s2_context_banks;
345         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
346         atomic_t                        irptndx;
347
348         u32                             num_mapping_groups;
349         DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
350
351         unsigned long                   va_size;
352         unsigned long                   ipa_size;
353         unsigned long                   pa_size;
354         unsigned long                   pgsize_bitmap;
355
356         u32                             num_global_irqs;
357         u32                             num_context_irqs;
358         unsigned int                    *irqs;
359
360         struct list_head                list;
361         struct rb_root                  masters;
362
363         u32                             cavium_id_base; /* Specific to Cavium */
364 };
365
366 enum arm_smmu_context_fmt {
367         ARM_SMMU_CTX_FMT_NONE,
368         ARM_SMMU_CTX_FMT_AARCH64,
369         ARM_SMMU_CTX_FMT_AARCH32_L,
370         ARM_SMMU_CTX_FMT_AARCH32_S,
371 };
372
373 struct arm_smmu_cfg {
374         u8                              cbndx;
375         u8                              irptndx;
376         u32                             cbar;
377         enum arm_smmu_context_fmt       fmt;
378 };
379 #define INVALID_IRPTNDX                 0xff
380
381 #define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
382 #define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
383
384 enum arm_smmu_domain_stage {
385         ARM_SMMU_DOMAIN_S1 = 0,
386         ARM_SMMU_DOMAIN_S2,
387         ARM_SMMU_DOMAIN_NESTED,
388 };
389
390 struct arm_smmu_domain {
391         struct arm_smmu_device          *smmu;
392         struct io_pgtable_ops           *pgtbl_ops;
393         spinlock_t                      pgtbl_lock;
394         struct arm_smmu_cfg             cfg;
395         enum arm_smmu_domain_stage      stage;
396         struct mutex                    init_mutex; /* Protects smmu pointer */
397         struct iommu_domain             domain;
398 };
399
400 struct arm_smmu_phandle_args {
401         struct device_node *np;
402         int args_count;
403         uint32_t args[MAX_MASTER_STREAMIDS];
404 };
405
406 static DEFINE_SPINLOCK(arm_smmu_devices_lock);
407 static LIST_HEAD(arm_smmu_devices);
408
409 struct arm_smmu_option_prop {
410         u32 opt;
411         const char *prop;
412 };
413
414 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
415
416 static struct arm_smmu_option_prop arm_smmu_options[] = {
417         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
418         { 0, NULL},
419 };
420
421 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
422 {
423         return container_of(dom, struct arm_smmu_domain, domain);
424 }
425
426 static void parse_driver_options(struct arm_smmu_device *smmu)
427 {
428         int i = 0;
429
430         do {
431                 if (of_property_read_bool(smmu->dev->of_node,
432                                                 arm_smmu_options[i].prop)) {
433                         smmu->options |= arm_smmu_options[i].opt;
434                         dev_notice(smmu->dev, "option %s\n",
435                                 arm_smmu_options[i].prop);
436                 }
437         } while (arm_smmu_options[++i].opt);
438 }
439
440 static struct device_node *dev_get_dev_node(struct device *dev)
441 {
442         if (dev_is_pci(dev)) {
443                 struct pci_bus *bus = to_pci_dev(dev)->bus;
444
445                 while (!pci_is_root_bus(bus))
446                         bus = bus->parent;
447                 return bus->bridge->parent->of_node;
448         }
449
450         return dev->of_node;
451 }
452
453 static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
454                                                 struct device_node *dev_node)
455 {
456         struct rb_node *node = smmu->masters.rb_node;
457
458         while (node) {
459                 struct arm_smmu_master *master;
460
461                 master = container_of(node, struct arm_smmu_master, node);
462
463                 if (dev_node < master->of_node)
464                         node = node->rb_left;
465                 else if (dev_node > master->of_node)
466                         node = node->rb_right;
467                 else
468                         return master;
469         }
470
471         return NULL;
472 }
473
474 static struct arm_smmu_master_cfg *
475 find_smmu_master_cfg(struct device *dev)
476 {
477         struct arm_smmu_master_cfg *cfg = NULL;
478         struct iommu_group *group = iommu_group_get(dev);
479
480         if (group) {
481                 cfg = iommu_group_get_iommudata(group);
482                 iommu_group_put(group);
483         }
484
485         return cfg;
486 }
487
488 static int insert_smmu_master(struct arm_smmu_device *smmu,
489                               struct arm_smmu_master *master)
490 {
491         struct rb_node **new, *parent;
492
493         new = &smmu->masters.rb_node;
494         parent = NULL;
495         while (*new) {
496                 struct arm_smmu_master *this
497                         = container_of(*new, struct arm_smmu_master, node);
498
499                 parent = *new;
500                 if (master->of_node < this->of_node)
501                         new = &((*new)->rb_left);
502                 else if (master->of_node > this->of_node)
503                         new = &((*new)->rb_right);
504                 else
505                         return -EEXIST;
506         }
507
508         rb_link_node(&master->node, parent, new);
509         rb_insert_color(&master->node, &smmu->masters);
510         return 0;
511 }
512
513 static int register_smmu_master(struct arm_smmu_device *smmu,
514                                 struct device *dev,
515                                 struct arm_smmu_phandle_args *masterspec)
516 {
517         int i;
518         struct arm_smmu_master *master;
519
520         master = find_smmu_master(smmu, masterspec->np);
521         if (master) {
522                 dev_err(dev,
523                         "rejecting multiple registrations for master device %s\n",
524                         masterspec->np->name);
525                 return -EBUSY;
526         }
527
528         if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
529                 dev_err(dev,
530                         "reached maximum number (%d) of stream IDs for master device %s\n",
531                         MAX_MASTER_STREAMIDS, masterspec->np->name);
532                 return -ENOSPC;
533         }
534
535         master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL);
536         if (!master)
537                 return -ENOMEM;
538
539         master->of_node                 = masterspec->np;
540         master->cfg.num_streamids       = masterspec->args_count;
541
542         for (i = 0; i < master->cfg.num_streamids; ++i) {
543                 u16 streamid = masterspec->args[i];
544
545                 if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) &&
546                      (streamid >= smmu->num_mapping_groups)) {
547                         dev_err(dev,
548                                 "stream ID for master device %s greater than maximum allowed (%d)\n",
549                                 masterspec->np->name, smmu->num_mapping_groups);
550                         return -ERANGE;
551                 }
552                 master->cfg.streamids[i] = streamid;
553         }
554         return insert_smmu_master(smmu, master);
555 }
556
557 static struct arm_smmu_device *find_smmu_for_device(struct device *dev)
558 {
559         struct arm_smmu_device *smmu;
560         struct arm_smmu_master *master = NULL;
561         struct device_node *dev_node = dev_get_dev_node(dev);
562
563         spin_lock(&arm_smmu_devices_lock);
564         list_for_each_entry(smmu, &arm_smmu_devices, list) {
565                 master = find_smmu_master(smmu, dev_node);
566                 if (master)
567                         break;
568         }
569         spin_unlock(&arm_smmu_devices_lock);
570
571         return master ? smmu : NULL;
572 }
573
574 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
575 {
576         int idx;
577
578         do {
579                 idx = find_next_zero_bit(map, end, start);
580                 if (idx == end)
581                         return -ENOSPC;
582         } while (test_and_set_bit(idx, map));
583
584         return idx;
585 }
586
587 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
588 {
589         clear_bit(idx, map);
590 }
591
592 /* Wait for any pending TLB invalidations to complete */
593 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
594 {
595         int count = 0;
596         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
597
598         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
599         while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
600                & sTLBGSTATUS_GSACTIVE) {
601                 cpu_relax();
602                 if (++count == TLB_LOOP_TIMEOUT) {
603                         dev_err_ratelimited(smmu->dev,
604                         "TLB sync timed out -- SMMU may be deadlocked\n");
605                         return;
606                 }
607                 udelay(1);
608         }
609 }
610
611 static void arm_smmu_tlb_sync(void *cookie)
612 {
613         struct arm_smmu_domain *smmu_domain = cookie;
614         __arm_smmu_tlb_sync(smmu_domain->smmu);
615 }
616
617 static void arm_smmu_tlb_inv_context(void *cookie)
618 {
619         struct arm_smmu_domain *smmu_domain = cookie;
620         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
621         struct arm_smmu_device *smmu = smmu_domain->smmu;
622         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
623         void __iomem *base;
624
625         if (stage1) {
626                 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
627                 writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
628                                base + ARM_SMMU_CB_S1_TLBIASID);
629         } else {
630                 base = ARM_SMMU_GR0(smmu);
631                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
632                                base + ARM_SMMU_GR0_TLBIVMID);
633         }
634
635         __arm_smmu_tlb_sync(smmu);
636 }
637
638 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
639                                           size_t granule, bool leaf, void *cookie)
640 {
641         struct arm_smmu_domain *smmu_domain = cookie;
642         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
643         struct arm_smmu_device *smmu = smmu_domain->smmu;
644         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
645         void __iomem *reg;
646
647         if (stage1) {
648                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
649                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
650
651                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
652                         iova &= ~12UL;
653                         iova |= ARM_SMMU_CB_ASID(smmu, cfg);
654                         do {
655                                 writel_relaxed(iova, reg);
656                                 iova += granule;
657                         } while (size -= granule);
658                 } else {
659                         iova >>= 12;
660                         iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
661                         do {
662                                 writeq_relaxed(iova, reg);
663                                 iova += granule >> 12;
664                         } while (size -= granule);
665                 }
666         } else if (smmu->version == ARM_SMMU_V2) {
667                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
668                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
669                               ARM_SMMU_CB_S2_TLBIIPAS2;
670                 iova >>= 12;
671                 do {
672                         smmu_write_atomic_lq(iova, reg);
673                         iova += granule >> 12;
674                 } while (size -= granule);
675         } else {
676                 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
677                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
678         }
679 }
680
681 static struct iommu_gather_ops arm_smmu_gather_ops = {
682         .tlb_flush_all  = arm_smmu_tlb_inv_context,
683         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
684         .tlb_sync       = arm_smmu_tlb_sync,
685 };
686
687 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
688 {
689         int flags, ret;
690         u32 fsr, fsynr, resume;
691         unsigned long iova;
692         struct iommu_domain *domain = dev;
693         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
694         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
695         struct arm_smmu_device *smmu = smmu_domain->smmu;
696         void __iomem *cb_base;
697
698         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
699         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
700
701         if (!(fsr & FSR_FAULT))
702                 return IRQ_NONE;
703
704         if (fsr & FSR_IGN)
705                 dev_err_ratelimited(smmu->dev,
706                                     "Unexpected context fault (fsr 0x%x)\n",
707                                     fsr);
708
709         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
710         flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
711
712         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
713         if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
714                 ret = IRQ_HANDLED;
715                 resume = RESUME_RETRY;
716         } else {
717                 dev_err_ratelimited(smmu->dev,
718                     "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
719                     iova, fsynr, cfg->cbndx);
720                 ret = IRQ_NONE;
721                 resume = RESUME_TERMINATE;
722         }
723
724         /* Clear the faulting FSR */
725         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
726
727         /* Retry or terminate any stalled transactions */
728         if (fsr & FSR_SS)
729                 writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
730
731         return ret;
732 }
733
734 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
735 {
736         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
737         struct arm_smmu_device *smmu = dev;
738         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
739
740         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
741         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
742         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
743         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
744
745         if (!gfsr)
746                 return IRQ_NONE;
747
748         dev_err_ratelimited(smmu->dev,
749                 "Unexpected global fault, this could be serious\n");
750         dev_err_ratelimited(smmu->dev,
751                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
752                 gfsr, gfsynr0, gfsynr1, gfsynr2);
753
754         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
755         return IRQ_HANDLED;
756 }
757
758 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
759                                        struct io_pgtable_cfg *pgtbl_cfg)
760 {
761         u32 reg;
762         u64 reg64;
763         bool stage1;
764         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
765         struct arm_smmu_device *smmu = smmu_domain->smmu;
766         void __iomem *cb_base, *gr1_base;
767
768         gr1_base = ARM_SMMU_GR1(smmu);
769         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
770         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
771
772         if (smmu->version > ARM_SMMU_V1) {
773                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
774                         reg = CBA2R_RW64_64BIT;
775                 else
776                         reg = CBA2R_RW64_32BIT;
777                 /* 16-bit VMIDs live in CBA2R */
778                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
779                         reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
780
781                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
782         }
783
784         /* CBAR */
785         reg = cfg->cbar;
786         if (smmu->version < ARM_SMMU_V2)
787                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
788
789         /*
790          * Use the weakest shareability/memory types, so they are
791          * overridden by the ttbcr/pte.
792          */
793         if (stage1) {
794                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
795                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
796         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
797                 /* 8-bit VMIDs live in CBAR */
798                 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
799         }
800         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
801
802         /* TTBRs */
803         if (stage1) {
804                 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
805
806                 reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
807                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
808
809                 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
810                 reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
811                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
812         } else {
813                 reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
814                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
815         }
816
817         /* TTBCR */
818         if (stage1) {
819                 reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
820                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
821                 if (smmu->version > ARM_SMMU_V1) {
822                         reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
823                         reg |= TTBCR2_SEP_UPSTREAM;
824                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
825                 }
826         } else {
827                 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
828                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
829         }
830
831         /* MAIRs (stage-1 only) */
832         if (stage1) {
833                 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
834                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
835                 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
836                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR1);
837         }
838
839         /* SCTLR */
840         reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
841         if (stage1)
842                 reg |= SCTLR_S1_ASIDPNE;
843 #ifdef __BIG_ENDIAN
844         reg |= SCTLR_E;
845 #endif
846         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
847 }
848
849 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
850                                         struct arm_smmu_device *smmu)
851 {
852         int irq, start, ret = 0;
853         unsigned long ias, oas;
854         struct io_pgtable_ops *pgtbl_ops;
855         struct io_pgtable_cfg pgtbl_cfg;
856         enum io_pgtable_fmt fmt;
857         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
858         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
859
860         mutex_lock(&smmu_domain->init_mutex);
861         if (smmu_domain->smmu)
862                 goto out_unlock;
863
864         /* We're bypassing these SIDs, so don't allocate an actual context */
865         if (domain->type == IOMMU_DOMAIN_DMA) {
866                 smmu_domain->smmu = smmu;
867                 goto out_unlock;
868         }
869
870         /*
871          * Mapping the requested stage onto what we support is surprisingly
872          * complicated, mainly because the spec allows S1+S2 SMMUs without
873          * support for nested translation. That means we end up with the
874          * following table:
875          *
876          * Requested        Supported        Actual
877          *     S1               N              S1
878          *     S1             S1+S2            S1
879          *     S1               S2             S2
880          *     S1               S1             S1
881          *     N                N              N
882          *     N              S1+S2            S2
883          *     N                S2             S2
884          *     N                S1             S1
885          *
886          * Note that you can't actually request stage-2 mappings.
887          */
888         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
889                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
890         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
891                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
892
893         /*
894          * Choosing a suitable context format is even more fiddly. Until we
895          * grow some way for the caller to express a preference, and/or move
896          * the decision into the io-pgtable code where it arguably belongs,
897          * just aim for the closest thing to the rest of the system, and hope
898          * that the hardware isn't esoteric enough that we can't assume AArch64
899          * support to be a superset of AArch32 support...
900          */
901         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
902                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
903         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
904             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
905                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
906                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
907                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
908
909         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
910                 ret = -EINVAL;
911                 goto out_unlock;
912         }
913
914         switch (smmu_domain->stage) {
915         case ARM_SMMU_DOMAIN_S1:
916                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
917                 start = smmu->num_s2_context_banks;
918                 ias = smmu->va_size;
919                 oas = smmu->ipa_size;
920                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
921                         fmt = ARM_64_LPAE_S1;
922                 } else {
923                         fmt = ARM_32_LPAE_S1;
924                         ias = min(ias, 32UL);
925                         oas = min(oas, 40UL);
926                 }
927                 break;
928         case ARM_SMMU_DOMAIN_NESTED:
929                 /*
930                  * We will likely want to change this if/when KVM gets
931                  * involved.
932                  */
933         case ARM_SMMU_DOMAIN_S2:
934                 cfg->cbar = CBAR_TYPE_S2_TRANS;
935                 start = 0;
936                 ias = smmu->ipa_size;
937                 oas = smmu->pa_size;
938                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
939                         fmt = ARM_64_LPAE_S2;
940                 } else {
941                         fmt = ARM_32_LPAE_S2;
942                         ias = min(ias, 40UL);
943                         oas = min(oas, 40UL);
944                 }
945                 break;
946         default:
947                 ret = -EINVAL;
948                 goto out_unlock;
949         }
950
951         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
952                                       smmu->num_context_banks);
953         if (ret < 0)
954                 goto out_unlock;
955
956         cfg->cbndx = ret;
957         if (smmu->version < ARM_SMMU_V2) {
958                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
959                 cfg->irptndx %= smmu->num_context_irqs;
960         } else {
961                 cfg->irptndx = cfg->cbndx;
962         }
963
964         pgtbl_cfg = (struct io_pgtable_cfg) {
965                 .pgsize_bitmap  = smmu->pgsize_bitmap,
966                 .ias            = ias,
967                 .oas            = oas,
968                 .tlb            = &arm_smmu_gather_ops,
969                 .iommu_dev      = smmu->dev,
970         };
971
972         smmu_domain->smmu = smmu;
973         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
974         if (!pgtbl_ops) {
975                 ret = -ENOMEM;
976                 goto out_clear_smmu;
977         }
978
979         /* Update the domain's page sizes to reflect the page table format */
980         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
981
982         /* Initialise the context bank with our page table cfg */
983         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
984
985         /*
986          * Request context fault interrupt. Do this last to avoid the
987          * handler seeing a half-initialised domain state.
988          */
989         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
990         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
991                                IRQF_SHARED, "arm-smmu-context-fault", domain);
992         if (ret < 0) {
993                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
994                         cfg->irptndx, irq);
995                 cfg->irptndx = INVALID_IRPTNDX;
996         }
997
998         mutex_unlock(&smmu_domain->init_mutex);
999
1000         /* Publish page table ops for map/unmap */
1001         smmu_domain->pgtbl_ops = pgtbl_ops;
1002         return 0;
1003
1004 out_clear_smmu:
1005         smmu_domain->smmu = NULL;
1006 out_unlock:
1007         mutex_unlock(&smmu_domain->init_mutex);
1008         return ret;
1009 }
1010
1011 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
1012 {
1013         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1014         struct arm_smmu_device *smmu = smmu_domain->smmu;
1015         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1016         void __iomem *cb_base;
1017         int irq;
1018
1019         if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
1020                 return;
1021
1022         /*
1023          * Disable the context bank and free the page tables before freeing
1024          * it.
1025          */
1026         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1027         writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1028
1029         if (cfg->irptndx != INVALID_IRPTNDX) {
1030                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1031                 devm_free_irq(smmu->dev, irq, domain);
1032         }
1033
1034         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1035         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1036 }
1037
1038 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1039 {
1040         struct arm_smmu_domain *smmu_domain;
1041
1042         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1043                 return NULL;
1044         /*
1045          * Allocate the domain and initialise some of its data structures.
1046          * We can't really do anything meaningful until we've added a
1047          * master.
1048          */
1049         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1050         if (!smmu_domain)
1051                 return NULL;
1052
1053         if (type == IOMMU_DOMAIN_DMA &&
1054             iommu_get_dma_cookie(&smmu_domain->domain)) {
1055                 kfree(smmu_domain);
1056                 return NULL;
1057         }
1058
1059         mutex_init(&smmu_domain->init_mutex);
1060         spin_lock_init(&smmu_domain->pgtbl_lock);
1061
1062         return &smmu_domain->domain;
1063 }
1064
1065 static void arm_smmu_domain_free(struct iommu_domain *domain)
1066 {
1067         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1068
1069         /*
1070          * Free the domain resources. We assume that all devices have
1071          * already been detached.
1072          */
1073         iommu_put_dma_cookie(domain);
1074         arm_smmu_destroy_domain_context(domain);
1075         kfree(smmu_domain);
1076 }
1077
1078 static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
1079                                           struct arm_smmu_master_cfg *cfg)
1080 {
1081         int i;
1082         struct arm_smmu_smr *smrs;
1083         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1084
1085         if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
1086                 return 0;
1087
1088         if (cfg->smrs)
1089                 return -EEXIST;
1090
1091         smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
1092         if (!smrs) {
1093                 dev_err(smmu->dev, "failed to allocate %d SMRs\n",
1094                         cfg->num_streamids);
1095                 return -ENOMEM;
1096         }
1097
1098         /* Allocate the SMRs on the SMMU */
1099         for (i = 0; i < cfg->num_streamids; ++i) {
1100                 int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
1101                                                   smmu->num_mapping_groups);
1102                 if (idx < 0) {
1103                         dev_err(smmu->dev, "failed to allocate free SMR\n");
1104                         goto err_free_smrs;
1105                 }
1106
1107                 smrs[i] = (struct arm_smmu_smr) {
1108                         .idx    = idx,
1109                         .mask   = 0, /* We don't currently share SMRs */
1110                         .id     = cfg->streamids[i],
1111                 };
1112         }
1113
1114         /* It worked! Now, poke the actual hardware */
1115         for (i = 0; i < cfg->num_streamids; ++i) {
1116                 u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
1117                           smrs[i].mask << SMR_MASK_SHIFT;
1118                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
1119         }
1120
1121         cfg->smrs = smrs;
1122         return 0;
1123
1124 err_free_smrs:
1125         while (--i >= 0)
1126                 __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
1127         kfree(smrs);
1128         return -ENOSPC;
1129 }
1130
1131 static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
1132                                       struct arm_smmu_master_cfg *cfg)
1133 {
1134         int i;
1135         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1136         struct arm_smmu_smr *smrs = cfg->smrs;
1137
1138         if (!smrs)
1139                 return;
1140
1141         /* Invalidate the SMRs before freeing back to the allocator */
1142         for (i = 0; i < cfg->num_streamids; ++i) {
1143                 u8 idx = smrs[i].idx;
1144
1145                 writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
1146                 __arm_smmu_free_bitmap(smmu->smr_map, idx);
1147         }
1148
1149         cfg->smrs = NULL;
1150         kfree(smrs);
1151 }
1152
1153 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1154                                       struct arm_smmu_master_cfg *cfg)
1155 {
1156         int i, ret;
1157         struct arm_smmu_device *smmu = smmu_domain->smmu;
1158         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1159
1160         /*
1161          * FIXME: This won't be needed once we have IOMMU-backed DMA ops
1162          * for all devices behind the SMMU. Note that we need to take
1163          * care configuring SMRs for devices both a platform_device and
1164          * and a PCI device (i.e. a PCI host controller)
1165          */
1166         if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
1167                 return 0;
1168
1169         /* Devices in an IOMMU group may already be configured */
1170         ret = arm_smmu_master_configure_smrs(smmu, cfg);
1171         if (ret)
1172                 return ret == -EEXIST ? 0 : ret;
1173
1174         for (i = 0; i < cfg->num_streamids; ++i) {
1175                 u32 idx, s2cr;
1176
1177                 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
1178                 s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
1179                        (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
1180                 writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
1181         }
1182
1183         return 0;
1184 }
1185
1186 static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
1187                                           struct arm_smmu_master_cfg *cfg)
1188 {
1189         int i;
1190         struct arm_smmu_device *smmu = smmu_domain->smmu;
1191         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1192
1193         /* An IOMMU group is torn down by the first device to be removed */
1194         if ((smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && !cfg->smrs)
1195                 return;
1196
1197         /*
1198          * We *must* clear the S2CR first, because freeing the SMR means
1199          * that it can be re-allocated immediately.
1200          */
1201         for (i = 0; i < cfg->num_streamids; ++i) {
1202                 u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
1203                 u32 reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
1204
1205                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(idx));
1206         }
1207
1208         arm_smmu_master_free_smrs(smmu, cfg);
1209 }
1210
1211 static void arm_smmu_detach_dev(struct device *dev,
1212                                 struct arm_smmu_master_cfg *cfg)
1213 {
1214         struct iommu_domain *domain = dev->archdata.iommu;
1215         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1216
1217         dev->archdata.iommu = NULL;
1218         arm_smmu_domain_remove_master(smmu_domain, cfg);
1219 }
1220
1221 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1222 {
1223         int ret;
1224         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1225         struct arm_smmu_device *smmu;
1226         struct arm_smmu_master_cfg *cfg;
1227
1228         smmu = find_smmu_for_device(dev);
1229         if (!smmu) {
1230                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1231                 return -ENXIO;
1232         }
1233
1234         /* Ensure that the domain is finalised */
1235         ret = arm_smmu_init_domain_context(domain, smmu);
1236         if (ret < 0)
1237                 return ret;
1238
1239         /*
1240          * Sanity check the domain. We don't support domains across
1241          * different SMMUs.
1242          */
1243         if (smmu_domain->smmu != smmu) {
1244                 dev_err(dev,
1245                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1246                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1247                 return -EINVAL;
1248         }
1249
1250         /* Looks ok, so add the device to the domain */
1251         cfg = find_smmu_master_cfg(dev);
1252         if (!cfg)
1253                 return -ENODEV;
1254
1255         /* Detach the dev from its current domain */
1256         if (dev->archdata.iommu)
1257                 arm_smmu_detach_dev(dev, cfg);
1258
1259         ret = arm_smmu_domain_add_master(smmu_domain, cfg);
1260         if (!ret)
1261                 dev->archdata.iommu = domain;
1262         return ret;
1263 }
1264
1265 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1266                         phys_addr_t paddr, size_t size, int prot)
1267 {
1268         int ret;
1269         unsigned long flags;
1270         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1271         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1272
1273         if (!ops)
1274                 return -ENODEV;
1275
1276         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1277         ret = ops->map(ops, iova, paddr, size, prot);
1278         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1279         return ret;
1280 }
1281
1282 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1283                              size_t size)
1284 {
1285         size_t ret;
1286         unsigned long flags;
1287         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1288         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1289
1290         if (!ops)
1291                 return 0;
1292
1293         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1294         ret = ops->unmap(ops, iova, size);
1295         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1296         return ret;
1297 }
1298
1299 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1300                                               dma_addr_t iova)
1301 {
1302         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1303         struct arm_smmu_device *smmu = smmu_domain->smmu;
1304         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1305         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1306         struct device *dev = smmu->dev;
1307         void __iomem *cb_base;
1308         u32 tmp;
1309         u64 phys;
1310         unsigned long va;
1311
1312         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1313
1314         /* ATS1 registers can only be written atomically */
1315         va = iova & ~0xfffUL;
1316         if (smmu->version == ARM_SMMU_V2)
1317                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1318         else /* Register is only 32-bit in v1 */
1319                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1320
1321         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1322                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1323                 dev_err(dev,
1324                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1325                         &iova);
1326                 return ops->iova_to_phys(ops, iova);
1327         }
1328
1329         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1330         if (phys & CB_PAR_F) {
1331                 dev_err(dev, "translation fault!\n");
1332                 dev_err(dev, "PAR = 0x%llx\n", phys);
1333                 return 0;
1334         }
1335
1336         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1337 }
1338
1339 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1340                                         dma_addr_t iova)
1341 {
1342         phys_addr_t ret;
1343         unsigned long flags;
1344         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1345         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1346
1347         if (!ops)
1348                 return 0;
1349
1350         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1351         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1352                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1353                 ret = arm_smmu_iova_to_phys_hard(domain, iova);
1354         } else {
1355                 ret = ops->iova_to_phys(ops, iova);
1356         }
1357
1358         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1359
1360         return ret;
1361 }
1362
1363 static bool arm_smmu_capable(enum iommu_cap cap)
1364 {
1365         switch (cap) {
1366         case IOMMU_CAP_CACHE_COHERENCY:
1367                 /*
1368                  * Return true here as the SMMU can always send out coherent
1369                  * requests.
1370                  */
1371                 return true;
1372         case IOMMU_CAP_INTR_REMAP:
1373                 return true; /* MSIs are just memory writes */
1374         case IOMMU_CAP_NOEXEC:
1375                 return true;
1376         default:
1377                 return false;
1378         }
1379 }
1380
1381 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
1382 {
1383         *((u16 *)data) = alias;
1384         return 0; /* Continue walking */
1385 }
1386
1387 static void __arm_smmu_release_pci_iommudata(void *data)
1388 {
1389         kfree(data);
1390 }
1391
1392 static int arm_smmu_init_pci_device(struct pci_dev *pdev,
1393                                     struct iommu_group *group)
1394 {
1395         struct arm_smmu_master_cfg *cfg;
1396         u16 sid;
1397         int i;
1398
1399         cfg = iommu_group_get_iommudata(group);
1400         if (!cfg) {
1401                 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
1402                 if (!cfg)
1403                         return -ENOMEM;
1404
1405                 iommu_group_set_iommudata(group, cfg,
1406                                           __arm_smmu_release_pci_iommudata);
1407         }
1408
1409         if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
1410                 return -ENOSPC;
1411
1412         /*
1413          * Assume Stream ID == Requester ID for now.
1414          * We need a way to describe the ID mappings in FDT.
1415          */
1416         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1417         for (i = 0; i < cfg->num_streamids; ++i)
1418                 if (cfg->streamids[i] == sid)
1419                         break;
1420
1421         /* Avoid duplicate SIDs, as this can lead to SMR conflicts */
1422         if (i == cfg->num_streamids)
1423                 cfg->streamids[cfg->num_streamids++] = sid;
1424
1425         return 0;
1426 }
1427
1428 static int arm_smmu_init_platform_device(struct device *dev,
1429                                          struct iommu_group *group)
1430 {
1431         struct arm_smmu_device *smmu = find_smmu_for_device(dev);
1432         struct arm_smmu_master *master;
1433
1434         if (!smmu)
1435                 return -ENODEV;
1436
1437         master = find_smmu_master(smmu, dev->of_node);
1438         if (!master)
1439                 return -ENODEV;
1440
1441         iommu_group_set_iommudata(group, &master->cfg, NULL);
1442
1443         return 0;
1444 }
1445
1446 static int arm_smmu_add_device(struct device *dev)
1447 {
1448         struct iommu_group *group;
1449
1450         group = iommu_group_get_for_dev(dev);
1451         if (IS_ERR(group))
1452                 return PTR_ERR(group);
1453
1454         iommu_group_put(group);
1455         return 0;
1456 }
1457
1458 static void arm_smmu_remove_device(struct device *dev)
1459 {
1460         iommu_group_remove_device(dev);
1461 }
1462
1463 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1464 {
1465         struct iommu_group *group;
1466         int ret;
1467
1468         if (dev_is_pci(dev))
1469                 group = pci_device_group(dev);
1470         else
1471                 group = generic_device_group(dev);
1472
1473         if (IS_ERR(group))
1474                 return group;
1475
1476         if (dev_is_pci(dev))
1477                 ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
1478         else
1479                 ret = arm_smmu_init_platform_device(dev, group);
1480
1481         if (ret) {
1482                 iommu_group_put(group);
1483                 group = ERR_PTR(ret);
1484         }
1485
1486         return group;
1487 }
1488
1489 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1490                                     enum iommu_attr attr, void *data)
1491 {
1492         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1493
1494         switch (attr) {
1495         case DOMAIN_ATTR_NESTING:
1496                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1497                 return 0;
1498         default:
1499                 return -ENODEV;
1500         }
1501 }
1502
1503 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1504                                     enum iommu_attr attr, void *data)
1505 {
1506         int ret = 0;
1507         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1508
1509         mutex_lock(&smmu_domain->init_mutex);
1510
1511         switch (attr) {
1512         case DOMAIN_ATTR_NESTING:
1513                 if (smmu_domain->smmu) {
1514                         ret = -EPERM;
1515                         goto out_unlock;
1516                 }
1517
1518                 if (*(int *)data)
1519                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1520                 else
1521                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1522
1523                 break;
1524         default:
1525                 ret = -ENODEV;
1526         }
1527
1528 out_unlock:
1529         mutex_unlock(&smmu_domain->init_mutex);
1530         return ret;
1531 }
1532
1533 static struct iommu_ops arm_smmu_ops = {
1534         .capable                = arm_smmu_capable,
1535         .domain_alloc           = arm_smmu_domain_alloc,
1536         .domain_free            = arm_smmu_domain_free,
1537         .attach_dev             = arm_smmu_attach_dev,
1538         .map                    = arm_smmu_map,
1539         .unmap                  = arm_smmu_unmap,
1540         .map_sg                 = default_iommu_map_sg,
1541         .iova_to_phys           = arm_smmu_iova_to_phys,
1542         .add_device             = arm_smmu_add_device,
1543         .remove_device          = arm_smmu_remove_device,
1544         .device_group           = arm_smmu_device_group,
1545         .domain_get_attr        = arm_smmu_domain_get_attr,
1546         .domain_set_attr        = arm_smmu_domain_set_attr,
1547         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1548 };
1549
1550 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1551 {
1552         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1553         void __iomem *cb_base;
1554         int i = 0;
1555         u32 reg, major;
1556
1557         /* clear global FSR */
1558         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1559         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1560
1561         /* Mark all SMRn as invalid and all S2CRn as bypass unless overridden */
1562         reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
1563         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1564                 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_SMR(i));
1565                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
1566         }
1567
1568         /*
1569          * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1570          * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1571          * bit is only present in MMU-500r2 onwards.
1572          */
1573         reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1574         major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1575         if ((smmu->model == ARM_MMU500) && (major >= 2)) {
1576                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1577                 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1578                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1579         }
1580
1581         /* Make sure all context banks are disabled and clear CB_FSR  */
1582         for (i = 0; i < smmu->num_context_banks; ++i) {
1583                 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
1584                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1585                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1586                 /*
1587                  * Disable MMU-500's not-particularly-beneficial next-page
1588                  * prefetcher for the sake of errata #841119 and #826419.
1589                  */
1590                 if (smmu->model == ARM_MMU500) {
1591                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1592                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1593                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1594                 }
1595         }
1596
1597         /* Invalidate the TLB, just in case */
1598         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1599         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1600
1601         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1602
1603         /* Enable fault reporting */
1604         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1605
1606         /* Disable TLB broadcasting. */
1607         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1608
1609         /* Enable client access, handling unmatched streams as appropriate */
1610         reg &= ~sCR0_CLIENTPD;
1611         if (disable_bypass)
1612                 reg |= sCR0_USFCFG;
1613         else
1614                 reg &= ~sCR0_USFCFG;
1615
1616         /* Disable forced broadcasting */
1617         reg &= ~sCR0_FB;
1618
1619         /* Don't upgrade barriers */
1620         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1621
1622         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1623                 reg |= sCR0_VMID16EN;
1624
1625         /* Push the button */
1626         __arm_smmu_tlb_sync(smmu);
1627         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1628 }
1629
1630 static int arm_smmu_id_size_to_bits(int size)
1631 {
1632         switch (size) {
1633         case 0:
1634                 return 32;
1635         case 1:
1636                 return 36;
1637         case 2:
1638                 return 40;
1639         case 3:
1640                 return 42;
1641         case 4:
1642                 return 44;
1643         case 5:
1644         default:
1645                 return 48;
1646         }
1647 }
1648
1649 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1650 {
1651         unsigned long size;
1652         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1653         u32 id;
1654         bool cttw_dt, cttw_reg;
1655
1656         dev_notice(smmu->dev, "probing hardware configuration...\n");
1657         dev_notice(smmu->dev, "SMMUv%d with:\n",
1658                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1659
1660         /* ID0 */
1661         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1662
1663         /* Restrict available stages based on module parameter */
1664         if (force_stage == 1)
1665                 id &= ~(ID0_S2TS | ID0_NTS);
1666         else if (force_stage == 2)
1667                 id &= ~(ID0_S1TS | ID0_NTS);
1668
1669         if (id & ID0_S1TS) {
1670                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1671                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1672         }
1673
1674         if (id & ID0_S2TS) {
1675                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1676                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1677         }
1678
1679         if (id & ID0_NTS) {
1680                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1681                 dev_notice(smmu->dev, "\tnested translation\n");
1682         }
1683
1684         if (!(smmu->features &
1685                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1686                 dev_err(smmu->dev, "\tno translation support!\n");
1687                 return -ENODEV;
1688         }
1689
1690         if ((id & ID0_S1TS) &&
1691                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1692                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1693                 dev_notice(smmu->dev, "\taddress translation ops\n");
1694         }
1695
1696         /*
1697          * In order for DMA API calls to work properly, we must defer to what
1698          * the DT says about coherency, regardless of what the hardware claims.
1699          * Fortunately, this also opens up a workaround for systems where the
1700          * ID register value has ended up configured incorrectly.
1701          */
1702         cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
1703         cttw_reg = !!(id & ID0_CTTW);
1704         if (cttw_dt)
1705                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1706         if (cttw_dt || cttw_reg)
1707                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1708                            cttw_dt ? "" : "non-");
1709         if (cttw_dt != cttw_reg)
1710                 dev_notice(smmu->dev,
1711                            "\t(IDR0.CTTW overridden by dma-coherent property)\n");
1712
1713         if (id & ID0_SMS) {
1714                 u32 smr, sid, mask;
1715
1716                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1717                 smmu->num_mapping_groups = (id >> ID0_NUMSMRG_SHIFT) &
1718                                            ID0_NUMSMRG_MASK;
1719                 if (smmu->num_mapping_groups == 0) {
1720                         dev_err(smmu->dev,
1721                                 "stream-matching supported, but no SMRs present!\n");
1722                         return -ENODEV;
1723                 }
1724
1725                 smr = SMR_MASK_MASK << SMR_MASK_SHIFT;
1726                 smr |= (SMR_ID_MASK << SMR_ID_SHIFT);
1727                 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1728                 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1729
1730                 mask = (smr >> SMR_MASK_SHIFT) & SMR_MASK_MASK;
1731                 sid = (smr >> SMR_ID_SHIFT) & SMR_ID_MASK;
1732                 if ((mask & sid) != sid) {
1733                         dev_err(smmu->dev,
1734                                 "SMR mask bits (0x%x) insufficient for ID field (0x%x)\n",
1735                                 mask, sid);
1736                         return -ENODEV;
1737                 }
1738
1739                 dev_notice(smmu->dev,
1740                            "\tstream matching with %u register groups, mask 0x%x",
1741                            smmu->num_mapping_groups, mask);
1742         } else {
1743                 smmu->num_mapping_groups = (id >> ID0_NUMSIDB_SHIFT) &
1744                                            ID0_NUMSIDB_MASK;
1745         }
1746
1747         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1748                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1749                 if (!(id & ID0_PTFS_NO_AARCH32S))
1750                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1751         }
1752
1753         /* ID1 */
1754         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1755         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1756
1757         /* Check for size mismatch of SMMU address space from mapped region */
1758         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1759         size *= 2 << smmu->pgshift;
1760         if (smmu->size != size)
1761                 dev_warn(smmu->dev,
1762                         "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
1763                         size, smmu->size);
1764
1765         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1766         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1767         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1768                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1769                 return -ENODEV;
1770         }
1771         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1772                    smmu->num_context_banks, smmu->num_s2_context_banks);
1773         /*
1774          * Cavium CN88xx erratum #27704.
1775          * Ensure ASID and VMID allocation is unique across all SMMUs in
1776          * the system.
1777          */
1778         if (smmu->model == CAVIUM_SMMUV2) {
1779                 smmu->cavium_id_base =
1780                         atomic_add_return(smmu->num_context_banks,
1781                                           &cavium_smmu_context_count);
1782                 smmu->cavium_id_base -= smmu->num_context_banks;
1783         }
1784
1785         /* ID2 */
1786         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1787         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1788         smmu->ipa_size = size;
1789
1790         /* The output mask is also applied for bypass */
1791         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1792         smmu->pa_size = size;
1793
1794         if (id & ID2_VMID16)
1795                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1796
1797         /*
1798          * What the page table walker can address actually depends on which
1799          * descriptor format is in use, but since a) we don't know that yet,
1800          * and b) it can vary per context bank, this will have to do...
1801          */
1802         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1803                 dev_warn(smmu->dev,
1804                          "failed to set DMA mask for table walker\n");
1805
1806         if (smmu->version < ARM_SMMU_V2) {
1807                 smmu->va_size = smmu->ipa_size;
1808                 if (smmu->version == ARM_SMMU_V1_64K)
1809                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1810         } else {
1811                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1812                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1813                 if (id & ID2_PTFS_4K)
1814                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1815                 if (id & ID2_PTFS_16K)
1816                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1817                 if (id & ID2_PTFS_64K)
1818                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1819         }
1820
1821         /* Now we've corralled the various formats, what'll it do? */
1822         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1823                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1824         if (smmu->features &
1825             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1826                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1827         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1828                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1829         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1830                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1831
1832         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1833                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1834         else
1835                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1836         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1837                    smmu->pgsize_bitmap);
1838
1839
1840         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1841                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1842                            smmu->va_size, smmu->ipa_size);
1843
1844         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1845                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1846                            smmu->ipa_size, smmu->pa_size);
1847
1848         return 0;
1849 }
1850
1851 struct arm_smmu_match_data {
1852         enum arm_smmu_arch_version version;
1853         enum arm_smmu_implementation model;
1854 };
1855
1856 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1857 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1858
1859 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1860 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1861 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1862 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1863 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1864
1865 static const struct of_device_id arm_smmu_of_match[] = {
1866         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1867         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1868         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1869         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1870         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1871         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1872         { },
1873 };
1874 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1875
1876 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
1877 {
1878         const struct of_device_id *of_id;
1879         const struct arm_smmu_match_data *data;
1880         struct resource *res;
1881         struct arm_smmu_device *smmu;
1882         struct device *dev = &pdev->dev;
1883         struct rb_node *node;
1884         struct of_phandle_iterator it;
1885         struct arm_smmu_phandle_args *masterspec;
1886         int num_irqs, i, err;
1887
1888         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
1889         if (!smmu) {
1890                 dev_err(dev, "failed to allocate arm_smmu_device\n");
1891                 return -ENOMEM;
1892         }
1893         smmu->dev = dev;
1894
1895         of_id = of_match_node(arm_smmu_of_match, dev->of_node);
1896         data = of_id->data;
1897         smmu->version = data->version;
1898         smmu->model = data->model;
1899
1900         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1901         smmu->base = devm_ioremap_resource(dev, res);
1902         if (IS_ERR(smmu->base))
1903                 return PTR_ERR(smmu->base);
1904         smmu->size = resource_size(res);
1905
1906         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1907                                  &smmu->num_global_irqs)) {
1908                 dev_err(dev, "missing #global-interrupts property\n");
1909                 return -ENODEV;
1910         }
1911
1912         num_irqs = 0;
1913         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
1914                 num_irqs++;
1915                 if (num_irqs > smmu->num_global_irqs)
1916                         smmu->num_context_irqs++;
1917         }
1918
1919         if (!smmu->num_context_irqs) {
1920                 dev_err(dev, "found %d interrupts but expected at least %d\n",
1921                         num_irqs, smmu->num_global_irqs + 1);
1922                 return -ENODEV;
1923         }
1924
1925         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
1926                                   GFP_KERNEL);
1927         if (!smmu->irqs) {
1928                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
1929                 return -ENOMEM;
1930         }
1931
1932         for (i = 0; i < num_irqs; ++i) {
1933                 int irq = platform_get_irq(pdev, i);
1934
1935                 if (irq < 0) {
1936                         dev_err(dev, "failed to get irq index %d\n", i);
1937                         return -ENODEV;
1938                 }
1939                 smmu->irqs[i] = irq;
1940         }
1941
1942         err = arm_smmu_device_cfg_probe(smmu);
1943         if (err)
1944                 return err;
1945
1946         i = 0;
1947         smmu->masters = RB_ROOT;
1948
1949         err = -ENOMEM;
1950         /* No need to zero the memory for masterspec */
1951         masterspec = kmalloc(sizeof(*masterspec), GFP_KERNEL);
1952         if (!masterspec)
1953                 goto out_put_masters;
1954
1955         of_for_each_phandle(&it, err, dev->of_node,
1956                             "mmu-masters", "#stream-id-cells", 0) {
1957                 int count = of_phandle_iterator_args(&it, masterspec->args,
1958                                                      MAX_MASTER_STREAMIDS);
1959                 masterspec->np          = of_node_get(it.node);
1960                 masterspec->args_count  = count;
1961
1962                 err = register_smmu_master(smmu, dev, masterspec);
1963                 if (err) {
1964                         dev_err(dev, "failed to add master %s\n",
1965                                 masterspec->np->name);
1966                         kfree(masterspec);
1967                         goto out_put_masters;
1968                 }
1969
1970                 i++;
1971         }
1972
1973         dev_notice(dev, "registered %d master devices\n", i);
1974
1975         kfree(masterspec);
1976
1977         parse_driver_options(smmu);
1978
1979         if (smmu->version == ARM_SMMU_V2 &&
1980             smmu->num_context_banks != smmu->num_context_irqs) {
1981                 dev_err(dev,
1982                         "found only %d context interrupt(s) but %d required\n",
1983                         smmu->num_context_irqs, smmu->num_context_banks);
1984                 err = -ENODEV;
1985                 goto out_put_masters;
1986         }
1987
1988         for (i = 0; i < smmu->num_global_irqs; ++i) {
1989                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
1990                                        arm_smmu_global_fault,
1991                                        IRQF_SHARED,
1992                                        "arm-smmu global fault",
1993                                        smmu);
1994                 if (err) {
1995                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
1996                                 i, smmu->irqs[i]);
1997                         goto out_put_masters;
1998                 }
1999         }
2000
2001         INIT_LIST_HEAD(&smmu->list);
2002         spin_lock(&arm_smmu_devices_lock);
2003         list_add(&smmu->list, &arm_smmu_devices);
2004         spin_unlock(&arm_smmu_devices_lock);
2005
2006         arm_smmu_device_reset(smmu);
2007         return 0;
2008
2009 out_put_masters:
2010         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
2011                 struct arm_smmu_master *master
2012                         = container_of(node, struct arm_smmu_master, node);
2013                 of_node_put(master->of_node);
2014         }
2015
2016         return err;
2017 }
2018
2019 static int arm_smmu_device_remove(struct platform_device *pdev)
2020 {
2021         int i;
2022         struct device *dev = &pdev->dev;
2023         struct arm_smmu_device *curr, *smmu = NULL;
2024         struct rb_node *node;
2025
2026         spin_lock(&arm_smmu_devices_lock);
2027         list_for_each_entry(curr, &arm_smmu_devices, list) {
2028                 if (curr->dev == dev) {
2029                         smmu = curr;
2030                         list_del(&smmu->list);
2031                         break;
2032                 }
2033         }
2034         spin_unlock(&arm_smmu_devices_lock);
2035
2036         if (!smmu)
2037                 return -ENODEV;
2038
2039         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
2040                 struct arm_smmu_master *master
2041                         = container_of(node, struct arm_smmu_master, node);
2042                 of_node_put(master->of_node);
2043         }
2044
2045         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2046                 dev_err(dev, "removing device with active domains!\n");
2047
2048         for (i = 0; i < smmu->num_global_irqs; ++i)
2049                 devm_free_irq(smmu->dev, smmu->irqs[i], smmu);
2050
2051         /* Turn the thing off */
2052         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2053         return 0;
2054 }
2055
2056 static struct platform_driver arm_smmu_driver = {
2057         .driver = {
2058                 .name           = "arm-smmu",
2059                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2060         },
2061         .probe  = arm_smmu_device_dt_probe,
2062         .remove = arm_smmu_device_remove,
2063 };
2064
2065 static int __init arm_smmu_init(void)
2066 {
2067         struct device_node *np;
2068         int ret;
2069
2070         /*
2071          * Play nice with systems that don't have an ARM SMMU by checking that
2072          * an ARM SMMU exists in the system before proceeding with the driver
2073          * and IOMMU bus operation registration.
2074          */
2075         np = of_find_matching_node(NULL, arm_smmu_of_match);
2076         if (!np)
2077                 return 0;
2078
2079         of_node_put(np);
2080
2081         ret = platform_driver_register(&arm_smmu_driver);
2082         if (ret)
2083                 return ret;
2084
2085         /* Oh, for a proper bus abstraction */
2086         if (!iommu_present(&platform_bus_type))
2087                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2088
2089 #ifdef CONFIG_ARM_AMBA
2090         if (!iommu_present(&amba_bustype))
2091                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2092 #endif
2093
2094 #ifdef CONFIG_PCI
2095         if (!iommu_present(&pci_bus_type)) {
2096                 pci_request_acs();
2097                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2098         }
2099 #endif
2100
2101         return 0;
2102 }
2103
2104 static void __exit arm_smmu_exit(void)
2105 {
2106         return platform_driver_unregister(&arm_smmu_driver);
2107 }
2108
2109 subsys_initcall(arm_smmu_init);
2110 module_exit(arm_smmu_exit);
2111
2112 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2113 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2114 MODULE_LICENSE("GPL v2");