Merge tag 'libnvdimm-for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...
[cascardo/linux.git] / drivers / iommu / arm-smmu-v3.c
1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/iommu.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/msi.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_platform.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36
37 #include "io-pgtable.h"
38
39 /* MMIO registers */
40 #define ARM_SMMU_IDR0                   0x0
41 #define IDR0_ST_LVL_SHIFT               27
42 #define IDR0_ST_LVL_MASK                0x3
43 #define IDR0_ST_LVL_2LVL                (1 << IDR0_ST_LVL_SHIFT)
44 #define IDR0_STALL_MODEL_SHIFT          24
45 #define IDR0_STALL_MODEL_MASK           0x3
46 #define IDR0_STALL_MODEL_STALL          (0 << IDR0_STALL_MODEL_SHIFT)
47 #define IDR0_STALL_MODEL_FORCE          (2 << IDR0_STALL_MODEL_SHIFT)
48 #define IDR0_TTENDIAN_SHIFT             21
49 #define IDR0_TTENDIAN_MASK              0x3
50 #define IDR0_TTENDIAN_LE                (2 << IDR0_TTENDIAN_SHIFT)
51 #define IDR0_TTENDIAN_BE                (3 << IDR0_TTENDIAN_SHIFT)
52 #define IDR0_TTENDIAN_MIXED             (0 << IDR0_TTENDIAN_SHIFT)
53 #define IDR0_CD2L                       (1 << 19)
54 #define IDR0_VMID16                     (1 << 18)
55 #define IDR0_PRI                        (1 << 16)
56 #define IDR0_SEV                        (1 << 14)
57 #define IDR0_MSI                        (1 << 13)
58 #define IDR0_ASID16                     (1 << 12)
59 #define IDR0_ATS                        (1 << 10)
60 #define IDR0_HYP                        (1 << 9)
61 #define IDR0_COHACC                     (1 << 4)
62 #define IDR0_TTF_SHIFT                  2
63 #define IDR0_TTF_MASK                   0x3
64 #define IDR0_TTF_AARCH64                (2 << IDR0_TTF_SHIFT)
65 #define IDR0_TTF_AARCH32_64             (3 << IDR0_TTF_SHIFT)
66 #define IDR0_S1P                        (1 << 1)
67 #define IDR0_S2P                        (1 << 0)
68
69 #define ARM_SMMU_IDR1                   0x4
70 #define IDR1_TABLES_PRESET              (1 << 30)
71 #define IDR1_QUEUES_PRESET              (1 << 29)
72 #define IDR1_REL                        (1 << 28)
73 #define IDR1_CMDQ_SHIFT                 21
74 #define IDR1_CMDQ_MASK                  0x1f
75 #define IDR1_EVTQ_SHIFT                 16
76 #define IDR1_EVTQ_MASK                  0x1f
77 #define IDR1_PRIQ_SHIFT                 11
78 #define IDR1_PRIQ_MASK                  0x1f
79 #define IDR1_SSID_SHIFT                 6
80 #define IDR1_SSID_MASK                  0x1f
81 #define IDR1_SID_SHIFT                  0
82 #define IDR1_SID_MASK                   0x3f
83
84 #define ARM_SMMU_IDR5                   0x14
85 #define IDR5_STALL_MAX_SHIFT            16
86 #define IDR5_STALL_MAX_MASK             0xffff
87 #define IDR5_GRAN64K                    (1 << 6)
88 #define IDR5_GRAN16K                    (1 << 5)
89 #define IDR5_GRAN4K                     (1 << 4)
90 #define IDR5_OAS_SHIFT                  0
91 #define IDR5_OAS_MASK                   0x7
92 #define IDR5_OAS_32_BIT                 (0 << IDR5_OAS_SHIFT)
93 #define IDR5_OAS_36_BIT                 (1 << IDR5_OAS_SHIFT)
94 #define IDR5_OAS_40_BIT                 (2 << IDR5_OAS_SHIFT)
95 #define IDR5_OAS_42_BIT                 (3 << IDR5_OAS_SHIFT)
96 #define IDR5_OAS_44_BIT                 (4 << IDR5_OAS_SHIFT)
97 #define IDR5_OAS_48_BIT                 (5 << IDR5_OAS_SHIFT)
98
99 #define ARM_SMMU_CR0                    0x20
100 #define CR0_CMDQEN                      (1 << 3)
101 #define CR0_EVTQEN                      (1 << 2)
102 #define CR0_PRIQEN                      (1 << 1)
103 #define CR0_SMMUEN                      (1 << 0)
104
105 #define ARM_SMMU_CR0ACK                 0x24
106
107 #define ARM_SMMU_CR1                    0x28
108 #define CR1_SH_NSH                      0
109 #define CR1_SH_OSH                      2
110 #define CR1_SH_ISH                      3
111 #define CR1_CACHE_NC                    0
112 #define CR1_CACHE_WB                    1
113 #define CR1_CACHE_WT                    2
114 #define CR1_TABLE_SH_SHIFT              10
115 #define CR1_TABLE_OC_SHIFT              8
116 #define CR1_TABLE_IC_SHIFT              6
117 #define CR1_QUEUE_SH_SHIFT              4
118 #define CR1_QUEUE_OC_SHIFT              2
119 #define CR1_QUEUE_IC_SHIFT              0
120
121 #define ARM_SMMU_CR2                    0x2c
122 #define CR2_PTM                         (1 << 2)
123 #define CR2_RECINVSID                   (1 << 1)
124 #define CR2_E2H                         (1 << 0)
125
126 #define ARM_SMMU_IRQ_CTRL               0x50
127 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
128 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
129 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
130
131 #define ARM_SMMU_IRQ_CTRLACK            0x54
132
133 #define ARM_SMMU_GERROR                 0x60
134 #define GERROR_SFM_ERR                  (1 << 8)
135 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
136 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
137 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
138 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
139 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
140 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
141 #define GERROR_CMDQ_ERR                 (1 << 0)
142 #define GERROR_ERR_MASK                 0xfd
143
144 #define ARM_SMMU_GERRORN                0x64
145
146 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
147 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
148 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
149
150 #define ARM_SMMU_STRTAB_BASE            0x80
151 #define STRTAB_BASE_RA                  (1UL << 62)
152 #define STRTAB_BASE_ADDR_SHIFT          6
153 #define STRTAB_BASE_ADDR_MASK           0x3ffffffffffUL
154
155 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
156 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT  0
157 #define STRTAB_BASE_CFG_LOG2SIZE_MASK   0x3f
158 #define STRTAB_BASE_CFG_SPLIT_SHIFT     6
159 #define STRTAB_BASE_CFG_SPLIT_MASK      0x1f
160 #define STRTAB_BASE_CFG_FMT_SHIFT       16
161 #define STRTAB_BASE_CFG_FMT_MASK        0x3
162 #define STRTAB_BASE_CFG_FMT_LINEAR      (0 << STRTAB_BASE_CFG_FMT_SHIFT)
163 #define STRTAB_BASE_CFG_FMT_2LVL        (1 << STRTAB_BASE_CFG_FMT_SHIFT)
164
165 #define ARM_SMMU_CMDQ_BASE              0x90
166 #define ARM_SMMU_CMDQ_PROD              0x98
167 #define ARM_SMMU_CMDQ_CONS              0x9c
168
169 #define ARM_SMMU_EVTQ_BASE              0xa0
170 #define ARM_SMMU_EVTQ_PROD              0x100a8
171 #define ARM_SMMU_EVTQ_CONS              0x100ac
172 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
173 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
174 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
175
176 #define ARM_SMMU_PRIQ_BASE              0xc0
177 #define ARM_SMMU_PRIQ_PROD              0x100c8
178 #define ARM_SMMU_PRIQ_CONS              0x100cc
179 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
180 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
181 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
182
183 /* Common MSI config fields */
184 #define MSI_CFG0_ADDR_SHIFT             2
185 #define MSI_CFG0_ADDR_MASK              0x3fffffffffffUL
186 #define MSI_CFG2_SH_SHIFT               4
187 #define MSI_CFG2_SH_NSH                 (0UL << MSI_CFG2_SH_SHIFT)
188 #define MSI_CFG2_SH_OSH                 (2UL << MSI_CFG2_SH_SHIFT)
189 #define MSI_CFG2_SH_ISH                 (3UL << MSI_CFG2_SH_SHIFT)
190 #define MSI_CFG2_MEMATTR_SHIFT          0
191 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE   (0x1 << MSI_CFG2_MEMATTR_SHIFT)
192
193 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG                 (1 << 31)
196 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p)                     ((q)->base +                    \
198                                          Q_IDX(q, p) * (q)->ent_dwords)
199
200 #define Q_BASE_RWA                      (1UL << 62)
201 #define Q_BASE_ADDR_SHIFT               5
202 #define Q_BASE_ADDR_MASK                0xfffffffffffUL
203 #define Q_BASE_LOG2SIZE_SHIFT           0
204 #define Q_BASE_LOG2SIZE_MASK            0x1fUL
205
206 /*
207  * Stream table.
208  *
209  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
210  * 2lvl: 128k L1 entries,
211  *       256 lazy entries per table (each table covers a PCI bus)
212  */
213 #define STRTAB_L1_SZ_SHIFT              20
214 #define STRTAB_SPLIT                    8
215
216 #define STRTAB_L1_DESC_DWORDS           1
217 #define STRTAB_L1_DESC_SPAN_SHIFT       0
218 #define STRTAB_L1_DESC_SPAN_MASK        0x1fUL
219 #define STRTAB_L1_DESC_L2PTR_SHIFT      6
220 #define STRTAB_L1_DESC_L2PTR_MASK       0x3ffffffffffUL
221
222 #define STRTAB_STE_DWORDS               8
223 #define STRTAB_STE_0_V                  (1UL << 0)
224 #define STRTAB_STE_0_CFG_SHIFT          1
225 #define STRTAB_STE_0_CFG_MASK           0x7UL
226 #define STRTAB_STE_0_CFG_ABORT          (0UL << STRTAB_STE_0_CFG_SHIFT)
227 #define STRTAB_STE_0_CFG_BYPASS         (4UL << STRTAB_STE_0_CFG_SHIFT)
228 #define STRTAB_STE_0_CFG_S1_TRANS       (5UL << STRTAB_STE_0_CFG_SHIFT)
229 #define STRTAB_STE_0_CFG_S2_TRANS       (6UL << STRTAB_STE_0_CFG_SHIFT)
230
231 #define STRTAB_STE_0_S1FMT_SHIFT        4
232 #define STRTAB_STE_0_S1FMT_LINEAR       (0UL << STRTAB_STE_0_S1FMT_SHIFT)
233 #define STRTAB_STE_0_S1CTXPTR_SHIFT     6
234 #define STRTAB_STE_0_S1CTXPTR_MASK      0x3ffffffffffUL
235 #define STRTAB_STE_0_S1CDMAX_SHIFT      59
236 #define STRTAB_STE_0_S1CDMAX_MASK       0x1fUL
237
238 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
239 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
240 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
241 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
242 #define STRTAB_STE_1_S1C_SH_NSH         0UL
243 #define STRTAB_STE_1_S1C_SH_OSH         2UL
244 #define STRTAB_STE_1_S1C_SH_ISH         3UL
245 #define STRTAB_STE_1_S1CIR_SHIFT        2
246 #define STRTAB_STE_1_S1COR_SHIFT        4
247 #define STRTAB_STE_1_S1CSH_SHIFT        6
248
249 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
250
251 #define STRTAB_STE_1_EATS_ABT           0UL
252 #define STRTAB_STE_1_EATS_TRANS         1UL
253 #define STRTAB_STE_1_EATS_S1CHK         2UL
254 #define STRTAB_STE_1_EATS_SHIFT         28
255
256 #define STRTAB_STE_1_STRW_NSEL1         0UL
257 #define STRTAB_STE_1_STRW_EL2           2UL
258 #define STRTAB_STE_1_STRW_SHIFT         30
259
260 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
261 #define STRTAB_STE_1_SHCFG_SHIFT        44
262
263 #define STRTAB_STE_2_S2VMID_SHIFT       0
264 #define STRTAB_STE_2_S2VMID_MASK        0xffffUL
265 #define STRTAB_STE_2_VTCR_SHIFT         32
266 #define STRTAB_STE_2_VTCR_MASK          0x7ffffUL
267 #define STRTAB_STE_2_S2AA64             (1UL << 51)
268 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
269 #define STRTAB_STE_2_S2PTW              (1UL << 54)
270 #define STRTAB_STE_2_S2R                (1UL << 58)
271
272 #define STRTAB_STE_3_S2TTB_SHIFT        4
273 #define STRTAB_STE_3_S2TTB_MASK         0xfffffffffffUL
274
275 /* Context descriptor (stage-1 only) */
276 #define CTXDESC_CD_DWORDS               8
277 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT     0
278 #define ARM64_TCR_T0SZ_SHIFT            0
279 #define ARM64_TCR_T0SZ_MASK             0x1fUL
280 #define CTXDESC_CD_0_TCR_TG0_SHIFT      6
281 #define ARM64_TCR_TG0_SHIFT             14
282 #define ARM64_TCR_TG0_MASK              0x3UL
283 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT    8
284 #define ARM64_TCR_IRGN0_SHIFT           8
285 #define ARM64_TCR_IRGN0_MASK            0x3UL
286 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT    10
287 #define ARM64_TCR_ORGN0_SHIFT           10
288 #define ARM64_TCR_ORGN0_MASK            0x3UL
289 #define CTXDESC_CD_0_TCR_SH0_SHIFT      12
290 #define ARM64_TCR_SH0_SHIFT             12
291 #define ARM64_TCR_SH0_MASK              0x3UL
292 #define CTXDESC_CD_0_TCR_EPD0_SHIFT     14
293 #define ARM64_TCR_EPD0_SHIFT            7
294 #define ARM64_TCR_EPD0_MASK             0x1UL
295 #define CTXDESC_CD_0_TCR_EPD1_SHIFT     30
296 #define ARM64_TCR_EPD1_SHIFT            23
297 #define ARM64_TCR_EPD1_MASK             0x1UL
298
299 #define CTXDESC_CD_0_ENDI               (1UL << 15)
300 #define CTXDESC_CD_0_V                  (1UL << 31)
301
302 #define CTXDESC_CD_0_TCR_IPS_SHIFT      32
303 #define ARM64_TCR_IPS_SHIFT             32
304 #define ARM64_TCR_IPS_MASK              0x7UL
305 #define CTXDESC_CD_0_TCR_TBI0_SHIFT     38
306 #define ARM64_TCR_TBI0_SHIFT            37
307 #define ARM64_TCR_TBI0_MASK             0x1UL
308
309 #define CTXDESC_CD_0_AA64               (1UL << 41)
310 #define CTXDESC_CD_0_R                  (1UL << 45)
311 #define CTXDESC_CD_0_A                  (1UL << 46)
312 #define CTXDESC_CD_0_ASET_SHIFT         47
313 #define CTXDESC_CD_0_ASET_SHARED        (0UL << CTXDESC_CD_0_ASET_SHIFT)
314 #define CTXDESC_CD_0_ASET_PRIVATE       (1UL << CTXDESC_CD_0_ASET_SHIFT)
315 #define CTXDESC_CD_0_ASID_SHIFT         48
316 #define CTXDESC_CD_0_ASID_MASK          0xffffUL
317
318 #define CTXDESC_CD_1_TTB0_SHIFT         4
319 #define CTXDESC_CD_1_TTB0_MASK          0xfffffffffffUL
320
321 #define CTXDESC_CD_3_MAIR_SHIFT         0
322
323 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
324 #define ARM_SMMU_TCR2CD(tcr, fld)                                       \
325         (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)    \
326          << CTXDESC_CD_0_TCR_##fld##_SHIFT)
327
328 /* Command queue */
329 #define CMDQ_ENT_DWORDS                 2
330 #define CMDQ_MAX_SZ_SHIFT               8
331
332 #define CMDQ_ERR_SHIFT                  24
333 #define CMDQ_ERR_MASK                   0x7f
334 #define CMDQ_ERR_CERROR_NONE_IDX        0
335 #define CMDQ_ERR_CERROR_ILL_IDX         1
336 #define CMDQ_ERR_CERROR_ABT_IDX         2
337
338 #define CMDQ_0_OP_SHIFT                 0
339 #define CMDQ_0_OP_MASK                  0xffUL
340 #define CMDQ_0_SSV                      (1UL << 11)
341
342 #define CMDQ_PREFETCH_0_SID_SHIFT       32
343 #define CMDQ_PREFETCH_1_SIZE_SHIFT      0
344 #define CMDQ_PREFETCH_1_ADDR_MASK       ~0xfffUL
345
346 #define CMDQ_CFGI_0_SID_SHIFT           32
347 #define CMDQ_CFGI_0_SID_MASK            0xffffffffUL
348 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
349 #define CMDQ_CFGI_1_RANGE_SHIFT         0
350 #define CMDQ_CFGI_1_RANGE_MASK          0x1fUL
351
352 #define CMDQ_TLBI_0_VMID_SHIFT          32
353 #define CMDQ_TLBI_0_ASID_SHIFT          48
354 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
355 #define CMDQ_TLBI_1_VA_MASK             ~0xfffUL
356 #define CMDQ_TLBI_1_IPA_MASK            0xfffffffff000UL
357
358 #define CMDQ_PRI_0_SSID_SHIFT           12
359 #define CMDQ_PRI_0_SSID_MASK            0xfffffUL
360 #define CMDQ_PRI_0_SID_SHIFT            32
361 #define CMDQ_PRI_0_SID_MASK             0xffffffffUL
362 #define CMDQ_PRI_1_GRPID_SHIFT          0
363 #define CMDQ_PRI_1_GRPID_MASK           0x1ffUL
364 #define CMDQ_PRI_1_RESP_SHIFT           12
365 #define CMDQ_PRI_1_RESP_DENY            (0UL << CMDQ_PRI_1_RESP_SHIFT)
366 #define CMDQ_PRI_1_RESP_FAIL            (1UL << CMDQ_PRI_1_RESP_SHIFT)
367 #define CMDQ_PRI_1_RESP_SUCC            (2UL << CMDQ_PRI_1_RESP_SHIFT)
368
369 #define CMDQ_SYNC_0_CS_SHIFT            12
370 #define CMDQ_SYNC_0_CS_NONE             (0UL << CMDQ_SYNC_0_CS_SHIFT)
371 #define CMDQ_SYNC_0_CS_SEV              (2UL << CMDQ_SYNC_0_CS_SHIFT)
372
373 /* Event queue */
374 #define EVTQ_ENT_DWORDS                 4
375 #define EVTQ_MAX_SZ_SHIFT               7
376
377 #define EVTQ_0_ID_SHIFT                 0
378 #define EVTQ_0_ID_MASK                  0xffUL
379
380 /* PRI queue */
381 #define PRIQ_ENT_DWORDS                 2
382 #define PRIQ_MAX_SZ_SHIFT               8
383
384 #define PRIQ_0_SID_SHIFT                0
385 #define PRIQ_0_SID_MASK                 0xffffffffUL
386 #define PRIQ_0_SSID_SHIFT               32
387 #define PRIQ_0_SSID_MASK                0xfffffUL
388 #define PRIQ_0_PERM_PRIV                (1UL << 58)
389 #define PRIQ_0_PERM_EXEC                (1UL << 59)
390 #define PRIQ_0_PERM_READ                (1UL << 60)
391 #define PRIQ_0_PERM_WRITE               (1UL << 61)
392 #define PRIQ_0_PRG_LAST                 (1UL << 62)
393 #define PRIQ_0_SSID_V                   (1UL << 63)
394
395 #define PRIQ_1_PRG_IDX_SHIFT            0
396 #define PRIQ_1_PRG_IDX_MASK             0x1ffUL
397 #define PRIQ_1_ADDR_SHIFT               12
398 #define PRIQ_1_ADDR_MASK                0xfffffffffffffUL
399
400 /* High-level queue structures */
401 #define ARM_SMMU_POLL_TIMEOUT_US        100
402
403 static bool disable_bypass;
404 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
405 MODULE_PARM_DESC(disable_bypass,
406         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
407
408 enum pri_resp {
409         PRI_RESP_DENY,
410         PRI_RESP_FAIL,
411         PRI_RESP_SUCC,
412 };
413
414 enum arm_smmu_msi_index {
415         EVTQ_MSI_INDEX,
416         GERROR_MSI_INDEX,
417         PRIQ_MSI_INDEX,
418         ARM_SMMU_MAX_MSIS,
419 };
420
421 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
422         [EVTQ_MSI_INDEX] = {
423                 ARM_SMMU_EVTQ_IRQ_CFG0,
424                 ARM_SMMU_EVTQ_IRQ_CFG1,
425                 ARM_SMMU_EVTQ_IRQ_CFG2,
426         },
427         [GERROR_MSI_INDEX] = {
428                 ARM_SMMU_GERROR_IRQ_CFG0,
429                 ARM_SMMU_GERROR_IRQ_CFG1,
430                 ARM_SMMU_GERROR_IRQ_CFG2,
431         },
432         [PRIQ_MSI_INDEX] = {
433                 ARM_SMMU_PRIQ_IRQ_CFG0,
434                 ARM_SMMU_PRIQ_IRQ_CFG1,
435                 ARM_SMMU_PRIQ_IRQ_CFG2,
436         },
437 };
438
439 struct arm_smmu_cmdq_ent {
440         /* Common fields */
441         u8                              opcode;
442         bool                            substream_valid;
443
444         /* Command-specific fields */
445         union {
446                 #define CMDQ_OP_PREFETCH_CFG    0x1
447                 struct {
448                         u32                     sid;
449                         u8                      size;
450                         u64                     addr;
451                 } prefetch;
452
453                 #define CMDQ_OP_CFGI_STE        0x3
454                 #define CMDQ_OP_CFGI_ALL        0x4
455                 struct {
456                         u32                     sid;
457                         union {
458                                 bool            leaf;
459                                 u8              span;
460                         };
461                 } cfgi;
462
463                 #define CMDQ_OP_TLBI_NH_ASID    0x11
464                 #define CMDQ_OP_TLBI_NH_VA      0x12
465                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
466                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
467                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
468                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
469                 struct {
470                         u16                     asid;
471                         u16                     vmid;
472                         bool                    leaf;
473                         u64                     addr;
474                 } tlbi;
475
476                 #define CMDQ_OP_PRI_RESP        0x41
477                 struct {
478                         u32                     sid;
479                         u32                     ssid;
480                         u16                     grpid;
481                         enum pri_resp           resp;
482                 } pri;
483
484                 #define CMDQ_OP_CMD_SYNC        0x46
485         };
486 };
487
488 struct arm_smmu_queue {
489         int                             irq; /* Wired interrupt */
490
491         __le64                          *base;
492         dma_addr_t                      base_dma;
493         u64                             q_base;
494
495         size_t                          ent_dwords;
496         u32                             max_n_shift;
497         u32                             prod;
498         u32                             cons;
499
500         u32 __iomem                     *prod_reg;
501         u32 __iomem                     *cons_reg;
502 };
503
504 struct arm_smmu_cmdq {
505         struct arm_smmu_queue           q;
506         spinlock_t                      lock;
507 };
508
509 struct arm_smmu_evtq {
510         struct arm_smmu_queue           q;
511         u32                             max_stalls;
512 };
513
514 struct arm_smmu_priq {
515         struct arm_smmu_queue           q;
516 };
517
518 /* High-level stream table and context descriptor structures */
519 struct arm_smmu_strtab_l1_desc {
520         u8                              span;
521
522         __le64                          *l2ptr;
523         dma_addr_t                      l2ptr_dma;
524 };
525
526 struct arm_smmu_s1_cfg {
527         __le64                          *cdptr;
528         dma_addr_t                      cdptr_dma;
529
530         struct arm_smmu_ctx_desc {
531                 u16     asid;
532                 u64     ttbr;
533                 u64     tcr;
534                 u64     mair;
535         }                               cd;
536 };
537
538 struct arm_smmu_s2_cfg {
539         u16                             vmid;
540         u64                             vttbr;
541         u64                             vtcr;
542 };
543
544 struct arm_smmu_strtab_ent {
545         bool                            valid;
546
547         bool                            bypass; /* Overrides s1/s2 config */
548         struct arm_smmu_s1_cfg          *s1_cfg;
549         struct arm_smmu_s2_cfg          *s2_cfg;
550 };
551
552 struct arm_smmu_strtab_cfg {
553         __le64                          *strtab;
554         dma_addr_t                      strtab_dma;
555         struct arm_smmu_strtab_l1_desc  *l1_desc;
556         unsigned int                    num_l1_ents;
557
558         u64                             strtab_base;
559         u32                             strtab_base_cfg;
560 };
561
562 /* An SMMUv3 instance */
563 struct arm_smmu_device {
564         struct device                   *dev;
565         void __iomem                    *base;
566
567 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
568 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
569 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
570 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
571 #define ARM_SMMU_FEAT_PRI               (1 << 4)
572 #define ARM_SMMU_FEAT_ATS               (1 << 5)
573 #define ARM_SMMU_FEAT_SEV               (1 << 6)
574 #define ARM_SMMU_FEAT_MSI               (1 << 7)
575 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
576 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
577 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
578 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
579 #define ARM_SMMU_FEAT_HYP               (1 << 12)
580         u32                             features;
581
582 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
583         u32                             options;
584
585         struct arm_smmu_cmdq            cmdq;
586         struct arm_smmu_evtq            evtq;
587         struct arm_smmu_priq            priq;
588
589         int                             gerr_irq;
590
591         unsigned long                   ias; /* IPA */
592         unsigned long                   oas; /* PA */
593         unsigned long                   pgsize_bitmap;
594
595 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
596         unsigned int                    asid_bits;
597         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
598
599 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
600         unsigned int                    vmid_bits;
601         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
602
603         unsigned int                    ssid_bits;
604         unsigned int                    sid_bits;
605
606         struct arm_smmu_strtab_cfg      strtab_cfg;
607 };
608
609 /* SMMU private data for an IOMMU group */
610 struct arm_smmu_group {
611         struct arm_smmu_device          *smmu;
612         struct arm_smmu_domain          *domain;
613         int                             num_sids;
614         u32                             *sids;
615         struct arm_smmu_strtab_ent      ste;
616 };
617
618 /* SMMU private data for an IOMMU domain */
619 enum arm_smmu_domain_stage {
620         ARM_SMMU_DOMAIN_S1 = 0,
621         ARM_SMMU_DOMAIN_S2,
622         ARM_SMMU_DOMAIN_NESTED,
623 };
624
625 struct arm_smmu_domain {
626         struct arm_smmu_device          *smmu;
627         struct mutex                    init_mutex; /* Protects smmu pointer */
628
629         struct io_pgtable_ops           *pgtbl_ops;
630         spinlock_t                      pgtbl_lock;
631
632         enum arm_smmu_domain_stage      stage;
633         union {
634                 struct arm_smmu_s1_cfg  s1_cfg;
635                 struct arm_smmu_s2_cfg  s2_cfg;
636         };
637
638         struct iommu_domain             domain;
639 };
640
641 struct arm_smmu_option_prop {
642         u32 opt;
643         const char *prop;
644 };
645
646 static struct arm_smmu_option_prop arm_smmu_options[] = {
647         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
648         { 0, NULL},
649 };
650
651 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
652 {
653         return container_of(dom, struct arm_smmu_domain, domain);
654 }
655
656 static void parse_driver_options(struct arm_smmu_device *smmu)
657 {
658         int i = 0;
659
660         do {
661                 if (of_property_read_bool(smmu->dev->of_node,
662                                                 arm_smmu_options[i].prop)) {
663                         smmu->options |= arm_smmu_options[i].opt;
664                         dev_notice(smmu->dev, "option %s\n",
665                                 arm_smmu_options[i].prop);
666                 }
667         } while (arm_smmu_options[++i].opt);
668 }
669
670 /* Low-level queue manipulation functions */
671 static bool queue_full(struct arm_smmu_queue *q)
672 {
673         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
674                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
675 }
676
677 static bool queue_empty(struct arm_smmu_queue *q)
678 {
679         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
680                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
681 }
682
683 static void queue_sync_cons(struct arm_smmu_queue *q)
684 {
685         q->cons = readl_relaxed(q->cons_reg);
686 }
687
688 static void queue_inc_cons(struct arm_smmu_queue *q)
689 {
690         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
691
692         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
693         writel(q->cons, q->cons_reg);
694 }
695
696 static int queue_sync_prod(struct arm_smmu_queue *q)
697 {
698         int ret = 0;
699         u32 prod = readl_relaxed(q->prod_reg);
700
701         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
702                 ret = -EOVERFLOW;
703
704         q->prod = prod;
705         return ret;
706 }
707
708 static void queue_inc_prod(struct arm_smmu_queue *q)
709 {
710         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
711
712         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
713         writel(q->prod, q->prod_reg);
714 }
715
716 static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
717 {
718         if (Q_WRP(q, q->cons) == Q_WRP(q, until))
719                 return Q_IDX(q, q->cons) < Q_IDX(q, until);
720
721         return Q_IDX(q, q->cons) >= Q_IDX(q, until);
722 }
723
724 static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
725 {
726         ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
727
728         while (queue_sync_cons(q), __queue_cons_before(q, until)) {
729                 if (ktime_compare(ktime_get(), timeout) > 0)
730                         return -ETIMEDOUT;
731
732                 if (wfe) {
733                         wfe();
734                 } else {
735                         cpu_relax();
736                         udelay(1);
737                 }
738         }
739
740         return 0;
741 }
742
743 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
744 {
745         int i;
746
747         for (i = 0; i < n_dwords; ++i)
748                 *dst++ = cpu_to_le64(*src++);
749 }
750
751 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
752 {
753         if (queue_full(q))
754                 return -ENOSPC;
755
756         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
757         queue_inc_prod(q);
758         return 0;
759 }
760
761 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
762 {
763         int i;
764
765         for (i = 0; i < n_dwords; ++i)
766                 *dst++ = le64_to_cpu(*src++);
767 }
768
769 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
770 {
771         if (queue_empty(q))
772                 return -EAGAIN;
773
774         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
775         queue_inc_cons(q);
776         return 0;
777 }
778
779 /* High-level queue accessors */
780 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
781 {
782         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
783         cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
784
785         switch (ent->opcode) {
786         case CMDQ_OP_TLBI_EL2_ALL:
787         case CMDQ_OP_TLBI_NSNH_ALL:
788                 break;
789         case CMDQ_OP_PREFETCH_CFG:
790                 cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
791                 cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
792                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
793                 break;
794         case CMDQ_OP_CFGI_STE:
795                 cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
796                 cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
797                 break;
798         case CMDQ_OP_CFGI_ALL:
799                 /* Cover the entire SID range */
800                 cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
801                 break;
802         case CMDQ_OP_TLBI_NH_VA:
803                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
804                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
805                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
806                 break;
807         case CMDQ_OP_TLBI_S2_IPA:
808                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
809                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
810                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
811                 break;
812         case CMDQ_OP_TLBI_NH_ASID:
813                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
814                 /* Fallthrough */
815         case CMDQ_OP_TLBI_S12_VMALL:
816                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
817                 break;
818         case CMDQ_OP_PRI_RESP:
819                 cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
820                 cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
821                 cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
822                 cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
823                 switch (ent->pri.resp) {
824                 case PRI_RESP_DENY:
825                         cmd[1] |= CMDQ_PRI_1_RESP_DENY;
826                         break;
827                 case PRI_RESP_FAIL:
828                         cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
829                         break;
830                 case PRI_RESP_SUCC:
831                         cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
832                         break;
833                 default:
834                         return -EINVAL;
835                 }
836                 break;
837         case CMDQ_OP_CMD_SYNC:
838                 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
839                 break;
840         default:
841                 return -ENOENT;
842         }
843
844         return 0;
845 }
846
847 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
848 {
849         static const char *cerror_str[] = {
850                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
851                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
852                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
853         };
854
855         int i;
856         u64 cmd[CMDQ_ENT_DWORDS];
857         struct arm_smmu_queue *q = &smmu->cmdq.q;
858         u32 cons = readl_relaxed(q->cons_reg);
859         u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
860         struct arm_smmu_cmdq_ent cmd_sync = {
861                 .opcode = CMDQ_OP_CMD_SYNC,
862         };
863
864         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
865                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
866
867         switch (idx) {
868         case CMDQ_ERR_CERROR_ABT_IDX:
869                 dev_err(smmu->dev, "retrying command fetch\n");
870         case CMDQ_ERR_CERROR_NONE_IDX:
871                 return;
872         case CMDQ_ERR_CERROR_ILL_IDX:
873                 /* Fallthrough */
874         default:
875                 break;
876         }
877
878         /*
879          * We may have concurrent producers, so we need to be careful
880          * not to touch any of the shadow cmdq state.
881          */
882         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
883         dev_err(smmu->dev, "skipping command in error state:\n");
884         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
885                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
886
887         /* Convert the erroneous command into a CMD_SYNC */
888         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
889                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
890                 return;
891         }
892
893         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
894 }
895
896 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
897                                     struct arm_smmu_cmdq_ent *ent)
898 {
899         u32 until;
900         u64 cmd[CMDQ_ENT_DWORDS];
901         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
902         struct arm_smmu_queue *q = &smmu->cmdq.q;
903
904         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
905                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
906                          ent->opcode);
907                 return;
908         }
909
910         spin_lock(&smmu->cmdq.lock);
911         while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
912                 /*
913                  * Keep the queue locked, otherwise the producer could wrap
914                  * twice and we could see a future consumer pointer that looks
915                  * like it's behind us.
916                  */
917                 if (queue_poll_cons(q, until, wfe))
918                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
919         }
920
921         if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
922                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
923         spin_unlock(&smmu->cmdq.lock);
924 }
925
926 /* Context descriptor manipulation functions */
927 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
928 {
929         u64 val = 0;
930
931         /* Repack the TCR. Just care about TTBR0 for now */
932         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
933         val |= ARM_SMMU_TCR2CD(tcr, TG0);
934         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
935         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
936         val |= ARM_SMMU_TCR2CD(tcr, SH0);
937         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
938         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
939         val |= ARM_SMMU_TCR2CD(tcr, IPS);
940         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
941
942         return val;
943 }
944
945 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
946                                     struct arm_smmu_s1_cfg *cfg)
947 {
948         u64 val;
949
950         /*
951          * We don't need to issue any invalidation here, as we'll invalidate
952          * the STE when installing the new entry anyway.
953          */
954         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
955 #ifdef __BIG_ENDIAN
956               CTXDESC_CD_0_ENDI |
957 #endif
958               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
959               CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
960               CTXDESC_CD_0_V;
961         cfg->cdptr[0] = cpu_to_le64(val);
962
963         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
964         cfg->cdptr[1] = cpu_to_le64(val);
965
966         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
967 }
968
969 /* Stream table manipulation functions */
970 static void
971 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
972 {
973         u64 val = 0;
974
975         val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
976                 << STRTAB_L1_DESC_SPAN_SHIFT;
977         val |= desc->l2ptr_dma &
978                STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
979
980         *dst = cpu_to_le64(val);
981 }
982
983 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
984 {
985         struct arm_smmu_cmdq_ent cmd = {
986                 .opcode = CMDQ_OP_CFGI_STE,
987                 .cfgi   = {
988                         .sid    = sid,
989                         .leaf   = true,
990                 },
991         };
992
993         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
994         cmd.opcode = CMDQ_OP_CMD_SYNC;
995         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
996 }
997
998 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
999                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
1000 {
1001         /*
1002          * This is hideously complicated, but we only really care about
1003          * three cases at the moment:
1004          *
1005          * 1. Invalid (all zero) -> bypass  (init)
1006          * 2. Bypass -> translation (attach)
1007          * 3. Translation -> bypass (detach)
1008          *
1009          * Given that we can't update the STE atomically and the SMMU
1010          * doesn't read the thing in a defined order, that leaves us
1011          * with the following maintenance requirements:
1012          *
1013          * 1. Update Config, return (init time STEs aren't live)
1014          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1015          * 3. Update Config, sync
1016          */
1017         u64 val = le64_to_cpu(dst[0]);
1018         bool ste_live = false;
1019         struct arm_smmu_cmdq_ent prefetch_cmd = {
1020                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1021                 .prefetch       = {
1022                         .sid    = sid,
1023                 },
1024         };
1025
1026         if (val & STRTAB_STE_0_V) {
1027                 u64 cfg;
1028
1029                 cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1030                 switch (cfg) {
1031                 case STRTAB_STE_0_CFG_BYPASS:
1032                         break;
1033                 case STRTAB_STE_0_CFG_S1_TRANS:
1034                 case STRTAB_STE_0_CFG_S2_TRANS:
1035                         ste_live = true;
1036                         break;
1037                 case STRTAB_STE_0_CFG_ABORT:
1038                         if (disable_bypass)
1039                                 break;
1040                 default:
1041                         BUG(); /* STE corruption */
1042                 }
1043         }
1044
1045         /* Nuke the existing Config, as we're going to rewrite it */
1046         val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1047
1048         if (ste->valid)
1049                 val |= STRTAB_STE_0_V;
1050         else
1051                 val &= ~STRTAB_STE_0_V;
1052
1053         if (ste->bypass) {
1054                 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1055                                       : STRTAB_STE_0_CFG_BYPASS;
1056                 dst[0] = cpu_to_le64(val);
1057                 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
1058                          << STRTAB_STE_1_SHCFG_SHIFT);
1059                 dst[2] = 0; /* Nuke the VMID */
1060                 if (ste_live)
1061                         arm_smmu_sync_ste_for_sid(smmu, sid);
1062                 return;
1063         }
1064
1065         if (ste->s1_cfg) {
1066                 BUG_ON(ste_live);
1067                 dst[1] = cpu_to_le64(
1068                          STRTAB_STE_1_S1C_CACHE_WBRA
1069                          << STRTAB_STE_1_S1CIR_SHIFT |
1070                          STRTAB_STE_1_S1C_CACHE_WBRA
1071                          << STRTAB_STE_1_S1COR_SHIFT |
1072                          STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1073 #ifdef CONFIG_PCI_ATS
1074                          STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1075 #endif
1076                          STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1077
1078                 if (smmu->features & ARM_SMMU_FEAT_STALLS)
1079                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1080
1081                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1082                         << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1083                         STRTAB_STE_0_CFG_S1_TRANS;
1084
1085         }
1086
1087         if (ste->s2_cfg) {
1088                 BUG_ON(ste_live);
1089                 dst[2] = cpu_to_le64(
1090                          ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1091                          (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1092                           << STRTAB_STE_2_VTCR_SHIFT |
1093 #ifdef __BIG_ENDIAN
1094                          STRTAB_STE_2_S2ENDI |
1095 #endif
1096                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1097                          STRTAB_STE_2_S2R);
1098
1099                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1100                          STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1101
1102                 val |= STRTAB_STE_0_CFG_S2_TRANS;
1103         }
1104
1105         arm_smmu_sync_ste_for_sid(smmu, sid);
1106         dst[0] = cpu_to_le64(val);
1107         arm_smmu_sync_ste_for_sid(smmu, sid);
1108
1109         /* It's likely that we'll want to use the new STE soon */
1110         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1111                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1112 }
1113
1114 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1115 {
1116         unsigned int i;
1117         struct arm_smmu_strtab_ent ste = {
1118                 .valid  = true,
1119                 .bypass = true,
1120         };
1121
1122         for (i = 0; i < nent; ++i) {
1123                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1124                 strtab += STRTAB_STE_DWORDS;
1125         }
1126 }
1127
1128 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1129 {
1130         size_t size;
1131         void *strtab;
1132         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1133         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1134
1135         if (desc->l2ptr)
1136                 return 0;
1137
1138         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1139         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1140
1141         desc->span = STRTAB_SPLIT + 1;
1142         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1143                                           GFP_KERNEL | __GFP_ZERO);
1144         if (!desc->l2ptr) {
1145                 dev_err(smmu->dev,
1146                         "failed to allocate l2 stream table for SID %u\n",
1147                         sid);
1148                 return -ENOMEM;
1149         }
1150
1151         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1152         arm_smmu_write_strtab_l1_desc(strtab, desc);
1153         return 0;
1154 }
1155
1156 /* IRQ and event handlers */
1157 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1158 {
1159         int i;
1160         struct arm_smmu_device *smmu = dev;
1161         struct arm_smmu_queue *q = &smmu->evtq.q;
1162         u64 evt[EVTQ_ENT_DWORDS];
1163
1164         while (!queue_remove_raw(q, evt)) {
1165                 u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1166
1167                 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1168                 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1169                         dev_info(smmu->dev, "\t0x%016llx\n",
1170                                  (unsigned long long)evt[i]);
1171         }
1172
1173         /* Sync our overflow flag, as we believe we're up to speed */
1174         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1175         return IRQ_HANDLED;
1176 }
1177
1178 static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1179 {
1180         irqreturn_t ret = IRQ_WAKE_THREAD;
1181         struct arm_smmu_device *smmu = dev;
1182         struct arm_smmu_queue *q = &smmu->evtq.q;
1183
1184         /*
1185          * Not much we can do on overflow, so scream and pretend we're
1186          * trying harder.
1187          */
1188         if (queue_sync_prod(q) == -EOVERFLOW)
1189                 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1190         else if (queue_empty(q))
1191                 ret = IRQ_NONE;
1192
1193         return ret;
1194 }
1195
1196 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1197 {
1198         struct arm_smmu_device *smmu = dev;
1199         struct arm_smmu_queue *q = &smmu->priq.q;
1200         u64 evt[PRIQ_ENT_DWORDS];
1201
1202         while (!queue_remove_raw(q, evt)) {
1203                 u32 sid, ssid;
1204                 u16 grpid;
1205                 bool ssv, last;
1206
1207                 sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1208                 ssv = evt[0] & PRIQ_0_SSID_V;
1209                 ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1210                 last = evt[0] & PRIQ_0_PRG_LAST;
1211                 grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1212
1213                 dev_info(smmu->dev, "unexpected PRI request received:\n");
1214                 dev_info(smmu->dev,
1215                          "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1216                          sid, ssid, grpid, last ? "L" : "",
1217                          evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1218                          evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1219                          evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1220                          evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1221                          evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1222
1223                 if (last) {
1224                         struct arm_smmu_cmdq_ent cmd = {
1225                                 .opcode                 = CMDQ_OP_PRI_RESP,
1226                                 .substream_valid        = ssv,
1227                                 .pri                    = {
1228                                         .sid    = sid,
1229                                         .ssid   = ssid,
1230                                         .grpid  = grpid,
1231                                         .resp   = PRI_RESP_DENY,
1232                                 },
1233                         };
1234
1235                         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1236                 }
1237         }
1238
1239         /* Sync our overflow flag, as we believe we're up to speed */
1240         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1241         return IRQ_HANDLED;
1242 }
1243
1244 static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1245 {
1246         irqreturn_t ret = IRQ_WAKE_THREAD;
1247         struct arm_smmu_device *smmu = dev;
1248         struct arm_smmu_queue *q = &smmu->priq.q;
1249
1250         /* PRIQ overflow indicates a programming error */
1251         if (queue_sync_prod(q) == -EOVERFLOW)
1252                 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1253         else if (queue_empty(q))
1254                 ret = IRQ_NONE;
1255
1256         return ret;
1257 }
1258
1259 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1260 {
1261         /* We don't actually use CMD_SYNC interrupts for anything */
1262         return IRQ_HANDLED;
1263 }
1264
1265 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1266
1267 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1268 {
1269         u32 gerror, gerrorn, active;
1270         struct arm_smmu_device *smmu = dev;
1271
1272         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1273         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1274
1275         active = gerror ^ gerrorn;
1276         if (!(active & GERROR_ERR_MASK))
1277                 return IRQ_NONE; /* No errors pending */
1278
1279         dev_warn(smmu->dev,
1280                  "unexpected global error reported (0x%08x), this could be serious\n",
1281                  active);
1282
1283         if (active & GERROR_SFM_ERR) {
1284                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1285                 arm_smmu_device_disable(smmu);
1286         }
1287
1288         if (active & GERROR_MSI_GERROR_ABT_ERR)
1289                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1290
1291         if (active & GERROR_MSI_PRIQ_ABT_ERR) {
1292                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1293                 arm_smmu_priq_handler(irq, smmu->dev);
1294         }
1295
1296         if (active & GERROR_MSI_EVTQ_ABT_ERR) {
1297                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1298                 arm_smmu_evtq_handler(irq, smmu->dev);
1299         }
1300
1301         if (active & GERROR_MSI_CMDQ_ABT_ERR) {
1302                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1303                 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1304         }
1305
1306         if (active & GERROR_PRIQ_ABT_ERR)
1307                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1308
1309         if (active & GERROR_EVTQ_ABT_ERR)
1310                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1311
1312         if (active & GERROR_CMDQ_ERR)
1313                 arm_smmu_cmdq_skip_err(smmu);
1314
1315         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1316         return IRQ_HANDLED;
1317 }
1318
1319 /* IO_PGTABLE API */
1320 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1321 {
1322         struct arm_smmu_cmdq_ent cmd;
1323
1324         cmd.opcode = CMDQ_OP_CMD_SYNC;
1325         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1326 }
1327
1328 static void arm_smmu_tlb_sync(void *cookie)
1329 {
1330         struct arm_smmu_domain *smmu_domain = cookie;
1331         __arm_smmu_tlb_sync(smmu_domain->smmu);
1332 }
1333
1334 static void arm_smmu_tlb_inv_context(void *cookie)
1335 {
1336         struct arm_smmu_domain *smmu_domain = cookie;
1337         struct arm_smmu_device *smmu = smmu_domain->smmu;
1338         struct arm_smmu_cmdq_ent cmd;
1339
1340         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1341                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1342                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1343                 cmd.tlbi.vmid   = 0;
1344         } else {
1345                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1346                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1347         }
1348
1349         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1350         __arm_smmu_tlb_sync(smmu);
1351 }
1352
1353 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1354                                           size_t granule, bool leaf, void *cookie)
1355 {
1356         struct arm_smmu_domain *smmu_domain = cookie;
1357         struct arm_smmu_device *smmu = smmu_domain->smmu;
1358         struct arm_smmu_cmdq_ent cmd = {
1359                 .tlbi = {
1360                         .leaf   = leaf,
1361                         .addr   = iova,
1362                 },
1363         };
1364
1365         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1366                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1367                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1368         } else {
1369                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1370                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1371         }
1372
1373         do {
1374                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1375                 cmd.tlbi.addr += granule;
1376         } while (size -= granule);
1377 }
1378
1379 static struct iommu_gather_ops arm_smmu_gather_ops = {
1380         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1381         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1382         .tlb_sync       = arm_smmu_tlb_sync,
1383 };
1384
1385 /* IOMMU API */
1386 static bool arm_smmu_capable(enum iommu_cap cap)
1387 {
1388         switch (cap) {
1389         case IOMMU_CAP_CACHE_COHERENCY:
1390                 return true;
1391         case IOMMU_CAP_INTR_REMAP:
1392                 return true; /* MSIs are just memory writes */
1393         case IOMMU_CAP_NOEXEC:
1394                 return true;
1395         default:
1396                 return false;
1397         }
1398 }
1399
1400 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1401 {
1402         struct arm_smmu_domain *smmu_domain;
1403
1404         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1405                 return NULL;
1406
1407         /*
1408          * Allocate the domain and initialise some of its data structures.
1409          * We can't really do anything meaningful until we've added a
1410          * master.
1411          */
1412         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1413         if (!smmu_domain)
1414                 return NULL;
1415
1416         if (type == IOMMU_DOMAIN_DMA &&
1417             iommu_get_dma_cookie(&smmu_domain->domain)) {
1418                 kfree(smmu_domain);
1419                 return NULL;
1420         }
1421
1422         mutex_init(&smmu_domain->init_mutex);
1423         spin_lock_init(&smmu_domain->pgtbl_lock);
1424         return &smmu_domain->domain;
1425 }
1426
1427 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1428 {
1429         int idx, size = 1 << span;
1430
1431         do {
1432                 idx = find_first_zero_bit(map, size);
1433                 if (idx == size)
1434                         return -ENOSPC;
1435         } while (test_and_set_bit(idx, map));
1436
1437         return idx;
1438 }
1439
1440 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1441 {
1442         clear_bit(idx, map);
1443 }
1444
1445 static void arm_smmu_domain_free(struct iommu_domain *domain)
1446 {
1447         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1448         struct arm_smmu_device *smmu = smmu_domain->smmu;
1449
1450         iommu_put_dma_cookie(domain);
1451         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1452
1453         /* Free the CD and ASID, if we allocated them */
1454         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1455                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1456
1457                 if (cfg->cdptr) {
1458                         dmam_free_coherent(smmu_domain->smmu->dev,
1459                                            CTXDESC_CD_DWORDS << 3,
1460                                            cfg->cdptr,
1461                                            cfg->cdptr_dma);
1462
1463                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1464                 }
1465         } else {
1466                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1467                 if (cfg->vmid)
1468                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1469         }
1470
1471         kfree(smmu_domain);
1472 }
1473
1474 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1475                                        struct io_pgtable_cfg *pgtbl_cfg)
1476 {
1477         int ret;
1478         int asid;
1479         struct arm_smmu_device *smmu = smmu_domain->smmu;
1480         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1481
1482         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1483         if (asid < 0)
1484                 return asid;
1485
1486         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1487                                          &cfg->cdptr_dma,
1488                                          GFP_KERNEL | __GFP_ZERO);
1489         if (!cfg->cdptr) {
1490                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1491                 ret = -ENOMEM;
1492                 goto out_free_asid;
1493         }
1494
1495         cfg->cd.asid    = (u16)asid;
1496         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1497         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1498         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1499         return 0;
1500
1501 out_free_asid:
1502         arm_smmu_bitmap_free(smmu->asid_map, asid);
1503         return ret;
1504 }
1505
1506 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1507                                        struct io_pgtable_cfg *pgtbl_cfg)
1508 {
1509         int vmid;
1510         struct arm_smmu_device *smmu = smmu_domain->smmu;
1511         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1512
1513         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1514         if (vmid < 0)
1515                 return vmid;
1516
1517         cfg->vmid       = (u16)vmid;
1518         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1519         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1520         return 0;
1521 }
1522
1523 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1524 {
1525         int ret;
1526         unsigned long ias, oas;
1527         enum io_pgtable_fmt fmt;
1528         struct io_pgtable_cfg pgtbl_cfg;
1529         struct io_pgtable_ops *pgtbl_ops;
1530         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1531                                  struct io_pgtable_cfg *);
1532         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1533         struct arm_smmu_device *smmu = smmu_domain->smmu;
1534
1535         /* Restrict the stage to what we can actually support */
1536         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1537                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1538         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1539                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1540
1541         switch (smmu_domain->stage) {
1542         case ARM_SMMU_DOMAIN_S1:
1543                 ias = VA_BITS;
1544                 oas = smmu->ias;
1545                 fmt = ARM_64_LPAE_S1;
1546                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1547                 break;
1548         case ARM_SMMU_DOMAIN_NESTED:
1549         case ARM_SMMU_DOMAIN_S2:
1550                 ias = smmu->ias;
1551                 oas = smmu->oas;
1552                 fmt = ARM_64_LPAE_S2;
1553                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1554                 break;
1555         default:
1556                 return -EINVAL;
1557         }
1558
1559         pgtbl_cfg = (struct io_pgtable_cfg) {
1560                 .pgsize_bitmap  = smmu->pgsize_bitmap,
1561                 .ias            = ias,
1562                 .oas            = oas,
1563                 .tlb            = &arm_smmu_gather_ops,
1564                 .iommu_dev      = smmu->dev,
1565         };
1566
1567         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1568         if (!pgtbl_ops)
1569                 return -ENOMEM;
1570
1571         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1572         smmu_domain->pgtbl_ops = pgtbl_ops;
1573
1574         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1575         if (ret < 0)
1576                 free_io_pgtable_ops(pgtbl_ops);
1577
1578         return ret;
1579 }
1580
1581 static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1582 {
1583         struct iommu_group *group;
1584         struct arm_smmu_group *smmu_group;
1585
1586         group = iommu_group_get(dev);
1587         if (!group)
1588                 return NULL;
1589
1590         smmu_group = iommu_group_get_iommudata(group);
1591         iommu_group_put(group);
1592         return smmu_group;
1593 }
1594
1595 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1596 {
1597         __le64 *step;
1598         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1599
1600         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1601                 struct arm_smmu_strtab_l1_desc *l1_desc;
1602                 int idx;
1603
1604                 /* Two-level walk */
1605                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1606                 l1_desc = &cfg->l1_desc[idx];
1607                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1608                 step = &l1_desc->l2ptr[idx];
1609         } else {
1610                 /* Simple linear lookup */
1611                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1612         }
1613
1614         return step;
1615 }
1616
1617 static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1618 {
1619         int i;
1620         struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1621         struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1622         struct arm_smmu_device *smmu = smmu_group->smmu;
1623
1624         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1625                 ste->s1_cfg = &smmu_domain->s1_cfg;
1626                 ste->s2_cfg = NULL;
1627                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1628         } else {
1629                 ste->s1_cfg = NULL;
1630                 ste->s2_cfg = &smmu_domain->s2_cfg;
1631         }
1632
1633         for (i = 0; i < smmu_group->num_sids; ++i) {
1634                 u32 sid = smmu_group->sids[i];
1635                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1636
1637                 arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1638         }
1639
1640         return 0;
1641 }
1642
1643 static void arm_smmu_detach_dev(struct device *dev)
1644 {
1645         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1646
1647         smmu_group->ste.bypass = true;
1648         if (arm_smmu_install_ste_for_group(smmu_group) < 0)
1649                 dev_warn(dev, "failed to install bypass STE\n");
1650
1651         smmu_group->domain = NULL;
1652 }
1653
1654 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1655 {
1656         int ret = 0;
1657         struct arm_smmu_device *smmu;
1658         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1659         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1660
1661         if (!smmu_group)
1662                 return -ENOENT;
1663
1664         /* Already attached to a different domain? */
1665         if (smmu_group->domain && smmu_group->domain != smmu_domain)
1666                 arm_smmu_detach_dev(dev);
1667
1668         smmu = smmu_group->smmu;
1669         mutex_lock(&smmu_domain->init_mutex);
1670
1671         if (!smmu_domain->smmu) {
1672                 smmu_domain->smmu = smmu;
1673                 ret = arm_smmu_domain_finalise(domain);
1674                 if (ret) {
1675                         smmu_domain->smmu = NULL;
1676                         goto out_unlock;
1677                 }
1678         } else if (smmu_domain->smmu != smmu) {
1679                 dev_err(dev,
1680                         "cannot attach to SMMU %s (upstream of %s)\n",
1681                         dev_name(smmu_domain->smmu->dev),
1682                         dev_name(smmu->dev));
1683                 ret = -ENXIO;
1684                 goto out_unlock;
1685         }
1686
1687         /* Group already attached to this domain? */
1688         if (smmu_group->domain)
1689                 goto out_unlock;
1690
1691         smmu_group->domain      = smmu_domain;
1692
1693         /*
1694          * FIXME: This should always be "false" once we have IOMMU-backed
1695          * DMA ops for all devices behind the SMMU.
1696          */
1697         smmu_group->ste.bypass  = domain->type == IOMMU_DOMAIN_DMA;
1698
1699         ret = arm_smmu_install_ste_for_group(smmu_group);
1700         if (ret < 0)
1701                 smmu_group->domain = NULL;
1702
1703 out_unlock:
1704         mutex_unlock(&smmu_domain->init_mutex);
1705         return ret;
1706 }
1707
1708 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1709                         phys_addr_t paddr, size_t size, int prot)
1710 {
1711         int ret;
1712         unsigned long flags;
1713         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1714         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1715
1716         if (!ops)
1717                 return -ENODEV;
1718
1719         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1720         ret = ops->map(ops, iova, paddr, size, prot);
1721         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1722         return ret;
1723 }
1724
1725 static size_t
1726 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1727 {
1728         size_t ret;
1729         unsigned long flags;
1730         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1731         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1732
1733         if (!ops)
1734                 return 0;
1735
1736         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1737         ret = ops->unmap(ops, iova, size);
1738         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1739         return ret;
1740 }
1741
1742 static phys_addr_t
1743 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1744 {
1745         phys_addr_t ret;
1746         unsigned long flags;
1747         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1748         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1749
1750         if (!ops)
1751                 return 0;
1752
1753         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1754         ret = ops->iova_to_phys(ops, iova);
1755         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1756
1757         return ret;
1758 }
1759
1760 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1761 {
1762         *(u32 *)sidp = alias;
1763         return 0; /* Continue walking */
1764 }
1765
1766 static void __arm_smmu_release_pci_iommudata(void *data)
1767 {
1768         kfree(data);
1769 }
1770
1771 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1772 {
1773         struct device_node *of_node;
1774         struct platform_device *smmu_pdev;
1775         struct arm_smmu_device *smmu = NULL;
1776         struct pci_bus *bus = pdev->bus;
1777
1778         /* Walk up to the root bus */
1779         while (!pci_is_root_bus(bus))
1780                 bus = bus->parent;
1781
1782         /* Follow the "iommus" phandle from the host controller */
1783         of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1784         if (!of_node)
1785                 return NULL;
1786
1787         /* See if we can find an SMMU corresponding to the phandle */
1788         smmu_pdev = of_find_device_by_node(of_node);
1789         if (smmu_pdev)
1790                 smmu = platform_get_drvdata(smmu_pdev);
1791
1792         of_node_put(of_node);
1793         return smmu;
1794 }
1795
1796 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1797 {
1798         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1799
1800         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1801                 limit *= 1UL << STRTAB_SPLIT;
1802
1803         return sid < limit;
1804 }
1805
1806 static int arm_smmu_add_device(struct device *dev)
1807 {
1808         int i, ret;
1809         u32 sid, *sids;
1810         struct pci_dev *pdev;
1811         struct iommu_group *group;
1812         struct arm_smmu_group *smmu_group;
1813         struct arm_smmu_device *smmu;
1814
1815         /* We only support PCI, for now */
1816         if (!dev_is_pci(dev))
1817                 return -ENODEV;
1818
1819         pdev = to_pci_dev(dev);
1820         group = iommu_group_get_for_dev(dev);
1821         if (IS_ERR(group))
1822                 return PTR_ERR(group);
1823
1824         smmu_group = iommu_group_get_iommudata(group);
1825         if (!smmu_group) {
1826                 smmu = arm_smmu_get_for_pci_dev(pdev);
1827                 if (!smmu) {
1828                         ret = -ENOENT;
1829                         goto out_remove_dev;
1830                 }
1831
1832                 smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1833                 if (!smmu_group) {
1834                         ret = -ENOMEM;
1835                         goto out_remove_dev;
1836                 }
1837
1838                 smmu_group->ste.valid   = true;
1839                 smmu_group->smmu        = smmu;
1840                 iommu_group_set_iommudata(group, smmu_group,
1841                                           __arm_smmu_release_pci_iommudata);
1842         } else {
1843                 smmu = smmu_group->smmu;
1844         }
1845
1846         /* Assume SID == RID until firmware tells us otherwise */
1847         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1848         for (i = 0; i < smmu_group->num_sids; ++i) {
1849                 /* If we already know about this SID, then we're done */
1850                 if (smmu_group->sids[i] == sid)
1851                         goto out_put_group;
1852         }
1853
1854         /* Check the SID is in range of the SMMU and our stream table */
1855         if (!arm_smmu_sid_in_range(smmu, sid)) {
1856                 ret = -ERANGE;
1857                 goto out_remove_dev;
1858         }
1859
1860         /* Ensure l2 strtab is initialised */
1861         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1862                 ret = arm_smmu_init_l2_strtab(smmu, sid);
1863                 if (ret)
1864                         goto out_remove_dev;
1865         }
1866
1867         /* Resize the SID array for the group */
1868         smmu_group->num_sids++;
1869         sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1870                         GFP_KERNEL);
1871         if (!sids) {
1872                 smmu_group->num_sids--;
1873                 ret = -ENOMEM;
1874                 goto out_remove_dev;
1875         }
1876
1877         /* Add the new SID */
1878         sids[smmu_group->num_sids - 1] = sid;
1879         smmu_group->sids = sids;
1880
1881 out_put_group:
1882         iommu_group_put(group);
1883         return 0;
1884
1885 out_remove_dev:
1886         iommu_group_remove_device(dev);
1887         iommu_group_put(group);
1888         return ret;
1889 }
1890
1891 static void arm_smmu_remove_device(struct device *dev)
1892 {
1893         iommu_group_remove_device(dev);
1894 }
1895
1896 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1897                                     enum iommu_attr attr, void *data)
1898 {
1899         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1900
1901         switch (attr) {
1902         case DOMAIN_ATTR_NESTING:
1903                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1904                 return 0;
1905         default:
1906                 return -ENODEV;
1907         }
1908 }
1909
1910 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1911                                     enum iommu_attr attr, void *data)
1912 {
1913         int ret = 0;
1914         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1915
1916         mutex_lock(&smmu_domain->init_mutex);
1917
1918         switch (attr) {
1919         case DOMAIN_ATTR_NESTING:
1920                 if (smmu_domain->smmu) {
1921                         ret = -EPERM;
1922                         goto out_unlock;
1923                 }
1924
1925                 if (*(int *)data)
1926                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1927                 else
1928                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1929
1930                 break;
1931         default:
1932                 ret = -ENODEV;
1933         }
1934
1935 out_unlock:
1936         mutex_unlock(&smmu_domain->init_mutex);
1937         return ret;
1938 }
1939
1940 static struct iommu_ops arm_smmu_ops = {
1941         .capable                = arm_smmu_capable,
1942         .domain_alloc           = arm_smmu_domain_alloc,
1943         .domain_free            = arm_smmu_domain_free,
1944         .attach_dev             = arm_smmu_attach_dev,
1945         .map                    = arm_smmu_map,
1946         .unmap                  = arm_smmu_unmap,
1947         .map_sg                 = default_iommu_map_sg,
1948         .iova_to_phys           = arm_smmu_iova_to_phys,
1949         .add_device             = arm_smmu_add_device,
1950         .remove_device          = arm_smmu_remove_device,
1951         .device_group           = pci_device_group,
1952         .domain_get_attr        = arm_smmu_domain_get_attr,
1953         .domain_set_attr        = arm_smmu_domain_set_attr,
1954         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1955 };
1956
1957 /* Probing and initialisation functions */
1958 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1959                                    struct arm_smmu_queue *q,
1960                                    unsigned long prod_off,
1961                                    unsigned long cons_off,
1962                                    size_t dwords)
1963 {
1964         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1965
1966         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1967         if (!q->base) {
1968                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1969                         qsz);
1970                 return -ENOMEM;
1971         }
1972
1973         q->prod_reg     = smmu->base + prod_off;
1974         q->cons_reg     = smmu->base + cons_off;
1975         q->ent_dwords   = dwords;
1976
1977         q->q_base  = Q_BASE_RWA;
1978         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1979         q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1980                      << Q_BASE_LOG2SIZE_SHIFT;
1981
1982         q->prod = q->cons = 0;
1983         return 0;
1984 }
1985
1986 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1987 {
1988         int ret;
1989
1990         /* cmdq */
1991         spin_lock_init(&smmu->cmdq.lock);
1992         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1993                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1994         if (ret)
1995                 return ret;
1996
1997         /* evtq */
1998         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1999                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2000         if (ret)
2001                 return ret;
2002
2003         /* priq */
2004         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2005                 return 0;
2006
2007         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2008                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2009 }
2010
2011 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2012 {
2013         unsigned int i;
2014         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2015         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2016         void *strtab = smmu->strtab_cfg.strtab;
2017
2018         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2019         if (!cfg->l1_desc) {
2020                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2021                 return -ENOMEM;
2022         }
2023
2024         for (i = 0; i < cfg->num_l1_ents; ++i) {
2025                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2026                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2027         }
2028
2029         return 0;
2030 }
2031
2032 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2033 {
2034         void *strtab;
2035         u64 reg;
2036         u32 size, l1size;
2037         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2038
2039         /*
2040          * If we can resolve everything with a single L2 table, then we
2041          * just need a single L1 descriptor. Otherwise, calculate the L1
2042          * size, capped to the SIDSIZE.
2043          */
2044         if (smmu->sid_bits < STRTAB_SPLIT) {
2045                 size = 0;
2046         } else {
2047                 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2048                 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2049         }
2050         cfg->num_l1_ents = 1 << size;
2051
2052         size += STRTAB_SPLIT;
2053         if (size < smmu->sid_bits)
2054                 dev_warn(smmu->dev,
2055                          "2-level strtab only covers %u/%u bits of SID\n",
2056                          size, smmu->sid_bits);
2057
2058         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2059         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2060                                      GFP_KERNEL | __GFP_ZERO);
2061         if (!strtab) {
2062                 dev_err(smmu->dev,
2063                         "failed to allocate l1 stream table (%u bytes)\n",
2064                         size);
2065                 return -ENOMEM;
2066         }
2067         cfg->strtab = strtab;
2068
2069         /* Configure strtab_base_cfg for 2 levels */
2070         reg  = STRTAB_BASE_CFG_FMT_2LVL;
2071         reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2072                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2073         reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2074                 << STRTAB_BASE_CFG_SPLIT_SHIFT;
2075         cfg->strtab_base_cfg = reg;
2076
2077         return arm_smmu_init_l1_strtab(smmu);
2078 }
2079
2080 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2081 {
2082         void *strtab;
2083         u64 reg;
2084         u32 size;
2085         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2086
2087         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2088         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2089                                      GFP_KERNEL | __GFP_ZERO);
2090         if (!strtab) {
2091                 dev_err(smmu->dev,
2092                         "failed to allocate linear stream table (%u bytes)\n",
2093                         size);
2094                 return -ENOMEM;
2095         }
2096         cfg->strtab = strtab;
2097         cfg->num_l1_ents = 1 << smmu->sid_bits;
2098
2099         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2100         reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2101         reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2102                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2103         cfg->strtab_base_cfg = reg;
2104
2105         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2106         return 0;
2107 }
2108
2109 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2110 {
2111         u64 reg;
2112         int ret;
2113
2114         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2115                 ret = arm_smmu_init_strtab_2lvl(smmu);
2116         else
2117                 ret = arm_smmu_init_strtab_linear(smmu);
2118
2119         if (ret)
2120                 return ret;
2121
2122         /* Set the strtab base address */
2123         reg  = smmu->strtab_cfg.strtab_dma &
2124                STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2125         reg |= STRTAB_BASE_RA;
2126         smmu->strtab_cfg.strtab_base = reg;
2127
2128         /* Allocate the first VMID for stage-2 bypass STEs */
2129         set_bit(0, smmu->vmid_map);
2130         return 0;
2131 }
2132
2133 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2134 {
2135         int ret;
2136
2137         ret = arm_smmu_init_queues(smmu);
2138         if (ret)
2139                 return ret;
2140
2141         return arm_smmu_init_strtab(smmu);
2142 }
2143
2144 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2145                                    unsigned int reg_off, unsigned int ack_off)
2146 {
2147         u32 reg;
2148
2149         writel_relaxed(val, smmu->base + reg_off);
2150         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2151                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2152 }
2153
2154 static void arm_smmu_free_msis(void *data)
2155 {
2156         struct device *dev = data;
2157         platform_msi_domain_free_irqs(dev);
2158 }
2159
2160 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2161 {
2162         phys_addr_t doorbell;
2163         struct device *dev = msi_desc_to_dev(desc);
2164         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2165         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2166
2167         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2168         doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2169
2170         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2171         writel_relaxed(msg->data, smmu->base + cfg[1]);
2172         writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2173 }
2174
2175 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2176 {
2177         struct msi_desc *desc;
2178         int ret, nvec = ARM_SMMU_MAX_MSIS;
2179         struct device *dev = smmu->dev;
2180
2181         /* Clear the MSI address regs */
2182         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2183         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2184
2185         if (smmu->features & ARM_SMMU_FEAT_PRI)
2186                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2187         else
2188                 nvec--;
2189
2190         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2191                 return;
2192
2193         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2194         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2195         if (ret) {
2196                 dev_warn(dev, "failed to allocate MSIs\n");
2197                 return;
2198         }
2199
2200         for_each_msi_entry(desc, dev) {
2201                 switch (desc->platform.msi_index) {
2202                 case EVTQ_MSI_INDEX:
2203                         smmu->evtq.q.irq = desc->irq;
2204                         break;
2205                 case GERROR_MSI_INDEX:
2206                         smmu->gerr_irq = desc->irq;
2207                         break;
2208                 case PRIQ_MSI_INDEX:
2209                         smmu->priq.q.irq = desc->irq;
2210                         break;
2211                 default:        /* Unknown */
2212                         continue;
2213                 }
2214         }
2215
2216         /* Add callback to free MSIs on teardown */
2217         devm_add_action(dev, arm_smmu_free_msis, dev);
2218 }
2219
2220 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2221 {
2222         int ret, irq;
2223         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2224
2225         /* Disable IRQs first */
2226         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2227                                       ARM_SMMU_IRQ_CTRLACK);
2228         if (ret) {
2229                 dev_err(smmu->dev, "failed to disable irqs\n");
2230                 return ret;
2231         }
2232
2233         arm_smmu_setup_msis(smmu);
2234
2235         /* Request interrupt lines */
2236         irq = smmu->evtq.q.irq;
2237         if (irq) {
2238                 ret = devm_request_threaded_irq(smmu->dev, irq,
2239                                                 arm_smmu_evtq_handler,
2240                                                 arm_smmu_evtq_thread,
2241                                                 0, "arm-smmu-v3-evtq", smmu);
2242                 if (ret < 0)
2243                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2244         }
2245
2246         irq = smmu->cmdq.q.irq;
2247         if (irq) {
2248                 ret = devm_request_irq(smmu->dev, irq,
2249                                        arm_smmu_cmdq_sync_handler, 0,
2250                                        "arm-smmu-v3-cmdq-sync", smmu);
2251                 if (ret < 0)
2252                         dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2253         }
2254
2255         irq = smmu->gerr_irq;
2256         if (irq) {
2257                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2258                                        0, "arm-smmu-v3-gerror", smmu);
2259                 if (ret < 0)
2260                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2261         }
2262
2263         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2264                 irq = smmu->priq.q.irq;
2265                 if (irq) {
2266                         ret = devm_request_threaded_irq(smmu->dev, irq,
2267                                                         arm_smmu_priq_handler,
2268                                                         arm_smmu_priq_thread,
2269                                                         0, "arm-smmu-v3-priq",
2270                                                         smmu);
2271                         if (ret < 0)
2272                                 dev_warn(smmu->dev,
2273                                          "failed to enable priq irq\n");
2274                         else
2275                                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2276                 }
2277         }
2278
2279         /* Enable interrupt generation on the SMMU */
2280         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2281                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2282         if (ret)
2283                 dev_warn(smmu->dev, "failed to enable irqs\n");
2284
2285         return 0;
2286 }
2287
2288 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2289 {
2290         int ret;
2291
2292         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2293         if (ret)
2294                 dev_err(smmu->dev, "failed to clear cr0\n");
2295
2296         return ret;
2297 }
2298
2299 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2300 {
2301         int ret;
2302         u32 reg, enables;
2303         struct arm_smmu_cmdq_ent cmd;
2304
2305         /* Clear CR0 and sync (disables SMMU and queue processing) */
2306         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2307         if (reg & CR0_SMMUEN)
2308                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2309
2310         ret = arm_smmu_device_disable(smmu);
2311         if (ret)
2312                 return ret;
2313
2314         /* CR1 (table and queue memory attributes) */
2315         reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2316               (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2317               (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2318               (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2319               (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2320               (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2321         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2322
2323         /* CR2 (random crap) */
2324         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2325         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2326
2327         /* Stream table */
2328         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2329                        smmu->base + ARM_SMMU_STRTAB_BASE);
2330         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2331                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2332
2333         /* Command queue */
2334         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2335         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2336         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2337
2338         enables = CR0_CMDQEN;
2339         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2340                                       ARM_SMMU_CR0ACK);
2341         if (ret) {
2342                 dev_err(smmu->dev, "failed to enable command queue\n");
2343                 return ret;
2344         }
2345
2346         /* Invalidate any cached configuration */
2347         cmd.opcode = CMDQ_OP_CFGI_ALL;
2348         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2349         cmd.opcode = CMDQ_OP_CMD_SYNC;
2350         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2351
2352         /* Invalidate any stale TLB entries */
2353         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2354                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2355                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2356         }
2357
2358         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2359         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2360         cmd.opcode = CMDQ_OP_CMD_SYNC;
2361         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2362
2363         /* Event queue */
2364         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2365         writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2366         writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2367
2368         enables |= CR0_EVTQEN;
2369         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2370                                       ARM_SMMU_CR0ACK);
2371         if (ret) {
2372                 dev_err(smmu->dev, "failed to enable event queue\n");
2373                 return ret;
2374         }
2375
2376         /* PRI queue */
2377         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2378                 writeq_relaxed(smmu->priq.q.q_base,
2379                                smmu->base + ARM_SMMU_PRIQ_BASE);
2380                 writel_relaxed(smmu->priq.q.prod,
2381                                smmu->base + ARM_SMMU_PRIQ_PROD);
2382                 writel_relaxed(smmu->priq.q.cons,
2383                                smmu->base + ARM_SMMU_PRIQ_CONS);
2384
2385                 enables |= CR0_PRIQEN;
2386                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2387                                               ARM_SMMU_CR0ACK);
2388                 if (ret) {
2389                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2390                         return ret;
2391                 }
2392         }
2393
2394         ret = arm_smmu_setup_irqs(smmu);
2395         if (ret) {
2396                 dev_err(smmu->dev, "failed to setup irqs\n");
2397                 return ret;
2398         }
2399
2400         /* Enable the SMMU interface */
2401         enables |= CR0_SMMUEN;
2402         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2403                                       ARM_SMMU_CR0ACK);
2404         if (ret) {
2405                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2406                 return ret;
2407         }
2408
2409         return 0;
2410 }
2411
2412 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2413 {
2414         u32 reg;
2415         bool coherent;
2416
2417         /* IDR0 */
2418         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2419
2420         /* 2-level structures */
2421         if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2422                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2423
2424         if (reg & IDR0_CD2L)
2425                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2426
2427         /*
2428          * Translation table endianness.
2429          * We currently require the same endianness as the CPU, but this
2430          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2431          */
2432         switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2433         case IDR0_TTENDIAN_MIXED:
2434                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2435                 break;
2436 #ifdef __BIG_ENDIAN
2437         case IDR0_TTENDIAN_BE:
2438                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2439                 break;
2440 #else
2441         case IDR0_TTENDIAN_LE:
2442                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2443                 break;
2444 #endif
2445         default:
2446                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2447                 return -ENXIO;
2448         }
2449
2450         /* Boolean feature flags */
2451         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2452                 smmu->features |= ARM_SMMU_FEAT_PRI;
2453
2454         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2455                 smmu->features |= ARM_SMMU_FEAT_ATS;
2456
2457         if (reg & IDR0_SEV)
2458                 smmu->features |= ARM_SMMU_FEAT_SEV;
2459
2460         if (reg & IDR0_MSI)
2461                 smmu->features |= ARM_SMMU_FEAT_MSI;
2462
2463         if (reg & IDR0_HYP)
2464                 smmu->features |= ARM_SMMU_FEAT_HYP;
2465
2466         /*
2467          * The dma-coherent property is used in preference to the ID
2468          * register, but warn on mismatch.
2469          */
2470         coherent = of_dma_is_coherent(smmu->dev->of_node);
2471         if (coherent)
2472                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2473
2474         if (!!(reg & IDR0_COHACC) != coherent)
2475                 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2476                          coherent ? "true" : "false");
2477
2478         switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
2479         case IDR0_STALL_MODEL_STALL:
2480                 /* Fallthrough */
2481         case IDR0_STALL_MODEL_FORCE:
2482                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2483         }
2484
2485         if (reg & IDR0_S1P)
2486                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2487
2488         if (reg & IDR0_S2P)
2489                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2490
2491         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2492                 dev_err(smmu->dev, "no translation support!\n");
2493                 return -ENXIO;
2494         }
2495
2496         /* We only support the AArch64 table format at present */
2497         switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2498         case IDR0_TTF_AARCH32_64:
2499                 smmu->ias = 40;
2500                 /* Fallthrough */
2501         case IDR0_TTF_AARCH64:
2502                 break;
2503         default:
2504                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2505                 return -ENXIO;
2506         }
2507
2508         /* ASID/VMID sizes */
2509         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2510         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2511
2512         /* IDR1 */
2513         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2514         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2515                 dev_err(smmu->dev, "embedded implementation not supported\n");
2516                 return -ENXIO;
2517         }
2518
2519         /* Queue sizes, capped at 4k */
2520         smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2521                                        reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2522         if (!smmu->cmdq.q.max_n_shift) {
2523                 /* Odd alignment restrictions on the base, so ignore for now */
2524                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2525                 return -ENXIO;
2526         }
2527
2528         smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2529                                        reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2530         smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2531                                        reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2532
2533         /* SID/SSID sizes */
2534         smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2535         smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2536
2537         /* IDR5 */
2538         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2539
2540         /* Maximum number of outstanding stalls */
2541         smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2542                                 & IDR5_STALL_MAX_MASK;
2543
2544         /* Page sizes */
2545         if (reg & IDR5_GRAN64K)
2546                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2547         if (reg & IDR5_GRAN16K)
2548                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2549         if (reg & IDR5_GRAN4K)
2550                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2551
2552         if (arm_smmu_ops.pgsize_bitmap == -1UL)
2553                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2554         else
2555                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2556
2557         /* Output address size */
2558         switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2559         case IDR5_OAS_32_BIT:
2560                 smmu->oas = 32;
2561                 break;
2562         case IDR5_OAS_36_BIT:
2563                 smmu->oas = 36;
2564                 break;
2565         case IDR5_OAS_40_BIT:
2566                 smmu->oas = 40;
2567                 break;
2568         case IDR5_OAS_42_BIT:
2569                 smmu->oas = 42;
2570                 break;
2571         case IDR5_OAS_44_BIT:
2572                 smmu->oas = 44;
2573                 break;
2574         default:
2575                 dev_info(smmu->dev,
2576                         "unknown output address size. Truncating to 48-bit\n");
2577                 /* Fallthrough */
2578         case IDR5_OAS_48_BIT:
2579                 smmu->oas = 48;
2580         }
2581
2582         /* Set the DMA mask for our table walker */
2583         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2584                 dev_warn(smmu->dev,
2585                          "failed to set DMA mask for table walker\n");
2586
2587         smmu->ias = max(smmu->ias, smmu->oas);
2588
2589         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2590                  smmu->ias, smmu->oas, smmu->features);
2591         return 0;
2592 }
2593
2594 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2595 {
2596         int irq, ret;
2597         struct resource *res;
2598         struct arm_smmu_device *smmu;
2599         struct device *dev = &pdev->dev;
2600
2601         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2602         if (!smmu) {
2603                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2604                 return -ENOMEM;
2605         }
2606         smmu->dev = dev;
2607
2608         /* Base address */
2609         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2610         if (resource_size(res) + 1 < SZ_128K) {
2611                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2612                 return -EINVAL;
2613         }
2614
2615         smmu->base = devm_ioremap_resource(dev, res);
2616         if (IS_ERR(smmu->base))
2617                 return PTR_ERR(smmu->base);
2618
2619         /* Interrupt lines */
2620         irq = platform_get_irq_byname(pdev, "eventq");
2621         if (irq > 0)
2622                 smmu->evtq.q.irq = irq;
2623
2624         irq = platform_get_irq_byname(pdev, "priq");
2625         if (irq > 0)
2626                 smmu->priq.q.irq = irq;
2627
2628         irq = platform_get_irq_byname(pdev, "cmdq-sync");
2629         if (irq > 0)
2630                 smmu->cmdq.q.irq = irq;
2631
2632         irq = platform_get_irq_byname(pdev, "gerror");
2633         if (irq > 0)
2634                 smmu->gerr_irq = irq;
2635
2636         parse_driver_options(smmu);
2637
2638         /* Probe the h/w */
2639         ret = arm_smmu_device_probe(smmu);
2640         if (ret)
2641                 return ret;
2642
2643         /* Initialise in-memory data structures */
2644         ret = arm_smmu_init_structures(smmu);
2645         if (ret)
2646                 return ret;
2647
2648         /* Record our private device structure */
2649         platform_set_drvdata(pdev, smmu);
2650
2651         /* Reset the device */
2652         return arm_smmu_device_reset(smmu);
2653 }
2654
2655 static int arm_smmu_device_remove(struct platform_device *pdev)
2656 {
2657         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2658
2659         arm_smmu_device_disable(smmu);
2660         return 0;
2661 }
2662
2663 static struct of_device_id arm_smmu_of_match[] = {
2664         { .compatible = "arm,smmu-v3", },
2665         { },
2666 };
2667 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2668
2669 static struct platform_driver arm_smmu_driver = {
2670         .driver = {
2671                 .name           = "arm-smmu-v3",
2672                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2673         },
2674         .probe  = arm_smmu_device_dt_probe,
2675         .remove = arm_smmu_device_remove,
2676 };
2677
2678 static int __init arm_smmu_init(void)
2679 {
2680         struct device_node *np;
2681         int ret;
2682
2683         np = of_find_matching_node(NULL, arm_smmu_of_match);
2684         if (!np)
2685                 return 0;
2686
2687         of_node_put(np);
2688
2689         ret = platform_driver_register(&arm_smmu_driver);
2690         if (ret)
2691                 return ret;
2692
2693         pci_request_acs();
2694
2695         return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2696 }
2697
2698 static void __exit arm_smmu_exit(void)
2699 {
2700         return platform_driver_unregister(&arm_smmu_driver);
2701 }
2702
2703 subsys_initcall(arm_smmu_init);
2704 module_exit(arm_smmu_exit);
2705
2706 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2707 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2708 MODULE_LICENSE("GPL v2");