MAINTAINERS: update bcm2835 entry
[cascardo/linux.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/iova.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/timer.h>
36 #include <linux/irq.h>
37 #include <linux/interrupt.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/slab.h>
41 #include <linux/iommu.h>
42 #include <asm/irq_remapping.h>
43 #include <asm/iommu_table.h>
44
45 #include "irq_remapping.h"
46
47 /*
48  * Assumptions:
49  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
50  *    before IO devices managed by that unit.
51  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
52  *    after IO devices managed by that unit.
53  * 3) Hotplug events are rare.
54  *
55  * Locking rules for DMA and interrupt remapping related global data structures:
56  * 1) Use dmar_global_lock in process context
57  * 2) Use RCU in interrupt context
58  */
59 DECLARE_RWSEM(dmar_global_lock);
60 LIST_HEAD(dmar_drhd_units);
61
62 struct acpi_table_header * __initdata dmar_tbl;
63 static acpi_size dmar_tbl_size;
64 static int dmar_dev_scope_status = 1;
65
66 static int alloc_iommu(struct dmar_drhd_unit *drhd);
67 static void free_iommu(struct intel_iommu *iommu);
68
69 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71         /*
72          * add INCLUDE_ALL at the tail, so scan the list will find it at
73          * the very end.
74          */
75         if (drhd->include_all)
76                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77         else
78                 list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83         struct acpi_dmar_device_scope *scope;
84
85         *cnt = 0;
86         while (start < end) {
87                 scope = start;
88                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91                         (*cnt)++;
92                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94                         pr_warn("Unsupported device scope\n");
95                 }
96                 start += scope->length;
97         }
98         if (*cnt == 0)
99                 return NULL;
100
101         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106         int i;
107         struct device *tmp_dev;
108
109         if (*devices && *cnt) {
110                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111                         put_device(tmp_dev);
112                 kfree(*devices);
113         }
114
115         *devices = NULL;
116         *cnt = 0;
117 }
118
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125         int level = 0;
126         size_t size;
127         struct pci_dev *tmp;
128         struct dmar_pci_notify_info *info;
129
130         BUG_ON(dev->is_virtfn);
131
132         /* Only generate path[] for device addition event */
133         if (event == BUS_NOTIFY_ADD_DEVICE)
134                 for (tmp = dev; tmp; tmp = tmp->bus->self)
135                         level++;
136
137         size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path);
138         if (size <= sizeof(dmar_pci_notify_info_buf)) {
139                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
140         } else {
141                 info = kzalloc(size, GFP_KERNEL);
142                 if (!info) {
143                         pr_warn("Out of memory when allocating notify_info "
144                                 "for %s.\n", pci_name(dev));
145                         if (dmar_dev_scope_status == 0)
146                                 dmar_dev_scope_status = -ENOMEM;
147                         return NULL;
148                 }
149         }
150
151         info->event = event;
152         info->dev = dev;
153         info->seg = pci_domain_nr(dev->bus);
154         info->level = level;
155         if (event == BUS_NOTIFY_ADD_DEVICE) {
156                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
157                         level--;
158                         info->path[level].bus = tmp->bus->number;
159                         info->path[level].device = PCI_SLOT(tmp->devfn);
160                         info->path[level].function = PCI_FUNC(tmp->devfn);
161                         if (pci_is_root_bus(tmp->bus))
162                                 info->bus = tmp->bus->number;
163                 }
164         }
165
166         return info;
167 }
168
169 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
170 {
171         if ((void *)info != dmar_pci_notify_info_buf)
172                 kfree(info);
173 }
174
175 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
176                                 struct acpi_dmar_pci_path *path, int count)
177 {
178         int i;
179
180         if (info->bus != bus)
181                 goto fallback;
182         if (info->level != count)
183                 goto fallback;
184
185         for (i = 0; i < count; i++) {
186                 if (path[i].device != info->path[i].device ||
187                     path[i].function != info->path[i].function)
188                         goto fallback;
189         }
190
191         return true;
192
193 fallback:
194
195         if (count != 1)
196                 return false;
197
198         i = info->level - 1;
199         if (bus              == info->path[i].bus &&
200             path[0].device   == info->path[i].device &&
201             path[0].function == info->path[i].function) {
202                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
203                         bus, path[0].device, path[0].function);
204                 return true;
205         }
206
207         return false;
208 }
209
210 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
211 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
212                           void *start, void*end, u16 segment,
213                           struct dmar_dev_scope *devices,
214                           int devices_cnt)
215 {
216         int i, level;
217         struct device *tmp, *dev = &info->dev->dev;
218         struct acpi_dmar_device_scope *scope;
219         struct acpi_dmar_pci_path *path;
220
221         if (segment != info->seg)
222                 return 0;
223
224         for (; start < end; start += scope->length) {
225                 scope = start;
226                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
227                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
228                         continue;
229
230                 path = (struct acpi_dmar_pci_path *)(scope + 1);
231                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
232                 if (!dmar_match_pci_path(info, scope->bus, path, level))
233                         continue;
234
235                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^
236                     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) {
237                         pr_warn("Device scope type does not match for %s\n",
238                                 pci_name(info->dev));
239                         return -EINVAL;
240                 }
241
242                 for_each_dev_scope(devices, devices_cnt, i, tmp)
243                         if (tmp == NULL) {
244                                 devices[i].bus = info->dev->bus->number;
245                                 devices[i].devfn = info->dev->devfn;
246                                 rcu_assign_pointer(devices[i].dev,
247                                                    get_device(dev));
248                                 return 1;
249                         }
250                 BUG_ON(i >= devices_cnt);
251         }
252
253         return 0;
254 }
255
256 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
257                           struct dmar_dev_scope *devices, int count)
258 {
259         int index;
260         struct device *tmp;
261
262         if (info->seg != segment)
263                 return 0;
264
265         for_each_active_dev_scope(devices, count, index, tmp)
266                 if (tmp == &info->dev->dev) {
267                         RCU_INIT_POINTER(devices[index].dev, NULL);
268                         synchronize_rcu();
269                         put_device(tmp);
270                         return 1;
271                 }
272
273         return 0;
274 }
275
276 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
277 {
278         int ret = 0;
279         struct dmar_drhd_unit *dmaru;
280         struct acpi_dmar_hardware_unit *drhd;
281
282         for_each_drhd_unit(dmaru) {
283                 if (dmaru->include_all)
284                         continue;
285
286                 drhd = container_of(dmaru->hdr,
287                                     struct acpi_dmar_hardware_unit, header);
288                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
289                                 ((void *)drhd) + drhd->header.length,
290                                 dmaru->segment,
291                                 dmaru->devices, dmaru->devices_cnt);
292                 if (ret != 0)
293                         break;
294         }
295         if (ret >= 0)
296                 ret = dmar_iommu_notify_scope_dev(info);
297         if (ret < 0 && dmar_dev_scope_status == 0)
298                 dmar_dev_scope_status = ret;
299
300         return ret;
301 }
302
303 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
304 {
305         struct dmar_drhd_unit *dmaru;
306
307         for_each_drhd_unit(dmaru)
308                 if (dmar_remove_dev_scope(info, dmaru->segment,
309                         dmaru->devices, dmaru->devices_cnt))
310                         break;
311         dmar_iommu_notify_scope_dev(info);
312 }
313
314 static int dmar_pci_bus_notifier(struct notifier_block *nb,
315                                  unsigned long action, void *data)
316 {
317         struct pci_dev *pdev = to_pci_dev(data);
318         struct dmar_pci_notify_info *info;
319
320         /* Only care about add/remove events for physical functions */
321         if (pdev->is_virtfn)
322                 return NOTIFY_DONE;
323         if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
324                 return NOTIFY_DONE;
325
326         info = dmar_alloc_pci_notify_info(pdev, action);
327         if (!info)
328                 return NOTIFY_DONE;
329
330         down_write(&dmar_global_lock);
331         if (action == BUS_NOTIFY_ADD_DEVICE)
332                 dmar_pci_bus_add_dev(info);
333         else if (action == BUS_NOTIFY_DEL_DEVICE)
334                 dmar_pci_bus_del_dev(info);
335         up_write(&dmar_global_lock);
336
337         dmar_free_pci_notify_info(info);
338
339         return NOTIFY_OK;
340 }
341
342 static struct notifier_block dmar_pci_bus_nb = {
343         .notifier_call = dmar_pci_bus_notifier,
344         .priority = INT_MIN,
345 };
346
347 /**
348  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
349  * structure which uniquely represent one DMA remapping hardware unit
350  * present in the platform
351  */
352 static int __init
353 dmar_parse_one_drhd(struct acpi_dmar_header *header)
354 {
355         struct acpi_dmar_hardware_unit *drhd;
356         struct dmar_drhd_unit *dmaru;
357         int ret = 0;
358
359         drhd = (struct acpi_dmar_hardware_unit *)header;
360         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
361         if (!dmaru)
362                 return -ENOMEM;
363
364         dmaru->hdr = header;
365         dmaru->reg_base_addr = drhd->address;
366         dmaru->segment = drhd->segment;
367         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
368         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
369                                               ((void *)drhd) + drhd->header.length,
370                                               &dmaru->devices_cnt);
371         if (dmaru->devices_cnt && dmaru->devices == NULL) {
372                 kfree(dmaru);
373                 return -ENOMEM;
374         }
375
376         ret = alloc_iommu(dmaru);
377         if (ret) {
378                 dmar_free_dev_scope(&dmaru->devices,
379                                     &dmaru->devices_cnt);
380                 kfree(dmaru);
381                 return ret;
382         }
383         dmar_register_drhd_unit(dmaru);
384         return 0;
385 }
386
387 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
388 {
389         if (dmaru->devices && dmaru->devices_cnt)
390                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
391         if (dmaru->iommu)
392                 free_iommu(dmaru->iommu);
393         kfree(dmaru);
394 }
395
396 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
397 {
398         struct acpi_dmar_andd *andd = (void *)header;
399
400         /* Check for NUL termination within the designated length */
401         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
402                 WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
403                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
404                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
405                            dmi_get_system_info(DMI_BIOS_VENDOR),
406                            dmi_get_system_info(DMI_BIOS_VERSION),
407                            dmi_get_system_info(DMI_PRODUCT_VERSION));
408                 return -EINVAL;
409         }
410         pr_info("ANDD device: %x name: %s\n", andd->device_number,
411                 andd->device_name);
412
413         return 0;
414 }
415
416 #ifdef CONFIG_ACPI_NUMA
417 static int __init
418 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
419 {
420         struct acpi_dmar_rhsa *rhsa;
421         struct dmar_drhd_unit *drhd;
422
423         rhsa = (struct acpi_dmar_rhsa *)header;
424         for_each_drhd_unit(drhd) {
425                 if (drhd->reg_base_addr == rhsa->base_address) {
426                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
427
428                         if (!node_online(node))
429                                 node = -1;
430                         drhd->iommu->node = node;
431                         return 0;
432                 }
433         }
434         WARN_TAINT(
435                 1, TAINT_FIRMWARE_WORKAROUND,
436                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
437                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
438                 drhd->reg_base_addr,
439                 dmi_get_system_info(DMI_BIOS_VENDOR),
440                 dmi_get_system_info(DMI_BIOS_VERSION),
441                 dmi_get_system_info(DMI_PRODUCT_VERSION));
442
443         return 0;
444 }
445 #endif
446
447 static void __init
448 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
449 {
450         struct acpi_dmar_hardware_unit *drhd;
451         struct acpi_dmar_reserved_memory *rmrr;
452         struct acpi_dmar_atsr *atsr;
453         struct acpi_dmar_rhsa *rhsa;
454
455         switch (header->type) {
456         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
457                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
458                                     header);
459                 pr_info("DRHD base: %#016Lx flags: %#x\n",
460                         (unsigned long long)drhd->address, drhd->flags);
461                 break;
462         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
463                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
464                                     header);
465                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
466                         (unsigned long long)rmrr->base_address,
467                         (unsigned long long)rmrr->end_address);
468                 break;
469         case ACPI_DMAR_TYPE_ROOT_ATS:
470                 atsr = container_of(header, struct acpi_dmar_atsr, header);
471                 pr_info("ATSR flags: %#x\n", atsr->flags);
472                 break;
473         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
474                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
475                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
476                        (unsigned long long)rhsa->base_address,
477                        rhsa->proximity_domain);
478                 break;
479         case ACPI_DMAR_TYPE_NAMESPACE:
480                 /* We don't print this here because we need to sanity-check
481                    it first. So print it in dmar_parse_one_andd() instead. */
482                 break;
483         }
484 }
485
486 /**
487  * dmar_table_detect - checks to see if the platform supports DMAR devices
488  */
489 static int __init dmar_table_detect(void)
490 {
491         acpi_status status = AE_OK;
492
493         /* if we could find DMAR table, then there are DMAR devices */
494         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
495                                 (struct acpi_table_header **)&dmar_tbl,
496                                 &dmar_tbl_size);
497
498         if (ACPI_SUCCESS(status) && !dmar_tbl) {
499                 pr_warn("Unable to map DMAR\n");
500                 status = AE_NOT_FOUND;
501         }
502
503         return (ACPI_SUCCESS(status) ? 1 : 0);
504 }
505
506 /**
507  * parse_dmar_table - parses the DMA reporting table
508  */
509 static int __init
510 parse_dmar_table(void)
511 {
512         struct acpi_table_dmar *dmar;
513         struct acpi_dmar_header *entry_header;
514         int ret = 0;
515         int drhd_count = 0;
516
517         /*
518          * Do it again, earlier dmar_tbl mapping could be mapped with
519          * fixed map.
520          */
521         dmar_table_detect();
522
523         /*
524          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
525          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
526          */
527         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
528
529         dmar = (struct acpi_table_dmar *)dmar_tbl;
530         if (!dmar)
531                 return -ENODEV;
532
533         if (dmar->width < PAGE_SHIFT - 1) {
534                 pr_warn("Invalid DMAR haw\n");
535                 return -EINVAL;
536         }
537
538         pr_info("Host address width %d\n", dmar->width + 1);
539
540         entry_header = (struct acpi_dmar_header *)(dmar + 1);
541         while (((unsigned long)entry_header) <
542                         (((unsigned long)dmar) + dmar_tbl->length)) {
543                 /* Avoid looping forever on bad ACPI tables */
544                 if (entry_header->length == 0) {
545                         pr_warn("Invalid 0-length structure\n");
546                         ret = -EINVAL;
547                         break;
548                 }
549
550                 dmar_table_print_dmar_entry(entry_header);
551
552                 switch (entry_header->type) {
553                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
554                         drhd_count++;
555                         ret = dmar_parse_one_drhd(entry_header);
556                         break;
557                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
558                         ret = dmar_parse_one_rmrr(entry_header);
559                         break;
560                 case ACPI_DMAR_TYPE_ROOT_ATS:
561                         ret = dmar_parse_one_atsr(entry_header);
562                         break;
563                 case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
564 #ifdef CONFIG_ACPI_NUMA
565                         ret = dmar_parse_one_rhsa(entry_header);
566 #endif
567                         break;
568                 case ACPI_DMAR_TYPE_NAMESPACE:
569                         ret = dmar_parse_one_andd(entry_header);
570                         break;
571                 default:
572                         pr_warn("Unknown DMAR structure type %d\n",
573                                 entry_header->type);
574                         ret = 0; /* for forward compatibility */
575                         break;
576                 }
577                 if (ret)
578                         break;
579
580                 entry_header = ((void *)entry_header + entry_header->length);
581         }
582         if (drhd_count == 0)
583                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
584         return ret;
585 }
586
587 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
588                                  int cnt, struct pci_dev *dev)
589 {
590         int index;
591         struct device *tmp;
592
593         while (dev) {
594                 for_each_active_dev_scope(devices, cnt, index, tmp)
595                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
596                                 return 1;
597
598                 /* Check our parent */
599                 dev = dev->bus->self;
600         }
601
602         return 0;
603 }
604
605 struct dmar_drhd_unit *
606 dmar_find_matched_drhd_unit(struct pci_dev *dev)
607 {
608         struct dmar_drhd_unit *dmaru;
609         struct acpi_dmar_hardware_unit *drhd;
610
611         dev = pci_physfn(dev);
612
613         rcu_read_lock();
614         for_each_drhd_unit(dmaru) {
615                 drhd = container_of(dmaru->hdr,
616                                     struct acpi_dmar_hardware_unit,
617                                     header);
618
619                 if (dmaru->include_all &&
620                     drhd->segment == pci_domain_nr(dev->bus))
621                         goto out;
622
623                 if (dmar_pci_device_match(dmaru->devices,
624                                           dmaru->devices_cnt, dev))
625                         goto out;
626         }
627         dmaru = NULL;
628 out:
629         rcu_read_unlock();
630
631         return dmaru;
632 }
633
634 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
635                                               struct acpi_device *adev)
636 {
637         struct dmar_drhd_unit *dmaru;
638         struct acpi_dmar_hardware_unit *drhd;
639         struct acpi_dmar_device_scope *scope;
640         struct device *tmp;
641         int i;
642         struct acpi_dmar_pci_path *path;
643
644         for_each_drhd_unit(dmaru) {
645                 drhd = container_of(dmaru->hdr,
646                                     struct acpi_dmar_hardware_unit,
647                                     header);
648
649                 for (scope = (void *)(drhd + 1);
650                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
651                      scope = ((void *)scope) + scope->length) {
652                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
653                                 continue;
654                         if (scope->enumeration_id != device_number)
655                                 continue;
656
657                         path = (void *)(scope + 1);
658                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
659                                 dev_name(&adev->dev), dmaru->reg_base_addr,
660                                 scope->bus, path->device, path->function);
661                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
662                                 if (tmp == NULL) {
663                                         dmaru->devices[i].bus = scope->bus;
664                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
665                                                                             path->function);
666                                         rcu_assign_pointer(dmaru->devices[i].dev,
667                                                            get_device(&adev->dev));
668                                         return;
669                                 }
670                         BUG_ON(i >= dmaru->devices_cnt);
671                 }
672         }
673         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
674                 device_number, dev_name(&adev->dev));
675 }
676
677 static int __init dmar_acpi_dev_scope_init(void)
678 {
679         struct acpi_dmar_andd *andd;
680
681         if (dmar_tbl == NULL)
682                 return -ENODEV;
683
684         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
685              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
686              andd = ((void *)andd) + andd->header.length) {
687                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
688                         acpi_handle h;
689                         struct acpi_device *adev;
690
691                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
692                                                           andd->device_name,
693                                                           &h))) {
694                                 pr_err("Failed to find handle for ACPI object %s\n",
695                                        andd->device_name);
696                                 continue;
697                         }
698                         if (acpi_bus_get_device(h, &adev)) {
699                                 pr_err("Failed to get device for ACPI object %s\n",
700                                        andd->device_name);
701                                 continue;
702                         }
703                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
704                 }
705         }
706         return 0;
707 }
708
709 int __init dmar_dev_scope_init(void)
710 {
711         struct pci_dev *dev = NULL;
712         struct dmar_pci_notify_info *info;
713
714         if (dmar_dev_scope_status != 1)
715                 return dmar_dev_scope_status;
716
717         if (list_empty(&dmar_drhd_units)) {
718                 dmar_dev_scope_status = -ENODEV;
719         } else {
720                 dmar_dev_scope_status = 0;
721
722                 dmar_acpi_dev_scope_init();
723
724                 for_each_pci_dev(dev) {
725                         if (dev->is_virtfn)
726                                 continue;
727
728                         info = dmar_alloc_pci_notify_info(dev,
729                                         BUS_NOTIFY_ADD_DEVICE);
730                         if (!info) {
731                                 return dmar_dev_scope_status;
732                         } else {
733                                 dmar_pci_bus_add_dev(info);
734                                 dmar_free_pci_notify_info(info);
735                         }
736                 }
737
738                 bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
739         }
740
741         return dmar_dev_scope_status;
742 }
743
744
745 int __init dmar_table_init(void)
746 {
747         static int dmar_table_initialized;
748         int ret;
749
750         if (dmar_table_initialized == 0) {
751                 ret = parse_dmar_table();
752                 if (ret < 0) {
753                         if (ret != -ENODEV)
754                                 pr_info("parse DMAR table failure.\n");
755                 } else  if (list_empty(&dmar_drhd_units)) {
756                         pr_info("No DMAR devices found\n");
757                         ret = -ENODEV;
758                 }
759
760                 if (ret < 0)
761                         dmar_table_initialized = ret;
762                 else
763                         dmar_table_initialized = 1;
764         }
765
766         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
767 }
768
769 static void warn_invalid_dmar(u64 addr, const char *message)
770 {
771         WARN_TAINT_ONCE(
772                 1, TAINT_FIRMWARE_WORKAROUND,
773                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
774                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
775                 addr, message,
776                 dmi_get_system_info(DMI_BIOS_VENDOR),
777                 dmi_get_system_info(DMI_BIOS_VERSION),
778                 dmi_get_system_info(DMI_PRODUCT_VERSION));
779 }
780
781 static int __init check_zero_address(void)
782 {
783         struct acpi_table_dmar *dmar;
784         struct acpi_dmar_header *entry_header;
785         struct acpi_dmar_hardware_unit *drhd;
786
787         dmar = (struct acpi_table_dmar *)dmar_tbl;
788         entry_header = (struct acpi_dmar_header *)(dmar + 1);
789
790         while (((unsigned long)entry_header) <
791                         (((unsigned long)dmar) + dmar_tbl->length)) {
792                 /* Avoid looping forever on bad ACPI tables */
793                 if (entry_header->length == 0) {
794                         pr_warn("Invalid 0-length structure\n");
795                         return 0;
796                 }
797
798                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
799                         void __iomem *addr;
800                         u64 cap, ecap;
801
802                         drhd = (void *)entry_header;
803                         if (!drhd->address) {
804                                 warn_invalid_dmar(0, "");
805                                 goto failed;
806                         }
807
808                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
809                         if (!addr ) {
810                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
811                                 goto failed;
812                         }
813                         cap = dmar_readq(addr + DMAR_CAP_REG);
814                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
815                         early_iounmap(addr, VTD_PAGE_SIZE);
816                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
817                                 warn_invalid_dmar(drhd->address,
818                                                   " returns all ones");
819                                 goto failed;
820                         }
821                 }
822
823                 entry_header = ((void *)entry_header + entry_header->length);
824         }
825         return 1;
826
827 failed:
828         return 0;
829 }
830
831 int __init detect_intel_iommu(void)
832 {
833         int ret;
834
835         down_write(&dmar_global_lock);
836         ret = dmar_table_detect();
837         if (ret)
838                 ret = check_zero_address();
839         {
840                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
841                         iommu_detected = 1;
842                         /* Make sure ACS will be enabled */
843                         pci_request_acs();
844                 }
845
846 #ifdef CONFIG_X86
847                 if (ret)
848                         x86_init.iommu.iommu_init = intel_iommu_init;
849 #endif
850         }
851         early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
852         dmar_tbl = NULL;
853         up_write(&dmar_global_lock);
854
855         return ret ? 1 : -ENODEV;
856 }
857
858
859 static void unmap_iommu(struct intel_iommu *iommu)
860 {
861         iounmap(iommu->reg);
862         release_mem_region(iommu->reg_phys, iommu->reg_size);
863 }
864
865 /**
866  * map_iommu: map the iommu's registers
867  * @iommu: the iommu to map
868  * @phys_addr: the physical address of the base resgister
869  *
870  * Memory map the iommu's registers.  Start w/ a single page, and
871  * possibly expand if that turns out to be insufficent.
872  */
873 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
874 {
875         int map_size, err=0;
876
877         iommu->reg_phys = phys_addr;
878         iommu->reg_size = VTD_PAGE_SIZE;
879
880         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
881                 pr_err("IOMMU: can't reserve memory\n");
882                 err = -EBUSY;
883                 goto out;
884         }
885
886         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
887         if (!iommu->reg) {
888                 pr_err("IOMMU: can't map the region\n");
889                 err = -ENOMEM;
890                 goto release;
891         }
892
893         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
894         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
895
896         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
897                 err = -EINVAL;
898                 warn_invalid_dmar(phys_addr, " returns all ones");
899                 goto unmap;
900         }
901
902         /* the registers might be more than one page */
903         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
904                          cap_max_fault_reg_offset(iommu->cap));
905         map_size = VTD_PAGE_ALIGN(map_size);
906         if (map_size > iommu->reg_size) {
907                 iounmap(iommu->reg);
908                 release_mem_region(iommu->reg_phys, iommu->reg_size);
909                 iommu->reg_size = map_size;
910                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
911                                         iommu->name)) {
912                         pr_err("IOMMU: can't reserve memory\n");
913                         err = -EBUSY;
914                         goto out;
915                 }
916                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
917                 if (!iommu->reg) {
918                         pr_err("IOMMU: can't map the region\n");
919                         err = -ENOMEM;
920                         goto release;
921                 }
922         }
923         err = 0;
924         goto out;
925
926 unmap:
927         iounmap(iommu->reg);
928 release:
929         release_mem_region(iommu->reg_phys, iommu->reg_size);
930 out:
931         return err;
932 }
933
934 static int alloc_iommu(struct dmar_drhd_unit *drhd)
935 {
936         struct intel_iommu *iommu;
937         u32 ver, sts;
938         static int iommu_allocated = 0;
939         int agaw = 0;
940         int msagaw = 0;
941         int err;
942
943         if (!drhd->reg_base_addr) {
944                 warn_invalid_dmar(0, "");
945                 return -EINVAL;
946         }
947
948         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
949         if (!iommu)
950                 return -ENOMEM;
951
952         iommu->seq_id = iommu_allocated++;
953         sprintf (iommu->name, "dmar%d", iommu->seq_id);
954
955         err = map_iommu(iommu, drhd->reg_base_addr);
956         if (err) {
957                 pr_err("IOMMU: failed to map %s\n", iommu->name);
958                 goto error;
959         }
960
961         err = -EINVAL;
962         agaw = iommu_calculate_agaw(iommu);
963         if (agaw < 0) {
964                 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
965                         iommu->seq_id);
966                 goto err_unmap;
967         }
968         msagaw = iommu_calculate_max_sagaw(iommu);
969         if (msagaw < 0) {
970                 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
971                         iommu->seq_id);
972                 goto err_unmap;
973         }
974         iommu->agaw = agaw;
975         iommu->msagaw = msagaw;
976         iommu->segment = drhd->segment;
977
978         iommu->node = -1;
979
980         ver = readl(iommu->reg + DMAR_VER_REG);
981         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
982                 iommu->seq_id,
983                 (unsigned long long)drhd->reg_base_addr,
984                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
985                 (unsigned long long)iommu->cap,
986                 (unsigned long long)iommu->ecap);
987
988         /* Reflect status in gcmd */
989         sts = readl(iommu->reg + DMAR_GSTS_REG);
990         if (sts & DMA_GSTS_IRES)
991                 iommu->gcmd |= DMA_GCMD_IRE;
992         if (sts & DMA_GSTS_TES)
993                 iommu->gcmd |= DMA_GCMD_TE;
994         if (sts & DMA_GSTS_QIES)
995                 iommu->gcmd |= DMA_GCMD_QIE;
996
997         raw_spin_lock_init(&iommu->register_lock);
998
999         drhd->iommu = iommu;
1000
1001         if (intel_iommu_enabled)
1002                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
1003                                                        intel_iommu_groups,
1004                                                        iommu->name);
1005
1006         return 0;
1007
1008  err_unmap:
1009         unmap_iommu(iommu);
1010  error:
1011         kfree(iommu);
1012         return err;
1013 }
1014
1015 static void free_iommu(struct intel_iommu *iommu)
1016 {
1017         iommu_device_destroy(iommu->iommu_dev);
1018
1019         if (iommu->irq) {
1020                 free_irq(iommu->irq, iommu);
1021                 irq_set_handler_data(iommu->irq, NULL);
1022                 dmar_free_hwirq(iommu->irq);
1023         }
1024
1025         if (iommu->qi) {
1026                 free_page((unsigned long)iommu->qi->desc);
1027                 kfree(iommu->qi->desc_status);
1028                 kfree(iommu->qi);
1029         }
1030
1031         if (iommu->reg)
1032                 unmap_iommu(iommu);
1033
1034         kfree(iommu);
1035 }
1036
1037 /*
1038  * Reclaim all the submitted descriptors which have completed its work.
1039  */
1040 static inline void reclaim_free_desc(struct q_inval *qi)
1041 {
1042         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1043                qi->desc_status[qi->free_tail] == QI_ABORT) {
1044                 qi->desc_status[qi->free_tail] = QI_FREE;
1045                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1046                 qi->free_cnt++;
1047         }
1048 }
1049
1050 static int qi_check_fault(struct intel_iommu *iommu, int index)
1051 {
1052         u32 fault;
1053         int head, tail;
1054         struct q_inval *qi = iommu->qi;
1055         int wait_index = (index + 1) % QI_LENGTH;
1056
1057         if (qi->desc_status[wait_index] == QI_ABORT)
1058                 return -EAGAIN;
1059
1060         fault = readl(iommu->reg + DMAR_FSTS_REG);
1061
1062         /*
1063          * If IQE happens, the head points to the descriptor associated
1064          * with the error. No new descriptors are fetched until the IQE
1065          * is cleared.
1066          */
1067         if (fault & DMA_FSTS_IQE) {
1068                 head = readl(iommu->reg + DMAR_IQH_REG);
1069                 if ((head >> DMAR_IQ_SHIFT) == index) {
1070                         pr_err("VT-d detected invalid descriptor: "
1071                                 "low=%llx, high=%llx\n",
1072                                 (unsigned long long)qi->desc[index].low,
1073                                 (unsigned long long)qi->desc[index].high);
1074                         memcpy(&qi->desc[index], &qi->desc[wait_index],
1075                                         sizeof(struct qi_desc));
1076                         __iommu_flush_cache(iommu, &qi->desc[index],
1077                                         sizeof(struct qi_desc));
1078                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1079                         return -EINVAL;
1080                 }
1081         }
1082
1083         /*
1084          * If ITE happens, all pending wait_desc commands are aborted.
1085          * No new descriptors are fetched until the ITE is cleared.
1086          */
1087         if (fault & DMA_FSTS_ITE) {
1088                 head = readl(iommu->reg + DMAR_IQH_REG);
1089                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
1090                 head |= 1;
1091                 tail = readl(iommu->reg + DMAR_IQT_REG);
1092                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
1093
1094                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1095
1096                 do {
1097                         if (qi->desc_status[head] == QI_IN_USE)
1098                                 qi->desc_status[head] = QI_ABORT;
1099                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1100                 } while (head != tail);
1101
1102                 if (qi->desc_status[wait_index] == QI_ABORT)
1103                         return -EAGAIN;
1104         }
1105
1106         if (fault & DMA_FSTS_ICE)
1107                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1108
1109         return 0;
1110 }
1111
1112 /*
1113  * Submit the queued invalidation descriptor to the remapping
1114  * hardware unit and wait for its completion.
1115  */
1116 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
1117 {
1118         int rc;
1119         struct q_inval *qi = iommu->qi;
1120         struct qi_desc *hw, wait_desc;
1121         int wait_index, index;
1122         unsigned long flags;
1123
1124         if (!qi)
1125                 return 0;
1126
1127         hw = qi->desc;
1128
1129 restart:
1130         rc = 0;
1131
1132         raw_spin_lock_irqsave(&qi->q_lock, flags);
1133         while (qi->free_cnt < 3) {
1134                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1135                 cpu_relax();
1136                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1137         }
1138
1139         index = qi->free_head;
1140         wait_index = (index + 1) % QI_LENGTH;
1141
1142         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
1143
1144         hw[index] = *desc;
1145
1146         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
1147                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1148         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
1149
1150         hw[wait_index] = wait_desc;
1151
1152         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
1153         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
1154
1155         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
1156         qi->free_cnt -= 2;
1157
1158         /*
1159          * update the HW tail register indicating the presence of
1160          * new descriptors.
1161          */
1162         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
1163
1164         while (qi->desc_status[wait_index] != QI_DONE) {
1165                 /*
1166                  * We will leave the interrupts disabled, to prevent interrupt
1167                  * context to queue another cmd while a cmd is already submitted
1168                  * and waiting for completion on this cpu. This is to avoid
1169                  * a deadlock where the interrupt context can wait indefinitely
1170                  * for free slots in the queue.
1171                  */
1172                 rc = qi_check_fault(iommu, index);
1173                 if (rc)
1174                         break;
1175
1176                 raw_spin_unlock(&qi->q_lock);
1177                 cpu_relax();
1178                 raw_spin_lock(&qi->q_lock);
1179         }
1180
1181         qi->desc_status[index] = QI_DONE;
1182
1183         reclaim_free_desc(qi);
1184         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1185
1186         if (rc == -EAGAIN)
1187                 goto restart;
1188
1189         return rc;
1190 }
1191
1192 /*
1193  * Flush the global interrupt entry cache.
1194  */
1195 void qi_global_iec(struct intel_iommu *iommu)
1196 {
1197         struct qi_desc desc;
1198
1199         desc.low = QI_IEC_TYPE;
1200         desc.high = 0;
1201
1202         /* should never fail */
1203         qi_submit_sync(&desc, iommu);
1204 }
1205
1206 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1207                       u64 type)
1208 {
1209         struct qi_desc desc;
1210
1211         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1212                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1213         desc.high = 0;
1214
1215         qi_submit_sync(&desc, iommu);
1216 }
1217
1218 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1219                     unsigned int size_order, u64 type)
1220 {
1221         u8 dw = 0, dr = 0;
1222
1223         struct qi_desc desc;
1224         int ih = 0;
1225
1226         if (cap_write_drain(iommu->cap))
1227                 dw = 1;
1228
1229         if (cap_read_drain(iommu->cap))
1230                 dr = 1;
1231
1232         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1233                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1234         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1235                 | QI_IOTLB_AM(size_order);
1236
1237         qi_submit_sync(&desc, iommu);
1238 }
1239
1240 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
1241                         u64 addr, unsigned mask)
1242 {
1243         struct qi_desc desc;
1244
1245         if (mask) {
1246                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
1247                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1248                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1249         } else
1250                 desc.high = QI_DEV_IOTLB_ADDR(addr);
1251
1252         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1253                 qdep = 0;
1254
1255         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1256                    QI_DIOTLB_TYPE;
1257
1258         qi_submit_sync(&desc, iommu);
1259 }
1260
1261 /*
1262  * Disable Queued Invalidation interface.
1263  */
1264 void dmar_disable_qi(struct intel_iommu *iommu)
1265 {
1266         unsigned long flags;
1267         u32 sts;
1268         cycles_t start_time = get_cycles();
1269
1270         if (!ecap_qis(iommu->ecap))
1271                 return;
1272
1273         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1274
1275         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
1276         if (!(sts & DMA_GSTS_QIES))
1277                 goto end;
1278
1279         /*
1280          * Give a chance to HW to complete the pending invalidation requests.
1281          */
1282         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1283                 readl(iommu->reg + DMAR_IQH_REG)) &&
1284                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1285                 cpu_relax();
1286
1287         iommu->gcmd &= ~DMA_GCMD_QIE;
1288         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1289
1290         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1291                       !(sts & DMA_GSTS_QIES), sts);
1292 end:
1293         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1294 }
1295
1296 /*
1297  * Enable queued invalidation.
1298  */
1299 static void __dmar_enable_qi(struct intel_iommu *iommu)
1300 {
1301         u32 sts;
1302         unsigned long flags;
1303         struct q_inval *qi = iommu->qi;
1304
1305         qi->free_head = qi->free_tail = 0;
1306         qi->free_cnt = QI_LENGTH;
1307
1308         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1309
1310         /* write zero to the tail reg */
1311         writel(0, iommu->reg + DMAR_IQT_REG);
1312
1313         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1314
1315         iommu->gcmd |= DMA_GCMD_QIE;
1316         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1317
1318         /* Make sure hardware complete it */
1319         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1320
1321         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1322 }
1323
1324 /*
1325  * Enable Queued Invalidation interface. This is a must to support
1326  * interrupt-remapping. Also used by DMA-remapping, which replaces
1327  * register based IOTLB invalidation.
1328  */
1329 int dmar_enable_qi(struct intel_iommu *iommu)
1330 {
1331         struct q_inval *qi;
1332         struct page *desc_page;
1333
1334         if (!ecap_qis(iommu->ecap))
1335                 return -ENOENT;
1336
1337         /*
1338          * queued invalidation is already setup and enabled.
1339          */
1340         if (iommu->qi)
1341                 return 0;
1342
1343         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1344         if (!iommu->qi)
1345                 return -ENOMEM;
1346
1347         qi = iommu->qi;
1348
1349
1350         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1351         if (!desc_page) {
1352                 kfree(qi);
1353                 iommu->qi = NULL;
1354                 return -ENOMEM;
1355         }
1356
1357         qi->desc = page_address(desc_page);
1358
1359         qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1360         if (!qi->desc_status) {
1361                 free_page((unsigned long) qi->desc);
1362                 kfree(qi);
1363                 iommu->qi = NULL;
1364                 return -ENOMEM;
1365         }
1366
1367         raw_spin_lock_init(&qi->q_lock);
1368
1369         __dmar_enable_qi(iommu);
1370
1371         return 0;
1372 }
1373
1374 /* iommu interrupt handling. Most stuff are MSI-like. */
1375
1376 enum faulttype {
1377         DMA_REMAP,
1378         INTR_REMAP,
1379         UNKNOWN,
1380 };
1381
1382 static const char *dma_remap_fault_reasons[] =
1383 {
1384         "Software",
1385         "Present bit in root entry is clear",
1386         "Present bit in context entry is clear",
1387         "Invalid context entry",
1388         "Access beyond MGAW",
1389         "PTE Write access is not set",
1390         "PTE Read access is not set",
1391         "Next page table ptr is invalid",
1392         "Root table address invalid",
1393         "Context table ptr is invalid",
1394         "non-zero reserved fields in RTP",
1395         "non-zero reserved fields in CTP",
1396         "non-zero reserved fields in PTE",
1397         "PCE for translation request specifies blocking",
1398 };
1399
1400 static const char *irq_remap_fault_reasons[] =
1401 {
1402         "Detected reserved fields in the decoded interrupt-remapped request",
1403         "Interrupt index exceeded the interrupt-remapping table size",
1404         "Present field in the IRTE entry is clear",
1405         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1406         "Detected reserved fields in the IRTE entry",
1407         "Blocked a compatibility format interrupt request",
1408         "Blocked an interrupt request due to source-id verification failure",
1409 };
1410
1411 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1412 {
1413         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1414                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1415                 *fault_type = INTR_REMAP;
1416                 return irq_remap_fault_reasons[fault_reason - 0x20];
1417         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1418                 *fault_type = DMA_REMAP;
1419                 return dma_remap_fault_reasons[fault_reason];
1420         } else {
1421                 *fault_type = UNKNOWN;
1422                 return "Unknown";
1423         }
1424 }
1425
1426 void dmar_msi_unmask(struct irq_data *data)
1427 {
1428         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1429         unsigned long flag;
1430
1431         /* unmask it */
1432         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1433         writel(0, iommu->reg + DMAR_FECTL_REG);
1434         /* Read a reg to force flush the post write */
1435         readl(iommu->reg + DMAR_FECTL_REG);
1436         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1437 }
1438
1439 void dmar_msi_mask(struct irq_data *data)
1440 {
1441         unsigned long flag;
1442         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1443
1444         /* mask it */
1445         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1446         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1447         /* Read a reg to force flush the post write */
1448         readl(iommu->reg + DMAR_FECTL_REG);
1449         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1450 }
1451
1452 void dmar_msi_write(int irq, struct msi_msg *msg)
1453 {
1454         struct intel_iommu *iommu = irq_get_handler_data(irq);
1455         unsigned long flag;
1456
1457         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1458         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1459         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1460         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1461         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1462 }
1463
1464 void dmar_msi_read(int irq, struct msi_msg *msg)
1465 {
1466         struct intel_iommu *iommu = irq_get_handler_data(irq);
1467         unsigned long flag;
1468
1469         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1470         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1471         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1472         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1473         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1474 }
1475
1476 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1477                 u8 fault_reason, u16 source_id, unsigned long long addr)
1478 {
1479         const char *reason;
1480         int fault_type;
1481
1482         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1483
1484         if (fault_type == INTR_REMAP)
1485                 pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1486                        "fault index %llx\n"
1487                         "INTR-REMAP:[fault reason %02d] %s\n",
1488                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1489                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1490                         fault_reason, reason);
1491         else
1492                 pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1493                        "fault addr %llx \n"
1494                        "DMAR:[fault reason %02d] %s\n",
1495                        (type ? "DMA Read" : "DMA Write"),
1496                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1497                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1498         return 0;
1499 }
1500
1501 #define PRIMARY_FAULT_REG_LEN (16)
1502 irqreturn_t dmar_fault(int irq, void *dev_id)
1503 {
1504         struct intel_iommu *iommu = dev_id;
1505         int reg, fault_index;
1506         u32 fault_status;
1507         unsigned long flag;
1508
1509         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1510         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1511         if (fault_status)
1512                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1513
1514         /* TBD: ignore advanced fault log currently */
1515         if (!(fault_status & DMA_FSTS_PPF))
1516                 goto unlock_exit;
1517
1518         fault_index = dma_fsts_fault_record_index(fault_status);
1519         reg = cap_fault_reg_offset(iommu->cap);
1520         while (1) {
1521                 u8 fault_reason;
1522                 u16 source_id;
1523                 u64 guest_addr;
1524                 int type;
1525                 u32 data;
1526
1527                 /* highest 32 bits */
1528                 data = readl(iommu->reg + reg +
1529                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1530                 if (!(data & DMA_FRCD_F))
1531                         break;
1532
1533                 fault_reason = dma_frcd_fault_reason(data);
1534                 type = dma_frcd_type(data);
1535
1536                 data = readl(iommu->reg + reg +
1537                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1538                 source_id = dma_frcd_source_id(data);
1539
1540                 guest_addr = dmar_readq(iommu->reg + reg +
1541                                 fault_index * PRIMARY_FAULT_REG_LEN);
1542                 guest_addr = dma_frcd_page_addr(guest_addr);
1543                 /* clear the fault */
1544                 writel(DMA_FRCD_F, iommu->reg + reg +
1545                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1546
1547                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1548
1549                 dmar_fault_do_one(iommu, type, fault_reason,
1550                                 source_id, guest_addr);
1551
1552                 fault_index++;
1553                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1554                         fault_index = 0;
1555                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1556         }
1557
1558         writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
1559
1560 unlock_exit:
1561         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1562         return IRQ_HANDLED;
1563 }
1564
1565 int dmar_set_interrupt(struct intel_iommu *iommu)
1566 {
1567         int irq, ret;
1568
1569         /*
1570          * Check if the fault interrupt is already initialized.
1571          */
1572         if (iommu->irq)
1573                 return 0;
1574
1575         irq = dmar_alloc_hwirq();
1576         if (irq <= 0) {
1577                 pr_err("IOMMU: no free vectors\n");
1578                 return -EINVAL;
1579         }
1580
1581         irq_set_handler_data(irq, iommu);
1582         iommu->irq = irq;
1583
1584         ret = arch_setup_dmar_msi(irq);
1585         if (ret) {
1586                 irq_set_handler_data(irq, NULL);
1587                 iommu->irq = 0;
1588                 dmar_free_hwirq(irq);
1589                 return ret;
1590         }
1591
1592         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1593         if (ret)
1594                 pr_err("IOMMU: can't request irq\n");
1595         return ret;
1596 }
1597
1598 int __init enable_drhd_fault_handling(void)
1599 {
1600         struct dmar_drhd_unit *drhd;
1601         struct intel_iommu *iommu;
1602
1603         /*
1604          * Enable fault control interrupt.
1605          */
1606         for_each_iommu(iommu, drhd) {
1607                 u32 fault_status;
1608                 int ret = dmar_set_interrupt(iommu);
1609
1610                 if (ret) {
1611                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1612                                (unsigned long long)drhd->reg_base_addr, ret);
1613                         return -1;
1614                 }
1615
1616                 /*
1617                  * Clear any previous faults.
1618                  */
1619                 dmar_fault(iommu->irq, iommu);
1620                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1621                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1622         }
1623
1624         return 0;
1625 }
1626
1627 /*
1628  * Re-enable Queued Invalidation interface.
1629  */
1630 int dmar_reenable_qi(struct intel_iommu *iommu)
1631 {
1632         if (!ecap_qis(iommu->ecap))
1633                 return -ENOENT;
1634
1635         if (!iommu->qi)
1636                 return -ENOENT;
1637
1638         /*
1639          * First disable queued invalidation.
1640          */
1641         dmar_disable_qi(iommu);
1642         /*
1643          * Then enable queued invalidation again. Since there is no pending
1644          * invalidation requests now, it's safe to re-enable queued
1645          * invalidation.
1646          */
1647         __dmar_enable_qi(iommu);
1648
1649         return 0;
1650 }
1651
1652 /*
1653  * Check interrupt remapping support in DMAR table description.
1654  */
1655 int __init dmar_ir_support(void)
1656 {
1657         struct acpi_table_dmar *dmar;
1658         dmar = (struct acpi_table_dmar *)dmar_tbl;
1659         if (!dmar)
1660                 return 0;
1661         return dmar->flags & 0x1;
1662 }
1663
1664 static int __init dmar_free_unused_resources(void)
1665 {
1666         struct dmar_drhd_unit *dmaru, *dmaru_n;
1667
1668         /* DMAR units are in use */
1669         if (irq_remapping_enabled || intel_iommu_enabled)
1670                 return 0;
1671
1672         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
1673                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1674
1675         down_write(&dmar_global_lock);
1676         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1677                 list_del(&dmaru->list);
1678                 dmar_free_drhd(dmaru);
1679         }
1680         up_write(&dmar_global_lock);
1681
1682         return 0;
1683 }
1684
1685 late_initcall(dmar_free_unused_resources);
1686 IOMMU_INIT_POST(detect_intel_iommu);