mac80211: replace restart_complete() with reconfig_complete()
[cascardo/linux.git] / drivers / iommu / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/iova.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/timer.h>
36 #include <linux/irq.h>
37 #include <linux/interrupt.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/slab.h>
41 #include <linux/iommu.h>
42 #include <asm/irq_remapping.h>
43 #include <asm/iommu_table.h>
44
45 #include "irq_remapping.h"
46
47 /*
48  * Assumptions:
49  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
50  *    before IO devices managed by that unit.
51  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
52  *    after IO devices managed by that unit.
53  * 3) Hotplug events are rare.
54  *
55  * Locking rules for DMA and interrupt remapping related global data structures:
56  * 1) Use dmar_global_lock in process context
57  * 2) Use RCU in interrupt context
58  */
59 DECLARE_RWSEM(dmar_global_lock);
60 LIST_HEAD(dmar_drhd_units);
61
62 struct acpi_table_header * __initdata dmar_tbl;
63 static acpi_size dmar_tbl_size;
64 static int dmar_dev_scope_status = 1;
65
66 static int alloc_iommu(struct dmar_drhd_unit *drhd);
67 static void free_iommu(struct intel_iommu *iommu);
68
69 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71         /*
72          * add INCLUDE_ALL at the tail, so scan the list will find it at
73          * the very end.
74          */
75         if (drhd->include_all)
76                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77         else
78                 list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83         struct acpi_dmar_device_scope *scope;
84
85         *cnt = 0;
86         while (start < end) {
87                 scope = start;
88                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91                         (*cnt)++;
92                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94                         pr_warn("Unsupported device scope\n");
95                 }
96                 start += scope->length;
97         }
98         if (*cnt == 0)
99                 return NULL;
100
101         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106         int i;
107         struct device *tmp_dev;
108
109         if (*devices && *cnt) {
110                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111                         put_device(tmp_dev);
112                 kfree(*devices);
113         }
114
115         *devices = NULL;
116         *cnt = 0;
117 }
118
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125         int level = 0;
126         size_t size;
127         struct pci_dev *tmp;
128         struct dmar_pci_notify_info *info;
129
130         BUG_ON(dev->is_virtfn);
131
132         /* Only generate path[] for device addition event */
133         if (event == BUS_NOTIFY_ADD_DEVICE)
134                 for (tmp = dev; tmp; tmp = tmp->bus->self)
135                         level++;
136
137         size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path);
138         if (size <= sizeof(dmar_pci_notify_info_buf)) {
139                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
140         } else {
141                 info = kzalloc(size, GFP_KERNEL);
142                 if (!info) {
143                         pr_warn("Out of memory when allocating notify_info "
144                                 "for %s.\n", pci_name(dev));
145                         if (dmar_dev_scope_status == 0)
146                                 dmar_dev_scope_status = -ENOMEM;
147                         return NULL;
148                 }
149         }
150
151         info->event = event;
152         info->dev = dev;
153         info->seg = pci_domain_nr(dev->bus);
154         info->level = level;
155         if (event == BUS_NOTIFY_ADD_DEVICE) {
156                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
157                         level--;
158                         info->path[level].device = PCI_SLOT(tmp->devfn);
159                         info->path[level].function = PCI_FUNC(tmp->devfn);
160                         if (pci_is_root_bus(tmp->bus))
161                                 info->bus = tmp->bus->number;
162                 }
163         }
164
165         return info;
166 }
167
168 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
169 {
170         if ((void *)info != dmar_pci_notify_info_buf)
171                 kfree(info);
172 }
173
174 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
175                                 struct acpi_dmar_pci_path *path, int count)
176 {
177         int i;
178
179         if (info->bus != bus)
180                 return false;
181         if (info->level != count)
182                 return false;
183
184         for (i = 0; i < count; i++) {
185                 if (path[i].device != info->path[i].device ||
186                     path[i].function != info->path[i].function)
187                         return false;
188         }
189
190         return true;
191 }
192
193 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
194 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
195                           void *start, void*end, u16 segment,
196                           struct dmar_dev_scope *devices,
197                           int devices_cnt)
198 {
199         int i, level;
200         struct device *tmp, *dev = &info->dev->dev;
201         struct acpi_dmar_device_scope *scope;
202         struct acpi_dmar_pci_path *path;
203
204         if (segment != info->seg)
205                 return 0;
206
207         for (; start < end; start += scope->length) {
208                 scope = start;
209                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
210                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
211                         continue;
212
213                 path = (struct acpi_dmar_pci_path *)(scope + 1);
214                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
215                 if (!dmar_match_pci_path(info, scope->bus, path, level))
216                         continue;
217
218                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^
219                     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) {
220                         pr_warn("Device scope type does not match for %s\n",
221                                 pci_name(info->dev));
222                         return -EINVAL;
223                 }
224
225                 for_each_dev_scope(devices, devices_cnt, i, tmp)
226                         if (tmp == NULL) {
227                                 devices[i].bus = info->dev->bus->number;
228                                 devices[i].devfn = info->dev->devfn;
229                                 rcu_assign_pointer(devices[i].dev,
230                                                    get_device(dev));
231                                 return 1;
232                         }
233                 BUG_ON(i >= devices_cnt);
234         }
235
236         return 0;
237 }
238
239 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
240                           struct dmar_dev_scope *devices, int count)
241 {
242         int index;
243         struct device *tmp;
244
245         if (info->seg != segment)
246                 return 0;
247
248         for_each_active_dev_scope(devices, count, index, tmp)
249                 if (tmp == &info->dev->dev) {
250                         rcu_assign_pointer(devices[index].dev, NULL);
251                         synchronize_rcu();
252                         put_device(tmp);
253                         return 1;
254                 }
255
256         return 0;
257 }
258
259 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
260 {
261         int ret = 0;
262         struct dmar_drhd_unit *dmaru;
263         struct acpi_dmar_hardware_unit *drhd;
264
265         for_each_drhd_unit(dmaru) {
266                 if (dmaru->include_all)
267                         continue;
268
269                 drhd = container_of(dmaru->hdr,
270                                     struct acpi_dmar_hardware_unit, header);
271                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
272                                 ((void *)drhd) + drhd->header.length,
273                                 dmaru->segment,
274                                 dmaru->devices, dmaru->devices_cnt);
275                 if (ret != 0)
276                         break;
277         }
278         if (ret >= 0)
279                 ret = dmar_iommu_notify_scope_dev(info);
280         if (ret < 0 && dmar_dev_scope_status == 0)
281                 dmar_dev_scope_status = ret;
282
283         return ret;
284 }
285
286 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
287 {
288         struct dmar_drhd_unit *dmaru;
289
290         for_each_drhd_unit(dmaru)
291                 if (dmar_remove_dev_scope(info, dmaru->segment,
292                         dmaru->devices, dmaru->devices_cnt))
293                         break;
294         dmar_iommu_notify_scope_dev(info);
295 }
296
297 static int dmar_pci_bus_notifier(struct notifier_block *nb,
298                                  unsigned long action, void *data)
299 {
300         struct pci_dev *pdev = to_pci_dev(data);
301         struct dmar_pci_notify_info *info;
302
303         /* Only care about add/remove events for physical functions */
304         if (pdev->is_virtfn)
305                 return NOTIFY_DONE;
306         if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
307                 return NOTIFY_DONE;
308
309         info = dmar_alloc_pci_notify_info(pdev, action);
310         if (!info)
311                 return NOTIFY_DONE;
312
313         down_write(&dmar_global_lock);
314         if (action == BUS_NOTIFY_ADD_DEVICE)
315                 dmar_pci_bus_add_dev(info);
316         else if (action == BUS_NOTIFY_DEL_DEVICE)
317                 dmar_pci_bus_del_dev(info);
318         up_write(&dmar_global_lock);
319
320         dmar_free_pci_notify_info(info);
321
322         return NOTIFY_OK;
323 }
324
325 static struct notifier_block dmar_pci_bus_nb = {
326         .notifier_call = dmar_pci_bus_notifier,
327         .priority = INT_MIN,
328 };
329
330 /**
331  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
332  * structure which uniquely represent one DMA remapping hardware unit
333  * present in the platform
334  */
335 static int __init
336 dmar_parse_one_drhd(struct acpi_dmar_header *header)
337 {
338         struct acpi_dmar_hardware_unit *drhd;
339         struct dmar_drhd_unit *dmaru;
340         int ret = 0;
341
342         drhd = (struct acpi_dmar_hardware_unit *)header;
343         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
344         if (!dmaru)
345                 return -ENOMEM;
346
347         dmaru->hdr = header;
348         dmaru->reg_base_addr = drhd->address;
349         dmaru->segment = drhd->segment;
350         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
351         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
352                                               ((void *)drhd) + drhd->header.length,
353                                               &dmaru->devices_cnt);
354         if (dmaru->devices_cnt && dmaru->devices == NULL) {
355                 kfree(dmaru);
356                 return -ENOMEM;
357         }
358
359         ret = alloc_iommu(dmaru);
360         if (ret) {
361                 dmar_free_dev_scope(&dmaru->devices,
362                                     &dmaru->devices_cnt);
363                 kfree(dmaru);
364                 return ret;
365         }
366         dmar_register_drhd_unit(dmaru);
367         return 0;
368 }
369
370 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
371 {
372         if (dmaru->devices && dmaru->devices_cnt)
373                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
374         if (dmaru->iommu)
375                 free_iommu(dmaru->iommu);
376         kfree(dmaru);
377 }
378
379 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
380 {
381         struct acpi_dmar_andd *andd = (void *)header;
382
383         /* Check for NUL termination within the designated length */
384         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
385                 WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
386                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
387                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
388                            dmi_get_system_info(DMI_BIOS_VENDOR),
389                            dmi_get_system_info(DMI_BIOS_VERSION),
390                            dmi_get_system_info(DMI_PRODUCT_VERSION));
391                 return -EINVAL;
392         }
393         pr_info("ANDD device: %x name: %s\n", andd->device_number,
394                 andd->device_name);
395
396         return 0;
397 }
398
399 #ifdef CONFIG_ACPI_NUMA
400 static int __init
401 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
402 {
403         struct acpi_dmar_rhsa *rhsa;
404         struct dmar_drhd_unit *drhd;
405
406         rhsa = (struct acpi_dmar_rhsa *)header;
407         for_each_drhd_unit(drhd) {
408                 if (drhd->reg_base_addr == rhsa->base_address) {
409                         int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
410
411                         if (!node_online(node))
412                                 node = -1;
413                         drhd->iommu->node = node;
414                         return 0;
415                 }
416         }
417         WARN_TAINT(
418                 1, TAINT_FIRMWARE_WORKAROUND,
419                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
420                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
421                 drhd->reg_base_addr,
422                 dmi_get_system_info(DMI_BIOS_VENDOR),
423                 dmi_get_system_info(DMI_BIOS_VERSION),
424                 dmi_get_system_info(DMI_PRODUCT_VERSION));
425
426         return 0;
427 }
428 #endif
429
430 static void __init
431 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
432 {
433         struct acpi_dmar_hardware_unit *drhd;
434         struct acpi_dmar_reserved_memory *rmrr;
435         struct acpi_dmar_atsr *atsr;
436         struct acpi_dmar_rhsa *rhsa;
437
438         switch (header->type) {
439         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
440                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
441                                     header);
442                 pr_info("DRHD base: %#016Lx flags: %#x\n",
443                         (unsigned long long)drhd->address, drhd->flags);
444                 break;
445         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
446                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
447                                     header);
448                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
449                         (unsigned long long)rmrr->base_address,
450                         (unsigned long long)rmrr->end_address);
451                 break;
452         case ACPI_DMAR_TYPE_ROOT_ATS:
453                 atsr = container_of(header, struct acpi_dmar_atsr, header);
454                 pr_info("ATSR flags: %#x\n", atsr->flags);
455                 break;
456         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
457                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
458                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
459                        (unsigned long long)rhsa->base_address,
460                        rhsa->proximity_domain);
461                 break;
462         case ACPI_DMAR_TYPE_NAMESPACE:
463                 /* We don't print this here because we need to sanity-check
464                    it first. So print it in dmar_parse_one_andd() instead. */
465                 break;
466         }
467 }
468
469 /**
470  * dmar_table_detect - checks to see if the platform supports DMAR devices
471  */
472 static int __init dmar_table_detect(void)
473 {
474         acpi_status status = AE_OK;
475
476         /* if we could find DMAR table, then there are DMAR devices */
477         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
478                                 (struct acpi_table_header **)&dmar_tbl,
479                                 &dmar_tbl_size);
480
481         if (ACPI_SUCCESS(status) && !dmar_tbl) {
482                 pr_warn("Unable to map DMAR\n");
483                 status = AE_NOT_FOUND;
484         }
485
486         return (ACPI_SUCCESS(status) ? 1 : 0);
487 }
488
489 /**
490  * parse_dmar_table - parses the DMA reporting table
491  */
492 static int __init
493 parse_dmar_table(void)
494 {
495         struct acpi_table_dmar *dmar;
496         struct acpi_dmar_header *entry_header;
497         int ret = 0;
498         int drhd_count = 0;
499
500         /*
501          * Do it again, earlier dmar_tbl mapping could be mapped with
502          * fixed map.
503          */
504         dmar_table_detect();
505
506         /*
507          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
508          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
509          */
510         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
511
512         dmar = (struct acpi_table_dmar *)dmar_tbl;
513         if (!dmar)
514                 return -ENODEV;
515
516         if (dmar->width < PAGE_SHIFT - 1) {
517                 pr_warn("Invalid DMAR haw\n");
518                 return -EINVAL;
519         }
520
521         pr_info("Host address width %d\n", dmar->width + 1);
522
523         entry_header = (struct acpi_dmar_header *)(dmar + 1);
524         while (((unsigned long)entry_header) <
525                         (((unsigned long)dmar) + dmar_tbl->length)) {
526                 /* Avoid looping forever on bad ACPI tables */
527                 if (entry_header->length == 0) {
528                         pr_warn("Invalid 0-length structure\n");
529                         ret = -EINVAL;
530                         break;
531                 }
532
533                 dmar_table_print_dmar_entry(entry_header);
534
535                 switch (entry_header->type) {
536                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
537                         drhd_count++;
538                         ret = dmar_parse_one_drhd(entry_header);
539                         break;
540                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
541                         ret = dmar_parse_one_rmrr(entry_header);
542                         break;
543                 case ACPI_DMAR_TYPE_ROOT_ATS:
544                         ret = dmar_parse_one_atsr(entry_header);
545                         break;
546                 case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
547 #ifdef CONFIG_ACPI_NUMA
548                         ret = dmar_parse_one_rhsa(entry_header);
549 #endif
550                         break;
551                 case ACPI_DMAR_TYPE_NAMESPACE:
552                         ret = dmar_parse_one_andd(entry_header);
553                         break;
554                 default:
555                         pr_warn("Unknown DMAR structure type %d\n",
556                                 entry_header->type);
557                         ret = 0; /* for forward compatibility */
558                         break;
559                 }
560                 if (ret)
561                         break;
562
563                 entry_header = ((void *)entry_header + entry_header->length);
564         }
565         if (drhd_count == 0)
566                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
567         return ret;
568 }
569
570 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
571                                  int cnt, struct pci_dev *dev)
572 {
573         int index;
574         struct device *tmp;
575
576         while (dev) {
577                 for_each_active_dev_scope(devices, cnt, index, tmp)
578                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
579                                 return 1;
580
581                 /* Check our parent */
582                 dev = dev->bus->self;
583         }
584
585         return 0;
586 }
587
588 struct dmar_drhd_unit *
589 dmar_find_matched_drhd_unit(struct pci_dev *dev)
590 {
591         struct dmar_drhd_unit *dmaru;
592         struct acpi_dmar_hardware_unit *drhd;
593
594         dev = pci_physfn(dev);
595
596         rcu_read_lock();
597         for_each_drhd_unit(dmaru) {
598                 drhd = container_of(dmaru->hdr,
599                                     struct acpi_dmar_hardware_unit,
600                                     header);
601
602                 if (dmaru->include_all &&
603                     drhd->segment == pci_domain_nr(dev->bus))
604                         goto out;
605
606                 if (dmar_pci_device_match(dmaru->devices,
607                                           dmaru->devices_cnt, dev))
608                         goto out;
609         }
610         dmaru = NULL;
611 out:
612         rcu_read_unlock();
613
614         return dmaru;
615 }
616
617 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
618                                               struct acpi_device *adev)
619 {
620         struct dmar_drhd_unit *dmaru;
621         struct acpi_dmar_hardware_unit *drhd;
622         struct acpi_dmar_device_scope *scope;
623         struct device *tmp;
624         int i;
625         struct acpi_dmar_pci_path *path;
626
627         for_each_drhd_unit(dmaru) {
628                 drhd = container_of(dmaru->hdr,
629                                     struct acpi_dmar_hardware_unit,
630                                     header);
631
632                 for (scope = (void *)(drhd + 1);
633                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
634                      scope = ((void *)scope) + scope->length) {
635                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
636                                 continue;
637                         if (scope->enumeration_id != device_number)
638                                 continue;
639
640                         path = (void *)(scope + 1);
641                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
642                                 dev_name(&adev->dev), dmaru->reg_base_addr,
643                                 scope->bus, path->device, path->function);
644                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
645                                 if (tmp == NULL) {
646                                         dmaru->devices[i].bus = scope->bus;
647                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
648                                                                             path->function);
649                                         rcu_assign_pointer(dmaru->devices[i].dev,
650                                                            get_device(&adev->dev));
651                                         return;
652                                 }
653                         BUG_ON(i >= dmaru->devices_cnt);
654                 }
655         }
656         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
657                 device_number, dev_name(&adev->dev));
658 }
659
660 static int __init dmar_acpi_dev_scope_init(void)
661 {
662         struct acpi_dmar_andd *andd;
663
664         if (dmar_tbl == NULL)
665                 return -ENODEV;
666
667         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
668              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
669              andd = ((void *)andd) + andd->header.length) {
670                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
671                         acpi_handle h;
672                         struct acpi_device *adev;
673
674                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
675                                                           andd->device_name,
676                                                           &h))) {
677                                 pr_err("Failed to find handle for ACPI object %s\n",
678                                        andd->device_name);
679                                 continue;
680                         }
681                         if (acpi_bus_get_device(h, &adev)) {
682                                 pr_err("Failed to get device for ACPI object %s\n",
683                                        andd->device_name);
684                                 continue;
685                         }
686                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
687                 }
688         }
689         return 0;
690 }
691
692 int __init dmar_dev_scope_init(void)
693 {
694         struct pci_dev *dev = NULL;
695         struct dmar_pci_notify_info *info;
696
697         if (dmar_dev_scope_status != 1)
698                 return dmar_dev_scope_status;
699
700         if (list_empty(&dmar_drhd_units)) {
701                 dmar_dev_scope_status = -ENODEV;
702         } else {
703                 dmar_dev_scope_status = 0;
704
705                 dmar_acpi_dev_scope_init();
706
707                 for_each_pci_dev(dev) {
708                         if (dev->is_virtfn)
709                                 continue;
710
711                         info = dmar_alloc_pci_notify_info(dev,
712                                         BUS_NOTIFY_ADD_DEVICE);
713                         if (!info) {
714                                 return dmar_dev_scope_status;
715                         } else {
716                                 dmar_pci_bus_add_dev(info);
717                                 dmar_free_pci_notify_info(info);
718                         }
719                 }
720
721                 bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
722         }
723
724         return dmar_dev_scope_status;
725 }
726
727
728 int __init dmar_table_init(void)
729 {
730         static int dmar_table_initialized;
731         int ret;
732
733         if (dmar_table_initialized == 0) {
734                 ret = parse_dmar_table();
735                 if (ret < 0) {
736                         if (ret != -ENODEV)
737                                 pr_info("parse DMAR table failure.\n");
738                 } else  if (list_empty(&dmar_drhd_units)) {
739                         pr_info("No DMAR devices found\n");
740                         ret = -ENODEV;
741                 }
742
743                 if (ret < 0)
744                         dmar_table_initialized = ret;
745                 else
746                         dmar_table_initialized = 1;
747         }
748
749         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
750 }
751
752 static void warn_invalid_dmar(u64 addr, const char *message)
753 {
754         WARN_TAINT_ONCE(
755                 1, TAINT_FIRMWARE_WORKAROUND,
756                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
757                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
758                 addr, message,
759                 dmi_get_system_info(DMI_BIOS_VENDOR),
760                 dmi_get_system_info(DMI_BIOS_VERSION),
761                 dmi_get_system_info(DMI_PRODUCT_VERSION));
762 }
763
764 static int __init check_zero_address(void)
765 {
766         struct acpi_table_dmar *dmar;
767         struct acpi_dmar_header *entry_header;
768         struct acpi_dmar_hardware_unit *drhd;
769
770         dmar = (struct acpi_table_dmar *)dmar_tbl;
771         entry_header = (struct acpi_dmar_header *)(dmar + 1);
772
773         while (((unsigned long)entry_header) <
774                         (((unsigned long)dmar) + dmar_tbl->length)) {
775                 /* Avoid looping forever on bad ACPI tables */
776                 if (entry_header->length == 0) {
777                         pr_warn("Invalid 0-length structure\n");
778                         return 0;
779                 }
780
781                 if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
782                         void __iomem *addr;
783                         u64 cap, ecap;
784
785                         drhd = (void *)entry_header;
786                         if (!drhd->address) {
787                                 warn_invalid_dmar(0, "");
788                                 goto failed;
789                         }
790
791                         addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
792                         if (!addr ) {
793                                 printk("IOMMU: can't validate: %llx\n", drhd->address);
794                                 goto failed;
795                         }
796                         cap = dmar_readq(addr + DMAR_CAP_REG);
797                         ecap = dmar_readq(addr + DMAR_ECAP_REG);
798                         early_iounmap(addr, VTD_PAGE_SIZE);
799                         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
800                                 warn_invalid_dmar(drhd->address,
801                                                   " returns all ones");
802                                 goto failed;
803                         }
804                 }
805
806                 entry_header = ((void *)entry_header + entry_header->length);
807         }
808         return 1;
809
810 failed:
811         return 0;
812 }
813
814 int __init detect_intel_iommu(void)
815 {
816         int ret;
817
818         down_write(&dmar_global_lock);
819         ret = dmar_table_detect();
820         if (ret)
821                 ret = check_zero_address();
822         {
823                 if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
824                         iommu_detected = 1;
825                         /* Make sure ACS will be enabled */
826                         pci_request_acs();
827                 }
828
829 #ifdef CONFIG_X86
830                 if (ret)
831                         x86_init.iommu.iommu_init = intel_iommu_init;
832 #endif
833         }
834         early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
835         dmar_tbl = NULL;
836         up_write(&dmar_global_lock);
837
838         return ret ? 1 : -ENODEV;
839 }
840
841
842 static void unmap_iommu(struct intel_iommu *iommu)
843 {
844         iounmap(iommu->reg);
845         release_mem_region(iommu->reg_phys, iommu->reg_size);
846 }
847
848 /**
849  * map_iommu: map the iommu's registers
850  * @iommu: the iommu to map
851  * @phys_addr: the physical address of the base resgister
852  *
853  * Memory map the iommu's registers.  Start w/ a single page, and
854  * possibly expand if that turns out to be insufficent.
855  */
856 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
857 {
858         int map_size, err=0;
859
860         iommu->reg_phys = phys_addr;
861         iommu->reg_size = VTD_PAGE_SIZE;
862
863         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
864                 pr_err("IOMMU: can't reserve memory\n");
865                 err = -EBUSY;
866                 goto out;
867         }
868
869         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
870         if (!iommu->reg) {
871                 pr_err("IOMMU: can't map the region\n");
872                 err = -ENOMEM;
873                 goto release;
874         }
875
876         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
877         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
878
879         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
880                 err = -EINVAL;
881                 warn_invalid_dmar(phys_addr, " returns all ones");
882                 goto unmap;
883         }
884
885         /* the registers might be more than one page */
886         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
887                          cap_max_fault_reg_offset(iommu->cap));
888         map_size = VTD_PAGE_ALIGN(map_size);
889         if (map_size > iommu->reg_size) {
890                 iounmap(iommu->reg);
891                 release_mem_region(iommu->reg_phys, iommu->reg_size);
892                 iommu->reg_size = map_size;
893                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
894                                         iommu->name)) {
895                         pr_err("IOMMU: can't reserve memory\n");
896                         err = -EBUSY;
897                         goto out;
898                 }
899                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
900                 if (!iommu->reg) {
901                         pr_err("IOMMU: can't map the region\n");
902                         err = -ENOMEM;
903                         goto release;
904                 }
905         }
906         err = 0;
907         goto out;
908
909 unmap:
910         iounmap(iommu->reg);
911 release:
912         release_mem_region(iommu->reg_phys, iommu->reg_size);
913 out:
914         return err;
915 }
916
917 static int alloc_iommu(struct dmar_drhd_unit *drhd)
918 {
919         struct intel_iommu *iommu;
920         u32 ver, sts;
921         static int iommu_allocated = 0;
922         int agaw = 0;
923         int msagaw = 0;
924         int err;
925
926         if (!drhd->reg_base_addr) {
927                 warn_invalid_dmar(0, "");
928                 return -EINVAL;
929         }
930
931         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
932         if (!iommu)
933                 return -ENOMEM;
934
935         iommu->seq_id = iommu_allocated++;
936         sprintf (iommu->name, "dmar%d", iommu->seq_id);
937
938         err = map_iommu(iommu, drhd->reg_base_addr);
939         if (err) {
940                 pr_err("IOMMU: failed to map %s\n", iommu->name);
941                 goto error;
942         }
943
944         err = -EINVAL;
945         agaw = iommu_calculate_agaw(iommu);
946         if (agaw < 0) {
947                 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
948                         iommu->seq_id);
949                 goto err_unmap;
950         }
951         msagaw = iommu_calculate_max_sagaw(iommu);
952         if (msagaw < 0) {
953                 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
954                         iommu->seq_id);
955                 goto err_unmap;
956         }
957         iommu->agaw = agaw;
958         iommu->msagaw = msagaw;
959         iommu->segment = drhd->segment;
960
961         iommu->node = -1;
962
963         ver = readl(iommu->reg + DMAR_VER_REG);
964         pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
965                 iommu->seq_id,
966                 (unsigned long long)drhd->reg_base_addr,
967                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
968                 (unsigned long long)iommu->cap,
969                 (unsigned long long)iommu->ecap);
970
971         /* Reflect status in gcmd */
972         sts = readl(iommu->reg + DMAR_GSTS_REG);
973         if (sts & DMA_GSTS_IRES)
974                 iommu->gcmd |= DMA_GCMD_IRE;
975         if (sts & DMA_GSTS_TES)
976                 iommu->gcmd |= DMA_GCMD_TE;
977         if (sts & DMA_GSTS_QIES)
978                 iommu->gcmd |= DMA_GCMD_QIE;
979
980         raw_spin_lock_init(&iommu->register_lock);
981
982         drhd->iommu = iommu;
983
984         if (intel_iommu_enabled)
985                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
986                                                        intel_iommu_groups,
987                                                        iommu->name);
988
989         return 0;
990
991  err_unmap:
992         unmap_iommu(iommu);
993  error:
994         kfree(iommu);
995         return err;
996 }
997
998 static void free_iommu(struct intel_iommu *iommu)
999 {
1000         iommu_device_destroy(iommu->iommu_dev);
1001
1002         if (iommu->irq) {
1003                 free_irq(iommu->irq, iommu);
1004                 irq_set_handler_data(iommu->irq, NULL);
1005                 dmar_free_hwirq(iommu->irq);
1006         }
1007
1008         if (iommu->qi) {
1009                 free_page((unsigned long)iommu->qi->desc);
1010                 kfree(iommu->qi->desc_status);
1011                 kfree(iommu->qi);
1012         }
1013
1014         if (iommu->reg)
1015                 unmap_iommu(iommu);
1016
1017         kfree(iommu);
1018 }
1019
1020 /*
1021  * Reclaim all the submitted descriptors which have completed its work.
1022  */
1023 static inline void reclaim_free_desc(struct q_inval *qi)
1024 {
1025         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1026                qi->desc_status[qi->free_tail] == QI_ABORT) {
1027                 qi->desc_status[qi->free_tail] = QI_FREE;
1028                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1029                 qi->free_cnt++;
1030         }
1031 }
1032
1033 static int qi_check_fault(struct intel_iommu *iommu, int index)
1034 {
1035         u32 fault;
1036         int head, tail;
1037         struct q_inval *qi = iommu->qi;
1038         int wait_index = (index + 1) % QI_LENGTH;
1039
1040         if (qi->desc_status[wait_index] == QI_ABORT)
1041                 return -EAGAIN;
1042
1043         fault = readl(iommu->reg + DMAR_FSTS_REG);
1044
1045         /*
1046          * If IQE happens, the head points to the descriptor associated
1047          * with the error. No new descriptors are fetched until the IQE
1048          * is cleared.
1049          */
1050         if (fault & DMA_FSTS_IQE) {
1051                 head = readl(iommu->reg + DMAR_IQH_REG);
1052                 if ((head >> DMAR_IQ_SHIFT) == index) {
1053                         pr_err("VT-d detected invalid descriptor: "
1054                                 "low=%llx, high=%llx\n",
1055                                 (unsigned long long)qi->desc[index].low,
1056                                 (unsigned long long)qi->desc[index].high);
1057                         memcpy(&qi->desc[index], &qi->desc[wait_index],
1058                                         sizeof(struct qi_desc));
1059                         __iommu_flush_cache(iommu, &qi->desc[index],
1060                                         sizeof(struct qi_desc));
1061                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1062                         return -EINVAL;
1063                 }
1064         }
1065
1066         /*
1067          * If ITE happens, all pending wait_desc commands are aborted.
1068          * No new descriptors are fetched until the ITE is cleared.
1069          */
1070         if (fault & DMA_FSTS_ITE) {
1071                 head = readl(iommu->reg + DMAR_IQH_REG);
1072                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
1073                 head |= 1;
1074                 tail = readl(iommu->reg + DMAR_IQT_REG);
1075                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
1076
1077                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1078
1079                 do {
1080                         if (qi->desc_status[head] == QI_IN_USE)
1081                                 qi->desc_status[head] = QI_ABORT;
1082                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1083                 } while (head != tail);
1084
1085                 if (qi->desc_status[wait_index] == QI_ABORT)
1086                         return -EAGAIN;
1087         }
1088
1089         if (fault & DMA_FSTS_ICE)
1090                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1091
1092         return 0;
1093 }
1094
1095 /*
1096  * Submit the queued invalidation descriptor to the remapping
1097  * hardware unit and wait for its completion.
1098  */
1099 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
1100 {
1101         int rc;
1102         struct q_inval *qi = iommu->qi;
1103         struct qi_desc *hw, wait_desc;
1104         int wait_index, index;
1105         unsigned long flags;
1106
1107         if (!qi)
1108                 return 0;
1109
1110         hw = qi->desc;
1111
1112 restart:
1113         rc = 0;
1114
1115         raw_spin_lock_irqsave(&qi->q_lock, flags);
1116         while (qi->free_cnt < 3) {
1117                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1118                 cpu_relax();
1119                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1120         }
1121
1122         index = qi->free_head;
1123         wait_index = (index + 1) % QI_LENGTH;
1124
1125         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
1126
1127         hw[index] = *desc;
1128
1129         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
1130                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1131         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
1132
1133         hw[wait_index] = wait_desc;
1134
1135         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
1136         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
1137
1138         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
1139         qi->free_cnt -= 2;
1140
1141         /*
1142          * update the HW tail register indicating the presence of
1143          * new descriptors.
1144          */
1145         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
1146
1147         while (qi->desc_status[wait_index] != QI_DONE) {
1148                 /*
1149                  * We will leave the interrupts disabled, to prevent interrupt
1150                  * context to queue another cmd while a cmd is already submitted
1151                  * and waiting for completion on this cpu. This is to avoid
1152                  * a deadlock where the interrupt context can wait indefinitely
1153                  * for free slots in the queue.
1154                  */
1155                 rc = qi_check_fault(iommu, index);
1156                 if (rc)
1157                         break;
1158
1159                 raw_spin_unlock(&qi->q_lock);
1160                 cpu_relax();
1161                 raw_spin_lock(&qi->q_lock);
1162         }
1163
1164         qi->desc_status[index] = QI_DONE;
1165
1166         reclaim_free_desc(qi);
1167         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1168
1169         if (rc == -EAGAIN)
1170                 goto restart;
1171
1172         return rc;
1173 }
1174
1175 /*
1176  * Flush the global interrupt entry cache.
1177  */
1178 void qi_global_iec(struct intel_iommu *iommu)
1179 {
1180         struct qi_desc desc;
1181
1182         desc.low = QI_IEC_TYPE;
1183         desc.high = 0;
1184
1185         /* should never fail */
1186         qi_submit_sync(&desc, iommu);
1187 }
1188
1189 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1190                       u64 type)
1191 {
1192         struct qi_desc desc;
1193
1194         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1195                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1196         desc.high = 0;
1197
1198         qi_submit_sync(&desc, iommu);
1199 }
1200
1201 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1202                     unsigned int size_order, u64 type)
1203 {
1204         u8 dw = 0, dr = 0;
1205
1206         struct qi_desc desc;
1207         int ih = 0;
1208
1209         if (cap_write_drain(iommu->cap))
1210                 dw = 1;
1211
1212         if (cap_read_drain(iommu->cap))
1213                 dr = 1;
1214
1215         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1216                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1217         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1218                 | QI_IOTLB_AM(size_order);
1219
1220         qi_submit_sync(&desc, iommu);
1221 }
1222
1223 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
1224                         u64 addr, unsigned mask)
1225 {
1226         struct qi_desc desc;
1227
1228         if (mask) {
1229                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
1230                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1231                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1232         } else
1233                 desc.high = QI_DEV_IOTLB_ADDR(addr);
1234
1235         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1236                 qdep = 0;
1237
1238         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1239                    QI_DIOTLB_TYPE;
1240
1241         qi_submit_sync(&desc, iommu);
1242 }
1243
1244 /*
1245  * Disable Queued Invalidation interface.
1246  */
1247 void dmar_disable_qi(struct intel_iommu *iommu)
1248 {
1249         unsigned long flags;
1250         u32 sts;
1251         cycles_t start_time = get_cycles();
1252
1253         if (!ecap_qis(iommu->ecap))
1254                 return;
1255
1256         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1257
1258         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
1259         if (!(sts & DMA_GSTS_QIES))
1260                 goto end;
1261
1262         /*
1263          * Give a chance to HW to complete the pending invalidation requests.
1264          */
1265         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1266                 readl(iommu->reg + DMAR_IQH_REG)) &&
1267                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1268                 cpu_relax();
1269
1270         iommu->gcmd &= ~DMA_GCMD_QIE;
1271         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1272
1273         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1274                       !(sts & DMA_GSTS_QIES), sts);
1275 end:
1276         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1277 }
1278
1279 /*
1280  * Enable queued invalidation.
1281  */
1282 static void __dmar_enable_qi(struct intel_iommu *iommu)
1283 {
1284         u32 sts;
1285         unsigned long flags;
1286         struct q_inval *qi = iommu->qi;
1287
1288         qi->free_head = qi->free_tail = 0;
1289         qi->free_cnt = QI_LENGTH;
1290
1291         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1292
1293         /* write zero to the tail reg */
1294         writel(0, iommu->reg + DMAR_IQT_REG);
1295
1296         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1297
1298         iommu->gcmd |= DMA_GCMD_QIE;
1299         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1300
1301         /* Make sure hardware complete it */
1302         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1303
1304         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1305 }
1306
1307 /*
1308  * Enable Queued Invalidation interface. This is a must to support
1309  * interrupt-remapping. Also used by DMA-remapping, which replaces
1310  * register based IOTLB invalidation.
1311  */
1312 int dmar_enable_qi(struct intel_iommu *iommu)
1313 {
1314         struct q_inval *qi;
1315         struct page *desc_page;
1316
1317         if (!ecap_qis(iommu->ecap))
1318                 return -ENOENT;
1319
1320         /*
1321          * queued invalidation is already setup and enabled.
1322          */
1323         if (iommu->qi)
1324                 return 0;
1325
1326         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1327         if (!iommu->qi)
1328                 return -ENOMEM;
1329
1330         qi = iommu->qi;
1331
1332
1333         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1334         if (!desc_page) {
1335                 kfree(qi);
1336                 iommu->qi = NULL;
1337                 return -ENOMEM;
1338         }
1339
1340         qi->desc = page_address(desc_page);
1341
1342         qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1343         if (!qi->desc_status) {
1344                 free_page((unsigned long) qi->desc);
1345                 kfree(qi);
1346                 iommu->qi = NULL;
1347                 return -ENOMEM;
1348         }
1349
1350         raw_spin_lock_init(&qi->q_lock);
1351
1352         __dmar_enable_qi(iommu);
1353
1354         return 0;
1355 }
1356
1357 /* iommu interrupt handling. Most stuff are MSI-like. */
1358
1359 enum faulttype {
1360         DMA_REMAP,
1361         INTR_REMAP,
1362         UNKNOWN,
1363 };
1364
1365 static const char *dma_remap_fault_reasons[] =
1366 {
1367         "Software",
1368         "Present bit in root entry is clear",
1369         "Present bit in context entry is clear",
1370         "Invalid context entry",
1371         "Access beyond MGAW",
1372         "PTE Write access is not set",
1373         "PTE Read access is not set",
1374         "Next page table ptr is invalid",
1375         "Root table address invalid",
1376         "Context table ptr is invalid",
1377         "non-zero reserved fields in RTP",
1378         "non-zero reserved fields in CTP",
1379         "non-zero reserved fields in PTE",
1380         "PCE for translation request specifies blocking",
1381 };
1382
1383 static const char *irq_remap_fault_reasons[] =
1384 {
1385         "Detected reserved fields in the decoded interrupt-remapped request",
1386         "Interrupt index exceeded the interrupt-remapping table size",
1387         "Present field in the IRTE entry is clear",
1388         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1389         "Detected reserved fields in the IRTE entry",
1390         "Blocked a compatibility format interrupt request",
1391         "Blocked an interrupt request due to source-id verification failure",
1392 };
1393
1394 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1395 {
1396         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1397                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1398                 *fault_type = INTR_REMAP;
1399                 return irq_remap_fault_reasons[fault_reason - 0x20];
1400         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1401                 *fault_type = DMA_REMAP;
1402                 return dma_remap_fault_reasons[fault_reason];
1403         } else {
1404                 *fault_type = UNKNOWN;
1405                 return "Unknown";
1406         }
1407 }
1408
1409 void dmar_msi_unmask(struct irq_data *data)
1410 {
1411         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1412         unsigned long flag;
1413
1414         /* unmask it */
1415         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1416         writel(0, iommu->reg + DMAR_FECTL_REG);
1417         /* Read a reg to force flush the post write */
1418         readl(iommu->reg + DMAR_FECTL_REG);
1419         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1420 }
1421
1422 void dmar_msi_mask(struct irq_data *data)
1423 {
1424         unsigned long flag;
1425         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1426
1427         /* mask it */
1428         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1429         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1430         /* Read a reg to force flush the post write */
1431         readl(iommu->reg + DMAR_FECTL_REG);
1432         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1433 }
1434
1435 void dmar_msi_write(int irq, struct msi_msg *msg)
1436 {
1437         struct intel_iommu *iommu = irq_get_handler_data(irq);
1438         unsigned long flag;
1439
1440         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1441         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1442         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1443         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1444         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1445 }
1446
1447 void dmar_msi_read(int irq, struct msi_msg *msg)
1448 {
1449         struct intel_iommu *iommu = irq_get_handler_data(irq);
1450         unsigned long flag;
1451
1452         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1453         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1454         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1455         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1456         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1457 }
1458
1459 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1460                 u8 fault_reason, u16 source_id, unsigned long long addr)
1461 {
1462         const char *reason;
1463         int fault_type;
1464
1465         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1466
1467         if (fault_type == INTR_REMAP)
1468                 pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1469                        "fault index %llx\n"
1470                         "INTR-REMAP:[fault reason %02d] %s\n",
1471                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1472                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1473                         fault_reason, reason);
1474         else
1475                 pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1476                        "fault addr %llx \n"
1477                        "DMAR:[fault reason %02d] %s\n",
1478                        (type ? "DMA Read" : "DMA Write"),
1479                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1480                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1481         return 0;
1482 }
1483
1484 #define PRIMARY_FAULT_REG_LEN (16)
1485 irqreturn_t dmar_fault(int irq, void *dev_id)
1486 {
1487         struct intel_iommu *iommu = dev_id;
1488         int reg, fault_index;
1489         u32 fault_status;
1490         unsigned long flag;
1491
1492         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1493         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1494         if (fault_status)
1495                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1496
1497         /* TBD: ignore advanced fault log currently */
1498         if (!(fault_status & DMA_FSTS_PPF))
1499                 goto unlock_exit;
1500
1501         fault_index = dma_fsts_fault_record_index(fault_status);
1502         reg = cap_fault_reg_offset(iommu->cap);
1503         while (1) {
1504                 u8 fault_reason;
1505                 u16 source_id;
1506                 u64 guest_addr;
1507                 int type;
1508                 u32 data;
1509
1510                 /* highest 32 bits */
1511                 data = readl(iommu->reg + reg +
1512                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1513                 if (!(data & DMA_FRCD_F))
1514                         break;
1515
1516                 fault_reason = dma_frcd_fault_reason(data);
1517                 type = dma_frcd_type(data);
1518
1519                 data = readl(iommu->reg + reg +
1520                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1521                 source_id = dma_frcd_source_id(data);
1522
1523                 guest_addr = dmar_readq(iommu->reg + reg +
1524                                 fault_index * PRIMARY_FAULT_REG_LEN);
1525                 guest_addr = dma_frcd_page_addr(guest_addr);
1526                 /* clear the fault */
1527                 writel(DMA_FRCD_F, iommu->reg + reg +
1528                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1529
1530                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1531
1532                 dmar_fault_do_one(iommu, type, fault_reason,
1533                                 source_id, guest_addr);
1534
1535                 fault_index++;
1536                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1537                         fault_index = 0;
1538                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1539         }
1540
1541         writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
1542
1543 unlock_exit:
1544         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1545         return IRQ_HANDLED;
1546 }
1547
1548 int dmar_set_interrupt(struct intel_iommu *iommu)
1549 {
1550         int irq, ret;
1551
1552         /*
1553          * Check if the fault interrupt is already initialized.
1554          */
1555         if (iommu->irq)
1556                 return 0;
1557
1558         irq = dmar_alloc_hwirq();
1559         if (irq <= 0) {
1560                 pr_err("IOMMU: no free vectors\n");
1561                 return -EINVAL;
1562         }
1563
1564         irq_set_handler_data(irq, iommu);
1565         iommu->irq = irq;
1566
1567         ret = arch_setup_dmar_msi(irq);
1568         if (ret) {
1569                 irq_set_handler_data(irq, NULL);
1570                 iommu->irq = 0;
1571                 dmar_free_hwirq(irq);
1572                 return ret;
1573         }
1574
1575         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1576         if (ret)
1577                 pr_err("IOMMU: can't request irq\n");
1578         return ret;
1579 }
1580
1581 int __init enable_drhd_fault_handling(void)
1582 {
1583         struct dmar_drhd_unit *drhd;
1584         struct intel_iommu *iommu;
1585
1586         /*
1587          * Enable fault control interrupt.
1588          */
1589         for_each_iommu(iommu, drhd) {
1590                 u32 fault_status;
1591                 int ret = dmar_set_interrupt(iommu);
1592
1593                 if (ret) {
1594                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1595                                (unsigned long long)drhd->reg_base_addr, ret);
1596                         return -1;
1597                 }
1598
1599                 /*
1600                  * Clear any previous faults.
1601                  */
1602                 dmar_fault(iommu->irq, iommu);
1603                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1604                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1605         }
1606
1607         return 0;
1608 }
1609
1610 /*
1611  * Re-enable Queued Invalidation interface.
1612  */
1613 int dmar_reenable_qi(struct intel_iommu *iommu)
1614 {
1615         if (!ecap_qis(iommu->ecap))
1616                 return -ENOENT;
1617
1618         if (!iommu->qi)
1619                 return -ENOENT;
1620
1621         /*
1622          * First disable queued invalidation.
1623          */
1624         dmar_disable_qi(iommu);
1625         /*
1626          * Then enable queued invalidation again. Since there is no pending
1627          * invalidation requests now, it's safe to re-enable queued
1628          * invalidation.
1629          */
1630         __dmar_enable_qi(iommu);
1631
1632         return 0;
1633 }
1634
1635 /*
1636  * Check interrupt remapping support in DMAR table description.
1637  */
1638 int __init dmar_ir_support(void)
1639 {
1640         struct acpi_table_dmar *dmar;
1641         dmar = (struct acpi_table_dmar *)dmar_tbl;
1642         if (!dmar)
1643                 return 0;
1644         return dmar->flags & 0x1;
1645 }
1646
1647 static int __init dmar_free_unused_resources(void)
1648 {
1649         struct dmar_drhd_unit *dmaru, *dmaru_n;
1650
1651         /* DMAR units are in use */
1652         if (irq_remapping_enabled || intel_iommu_enabled)
1653                 return 0;
1654
1655         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
1656                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1657
1658         down_write(&dmar_global_lock);
1659         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
1660                 list_del(&dmaru->list);
1661                 dmar_free_drhd(dmaru);
1662         }
1663         up_write(&dmar_global_lock);
1664
1665         return 0;
1666 }
1667
1668 late_initcall(dmar_free_unused_resources);
1669 IOMMU_INIT_POST(detect_intel_iommu);