Merge branches 'pm-sleep' and 'pm-cpufreq'
[cascardo/linux.git] / arch / x86 / kvm / assigned-dev.c
1 /*
2  * Kernel-based Virtual Machine - device assignment support
3  *
4  * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2.  See
7  * the COPYING file in the top-level directory.
8  *
9  */
10
11 #include <linux/kvm_host.h>
12 #include <linux/kvm.h>
13 #include <linux/uaccess.h>
14 #include <linux/vmalloc.h>
15 #include <linux/errno.h>
16 #include <linux/spinlock.h>
17 #include <linux/pci.h>
18 #include <linux/interrupt.h>
19 #include <linux/slab.h>
20 #include <linux/namei.h>
21 #include <linux/fs.h>
22 #include "irq.h"
23 #include "assigned-dev.h"
24 #include "trace/events/kvm.h"
25
26 struct kvm_assigned_dev_kernel {
27         struct kvm_irq_ack_notifier ack_notifier;
28         struct list_head list;
29         int assigned_dev_id;
30         int host_segnr;
31         int host_busnr;
32         int host_devfn;
33         unsigned int entries_nr;
34         int host_irq;
35         bool host_irq_disabled;
36         bool pci_2_3;
37         struct msix_entry *host_msix_entries;
38         int guest_irq;
39         struct msix_entry *guest_msix_entries;
40         unsigned long irq_requested_type;
41         int irq_source_id;
42         int flags;
43         struct pci_dev *dev;
44         struct kvm *kvm;
45         spinlock_t intx_lock;
46         spinlock_t intx_mask_lock;
47         char irq_name[32];
48         struct pci_saved_state *pci_saved_state;
49 };
50
51 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
52                                                       int assigned_dev_id)
53 {
54         struct kvm_assigned_dev_kernel *match;
55
56         list_for_each_entry(match, head, list) {
57                 if (match->assigned_dev_id == assigned_dev_id)
58                         return match;
59         }
60         return NULL;
61 }
62
63 static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
64                                     *assigned_dev, int irq)
65 {
66         int i, index;
67         struct msix_entry *host_msix_entries;
68
69         host_msix_entries = assigned_dev->host_msix_entries;
70
71         index = -1;
72         for (i = 0; i < assigned_dev->entries_nr; i++)
73                 if (irq == host_msix_entries[i].vector) {
74                         index = i;
75                         break;
76                 }
77         if (index < 0)
78                 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
79
80         return index;
81 }
82
83 static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
84 {
85         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
86         int ret;
87
88         spin_lock(&assigned_dev->intx_lock);
89         if (pci_check_and_mask_intx(assigned_dev->dev)) {
90                 assigned_dev->host_irq_disabled = true;
91                 ret = IRQ_WAKE_THREAD;
92         } else
93                 ret = IRQ_NONE;
94         spin_unlock(&assigned_dev->intx_lock);
95
96         return ret;
97 }
98
99 static void
100 kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
101                                  int vector)
102 {
103         if (unlikely(assigned_dev->irq_requested_type &
104                      KVM_DEV_IRQ_GUEST_INTX)) {
105                 spin_lock(&assigned_dev->intx_mask_lock);
106                 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
107                         kvm_set_irq(assigned_dev->kvm,
108                                     assigned_dev->irq_source_id, vector, 1,
109                                     false);
110                 spin_unlock(&assigned_dev->intx_mask_lock);
111         } else
112                 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
113                             vector, 1, false);
114 }
115
116 static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
117 {
118         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
119
120         if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
121                 spin_lock_irq(&assigned_dev->intx_lock);
122                 disable_irq_nosync(irq);
123                 assigned_dev->host_irq_disabled = true;
124                 spin_unlock_irq(&assigned_dev->intx_lock);
125         }
126
127         kvm_assigned_dev_raise_guest_irq(assigned_dev,
128                                          assigned_dev->guest_irq);
129
130         return IRQ_HANDLED;
131 }
132
133 /*
134  * Deliver an IRQ in an atomic context if we can, or return a failure,
135  * user can retry in a process context.
136  * Return value:
137  *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
138  *  Other values - No need to retry.
139  */
140 static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
141                                 int level)
142 {
143         struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
144         struct kvm_kernel_irq_routing_entry *e;
145         int ret = -EINVAL;
146         int idx;
147
148         trace_kvm_set_irq(irq, level, irq_source_id);
149
150         /*
151          * Injection into either PIC or IOAPIC might need to scan all CPUs,
152          * which would need to be retried from thread context;  when same GSI
153          * is connected to both PIC and IOAPIC, we'd have to report a
154          * partial failure here.
155          * Since there's no easy way to do this, we only support injecting MSI
156          * which is limited to 1:1 GSI mapping.
157          */
158         idx = srcu_read_lock(&kvm->irq_srcu);
159         if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
160                 e = &entries[0];
161                 ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
162                                                 irq, level);
163         }
164         srcu_read_unlock(&kvm->irq_srcu, idx);
165         return ret;
166 }
167
168
169 static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
170 {
171         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
172         int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
173                                        assigned_dev->irq_source_id,
174                                        assigned_dev->guest_irq, 1);
175         return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
176 }
177
178 static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
179 {
180         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
181
182         kvm_assigned_dev_raise_guest_irq(assigned_dev,
183                                          assigned_dev->guest_irq);
184
185         return IRQ_HANDLED;
186 }
187
188 static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
189 {
190         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
191         int index = find_index_from_host_irq(assigned_dev, irq);
192         u32 vector;
193         int ret = 0;
194
195         if (index >= 0) {
196                 vector = assigned_dev->guest_msix_entries[index].vector;
197                 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
198                                            assigned_dev->irq_source_id,
199                                            vector, 1);
200         }
201
202         return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
203 }
204
205 static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
206 {
207         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
208         int index = find_index_from_host_irq(assigned_dev, irq);
209         u32 vector;
210
211         if (index >= 0) {
212                 vector = assigned_dev->guest_msix_entries[index].vector;
213                 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
214         }
215
216         return IRQ_HANDLED;
217 }
218
219 /* Ack the irq line for an assigned device */
220 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
221 {
222         struct kvm_assigned_dev_kernel *dev =
223                 container_of(kian, struct kvm_assigned_dev_kernel,
224                              ack_notifier);
225
226         kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
227
228         spin_lock(&dev->intx_mask_lock);
229
230         if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
231                 bool reassert = false;
232
233                 spin_lock_irq(&dev->intx_lock);
234                 /*
235                  * The guest IRQ may be shared so this ack can come from an
236                  * IRQ for another guest device.
237                  */
238                 if (dev->host_irq_disabled) {
239                         if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
240                                 enable_irq(dev->host_irq);
241                         else if (!pci_check_and_unmask_intx(dev->dev))
242                                 reassert = true;
243                         dev->host_irq_disabled = reassert;
244                 }
245                 spin_unlock_irq(&dev->intx_lock);
246
247                 if (reassert)
248                         kvm_set_irq(dev->kvm, dev->irq_source_id,
249                                     dev->guest_irq, 1, false);
250         }
251
252         spin_unlock(&dev->intx_mask_lock);
253 }
254
255 static void deassign_guest_irq(struct kvm *kvm,
256                                struct kvm_assigned_dev_kernel *assigned_dev)
257 {
258         if (assigned_dev->ack_notifier.gsi != -1)
259                 kvm_unregister_irq_ack_notifier(kvm,
260                                                 &assigned_dev->ack_notifier);
261
262         kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
263                     assigned_dev->guest_irq, 0, false);
264
265         if (assigned_dev->irq_source_id != -1)
266                 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
267         assigned_dev->irq_source_id = -1;
268         assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
269 }
270
271 /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
272 static void deassign_host_irq(struct kvm *kvm,
273                               struct kvm_assigned_dev_kernel *assigned_dev)
274 {
275         /*
276          * We disable irq here to prevent further events.
277          *
278          * Notice this maybe result in nested disable if the interrupt type is
279          * INTx, but it's OK for we are going to free it.
280          *
281          * If this function is a part of VM destroy, please ensure that till
282          * now, the kvm state is still legal for probably we also have to wait
283          * on a currently running IRQ handler.
284          */
285         if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
286                 int i;
287                 for (i = 0; i < assigned_dev->entries_nr; i++)
288                         disable_irq(assigned_dev->host_msix_entries[i].vector);
289
290                 for (i = 0; i < assigned_dev->entries_nr; i++)
291                         free_irq(assigned_dev->host_msix_entries[i].vector,
292                                  assigned_dev);
293
294                 assigned_dev->entries_nr = 0;
295                 kfree(assigned_dev->host_msix_entries);
296                 kfree(assigned_dev->guest_msix_entries);
297                 pci_disable_msix(assigned_dev->dev);
298         } else {
299                 /* Deal with MSI and INTx */
300                 if ((assigned_dev->irq_requested_type &
301                      KVM_DEV_IRQ_HOST_INTX) &&
302                     (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
303                         spin_lock_irq(&assigned_dev->intx_lock);
304                         pci_intx(assigned_dev->dev, false);
305                         spin_unlock_irq(&assigned_dev->intx_lock);
306                         synchronize_irq(assigned_dev->host_irq);
307                 } else
308                         disable_irq(assigned_dev->host_irq);
309
310                 free_irq(assigned_dev->host_irq, assigned_dev);
311
312                 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
313                         pci_disable_msi(assigned_dev->dev);
314         }
315
316         assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
317 }
318
319 static int kvm_deassign_irq(struct kvm *kvm,
320                             struct kvm_assigned_dev_kernel *assigned_dev,
321                             unsigned long irq_requested_type)
322 {
323         unsigned long guest_irq_type, host_irq_type;
324
325         if (!irqchip_in_kernel(kvm))
326                 return -EINVAL;
327         /* no irq assignment to deassign */
328         if (!assigned_dev->irq_requested_type)
329                 return -ENXIO;
330
331         host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
332         guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
333
334         if (host_irq_type)
335                 deassign_host_irq(kvm, assigned_dev);
336         if (guest_irq_type)
337                 deassign_guest_irq(kvm, assigned_dev);
338
339         return 0;
340 }
341
342 static void kvm_free_assigned_irq(struct kvm *kvm,
343                                   struct kvm_assigned_dev_kernel *assigned_dev)
344 {
345         kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
346 }
347
348 static void kvm_free_assigned_device(struct kvm *kvm,
349                                      struct kvm_assigned_dev_kernel
350                                      *assigned_dev)
351 {
352         kvm_free_assigned_irq(kvm, assigned_dev);
353
354         pci_reset_function(assigned_dev->dev);
355         if (pci_load_and_free_saved_state(assigned_dev->dev,
356                                           &assigned_dev->pci_saved_state))
357                 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
358                        __func__, dev_name(&assigned_dev->dev->dev));
359         else
360                 pci_restore_state(assigned_dev->dev);
361
362         pci_clear_dev_assigned(assigned_dev->dev);
363
364         pci_release_regions(assigned_dev->dev);
365         pci_disable_device(assigned_dev->dev);
366         pci_dev_put(assigned_dev->dev);
367
368         list_del(&assigned_dev->list);
369         kfree(assigned_dev);
370 }
371
372 void kvm_free_all_assigned_devices(struct kvm *kvm)
373 {
374         struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
375
376         list_for_each_entry_safe(assigned_dev, tmp,
377                                  &kvm->arch.assigned_dev_head, list) {
378                 kvm_free_assigned_device(kvm, assigned_dev);
379         }
380 }
381
382 static int assigned_device_enable_host_intx(struct kvm *kvm,
383                                             struct kvm_assigned_dev_kernel *dev)
384 {
385         irq_handler_t irq_handler;
386         unsigned long flags;
387
388         dev->host_irq = dev->dev->irq;
389
390         /*
391          * We can only share the IRQ line with other host devices if we are
392          * able to disable the IRQ source at device-level - independently of
393          * the guest driver. Otherwise host devices may suffer from unbounded
394          * IRQ latencies when the guest keeps the line asserted.
395          */
396         if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
397                 irq_handler = kvm_assigned_dev_intx;
398                 flags = IRQF_SHARED;
399         } else {
400                 irq_handler = NULL;
401                 flags = IRQF_ONESHOT;
402         }
403         if (request_threaded_irq(dev->host_irq, irq_handler,
404                                  kvm_assigned_dev_thread_intx, flags,
405                                  dev->irq_name, dev))
406                 return -EIO;
407
408         if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
409                 spin_lock_irq(&dev->intx_lock);
410                 pci_intx(dev->dev, true);
411                 spin_unlock_irq(&dev->intx_lock);
412         }
413         return 0;
414 }
415
416 static int assigned_device_enable_host_msi(struct kvm *kvm,
417                                            struct kvm_assigned_dev_kernel *dev)
418 {
419         int r;
420
421         if (!dev->dev->msi_enabled) {
422                 r = pci_enable_msi(dev->dev);
423                 if (r)
424                         return r;
425         }
426
427         dev->host_irq = dev->dev->irq;
428         if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
429                                  kvm_assigned_dev_thread_msi, 0,
430                                  dev->irq_name, dev)) {
431                 pci_disable_msi(dev->dev);
432                 return -EIO;
433         }
434
435         return 0;
436 }
437
438 static int assigned_device_enable_host_msix(struct kvm *kvm,
439                                             struct kvm_assigned_dev_kernel *dev)
440 {
441         int i, r = -EINVAL;
442
443         /* host_msix_entries and guest_msix_entries should have been
444          * initialized */
445         if (dev->entries_nr == 0)
446                 return r;
447
448         r = pci_enable_msix_exact(dev->dev,
449                                   dev->host_msix_entries, dev->entries_nr);
450         if (r)
451                 return r;
452
453         for (i = 0; i < dev->entries_nr; i++) {
454                 r = request_threaded_irq(dev->host_msix_entries[i].vector,
455                                          kvm_assigned_dev_msix,
456                                          kvm_assigned_dev_thread_msix,
457                                          0, dev->irq_name, dev);
458                 if (r)
459                         goto err;
460         }
461
462         return 0;
463 err:
464         for (i -= 1; i >= 0; i--)
465                 free_irq(dev->host_msix_entries[i].vector, dev);
466         pci_disable_msix(dev->dev);
467         return r;
468 }
469
470 static int assigned_device_enable_guest_intx(struct kvm *kvm,
471                                 struct kvm_assigned_dev_kernel *dev,
472                                 struct kvm_assigned_irq *irq)
473 {
474         dev->guest_irq = irq->guest_irq;
475         dev->ack_notifier.gsi = irq->guest_irq;
476         return 0;
477 }
478
479 static int assigned_device_enable_guest_msi(struct kvm *kvm,
480                         struct kvm_assigned_dev_kernel *dev,
481                         struct kvm_assigned_irq *irq)
482 {
483         dev->guest_irq = irq->guest_irq;
484         dev->ack_notifier.gsi = -1;
485         return 0;
486 }
487
488 static int assigned_device_enable_guest_msix(struct kvm *kvm,
489                         struct kvm_assigned_dev_kernel *dev,
490                         struct kvm_assigned_irq *irq)
491 {
492         dev->guest_irq = irq->guest_irq;
493         dev->ack_notifier.gsi = -1;
494         return 0;
495 }
496
497 static int assign_host_irq(struct kvm *kvm,
498                            struct kvm_assigned_dev_kernel *dev,
499                            __u32 host_irq_type)
500 {
501         int r = -EEXIST;
502
503         if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
504                 return r;
505
506         snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
507                  pci_name(dev->dev));
508
509         switch (host_irq_type) {
510         case KVM_DEV_IRQ_HOST_INTX:
511                 r = assigned_device_enable_host_intx(kvm, dev);
512                 break;
513         case KVM_DEV_IRQ_HOST_MSI:
514                 r = assigned_device_enable_host_msi(kvm, dev);
515                 break;
516         case KVM_DEV_IRQ_HOST_MSIX:
517                 r = assigned_device_enable_host_msix(kvm, dev);
518                 break;
519         default:
520                 r = -EINVAL;
521         }
522         dev->host_irq_disabled = false;
523
524         if (!r)
525                 dev->irq_requested_type |= host_irq_type;
526
527         return r;
528 }
529
530 static int assign_guest_irq(struct kvm *kvm,
531                             struct kvm_assigned_dev_kernel *dev,
532                             struct kvm_assigned_irq *irq,
533                             unsigned long guest_irq_type)
534 {
535         int id;
536         int r = -EEXIST;
537
538         if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
539                 return r;
540
541         id = kvm_request_irq_source_id(kvm);
542         if (id < 0)
543                 return id;
544
545         dev->irq_source_id = id;
546
547         switch (guest_irq_type) {
548         case KVM_DEV_IRQ_GUEST_INTX:
549                 r = assigned_device_enable_guest_intx(kvm, dev, irq);
550                 break;
551         case KVM_DEV_IRQ_GUEST_MSI:
552                 r = assigned_device_enable_guest_msi(kvm, dev, irq);
553                 break;
554         case KVM_DEV_IRQ_GUEST_MSIX:
555                 r = assigned_device_enable_guest_msix(kvm, dev, irq);
556                 break;
557         default:
558                 r = -EINVAL;
559         }
560
561         if (!r) {
562                 dev->irq_requested_type |= guest_irq_type;
563                 if (dev->ack_notifier.gsi != -1)
564                         kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
565         } else {
566                 kvm_free_irq_source_id(kvm, dev->irq_source_id);
567                 dev->irq_source_id = -1;
568         }
569
570         return r;
571 }
572
573 /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
574 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
575                                    struct kvm_assigned_irq *assigned_irq)
576 {
577         int r = -EINVAL;
578         struct kvm_assigned_dev_kernel *match;
579         unsigned long host_irq_type, guest_irq_type;
580
581         if (!irqchip_in_kernel(kvm))
582                 return r;
583
584         mutex_lock(&kvm->lock);
585         r = -ENODEV;
586         match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
587                                       assigned_irq->assigned_dev_id);
588         if (!match)
589                 goto out;
590
591         host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
592         guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
593
594         r = -EINVAL;
595         /* can only assign one type at a time */
596         if (hweight_long(host_irq_type) > 1)
597                 goto out;
598         if (hweight_long(guest_irq_type) > 1)
599                 goto out;
600         if (host_irq_type == 0 && guest_irq_type == 0)
601                 goto out;
602
603         r = 0;
604         if (host_irq_type)
605                 r = assign_host_irq(kvm, match, host_irq_type);
606         if (r)
607                 goto out;
608
609         if (guest_irq_type)
610                 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
611 out:
612         mutex_unlock(&kvm->lock);
613         return r;
614 }
615
616 static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
617                                          struct kvm_assigned_irq
618                                          *assigned_irq)
619 {
620         int r = -ENODEV;
621         struct kvm_assigned_dev_kernel *match;
622         unsigned long irq_type;
623
624         mutex_lock(&kvm->lock);
625
626         match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
627                                       assigned_irq->assigned_dev_id);
628         if (!match)
629                 goto out;
630
631         irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
632                                           KVM_DEV_IRQ_GUEST_MASK);
633         r = kvm_deassign_irq(kvm, match, irq_type);
634 out:
635         mutex_unlock(&kvm->lock);
636         return r;
637 }
638
639 /*
640  * We want to test whether the caller has been granted permissions to
641  * use this device.  To be able to configure and control the device,
642  * the user needs access to PCI configuration space and BAR resources.
643  * These are accessed through PCI sysfs.  PCI config space is often
644  * passed to the process calling this ioctl via file descriptor, so we
645  * can't rely on access to that file.  We can check for permissions
646  * on each of the BAR resource files, which is a pretty clear
647  * indicator that the user has been granted access to the device.
648  */
649 static int probe_sysfs_permissions(struct pci_dev *dev)
650 {
651 #ifdef CONFIG_SYSFS
652         int i;
653         bool bar_found = false;
654
655         for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
656                 char *kpath, *syspath;
657                 struct path path;
658                 struct inode *inode;
659                 int r;
660
661                 if (!pci_resource_len(dev, i))
662                         continue;
663
664                 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
665                 if (!kpath)
666                         return -ENOMEM;
667
668                 /* Per sysfs-rules, sysfs is always at /sys */
669                 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
670                 kfree(kpath);
671                 if (!syspath)
672                         return -ENOMEM;
673
674                 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
675                 kfree(syspath);
676                 if (r)
677                         return r;
678
679                 inode = d_backing_inode(path.dentry);
680
681                 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
682                 path_put(&path);
683                 if (r)
684                         return r;
685
686                 bar_found = true;
687         }
688
689         /* If no resources, probably something special */
690         if (!bar_found)
691                 return -EPERM;
692
693         return 0;
694 #else
695         return -EINVAL; /* No way to control the device without sysfs */
696 #endif
697 }
698
699 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
700                                       struct kvm_assigned_pci_dev *assigned_dev)
701 {
702         int r = 0, idx;
703         struct kvm_assigned_dev_kernel *match;
704         struct pci_dev *dev;
705
706         if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
707                 return -EINVAL;
708
709         mutex_lock(&kvm->lock);
710         idx = srcu_read_lock(&kvm->srcu);
711
712         match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
713                                       assigned_dev->assigned_dev_id);
714         if (match) {
715                 /* device already assigned */
716                 r = -EEXIST;
717                 goto out;
718         }
719
720         match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
721         if (match == NULL) {
722                 printk(KERN_INFO "%s: Couldn't allocate memory\n",
723                        __func__);
724                 r = -ENOMEM;
725                 goto out;
726         }
727         dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
728                                    assigned_dev->busnr,
729                                    assigned_dev->devfn);
730         if (!dev) {
731                 printk(KERN_INFO "%s: host device not found\n", __func__);
732                 r = -EINVAL;
733                 goto out_free;
734         }
735
736         /* Don't allow bridges to be assigned */
737         if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
738                 r = -EPERM;
739                 goto out_put;
740         }
741
742         r = probe_sysfs_permissions(dev);
743         if (r)
744                 goto out_put;
745
746         if (pci_enable_device(dev)) {
747                 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
748                 r = -EBUSY;
749                 goto out_put;
750         }
751         r = pci_request_regions(dev, "kvm_assigned_device");
752         if (r) {
753                 printk(KERN_INFO "%s: Could not get access to device regions\n",
754                        __func__);
755                 goto out_disable;
756         }
757
758         pci_reset_function(dev);
759         pci_save_state(dev);
760         match->pci_saved_state = pci_store_saved_state(dev);
761         if (!match->pci_saved_state)
762                 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
763                        __func__, dev_name(&dev->dev));
764
765         if (!pci_intx_mask_supported(dev))
766                 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
767
768         match->assigned_dev_id = assigned_dev->assigned_dev_id;
769         match->host_segnr = assigned_dev->segnr;
770         match->host_busnr = assigned_dev->busnr;
771         match->host_devfn = assigned_dev->devfn;
772         match->flags = assigned_dev->flags;
773         match->dev = dev;
774         spin_lock_init(&match->intx_lock);
775         spin_lock_init(&match->intx_mask_lock);
776         match->irq_source_id = -1;
777         match->kvm = kvm;
778         match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
779
780         list_add(&match->list, &kvm->arch.assigned_dev_head);
781
782         if (!kvm->arch.iommu_domain) {
783                 r = kvm_iommu_map_guest(kvm);
784                 if (r)
785                         goto out_list_del;
786         }
787         r = kvm_assign_device(kvm, match->dev);
788         if (r)
789                 goto out_list_del;
790
791 out:
792         srcu_read_unlock(&kvm->srcu, idx);
793         mutex_unlock(&kvm->lock);
794         return r;
795 out_list_del:
796         if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
797                 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
798                        __func__, dev_name(&dev->dev));
799         list_del(&match->list);
800         pci_release_regions(dev);
801 out_disable:
802         pci_disable_device(dev);
803 out_put:
804         pci_dev_put(dev);
805 out_free:
806         kfree(match);
807         srcu_read_unlock(&kvm->srcu, idx);
808         mutex_unlock(&kvm->lock);
809         return r;
810 }
811
812 static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
813                 struct kvm_assigned_pci_dev *assigned_dev)
814 {
815         int r = 0;
816         struct kvm_assigned_dev_kernel *match;
817
818         mutex_lock(&kvm->lock);
819
820         match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
821                                       assigned_dev->assigned_dev_id);
822         if (!match) {
823                 printk(KERN_INFO "%s: device hasn't been assigned before, "
824                   "so cannot be deassigned\n", __func__);
825                 r = -EINVAL;
826                 goto out;
827         }
828
829         kvm_deassign_device(kvm, match->dev);
830
831         kvm_free_assigned_device(kvm, match);
832
833 out:
834         mutex_unlock(&kvm->lock);
835         return r;
836 }
837
838
839 static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
840                                     struct kvm_assigned_msix_nr *entry_nr)
841 {
842         int r = 0;
843         struct kvm_assigned_dev_kernel *adev;
844
845         mutex_lock(&kvm->lock);
846
847         adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
848                                       entry_nr->assigned_dev_id);
849         if (!adev) {
850                 r = -EINVAL;
851                 goto msix_nr_out;
852         }
853
854         if (adev->entries_nr == 0) {
855                 adev->entries_nr = entry_nr->entry_nr;
856                 if (adev->entries_nr == 0 ||
857                     adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
858                         r = -EINVAL;
859                         goto msix_nr_out;
860                 }
861
862                 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
863                                                 entry_nr->entry_nr,
864                                                 GFP_KERNEL);
865                 if (!adev->host_msix_entries) {
866                         r = -ENOMEM;
867                         goto msix_nr_out;
868                 }
869                 adev->guest_msix_entries =
870                         kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
871                                 GFP_KERNEL);
872                 if (!adev->guest_msix_entries) {
873                         kfree(adev->host_msix_entries);
874                         r = -ENOMEM;
875                         goto msix_nr_out;
876                 }
877         } else /* Not allowed set MSI-X number twice */
878                 r = -EINVAL;
879 msix_nr_out:
880         mutex_unlock(&kvm->lock);
881         return r;
882 }
883
884 static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
885                                        struct kvm_assigned_msix_entry *entry)
886 {
887         int r = 0, i;
888         struct kvm_assigned_dev_kernel *adev;
889
890         mutex_lock(&kvm->lock);
891
892         adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
893                                       entry->assigned_dev_id);
894
895         if (!adev) {
896                 r = -EINVAL;
897                 goto msix_entry_out;
898         }
899
900         for (i = 0; i < adev->entries_nr; i++)
901                 if (adev->guest_msix_entries[i].vector == 0 ||
902                     adev->guest_msix_entries[i].entry == entry->entry) {
903                         adev->guest_msix_entries[i].entry = entry->entry;
904                         adev->guest_msix_entries[i].vector = entry->gsi;
905                         adev->host_msix_entries[i].entry = entry->entry;
906                         break;
907                 }
908         if (i == adev->entries_nr) {
909                 r = -ENOSPC;
910                 goto msix_entry_out;
911         }
912
913 msix_entry_out:
914         mutex_unlock(&kvm->lock);
915
916         return r;
917 }
918
919 static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
920                 struct kvm_assigned_pci_dev *assigned_dev)
921 {
922         int r = 0;
923         struct kvm_assigned_dev_kernel *match;
924
925         mutex_lock(&kvm->lock);
926
927         match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
928                                       assigned_dev->assigned_dev_id);
929         if (!match) {
930                 r = -ENODEV;
931                 goto out;
932         }
933
934         spin_lock(&match->intx_mask_lock);
935
936         match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
937         match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
938
939         if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
940                 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
941                         kvm_set_irq(match->kvm, match->irq_source_id,
942                                     match->guest_irq, 0, false);
943                         /*
944                          * Masking at hardware-level is performed on demand,
945                          * i.e. when an IRQ actually arrives at the host.
946                          */
947                 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
948                         /*
949                          * Unmask the IRQ line if required. Unmasking at
950                          * device level will be performed by user space.
951                          */
952                         spin_lock_irq(&match->intx_lock);
953                         if (match->host_irq_disabled) {
954                                 enable_irq(match->host_irq);
955                                 match->host_irq_disabled = false;
956                         }
957                         spin_unlock_irq(&match->intx_lock);
958                 }
959         }
960
961         spin_unlock(&match->intx_mask_lock);
962
963 out:
964         mutex_unlock(&kvm->lock);
965         return r;
966 }
967
968 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
969                                   unsigned long arg)
970 {
971         void __user *argp = (void __user *)arg;
972         int r;
973
974         switch (ioctl) {
975         case KVM_ASSIGN_PCI_DEVICE: {
976                 struct kvm_assigned_pci_dev assigned_dev;
977
978                 r = -EFAULT;
979                 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
980                         goto out;
981                 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
982                 if (r)
983                         goto out;
984                 break;
985         }
986         case KVM_ASSIGN_IRQ: {
987                 r = -EOPNOTSUPP;
988                 break;
989         }
990         case KVM_ASSIGN_DEV_IRQ: {
991                 struct kvm_assigned_irq assigned_irq;
992
993                 r = -EFAULT;
994                 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
995                         goto out;
996                 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
997                 if (r)
998                         goto out;
999                 break;
1000         }
1001         case KVM_DEASSIGN_DEV_IRQ: {
1002                 struct kvm_assigned_irq assigned_irq;
1003
1004                 r = -EFAULT;
1005                 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1006                         goto out;
1007                 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1008                 if (r)
1009                         goto out;
1010                 break;
1011         }
1012         case KVM_DEASSIGN_PCI_DEVICE: {
1013                 struct kvm_assigned_pci_dev assigned_dev;
1014
1015                 r = -EFAULT;
1016                 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1017                         goto out;
1018                 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1019                 if (r)
1020                         goto out;
1021                 break;
1022         }
1023         case KVM_ASSIGN_SET_MSIX_NR: {
1024                 struct kvm_assigned_msix_nr entry_nr;
1025                 r = -EFAULT;
1026                 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1027                         goto out;
1028                 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1029                 if (r)
1030                         goto out;
1031                 break;
1032         }
1033         case KVM_ASSIGN_SET_MSIX_ENTRY: {
1034                 struct kvm_assigned_msix_entry entry;
1035                 r = -EFAULT;
1036                 if (copy_from_user(&entry, argp, sizeof entry))
1037                         goto out;
1038                 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1039                 if (r)
1040                         goto out;
1041                 break;
1042         }
1043         case KVM_ASSIGN_SET_INTX_MASK: {
1044                 struct kvm_assigned_pci_dev assigned_dev;
1045
1046                 r = -EFAULT;
1047                 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1048                         goto out;
1049                 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1050                 break;
1051         }
1052         default:
1053                 r = -ENOTTY;
1054                 break;
1055         }
1056 out:
1057         return r;
1058 }