spi: dspi: clear SPI_SR before enable interrupt
[cascardo/linux.git] / virt / kvm / eventfd.c
1 /*
2  * kvm eventfd support - use eventfd objects to signal various KVM events
3  *
4  * Copyright 2009 Novell.  All Rights Reserved.
5  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
6  *
7  * Author:
8  *      Gregory Haskins <ghaskins@novell.com>
9  *
10  * This file is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License
12  * as published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software Foundation,
21  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22  */
23
24 #include <linux/kvm_host.h>
25 #include <linux/kvm.h>
26 #include <linux/kvm_irqfd.h>
27 #include <linux/workqueue.h>
28 #include <linux/syscalls.h>
29 #include <linux/wait.h>
30 #include <linux/poll.h>
31 #include <linux/file.h>
32 #include <linux/list.h>
33 #include <linux/eventfd.h>
34 #include <linux/kernel.h>
35 #include <linux/srcu.h>
36 #include <linux/slab.h>
37 #include <linux/seqlock.h>
38 #include <linux/irqbypass.h>
39 #include <trace/events/kvm.h>
40
41 #include <kvm/iodev.h>
42
43 #ifdef CONFIG_HAVE_KVM_IRQFD
44
45
46 static void
47 irqfd_inject(struct work_struct *work)
48 {
49         struct kvm_kernel_irqfd *irqfd =
50                 container_of(work, struct kvm_kernel_irqfd, inject);
51         struct kvm *kvm = irqfd->kvm;
52
53         if (!irqfd->resampler) {
54                 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
55                                 false);
56                 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
57                                 false);
58         } else
59                 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
60                             irqfd->gsi, 1, false);
61 }
62
63 /*
64  * Since resampler irqfds share an IRQ source ID, we de-assert once
65  * then notify all of the resampler irqfds using this GSI.  We can't
66  * do multiple de-asserts or we risk racing with incoming re-asserts.
67  */
68 static void
69 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
70 {
71         struct kvm_kernel_irqfd_resampler *resampler;
72         struct kvm *kvm;
73         struct kvm_kernel_irqfd *irqfd;
74         int idx;
75
76         resampler = container_of(kian,
77                         struct kvm_kernel_irqfd_resampler, notifier);
78         kvm = resampler->kvm;
79
80         kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
81                     resampler->notifier.gsi, 0, false);
82
83         idx = srcu_read_lock(&kvm->irq_srcu);
84
85         list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
86                 eventfd_signal(irqfd->resamplefd, 1);
87
88         srcu_read_unlock(&kvm->irq_srcu, idx);
89 }
90
91 static void
92 irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
93 {
94         struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
95         struct kvm *kvm = resampler->kvm;
96
97         mutex_lock(&kvm->irqfds.resampler_lock);
98
99         list_del_rcu(&irqfd->resampler_link);
100         synchronize_srcu(&kvm->irq_srcu);
101
102         if (list_empty(&resampler->list)) {
103                 list_del(&resampler->link);
104                 kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
105                 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
106                             resampler->notifier.gsi, 0, false);
107                 kfree(resampler);
108         }
109
110         mutex_unlock(&kvm->irqfds.resampler_lock);
111 }
112
113 /*
114  * Race-free decouple logic (ordering is critical)
115  */
116 static void
117 irqfd_shutdown(struct work_struct *work)
118 {
119         struct kvm_kernel_irqfd *irqfd =
120                 container_of(work, struct kvm_kernel_irqfd, shutdown);
121         u64 cnt;
122
123         /*
124          * Synchronize with the wait-queue and unhook ourselves to prevent
125          * further events.
126          */
127         eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
128
129         /*
130          * We know no new events will be scheduled at this point, so block
131          * until all previously outstanding events have completed
132          */
133         flush_work(&irqfd->inject);
134
135         if (irqfd->resampler) {
136                 irqfd_resampler_shutdown(irqfd);
137                 eventfd_ctx_put(irqfd->resamplefd);
138         }
139
140         /*
141          * It is now safe to release the object's resources
142          */
143 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
144         irq_bypass_unregister_consumer(&irqfd->consumer);
145 #endif
146         eventfd_ctx_put(irqfd->eventfd);
147         kfree(irqfd);
148 }
149
150
151 /* assumes kvm->irqfds.lock is held */
152 static bool
153 irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
154 {
155         return list_empty(&irqfd->list) ? false : true;
156 }
157
158 /*
159  * Mark the irqfd as inactive and schedule it for removal
160  *
161  * assumes kvm->irqfds.lock is held
162  */
163 static void
164 irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
165 {
166         BUG_ON(!irqfd_is_active(irqfd));
167
168         list_del_init(&irqfd->list);
169
170         schedule_work(&irqfd->shutdown);
171 }
172
173 int __attribute__((weak)) kvm_arch_set_irq_inatomic(
174                                 struct kvm_kernel_irq_routing_entry *irq,
175                                 struct kvm *kvm, int irq_source_id,
176                                 int level,
177                                 bool line_status)
178 {
179         return -EWOULDBLOCK;
180 }
181
182 /*
183  * Called with wqh->lock held and interrupts disabled
184  */
185 static int
186 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
187 {
188         struct kvm_kernel_irqfd *irqfd =
189                 container_of(wait, struct kvm_kernel_irqfd, wait);
190         unsigned long flags = (unsigned long)key;
191         struct kvm_kernel_irq_routing_entry irq;
192         struct kvm *kvm = irqfd->kvm;
193         unsigned seq;
194         int idx;
195
196         if (flags & POLLIN) {
197                 idx = srcu_read_lock(&kvm->irq_srcu);
198                 do {
199                         seq = read_seqcount_begin(&irqfd->irq_entry_sc);
200                         irq = irqfd->irq_entry;
201                 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
202                 /* An event has been signaled, inject an interrupt */
203                 if (kvm_arch_set_irq_inatomic(&irq, kvm,
204                                               KVM_USERSPACE_IRQ_SOURCE_ID, 1,
205                                               false) == -EWOULDBLOCK)
206                         schedule_work(&irqfd->inject);
207                 srcu_read_unlock(&kvm->irq_srcu, idx);
208         }
209
210         if (flags & POLLHUP) {
211                 /* The eventfd is closing, detach from KVM */
212                 unsigned long flags;
213
214                 spin_lock_irqsave(&kvm->irqfds.lock, flags);
215
216                 /*
217                  * We must check if someone deactivated the irqfd before
218                  * we could acquire the irqfds.lock since the item is
219                  * deactivated from the KVM side before it is unhooked from
220                  * the wait-queue.  If it is already deactivated, we can
221                  * simply return knowing the other side will cleanup for us.
222                  * We cannot race against the irqfd going away since the
223                  * other side is required to acquire wqh->lock, which we hold
224                  */
225                 if (irqfd_is_active(irqfd))
226                         irqfd_deactivate(irqfd);
227
228                 spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
229         }
230
231         return 0;
232 }
233
234 static void
235 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
236                         poll_table *pt)
237 {
238         struct kvm_kernel_irqfd *irqfd =
239                 container_of(pt, struct kvm_kernel_irqfd, pt);
240         add_wait_queue(wqh, &irqfd->wait);
241 }
242
243 /* Must be called under irqfds.lock */
244 static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
245 {
246         struct kvm_kernel_irq_routing_entry *e;
247         struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
248         int n_entries;
249
250         n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
251
252         write_seqcount_begin(&irqfd->irq_entry_sc);
253
254         e = entries;
255         if (n_entries == 1)
256                 irqfd->irq_entry = *e;
257         else
258                 irqfd->irq_entry.type = 0;
259
260         write_seqcount_end(&irqfd->irq_entry_sc);
261 }
262
263 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
264 void __attribute__((weak)) kvm_arch_irq_bypass_stop(
265                                 struct irq_bypass_consumer *cons)
266 {
267 }
268
269 void __attribute__((weak)) kvm_arch_irq_bypass_start(
270                                 struct irq_bypass_consumer *cons)
271 {
272 }
273
274 int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
275                                 struct kvm *kvm, unsigned int host_irq,
276                                 uint32_t guest_irq, bool set)
277 {
278         return 0;
279 }
280 #endif
281
282 static int
283 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
284 {
285         struct kvm_kernel_irqfd *irqfd, *tmp;
286         struct fd f;
287         struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
288         int ret;
289         unsigned int events;
290         int idx;
291
292         if (!kvm_arch_intc_initialized(kvm))
293                 return -EAGAIN;
294
295         irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
296         if (!irqfd)
297                 return -ENOMEM;
298
299         irqfd->kvm = kvm;
300         irqfd->gsi = args->gsi;
301         INIT_LIST_HEAD(&irqfd->list);
302         INIT_WORK(&irqfd->inject, irqfd_inject);
303         INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
304         seqcount_init(&irqfd->irq_entry_sc);
305
306         f = fdget(args->fd);
307         if (!f.file) {
308                 ret = -EBADF;
309                 goto out;
310         }
311
312         eventfd = eventfd_ctx_fileget(f.file);
313         if (IS_ERR(eventfd)) {
314                 ret = PTR_ERR(eventfd);
315                 goto fail;
316         }
317
318         irqfd->eventfd = eventfd;
319
320         if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
321                 struct kvm_kernel_irqfd_resampler *resampler;
322
323                 resamplefd = eventfd_ctx_fdget(args->resamplefd);
324                 if (IS_ERR(resamplefd)) {
325                         ret = PTR_ERR(resamplefd);
326                         goto fail;
327                 }
328
329                 irqfd->resamplefd = resamplefd;
330                 INIT_LIST_HEAD(&irqfd->resampler_link);
331
332                 mutex_lock(&kvm->irqfds.resampler_lock);
333
334                 list_for_each_entry(resampler,
335                                     &kvm->irqfds.resampler_list, link) {
336                         if (resampler->notifier.gsi == irqfd->gsi) {
337                                 irqfd->resampler = resampler;
338                                 break;
339                         }
340                 }
341
342                 if (!irqfd->resampler) {
343                         resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
344                         if (!resampler) {
345                                 ret = -ENOMEM;
346                                 mutex_unlock(&kvm->irqfds.resampler_lock);
347                                 goto fail;
348                         }
349
350                         resampler->kvm = kvm;
351                         INIT_LIST_HEAD(&resampler->list);
352                         resampler->notifier.gsi = irqfd->gsi;
353                         resampler->notifier.irq_acked = irqfd_resampler_ack;
354                         INIT_LIST_HEAD(&resampler->link);
355
356                         list_add(&resampler->link, &kvm->irqfds.resampler_list);
357                         kvm_register_irq_ack_notifier(kvm,
358                                                       &resampler->notifier);
359                         irqfd->resampler = resampler;
360                 }
361
362                 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
363                 synchronize_srcu(&kvm->irq_srcu);
364
365                 mutex_unlock(&kvm->irqfds.resampler_lock);
366         }
367
368         /*
369          * Install our own custom wake-up handling so we are notified via
370          * a callback whenever someone signals the underlying eventfd
371          */
372         init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
373         init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
374
375         spin_lock_irq(&kvm->irqfds.lock);
376
377         ret = 0;
378         list_for_each_entry(tmp, &kvm->irqfds.items, list) {
379                 if (irqfd->eventfd != tmp->eventfd)
380                         continue;
381                 /* This fd is used for another irq already. */
382                 ret = -EBUSY;
383                 spin_unlock_irq(&kvm->irqfds.lock);
384                 goto fail;
385         }
386
387         idx = srcu_read_lock(&kvm->irq_srcu);
388         irqfd_update(kvm, irqfd);
389         srcu_read_unlock(&kvm->irq_srcu, idx);
390
391         list_add_tail(&irqfd->list, &kvm->irqfds.items);
392
393         spin_unlock_irq(&kvm->irqfds.lock);
394
395         /*
396          * Check if there was an event already pending on the eventfd
397          * before we registered, and trigger it as if we didn't miss it.
398          */
399         events = f.file->f_op->poll(f.file, &irqfd->pt);
400
401         if (events & POLLIN)
402                 schedule_work(&irqfd->inject);
403
404         /*
405          * do not drop the file until the irqfd is fully initialized, otherwise
406          * we might race against the POLLHUP
407          */
408         fdput(f);
409 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
410         if (kvm_arch_has_irq_bypass()) {
411                 irqfd->consumer.token = (void *)irqfd->eventfd;
412                 irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
413                 irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
414                 irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
415                 irqfd->consumer.start = kvm_arch_irq_bypass_start;
416                 ret = irq_bypass_register_consumer(&irqfd->consumer);
417                 if (ret)
418                         pr_info("irq bypass consumer (token %p) registration fails: %d\n",
419                                 irqfd->consumer.token, ret);
420         }
421 #endif
422
423         return 0;
424
425 fail:
426         if (irqfd->resampler)
427                 irqfd_resampler_shutdown(irqfd);
428
429         if (resamplefd && !IS_ERR(resamplefd))
430                 eventfd_ctx_put(resamplefd);
431
432         if (eventfd && !IS_ERR(eventfd))
433                 eventfd_ctx_put(eventfd);
434
435         fdput(f);
436
437 out:
438         kfree(irqfd);
439         return ret;
440 }
441
442 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
443 {
444         struct kvm_irq_ack_notifier *kian;
445         int gsi, idx;
446
447         idx = srcu_read_lock(&kvm->irq_srcu);
448         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
449         if (gsi != -1)
450                 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
451                                          link)
452                         if (kian->gsi == gsi) {
453                                 srcu_read_unlock(&kvm->irq_srcu, idx);
454                                 return true;
455                         }
456
457         srcu_read_unlock(&kvm->irq_srcu, idx);
458
459         return false;
460 }
461 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
462
463 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
464 {
465         struct kvm_irq_ack_notifier *kian;
466
467         hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
468                                  link)
469                 if (kian->gsi == gsi)
470                         kian->irq_acked(kian);
471 }
472
473 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
474 {
475         int gsi, idx;
476
477         trace_kvm_ack_irq(irqchip, pin);
478
479         idx = srcu_read_lock(&kvm->irq_srcu);
480         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
481         if (gsi != -1)
482                 kvm_notify_acked_gsi(kvm, gsi);
483         srcu_read_unlock(&kvm->irq_srcu, idx);
484 }
485
486 void kvm_register_irq_ack_notifier(struct kvm *kvm,
487                                    struct kvm_irq_ack_notifier *kian)
488 {
489         mutex_lock(&kvm->irq_lock);
490         hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
491         mutex_unlock(&kvm->irq_lock);
492         kvm_vcpu_request_scan_ioapic(kvm);
493 }
494
495 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
496                                     struct kvm_irq_ack_notifier *kian)
497 {
498         mutex_lock(&kvm->irq_lock);
499         hlist_del_init_rcu(&kian->link);
500         mutex_unlock(&kvm->irq_lock);
501         synchronize_srcu(&kvm->irq_srcu);
502         kvm_vcpu_request_scan_ioapic(kvm);
503 }
504 #endif
505
506 void
507 kvm_eventfd_init(struct kvm *kvm)
508 {
509 #ifdef CONFIG_HAVE_KVM_IRQFD
510         spin_lock_init(&kvm->irqfds.lock);
511         INIT_LIST_HEAD(&kvm->irqfds.items);
512         INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
513         mutex_init(&kvm->irqfds.resampler_lock);
514 #endif
515         INIT_LIST_HEAD(&kvm->ioeventfds);
516 }
517
518 #ifdef CONFIG_HAVE_KVM_IRQFD
519 /*
520  * shutdown any irqfd's that match fd+gsi
521  */
522 static int
523 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
524 {
525         struct kvm_kernel_irqfd *irqfd, *tmp;
526         struct eventfd_ctx *eventfd;
527
528         eventfd = eventfd_ctx_fdget(args->fd);
529         if (IS_ERR(eventfd))
530                 return PTR_ERR(eventfd);
531
532         spin_lock_irq(&kvm->irqfds.lock);
533
534         list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
535                 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
536                         /*
537                          * This clearing of irq_entry.type is needed for when
538                          * another thread calls kvm_irq_routing_update before
539                          * we flush workqueue below (we synchronize with
540                          * kvm_irq_routing_update using irqfds.lock).
541                          */
542                         write_seqcount_begin(&irqfd->irq_entry_sc);
543                         irqfd->irq_entry.type = 0;
544                         write_seqcount_end(&irqfd->irq_entry_sc);
545                         irqfd_deactivate(irqfd);
546                 }
547         }
548
549         spin_unlock_irq(&kvm->irqfds.lock);
550         eventfd_ctx_put(eventfd);
551
552         /*
553          * Block until we know all outstanding shutdown jobs have completed
554          * so that we guarantee there will not be any more interrupts on this
555          * gsi once this deassign function returns.
556          */
557         flush_work(&irqfd->shutdown);
558
559         return 0;
560 }
561
562 int
563 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
564 {
565         if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
566                 return -EINVAL;
567
568         if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
569                 return kvm_irqfd_deassign(kvm, args);
570
571         return kvm_irqfd_assign(kvm, args);
572 }
573
574 /*
575  * This function is called as the kvm VM fd is being released. Shutdown all
576  * irqfds that still remain open
577  */
578 void
579 kvm_irqfd_release(struct kvm *kvm)
580 {
581         struct kvm_kernel_irqfd *irqfd, *tmp;
582
583         spin_lock_irq(&kvm->irqfds.lock);
584
585         list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
586                 irqfd_deactivate(irqfd);
587
588         spin_unlock_irq(&kvm->irqfds.lock);
589
590         /*
591          * Block until we know all outstanding shutdown jobs have completed
592          * since we do not take a kvm* reference.
593          */
594         flush_work(&irqfd->shutdown);
595
596 }
597
598 /*
599  * Take note of a change in irq routing.
600  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
601  */
602 void kvm_irq_routing_update(struct kvm *kvm)
603 {
604         struct kvm_kernel_irqfd *irqfd;
605
606         spin_lock_irq(&kvm->irqfds.lock);
607
608         list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
609                 irqfd_update(kvm, irqfd);
610
611 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
612                 if (irqfd->producer) {
613                         int ret = kvm_arch_update_irqfd_routing(
614                                         irqfd->kvm, irqfd->producer->irq,
615                                         irqfd->gsi, 1);
616                         WARN_ON(ret);
617                 }
618 #endif
619         }
620
621         spin_unlock_irq(&kvm->irqfds.lock);
622 }
623
624 void kvm_irqfd_exit(void)
625 {
626 }
627 #endif
628
629 /*
630  * --------------------------------------------------------------------
631  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
632  *
633  * userspace can register a PIO/MMIO address with an eventfd for receiving
634  * notification when the memory has been touched.
635  * --------------------------------------------------------------------
636  */
637
638 struct _ioeventfd {
639         struct list_head     list;
640         u64                  addr;
641         int                  length;
642         struct eventfd_ctx  *eventfd;
643         u64                  datamatch;
644         struct kvm_io_device dev;
645         u8                   bus_idx;
646         bool                 wildcard;
647 };
648
649 static inline struct _ioeventfd *
650 to_ioeventfd(struct kvm_io_device *dev)
651 {
652         return container_of(dev, struct _ioeventfd, dev);
653 }
654
655 static void
656 ioeventfd_release(struct _ioeventfd *p)
657 {
658         eventfd_ctx_put(p->eventfd);
659         list_del(&p->list);
660         kfree(p);
661 }
662
663 static bool
664 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
665 {
666         u64 _val;
667
668         if (addr != p->addr)
669                 /* address must be precise for a hit */
670                 return false;
671
672         if (!p->length)
673                 /* length = 0 means only look at the address, so always a hit */
674                 return true;
675
676         if (len != p->length)
677                 /* address-range must be precise for a hit */
678                 return false;
679
680         if (p->wildcard)
681                 /* all else equal, wildcard is always a hit */
682                 return true;
683
684         /* otherwise, we have to actually compare the data */
685
686         BUG_ON(!IS_ALIGNED((unsigned long)val, len));
687
688         switch (len) {
689         case 1:
690                 _val = *(u8 *)val;
691                 break;
692         case 2:
693                 _val = *(u16 *)val;
694                 break;
695         case 4:
696                 _val = *(u32 *)val;
697                 break;
698         case 8:
699                 _val = *(u64 *)val;
700                 break;
701         default:
702                 return false;
703         }
704
705         return _val == p->datamatch ? true : false;
706 }
707
708 /* MMIO/PIO writes trigger an event if the addr/val match */
709 static int
710 ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
711                 int len, const void *val)
712 {
713         struct _ioeventfd *p = to_ioeventfd(this);
714
715         if (!ioeventfd_in_range(p, addr, len, val))
716                 return -EOPNOTSUPP;
717
718         eventfd_signal(p->eventfd, 1);
719         return 0;
720 }
721
722 /*
723  * This function is called as KVM is completely shutting down.  We do not
724  * need to worry about locking just nuke anything we have as quickly as possible
725  */
726 static void
727 ioeventfd_destructor(struct kvm_io_device *this)
728 {
729         struct _ioeventfd *p = to_ioeventfd(this);
730
731         ioeventfd_release(p);
732 }
733
734 static const struct kvm_io_device_ops ioeventfd_ops = {
735         .write      = ioeventfd_write,
736         .destructor = ioeventfd_destructor,
737 };
738
739 /* assumes kvm->slots_lock held */
740 static bool
741 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
742 {
743         struct _ioeventfd *_p;
744
745         list_for_each_entry(_p, &kvm->ioeventfds, list)
746                 if (_p->bus_idx == p->bus_idx &&
747                     _p->addr == p->addr &&
748                     (!_p->length || !p->length ||
749                      (_p->length == p->length &&
750                       (_p->wildcard || p->wildcard ||
751                        _p->datamatch == p->datamatch))))
752                         return true;
753
754         return false;
755 }
756
757 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
758 {
759         if (flags & KVM_IOEVENTFD_FLAG_PIO)
760                 return KVM_PIO_BUS;
761         if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
762                 return KVM_VIRTIO_CCW_NOTIFY_BUS;
763         return KVM_MMIO_BUS;
764 }
765
766 static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
767                                 enum kvm_bus bus_idx,
768                                 struct kvm_ioeventfd *args)
769 {
770
771         struct eventfd_ctx *eventfd;
772         struct _ioeventfd *p;
773         int ret;
774
775         eventfd = eventfd_ctx_fdget(args->fd);
776         if (IS_ERR(eventfd))
777                 return PTR_ERR(eventfd);
778
779         p = kzalloc(sizeof(*p), GFP_KERNEL);
780         if (!p) {
781                 ret = -ENOMEM;
782                 goto fail;
783         }
784
785         INIT_LIST_HEAD(&p->list);
786         p->addr    = args->addr;
787         p->bus_idx = bus_idx;
788         p->length  = args->len;
789         p->eventfd = eventfd;
790
791         /* The datamatch feature is optional, otherwise this is a wildcard */
792         if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
793                 p->datamatch = args->datamatch;
794         else
795                 p->wildcard = true;
796
797         mutex_lock(&kvm->slots_lock);
798
799         /* Verify that there isn't a match already */
800         if (ioeventfd_check_collision(kvm, p)) {
801                 ret = -EEXIST;
802                 goto unlock_fail;
803         }
804
805         kvm_iodevice_init(&p->dev, &ioeventfd_ops);
806
807         ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
808                                       &p->dev);
809         if (ret < 0)
810                 goto unlock_fail;
811
812         kvm->buses[bus_idx]->ioeventfd_count++;
813         list_add_tail(&p->list, &kvm->ioeventfds);
814
815         mutex_unlock(&kvm->slots_lock);
816
817         return 0;
818
819 unlock_fail:
820         mutex_unlock(&kvm->slots_lock);
821
822 fail:
823         kfree(p);
824         eventfd_ctx_put(eventfd);
825
826         return ret;
827 }
828
829 static int
830 kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
831                            struct kvm_ioeventfd *args)
832 {
833         struct _ioeventfd        *p, *tmp;
834         struct eventfd_ctx       *eventfd;
835         int                       ret = -ENOENT;
836
837         eventfd = eventfd_ctx_fdget(args->fd);
838         if (IS_ERR(eventfd))
839                 return PTR_ERR(eventfd);
840
841         mutex_lock(&kvm->slots_lock);
842
843         list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
844                 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
845
846                 if (p->bus_idx != bus_idx ||
847                     p->eventfd != eventfd  ||
848                     p->addr != args->addr  ||
849                     p->length != args->len ||
850                     p->wildcard != wildcard)
851                         continue;
852
853                 if (!p->wildcard && p->datamatch != args->datamatch)
854                         continue;
855
856                 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
857                 kvm->buses[bus_idx]->ioeventfd_count--;
858                 ioeventfd_release(p);
859                 ret = 0;
860                 break;
861         }
862
863         mutex_unlock(&kvm->slots_lock);
864
865         eventfd_ctx_put(eventfd);
866
867         return ret;
868 }
869
870 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
871 {
872         enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
873         int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
874
875         if (!args->len && bus_idx == KVM_MMIO_BUS)
876                 kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
877
878         return ret;
879 }
880
881 static int
882 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
883 {
884         enum kvm_bus              bus_idx;
885         int ret;
886
887         bus_idx = ioeventfd_bus_from_flags(args->flags);
888         /* must be natural-word sized, or 0 to ignore length */
889         switch (args->len) {
890         case 0:
891         case 1:
892         case 2:
893         case 4:
894         case 8:
895                 break;
896         default:
897                 return -EINVAL;
898         }
899
900         /* check for range overflow */
901         if (args->addr + args->len < args->addr)
902                 return -EINVAL;
903
904         /* check for extra flags that we don't understand */
905         if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
906                 return -EINVAL;
907
908         /* ioeventfd with no length can't be combined with DATAMATCH */
909         if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
910                 return -EINVAL;
911
912         ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
913         if (ret)
914                 goto fail;
915
916         /* When length is ignored, MMIO is also put on a separate bus, for
917          * faster lookups.
918          */
919         if (!args->len && bus_idx == KVM_MMIO_BUS) {
920                 ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
921                 if (ret < 0)
922                         goto fast_fail;
923         }
924
925         return 0;
926
927 fast_fail:
928         kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
929 fail:
930         return ret;
931 }
932
933 int
934 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
935 {
936         if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
937                 return kvm_deassign_ioeventfd(kvm, args);
938
939         return kvm_assign_ioeventfd(kvm, args);
940 }