6417a26df8d861370c115414cc5b8e5de10c868d
[cascardo/linux.git] / drivers / misc / vmw_vmci / vmci_queue_pair.c
1 /*
2  * VMware VMCI Driver
3  *
4  * Copyright (C) 2012 VMware, Inc. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation version 2 and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * for more details.
14  */
15
16 #include <linux/vmw_vmci_defs.h>
17 #include <linux/vmw_vmci_api.h>
18 #include <linux/highmem.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/module.h>
22 #include <linux/mutex.h>
23 #include <linux/pagemap.h>
24 #include <linux/sched.h>
25 #include <linux/slab.h>
26 #include <linux/socket.h>
27 #include <linux/wait.h>
28
29 #include "vmci_handle_array.h"
30 #include "vmci_queue_pair.h"
31 #include "vmci_datagram.h"
32 #include "vmci_resource.h"
33 #include "vmci_context.h"
34 #include "vmci_driver.h"
35 #include "vmci_event.h"
36 #include "vmci_route.h"
37
38 /*
39  * In the following, we will distinguish between two kinds of VMX processes -
40  * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
41  * VMCI page files in the VMX and supporting VM to VM communication and the
42  * newer ones that use the guest memory directly. We will in the following
43  * refer to the older VMX versions as old-style VMX'en, and the newer ones as
44  * new-style VMX'en.
45  *
46  * The state transition datagram is as follows (the VMCIQPB_ prefix has been
47  * removed for readability) - see below for more details on the transtions:
48  *
49  *            --------------  NEW  -------------
50  *            |                                |
51  *           \_/                              \_/
52  *     CREATED_NO_MEM <-----------------> CREATED_MEM
53  *            |    |                           |
54  *            |    o-----------------------o   |
55  *            |                            |   |
56  *           \_/                          \_/ \_/
57  *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
58  *            |                            |   |
59  *            |     o----------------------o   |
60  *            |     |                          |
61  *           \_/   \_/                        \_/
62  *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
63  *            |                                |
64  *            |                                |
65  *            -------------> gone <-------------
66  *
67  * In more detail. When a VMCI queue pair is first created, it will be in the
68  * VMCIQPB_NEW state. It will then move into one of the following states:
69  *
70  * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
71  *
72  *     - the created was performed by a host endpoint, in which case there is
73  *       no backing memory yet.
74  *
75  *     - the create was initiated by an old-style VMX, that uses
76  *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
77  *       a later point in time. This state can be distinguished from the one
78  *       above by the context ID of the creator. A host side is not allowed to
79  *       attach until the page store has been set.
80  *
81  * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
82  *     is created by a VMX using the queue pair device backend that
83  *     sets the UVAs of the queue pair immediately and stores the
84  *     information for later attachers. At this point, it is ready for
85  *     the host side to attach to it.
86  *
87  * Once the queue pair is in one of the created states (with the exception of
88  * the case mentioned for older VMX'en above), it is possible to attach to the
89  * queue pair. Again we have two new states possible:
90  *
91  * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
92  *   paths:
93  *
94  *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
95  *       pair, and attaches to a queue pair previously created by the host side.
96  *
97  *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
98  *       already created by a guest.
99  *
100  *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
101  *       vmci_qp_broker_set_page_store (see below).
102  *
103  * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
104  *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
105  *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
106  *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
107  *     will be entered.
108  *
109  * From the attached queue pair, the queue pair can enter the shutdown states
110  * when either side of the queue pair detaches. If the guest side detaches
111  * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
112  * the content of the queue pair will no longer be available. If the host
113  * side detaches first, the queue pair will either enter the
114  * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
115  * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
116  * (e.g., the host detaches while a guest is stunned).
117  *
118  * New-style VMX'en will also unmap guest memory, if the guest is
119  * quiesced, e.g., during a snapshot operation. In that case, the guest
120  * memory will no longer be available, and the queue pair will transition from
121  * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
122  * in which case the queue pair will transition from the *_NO_MEM state at that
123  * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
124  * since the peer may have either attached or detached in the meantime. The
125  * values are laid out such that ++ on a state will move from a *_NO_MEM to a
126  * *_MEM state, and vice versa.
127  */
128
129 /*
130  * VMCIMemcpy{To,From}QueueFunc() prototypes.  Functions of these
131  * types are passed around to enqueue and dequeue routines.  Note that
132  * often the functions passed are simply wrappers around memcpy
133  * itself.
134  *
135  * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
136  * there's an unused last parameter for the hosted side.  In
137  * ESX, that parameter holds a buffer type.
138  */
139 typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
140                                       u64 queue_offset, const void *src,
141                                       size_t src_offset, size_t size);
142 typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
143                                         const struct vmci_queue *queue,
144                                         u64 queue_offset, size_t size);
145
146 /* The Kernel specific component of the struct vmci_queue structure. */
147 struct vmci_queue_kern_if {
148         struct page **page;
149         struct page **header_page;
150         void *va;
151         struct mutex __mutex;   /* Protects the queue. */
152         struct mutex *mutex;    /* Shared by producer and consumer queues. */
153         bool host;
154         size_t num_pages;
155         bool mapped;
156 };
157
158 /*
159  * This structure is opaque to the clients.
160  */
161 struct vmci_qp {
162         struct vmci_handle handle;
163         struct vmci_queue *produce_q;
164         struct vmci_queue *consume_q;
165         u64 produce_q_size;
166         u64 consume_q_size;
167         u32 peer;
168         u32 flags;
169         u32 priv_flags;
170         bool guest_endpoint;
171         unsigned int blocked;
172         unsigned int generation;
173         wait_queue_head_t event;
174 };
175
176 enum qp_broker_state {
177         VMCIQPB_NEW,
178         VMCIQPB_CREATED_NO_MEM,
179         VMCIQPB_CREATED_MEM,
180         VMCIQPB_ATTACHED_NO_MEM,
181         VMCIQPB_ATTACHED_MEM,
182         VMCIQPB_SHUTDOWN_NO_MEM,
183         VMCIQPB_SHUTDOWN_MEM,
184         VMCIQPB_GONE
185 };
186
187 #define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
188                                      _qpb->state == VMCIQPB_ATTACHED_MEM || \
189                                      _qpb->state == VMCIQPB_SHUTDOWN_MEM)
190
191 /*
192  * In the queue pair broker, we always use the guest point of view for
193  * the produce and consume queue values and references, e.g., the
194  * produce queue size stored is the guests produce queue size. The
195  * host endpoint will need to swap these around. The only exception is
196  * the local queue pairs on the host, in which case the host endpoint
197  * that creates the queue pair will have the right orientation, and
198  * the attaching host endpoint will need to swap.
199  */
200 struct qp_entry {
201         struct list_head list_item;
202         struct vmci_handle handle;
203         u32 peer;
204         u32 flags;
205         u64 produce_size;
206         u64 consume_size;
207         u32 ref_count;
208 };
209
210 struct qp_broker_entry {
211         struct vmci_resource resource;
212         struct qp_entry qp;
213         u32 create_id;
214         u32 attach_id;
215         enum qp_broker_state state;
216         bool require_trusted_attach;
217         bool created_by_trusted;
218         bool vmci_page_files;   /* Created by VMX using VMCI page files */
219         struct vmci_queue *produce_q;
220         struct vmci_queue *consume_q;
221         struct vmci_queue_header saved_produce_q;
222         struct vmci_queue_header saved_consume_q;
223         vmci_event_release_cb wakeup_cb;
224         void *client_data;
225         void *local_mem;        /* Kernel memory for local queue pair */
226 };
227
228 struct qp_guest_endpoint {
229         struct vmci_resource resource;
230         struct qp_entry qp;
231         u64 num_ppns;
232         void *produce_q;
233         void *consume_q;
234         struct PPNSet ppn_set;
235 };
236
237 struct qp_list {
238         struct list_head head;
239         struct mutex mutex;     /* Protect queue list. */
240 };
241
242 static struct qp_list qp_broker_list = {
243         .head = LIST_HEAD_INIT(qp_broker_list.head),
244         .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
245 };
246
247 static struct qp_list qp_guest_endpoints = {
248         .head = LIST_HEAD_INIT(qp_guest_endpoints.head),
249         .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
250 };
251
252 #define INVALID_VMCI_GUEST_MEM_ID  0
253 #define QPE_NUM_PAGES(_QPE) ((u32) \
254                              (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
255                               DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
256
257
258 /*
259  * Frees kernel VA space for a given queue and its queue header, and
260  * frees physical data pages.
261  */
262 static void qp_free_queue(void *q, u64 size)
263 {
264         struct vmci_queue *queue = q;
265
266         if (queue) {
267                 u64 i = DIV_ROUND_UP(size, PAGE_SIZE);
268
269                 if (queue->kernel_if->mapped) {
270                         vunmap(queue->kernel_if->va);
271                         queue->kernel_if->va = NULL;
272                 }
273
274                 while (i)
275                         __free_page(queue->kernel_if->page[--i]);
276
277                 vfree(queue->q_header);
278         }
279 }
280
281 /*
282  * Allocates kernel VA space of specified size, plus space for the
283  * queue structure/kernel interface and the queue header.  Allocates
284  * physical pages for the queue data pages.
285  *
286  * PAGE m:      struct vmci_queue_header (struct vmci_queue->q_header)
287  * PAGE m+1:    struct vmci_queue
288  * PAGE m+1+q:  struct vmci_queue_kern_if (struct vmci_queue->kernel_if)
289  * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[])
290  */
291 static void *qp_alloc_queue(u64 size, u32 flags)
292 {
293         u64 i;
294         struct vmci_queue *queue;
295         struct vmci_queue_header *q_header;
296         const u64 num_data_pages = DIV_ROUND_UP(size, PAGE_SIZE);
297         const uint queue_size =
298             PAGE_SIZE +
299             sizeof(*queue) + sizeof(*(queue->kernel_if)) +
300             num_data_pages * sizeof(*(queue->kernel_if->page));
301
302         q_header = vmalloc(queue_size);
303         if (!q_header)
304                 return NULL;
305
306         queue = (void *)q_header + PAGE_SIZE;
307         queue->q_header = q_header;
308         queue->saved_header = NULL;
309         queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
310         queue->kernel_if->header_page = NULL;   /* Unused in guest. */
311         queue->kernel_if->page = (struct page **)(queue->kernel_if + 1);
312         queue->kernel_if->host = false;
313         queue->kernel_if->va = NULL;
314         queue->kernel_if->mapped = false;
315
316         for (i = 0; i < num_data_pages; i++) {
317                 queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0);
318                 if (!queue->kernel_if->page[i])
319                         goto fail;
320         }
321
322         if (vmci_qp_pinned(flags)) {
323                 queue->kernel_if->va =
324                     vmap(queue->kernel_if->page, num_data_pages, VM_MAP,
325                          PAGE_KERNEL);
326                 if (!queue->kernel_if->va)
327                         goto fail;
328
329                 queue->kernel_if->mapped = true;
330         }
331
332         return (void *)queue;
333
334  fail:
335         qp_free_queue(queue, i * PAGE_SIZE);
336         return NULL;
337 }
338
339 /*
340  * Copies from a given buffer or iovector to a VMCI Queue.  Uses
341  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
342  * by traversing the offset -> page translation structure for the queue.
343  * Assumes that offset + size does not wrap around in the queue.
344  */
345 static int __qp_memcpy_to_queue(struct vmci_queue *queue,
346                                 u64 queue_offset,
347                                 const void *src,
348                                 size_t size,
349                                 bool is_iovec)
350 {
351         struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
352         size_t bytes_copied = 0;
353
354         while (bytes_copied < size) {
355                 u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
356                 size_t page_offset =
357                     (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
358                 void *va;
359                 size_t to_copy;
360
361                 if (!kernel_if->mapped)
362                         va = kmap(kernel_if->page[page_index]);
363                 else
364                         va = (void *)((u8 *)kernel_if->va +
365                                       (page_index * PAGE_SIZE));
366
367                 if (size - bytes_copied > PAGE_SIZE - page_offset)
368                         /* Enough payload to fill up from this page. */
369                         to_copy = PAGE_SIZE - page_offset;
370                 else
371                         to_copy = size - bytes_copied;
372
373                 if (is_iovec) {
374                         struct iovec *iov = (struct iovec *)src;
375                         int err;
376
377                         /* The iovec will track bytes_copied internally. */
378                         err = memcpy_fromiovec((u8 *)va + page_offset,
379                                                iov, to_copy);
380                         if (err != 0) {
381                                 kunmap(kernel_if->page[page_index]);
382                                 return VMCI_ERROR_INVALID_ARGS;
383                         }
384                 } else {
385                         memcpy((u8 *)va + page_offset,
386                                (u8 *)src + bytes_copied, to_copy);
387                 }
388
389                 bytes_copied += to_copy;
390                 if (!kernel_if->mapped)
391                         kunmap(kernel_if->page[page_index]);
392         }
393
394         return VMCI_SUCCESS;
395 }
396
397 /*
398  * Copies to a given buffer or iovector from a VMCI Queue.  Uses
399  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
400  * by traversing the offset -> page translation structure for the queue.
401  * Assumes that offset + size does not wrap around in the queue.
402  */
403 static int __qp_memcpy_from_queue(void *dest,
404                                   const struct vmci_queue *queue,
405                                   u64 queue_offset,
406                                   size_t size,
407                                   bool is_iovec)
408 {
409         struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
410         size_t bytes_copied = 0;
411
412         while (bytes_copied < size) {
413                 u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
414                 size_t page_offset =
415                     (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
416                 void *va;
417                 size_t to_copy;
418
419                 if (!kernel_if->mapped)
420                         va = kmap(kernel_if->page[page_index]);
421                 else
422                         va = (void *)((u8 *)kernel_if->va +
423                                       (page_index * PAGE_SIZE));
424
425                 if (size - bytes_copied > PAGE_SIZE - page_offset)
426                         /* Enough payload to fill up this page. */
427                         to_copy = PAGE_SIZE - page_offset;
428                 else
429                         to_copy = size - bytes_copied;
430
431                 if (is_iovec) {
432                         struct iovec *iov = (struct iovec *)dest;
433                         int err;
434
435                         /* The iovec will track bytes_copied internally. */
436                         err = memcpy_toiovec(iov, (u8 *)va + page_offset,
437                                              to_copy);
438                         if (err != 0) {
439                                 kunmap(kernel_if->page[page_index]);
440                                 return VMCI_ERROR_INVALID_ARGS;
441                         }
442                 } else {
443                         memcpy((u8 *)dest + bytes_copied,
444                                (u8 *)va + page_offset, to_copy);
445                 }
446
447                 bytes_copied += to_copy;
448                 if (!kernel_if->mapped)
449                         kunmap(kernel_if->page[page_index]);
450         }
451
452         return VMCI_SUCCESS;
453 }
454
455 /*
456  * Allocates two list of PPNs --- one for the pages in the produce queue,
457  * and the other for the pages in the consume queue. Intializes the list
458  * of PPNs with the page frame numbers of the KVA for the two queues (and
459  * the queue headers).
460  */
461 static int qp_alloc_ppn_set(void *prod_q,
462                             u64 num_produce_pages,
463                             void *cons_q,
464                             u64 num_consume_pages, struct PPNSet *ppn_set)
465 {
466         u32 *produce_ppns;
467         u32 *consume_ppns;
468         struct vmci_queue *produce_q = prod_q;
469         struct vmci_queue *consume_q = cons_q;
470         u64 i;
471
472         if (!produce_q || !num_produce_pages || !consume_q ||
473             !num_consume_pages || !ppn_set)
474                 return VMCI_ERROR_INVALID_ARGS;
475
476         if (ppn_set->initialized)
477                 return VMCI_ERROR_ALREADY_EXISTS;
478
479         produce_ppns =
480             kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL);
481         if (!produce_ppns)
482                 return VMCI_ERROR_NO_MEM;
483
484         consume_ppns =
485             kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL);
486         if (!consume_ppns) {
487                 kfree(produce_ppns);
488                 return VMCI_ERROR_NO_MEM;
489         }
490
491         produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header));
492         for (i = 1; i < num_produce_pages; i++) {
493                 unsigned long pfn;
494
495                 produce_ppns[i] =
496                     page_to_pfn(produce_q->kernel_if->page[i - 1]);
497                 pfn = produce_ppns[i];
498
499                 /* Fail allocation if PFN isn't supported by hypervisor. */
500                 if (sizeof(pfn) > sizeof(*produce_ppns)
501                     && pfn != produce_ppns[i])
502                         goto ppn_error;
503         }
504
505         consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header));
506         for (i = 1; i < num_consume_pages; i++) {
507                 unsigned long pfn;
508
509                 consume_ppns[i] =
510                     page_to_pfn(consume_q->kernel_if->page[i - 1]);
511                 pfn = consume_ppns[i];
512
513                 /* Fail allocation if PFN isn't supported by hypervisor. */
514                 if (sizeof(pfn) > sizeof(*consume_ppns)
515                     && pfn != consume_ppns[i])
516                         goto ppn_error;
517         }
518
519         ppn_set->num_produce_pages = num_produce_pages;
520         ppn_set->num_consume_pages = num_consume_pages;
521         ppn_set->produce_ppns = produce_ppns;
522         ppn_set->consume_ppns = consume_ppns;
523         ppn_set->initialized = true;
524         return VMCI_SUCCESS;
525
526  ppn_error:
527         kfree(produce_ppns);
528         kfree(consume_ppns);
529         return VMCI_ERROR_INVALID_ARGS;
530 }
531
532 /*
533  * Frees the two list of PPNs for a queue pair.
534  */
535 static void qp_free_ppn_set(struct PPNSet *ppn_set)
536 {
537         if (ppn_set->initialized) {
538                 /* Do not call these functions on NULL inputs. */
539                 kfree(ppn_set->produce_ppns);
540                 kfree(ppn_set->consume_ppns);
541         }
542         memset(ppn_set, 0, sizeof(*ppn_set));
543 }
544
545 /*
546  * Populates the list of PPNs in the hypercall structure with the PPNS
547  * of the produce queue and the consume queue.
548  */
549 static int qp_populate_ppn_set(u8 *call_buf, const struct PPNSet *ppn_set)
550 {
551         memcpy(call_buf, ppn_set->produce_ppns,
552                ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
553         memcpy(call_buf +
554                ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
555                ppn_set->consume_ppns,
556                ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
557
558         return VMCI_SUCCESS;
559 }
560
561 static int qp_memcpy_to_queue(struct vmci_queue *queue,
562                               u64 queue_offset,
563                               const void *src, size_t src_offset, size_t size)
564 {
565         return __qp_memcpy_to_queue(queue, queue_offset,
566                                     (u8 *)src + src_offset, size, false);
567 }
568
569 static int qp_memcpy_from_queue(void *dest,
570                                 size_t dest_offset,
571                                 const struct vmci_queue *queue,
572                                 u64 queue_offset, size_t size)
573 {
574         return __qp_memcpy_from_queue((u8 *)dest + dest_offset,
575                                       queue, queue_offset, size, false);
576 }
577
578 /*
579  * Copies from a given iovec from a VMCI Queue.
580  */
581 static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
582                                   u64 queue_offset,
583                                   const void *src,
584                                   size_t src_offset, size_t size)
585 {
586
587         /*
588          * We ignore src_offset because src is really a struct iovec * and will
589          * maintain offset internally.
590          */
591         return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
592 }
593
594 /*
595  * Copies to a given iovec from a VMCI Queue.
596  */
597 static int qp_memcpy_from_queue_iov(void *dest,
598                                     size_t dest_offset,
599                                     const struct vmci_queue *queue,
600                                     u64 queue_offset, size_t size)
601 {
602         /*
603          * We ignore dest_offset because dest is really a struct iovec * and
604          * will maintain offset internally.
605          */
606         return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true);
607 }
608
609 /*
610  * Allocates kernel VA space of specified size plus space for the queue
611  * and kernel interface.  This is different from the guest queue allocator,
612  * because we do not allocate our own queue header/data pages here but
613  * share those of the guest.
614  */
615 static struct vmci_queue *qp_host_alloc_queue(u64 size)
616 {
617         struct vmci_queue *queue;
618         const size_t num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
619         const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
620         const size_t queue_page_size =
621             num_pages * sizeof(*queue->kernel_if->page);
622
623         queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
624         if (queue) {
625                 queue->q_header = NULL;
626                 queue->saved_header = NULL;
627                 queue->kernel_if =
628                     (struct vmci_queue_kern_if *)((u8 *)queue +
629                                                   sizeof(*queue));
630                 queue->kernel_if->host = true;
631                 queue->kernel_if->mutex = NULL;
632                 queue->kernel_if->num_pages = num_pages;
633                 queue->kernel_if->header_page =
634                     (struct page **)((u8 *)queue + queue_size);
635                 queue->kernel_if->page = &queue->kernel_if->header_page[1];
636                 queue->kernel_if->va = NULL;
637                 queue->kernel_if->mapped = false;
638         }
639
640         return queue;
641 }
642
643 /*
644  * Frees kernel memory for a given queue (header plus translation
645  * structure).
646  */
647 static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
648 {
649         kfree(queue);
650 }
651
652 /*
653  * Initialize the mutex for the pair of queues.  This mutex is used to
654  * protect the q_header and the buffer from changing out from under any
655  * users of either queue.  Of course, it's only any good if the mutexes
656  * are actually acquired.  Queue structure must lie on non-paged memory
657  * or we cannot guarantee access to the mutex.
658  */
659 static void qp_init_queue_mutex(struct vmci_queue *produce_q,
660                                 struct vmci_queue *consume_q)
661 {
662         /*
663          * Only the host queue has shared state - the guest queues do not
664          * need to synchronize access using a queue mutex.
665          */
666
667         if (produce_q->kernel_if->host) {
668                 produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
669                 consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
670                 mutex_init(produce_q->kernel_if->mutex);
671         }
672 }
673
674 /*
675  * Cleans up the mutex for the pair of queues.
676  */
677 static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
678                                    struct vmci_queue *consume_q)
679 {
680         if (produce_q->kernel_if->host) {
681                 produce_q->kernel_if->mutex = NULL;
682                 consume_q->kernel_if->mutex = NULL;
683         }
684 }
685
686 /*
687  * Acquire the mutex for the queue.  Note that the produce_q and
688  * the consume_q share a mutex.  So, only one of the two need to
689  * be passed in to this routine.  Either will work just fine.
690  */
691 static void qp_acquire_queue_mutex(struct vmci_queue *queue)
692 {
693         if (queue->kernel_if->host)
694                 mutex_lock(queue->kernel_if->mutex);
695 }
696
697 /*
698  * Release the mutex for the queue.  Note that the produce_q and
699  * the consume_q share a mutex.  So, only one of the two need to
700  * be passed in to this routine.  Either will work just fine.
701  */
702 static void qp_release_queue_mutex(struct vmci_queue *queue)
703 {
704         if (queue->kernel_if->host)
705                 mutex_unlock(queue->kernel_if->mutex);
706 }
707
708 /*
709  * Helper function to release pages in the PageStoreAttachInfo
710  * previously obtained using get_user_pages.
711  */
712 static void qp_release_pages(struct page **pages,
713                              u64 num_pages, bool dirty)
714 {
715         int i;
716
717         for (i = 0; i < num_pages; i++) {
718                 if (dirty)
719                         set_page_dirty(pages[i]);
720
721                 page_cache_release(pages[i]);
722                 pages[i] = NULL;
723         }
724 }
725
726 /*
727  * Lock the user pages referenced by the {produce,consume}Buffer
728  * struct into memory and populate the {produce,consume}Pages
729  * arrays in the attach structure with them.
730  */
731 static int qp_host_get_user_memory(u64 produce_uva,
732                                    u64 consume_uva,
733                                    struct vmci_queue *produce_q,
734                                    struct vmci_queue *consume_q)
735 {
736         int retval;
737         int err = VMCI_SUCCESS;
738
739         down_write(&current->mm->mmap_sem);
740         retval = get_user_pages(current,
741                                 current->mm,
742                                 (uintptr_t) produce_uva,
743                                 produce_q->kernel_if->num_pages,
744                                 1, 0, produce_q->kernel_if->header_page, NULL);
745         if (retval < produce_q->kernel_if->num_pages) {
746                 pr_warn("get_user_pages(produce) failed (retval=%d)", retval);
747                 qp_release_pages(produce_q->kernel_if->header_page, retval,
748                                  false);
749                 err = VMCI_ERROR_NO_MEM;
750                 goto out;
751         }
752
753         retval = get_user_pages(current,
754                                 current->mm,
755                                 (uintptr_t) consume_uva,
756                                 consume_q->kernel_if->num_pages,
757                                 1, 0, consume_q->kernel_if->header_page, NULL);
758         if (retval < consume_q->kernel_if->num_pages) {
759                 pr_warn("get_user_pages(consume) failed (retval=%d)", retval);
760                 qp_release_pages(consume_q->kernel_if->header_page, retval,
761                                  false);
762                 qp_release_pages(produce_q->kernel_if->header_page,
763                                  produce_q->kernel_if->num_pages, false);
764                 err = VMCI_ERROR_NO_MEM;
765         }
766
767  out:
768         up_write(&current->mm->mmap_sem);
769
770         return err;
771 }
772
773 /*
774  * Registers the specification of the user pages used for backing a queue
775  * pair. Enough information to map in pages is stored in the OS specific
776  * part of the struct vmci_queue structure.
777  */
778 static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
779                                         struct vmci_queue *produce_q,
780                                         struct vmci_queue *consume_q)
781 {
782         u64 produce_uva;
783         u64 consume_uva;
784
785         /*
786          * The new style and the old style mapping only differs in
787          * that we either get a single or two UVAs, so we split the
788          * single UVA range at the appropriate spot.
789          */
790         produce_uva = page_store->pages;
791         consume_uva = page_store->pages +
792             produce_q->kernel_if->num_pages * PAGE_SIZE;
793         return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
794                                        consume_q);
795 }
796
797 /*
798  * Releases and removes the references to user pages stored in the attach
799  * struct.  Pages are released from the page cache and may become
800  * swappable again.
801  */
802 static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
803                                            struct vmci_queue *consume_q)
804 {
805         qp_release_pages(produce_q->kernel_if->header_page,
806                          produce_q->kernel_if->num_pages, true);
807         memset(produce_q->kernel_if->header_page, 0,
808                sizeof(*produce_q->kernel_if->header_page) *
809                produce_q->kernel_if->num_pages);
810         qp_release_pages(consume_q->kernel_if->header_page,
811                          consume_q->kernel_if->num_pages, true);
812         memset(consume_q->kernel_if->header_page, 0,
813                sizeof(*consume_q->kernel_if->header_page) *
814                consume_q->kernel_if->num_pages);
815 }
816
817 /*
818  * Once qp_host_register_user_memory has been performed on a
819  * queue, the queue pair headers can be mapped into the
820  * kernel. Once mapped, they must be unmapped with
821  * qp_host_unmap_queues prior to calling
822  * qp_host_unregister_user_memory.
823  * Pages are pinned.
824  */
825 static int qp_host_map_queues(struct vmci_queue *produce_q,
826                               struct vmci_queue *consume_q)
827 {
828         int result;
829
830         if (!produce_q->q_header || !consume_q->q_header) {
831                 struct page *headers[2];
832
833                 if (produce_q->q_header != consume_q->q_header)
834                         return VMCI_ERROR_QUEUEPAIR_MISMATCH;
835
836                 if (produce_q->kernel_if->header_page == NULL ||
837                     *produce_q->kernel_if->header_page == NULL)
838                         return VMCI_ERROR_UNAVAILABLE;
839
840                 headers[0] = *produce_q->kernel_if->header_page;
841                 headers[1] = *consume_q->kernel_if->header_page;
842
843                 produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
844                 if (produce_q->q_header != NULL) {
845                         consume_q->q_header =
846                             (struct vmci_queue_header *)((u8 *)
847                                                          produce_q->q_header +
848                                                          PAGE_SIZE);
849                         result = VMCI_SUCCESS;
850                 } else {
851                         pr_warn("vmap failed\n");
852                         result = VMCI_ERROR_NO_MEM;
853                 }
854         } else {
855                 result = VMCI_SUCCESS;
856         }
857
858         return result;
859 }
860
861 /*
862  * Unmaps previously mapped queue pair headers from the kernel.
863  * Pages are unpinned.
864  */
865 static int qp_host_unmap_queues(u32 gid,
866                                 struct vmci_queue *produce_q,
867                                 struct vmci_queue *consume_q)
868 {
869         if (produce_q->q_header) {
870                 if (produce_q->q_header < consume_q->q_header)
871                         vunmap(produce_q->q_header);
872                 else
873                         vunmap(consume_q->q_header);
874
875                 produce_q->q_header = NULL;
876                 consume_q->q_header = NULL;
877         }
878
879         return VMCI_SUCCESS;
880 }
881
882 /*
883  * Finds the entry in the list corresponding to a given handle. Assumes
884  * that the list is locked.
885  */
886 static struct qp_entry *qp_list_find(struct qp_list *qp_list,
887                                      struct vmci_handle handle)
888 {
889         struct qp_entry *entry;
890
891         if (vmci_handle_is_invalid(handle))
892                 return NULL;
893
894         list_for_each_entry(entry, &qp_list->head, list_item) {
895                 if (vmci_handle_is_equal(entry->handle, handle))
896                         return entry;
897         }
898
899         return NULL;
900 }
901
902 /*
903  * Finds the entry in the list corresponding to a given handle.
904  */
905 static struct qp_guest_endpoint *
906 qp_guest_handle_to_entry(struct vmci_handle handle)
907 {
908         struct qp_guest_endpoint *entry;
909         struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
910
911         entry = qp ? container_of(
912                 qp, struct qp_guest_endpoint, qp) : NULL;
913         return entry;
914 }
915
916 /*
917  * Finds the entry in the list corresponding to a given handle.
918  */
919 static struct qp_broker_entry *
920 qp_broker_handle_to_entry(struct vmci_handle handle)
921 {
922         struct qp_broker_entry *entry;
923         struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
924
925         entry = qp ? container_of(
926                 qp, struct qp_broker_entry, qp) : NULL;
927         return entry;
928 }
929
930 /*
931  * Dispatches a queue pair event message directly into the local event
932  * queue.
933  */
934 static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
935 {
936         u32 context_id = vmci_get_context_id();
937         struct vmci_event_qp ev;
938
939         ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
940         ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
941                                           VMCI_CONTEXT_RESOURCE_ID);
942         ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
943         ev.msg.event_data.event =
944             attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
945         ev.payload.peer_id = context_id;
946         ev.payload.handle = handle;
947
948         return vmci_event_dispatch(&ev.msg.hdr);
949 }
950
951 /*
952  * Allocates and initializes a qp_guest_endpoint structure.
953  * Allocates a queue_pair rid (and handle) iff the given entry has
954  * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
955  * are reserved handles.  Assumes that the QP list mutex is held
956  * by the caller.
957  */
958 static struct qp_guest_endpoint *
959 qp_guest_endpoint_create(struct vmci_handle handle,
960                          u32 peer,
961                          u32 flags,
962                          u64 produce_size,
963                          u64 consume_size,
964                          void *produce_q,
965                          void *consume_q)
966 {
967         int result;
968         struct qp_guest_endpoint *entry;
969         /* One page each for the queue headers. */
970         const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
971             DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
972
973         if (vmci_handle_is_invalid(handle)) {
974                 u32 context_id = vmci_get_context_id();
975
976                 handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
977         }
978
979         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
980         if (entry) {
981                 entry->qp.peer = peer;
982                 entry->qp.flags = flags;
983                 entry->qp.produce_size = produce_size;
984                 entry->qp.consume_size = consume_size;
985                 entry->qp.ref_count = 0;
986                 entry->num_ppns = num_ppns;
987                 entry->produce_q = produce_q;
988                 entry->consume_q = consume_q;
989                 INIT_LIST_HEAD(&entry->qp.list_item);
990
991                 /* Add resource obj */
992                 result = vmci_resource_add(&entry->resource,
993                                            VMCI_RESOURCE_TYPE_QPAIR_GUEST,
994                                            handle);
995                 entry->qp.handle = vmci_resource_handle(&entry->resource);
996                 if ((result != VMCI_SUCCESS) ||
997                     qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
998                         pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
999                                 handle.context, handle.resource, result);
1000                         kfree(entry);
1001                         entry = NULL;
1002                 }
1003         }
1004         return entry;
1005 }
1006
1007 /*
1008  * Frees a qp_guest_endpoint structure.
1009  */
1010 static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
1011 {
1012         qp_free_ppn_set(&entry->ppn_set);
1013         qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
1014         qp_free_queue(entry->produce_q, entry->qp.produce_size);
1015         qp_free_queue(entry->consume_q, entry->qp.consume_size);
1016         /* Unlink from resource hash table and free callback */
1017         vmci_resource_remove(&entry->resource);
1018
1019         kfree(entry);
1020 }
1021
1022 /*
1023  * Helper to make a queue_pairAlloc hypercall when the driver is
1024  * supporting a guest device.
1025  */
1026 static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
1027 {
1028         struct vmci_qp_alloc_msg *alloc_msg;
1029         size_t msg_size;
1030         int result;
1031
1032         if (!entry || entry->num_ppns <= 2)
1033                 return VMCI_ERROR_INVALID_ARGS;
1034
1035         msg_size = sizeof(*alloc_msg) +
1036             (size_t) entry->num_ppns * sizeof(u32);
1037         alloc_msg = kmalloc(msg_size, GFP_KERNEL);
1038         if (!alloc_msg)
1039                 return VMCI_ERROR_NO_MEM;
1040
1041         alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1042                                               VMCI_QUEUEPAIR_ALLOC);
1043         alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
1044         alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
1045         alloc_msg->handle = entry->qp.handle;
1046         alloc_msg->peer = entry->qp.peer;
1047         alloc_msg->flags = entry->qp.flags;
1048         alloc_msg->produce_size = entry->qp.produce_size;
1049         alloc_msg->consume_size = entry->qp.consume_size;
1050         alloc_msg->num_ppns = entry->num_ppns;
1051
1052         result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
1053                                      &entry->ppn_set);
1054         if (result == VMCI_SUCCESS)
1055                 result = vmci_send_datagram(&alloc_msg->hdr);
1056
1057         kfree(alloc_msg);
1058
1059         return result;
1060 }
1061
1062 /*
1063  * Helper to make a queue_pairDetach hypercall when the driver is
1064  * supporting a guest device.
1065  */
1066 static int qp_detatch_hypercall(struct vmci_handle handle)
1067 {
1068         struct vmci_qp_detach_msg detach_msg;
1069
1070         detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1071                                               VMCI_QUEUEPAIR_DETACH);
1072         detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
1073         detach_msg.hdr.payload_size = sizeof(handle);
1074         detach_msg.handle = handle;
1075
1076         return vmci_send_datagram(&detach_msg.hdr);
1077 }
1078
1079 /*
1080  * Adds the given entry to the list. Assumes that the list is locked.
1081  */
1082 static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
1083 {
1084         if (entry)
1085                 list_add(&entry->list_item, &qp_list->head);
1086 }
1087
1088 /*
1089  * Removes the given entry from the list. Assumes that the list is locked.
1090  */
1091 static void qp_list_remove_entry(struct qp_list *qp_list,
1092                                  struct qp_entry *entry)
1093 {
1094         if (entry)
1095                 list_del(&entry->list_item);
1096 }
1097
1098 /*
1099  * Helper for VMCI queue_pair detach interface. Frees the physical
1100  * pages for the queue pair.
1101  */
1102 static int qp_detatch_guest_work(struct vmci_handle handle)
1103 {
1104         int result;
1105         struct qp_guest_endpoint *entry;
1106         u32 ref_count = ~0;     /* To avoid compiler warning below */
1107
1108         mutex_lock(&qp_guest_endpoints.mutex);
1109
1110         entry = qp_guest_handle_to_entry(handle);
1111         if (!entry) {
1112                 mutex_unlock(&qp_guest_endpoints.mutex);
1113                 return VMCI_ERROR_NOT_FOUND;
1114         }
1115
1116         if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1117                 result = VMCI_SUCCESS;
1118
1119                 if (entry->qp.ref_count > 1) {
1120                         result = qp_notify_peer_local(false, handle);
1121                         /*
1122                          * We can fail to notify a local queuepair
1123                          * because we can't allocate.  We still want
1124                          * to release the entry if that happens, so
1125                          * don't bail out yet.
1126                          */
1127                 }
1128         } else {
1129                 result = qp_detatch_hypercall(handle);
1130                 if (result < VMCI_SUCCESS) {
1131                         /*
1132                          * We failed to notify a non-local queuepair.
1133                          * That other queuepair might still be
1134                          * accessing the shared memory, so don't
1135                          * release the entry yet.  It will get cleaned
1136                          * up by VMCIqueue_pair_Exit() if necessary
1137                          * (assuming we are going away, otherwise why
1138                          * did this fail?).
1139                          */
1140
1141                         mutex_unlock(&qp_guest_endpoints.mutex);
1142                         return result;
1143                 }
1144         }
1145
1146         /*
1147          * If we get here then we either failed to notify a local queuepair, or
1148          * we succeeded in all cases.  Release the entry if required.
1149          */
1150
1151         entry->qp.ref_count--;
1152         if (entry->qp.ref_count == 0)
1153                 qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
1154
1155         /* If we didn't remove the entry, this could change once we unlock. */
1156         if (entry)
1157                 ref_count = entry->qp.ref_count;
1158
1159         mutex_unlock(&qp_guest_endpoints.mutex);
1160
1161         if (ref_count == 0)
1162                 qp_guest_endpoint_destroy(entry);
1163
1164         return result;
1165 }
1166
1167 /*
1168  * This functions handles the actual allocation of a VMCI queue
1169  * pair guest endpoint. Allocates physical pages for the queue
1170  * pair. It makes OS dependent calls through generic wrappers.
1171  */
1172 static int qp_alloc_guest_work(struct vmci_handle *handle,
1173                                struct vmci_queue **produce_q,
1174                                u64 produce_size,
1175                                struct vmci_queue **consume_q,
1176                                u64 consume_size,
1177                                u32 peer,
1178                                u32 flags,
1179                                u32 priv_flags)
1180 {
1181         const u64 num_produce_pages =
1182             DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
1183         const u64 num_consume_pages =
1184             DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
1185         void *my_produce_q = NULL;
1186         void *my_consume_q = NULL;
1187         int result;
1188         struct qp_guest_endpoint *queue_pair_entry = NULL;
1189
1190         if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
1191                 return VMCI_ERROR_NO_ACCESS;
1192
1193         mutex_lock(&qp_guest_endpoints.mutex);
1194
1195         queue_pair_entry = qp_guest_handle_to_entry(*handle);
1196         if (queue_pair_entry) {
1197                 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1198                         /* Local attach case. */
1199                         if (queue_pair_entry->qp.ref_count > 1) {
1200                                 pr_devel("Error attempting to attach more than once\n");
1201                                 result = VMCI_ERROR_UNAVAILABLE;
1202                                 goto error_keep_entry;
1203                         }
1204
1205                         if (queue_pair_entry->qp.produce_size != consume_size ||
1206                             queue_pair_entry->qp.consume_size !=
1207                             produce_size ||
1208                             queue_pair_entry->qp.flags !=
1209                             (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
1210                                 pr_devel("Error mismatched queue pair in local attach\n");
1211                                 result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
1212                                 goto error_keep_entry;
1213                         }
1214
1215                         /*
1216                          * Do a local attach.  We swap the consume and
1217                          * produce queues for the attacher and deliver
1218                          * an attach event.
1219                          */
1220                         result = qp_notify_peer_local(true, *handle);
1221                         if (result < VMCI_SUCCESS)
1222                                 goto error_keep_entry;
1223
1224                         my_produce_q = queue_pair_entry->consume_q;
1225                         my_consume_q = queue_pair_entry->produce_q;
1226                         goto out;
1227                 }
1228
1229                 result = VMCI_ERROR_ALREADY_EXISTS;
1230                 goto error_keep_entry;
1231         }
1232
1233         my_produce_q = qp_alloc_queue(produce_size, flags);
1234         if (!my_produce_q) {
1235                 pr_warn("Error allocating pages for produce queue\n");
1236                 result = VMCI_ERROR_NO_MEM;
1237                 goto error;
1238         }
1239
1240         my_consume_q = qp_alloc_queue(consume_size, flags);
1241         if (!my_consume_q) {
1242                 pr_warn("Error allocating pages for consume queue\n");
1243                 result = VMCI_ERROR_NO_MEM;
1244                 goto error;
1245         }
1246
1247         queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
1248                                                     produce_size, consume_size,
1249                                                     my_produce_q, my_consume_q);
1250         if (!queue_pair_entry) {
1251                 pr_warn("Error allocating memory in %s\n", __func__);
1252                 result = VMCI_ERROR_NO_MEM;
1253                 goto error;
1254         }
1255
1256         result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
1257                                   num_consume_pages,
1258                                   &queue_pair_entry->ppn_set);
1259         if (result < VMCI_SUCCESS) {
1260                 pr_warn("qp_alloc_ppn_set failed\n");
1261                 goto error;
1262         }
1263
1264         /*
1265          * It's only necessary to notify the host if this queue pair will be
1266          * attached to from another context.
1267          */
1268         if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1269                 /* Local create case. */
1270                 u32 context_id = vmci_get_context_id();
1271
1272                 /*
1273                  * Enforce similar checks on local queue pairs as we
1274                  * do for regular ones.  The handle's context must
1275                  * match the creator or attacher context id (here they
1276                  * are both the current context id) and the
1277                  * attach-only flag cannot exist during create.  We
1278                  * also ensure specified peer is this context or an
1279                  * invalid one.
1280                  */
1281                 if (queue_pair_entry->qp.handle.context != context_id ||
1282                     (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
1283                      queue_pair_entry->qp.peer != context_id)) {
1284                         result = VMCI_ERROR_NO_ACCESS;
1285                         goto error;
1286                 }
1287
1288                 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
1289                         result = VMCI_ERROR_NOT_FOUND;
1290                         goto error;
1291                 }
1292         } else {
1293                 result = qp_alloc_hypercall(queue_pair_entry);
1294                 if (result < VMCI_SUCCESS) {
1295                         pr_warn("qp_alloc_hypercall result = %d\n", result);
1296                         goto error;
1297                 }
1298         }
1299
1300         qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
1301                             (struct vmci_queue *)my_consume_q);
1302
1303         qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
1304
1305  out:
1306         queue_pair_entry->qp.ref_count++;
1307         *handle = queue_pair_entry->qp.handle;
1308         *produce_q = (struct vmci_queue *)my_produce_q;
1309         *consume_q = (struct vmci_queue *)my_consume_q;
1310
1311         /*
1312          * We should initialize the queue pair header pages on a local
1313          * queue pair create.  For non-local queue pairs, the
1314          * hypervisor initializes the header pages in the create step.
1315          */
1316         if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
1317             queue_pair_entry->qp.ref_count == 1) {
1318                 vmci_q_header_init((*produce_q)->q_header, *handle);
1319                 vmci_q_header_init((*consume_q)->q_header, *handle);
1320         }
1321
1322         mutex_unlock(&qp_guest_endpoints.mutex);
1323
1324         return VMCI_SUCCESS;
1325
1326  error:
1327         mutex_unlock(&qp_guest_endpoints.mutex);
1328         if (queue_pair_entry) {
1329                 /* The queues will be freed inside the destroy routine. */
1330                 qp_guest_endpoint_destroy(queue_pair_entry);
1331         } else {
1332                 qp_free_queue(my_produce_q, produce_size);
1333                 qp_free_queue(my_consume_q, consume_size);
1334         }
1335         return result;
1336
1337  error_keep_entry:
1338         /* This path should only be used when an existing entry was found. */
1339         mutex_unlock(&qp_guest_endpoints.mutex);
1340         return result;
1341 }
1342
1343 /*
1344  * The first endpoint issuing a queue pair allocation will create the state
1345  * of the queue pair in the queue pair broker.
1346  *
1347  * If the creator is a guest, it will associate a VMX virtual address range
1348  * with the queue pair as specified by the page_store. For compatibility with
1349  * older VMX'en, that would use a separate step to set the VMX virtual
1350  * address range, the virtual address range can be registered later using
1351  * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
1352  * used.
1353  *
1354  * If the creator is the host, a page_store of NULL should be used as well,
1355  * since the host is not able to supply a page store for the queue pair.
1356  *
1357  * For older VMX and host callers, the queue pair will be created in the
1358  * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
1359  * created in VMCOQPB_CREATED_MEM state.
1360  */
1361 static int qp_broker_create(struct vmci_handle handle,
1362                             u32 peer,
1363                             u32 flags,
1364                             u32 priv_flags,
1365                             u64 produce_size,
1366                             u64 consume_size,
1367                             struct vmci_qp_page_store *page_store,
1368                             struct vmci_ctx *context,
1369                             vmci_event_release_cb wakeup_cb,
1370                             void *client_data, struct qp_broker_entry **ent)
1371 {
1372         struct qp_broker_entry *entry = NULL;
1373         const u32 context_id = vmci_ctx_get_id(context);
1374         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1375         int result;
1376         u64 guest_produce_size;
1377         u64 guest_consume_size;
1378
1379         /* Do not create if the caller asked not to. */
1380         if (flags & VMCI_QPFLAG_ATTACH_ONLY)
1381                 return VMCI_ERROR_NOT_FOUND;
1382
1383         /*
1384          * Creator's context ID should match handle's context ID or the creator
1385          * must allow the context in handle's context ID as the "peer".
1386          */
1387         if (handle.context != context_id && handle.context != peer)
1388                 return VMCI_ERROR_NO_ACCESS;
1389
1390         if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
1391                 return VMCI_ERROR_DST_UNREACHABLE;
1392
1393         /*
1394          * Creator's context ID for local queue pairs should match the
1395          * peer, if a peer is specified.
1396          */
1397         if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
1398                 return VMCI_ERROR_NO_ACCESS;
1399
1400         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
1401         if (!entry)
1402                 return VMCI_ERROR_NO_MEM;
1403
1404         if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
1405                 /*
1406                  * The queue pair broker entry stores values from the guest
1407                  * point of view, so a creating host side endpoint should swap
1408                  * produce and consume values -- unless it is a local queue
1409                  * pair, in which case no swapping is necessary, since the local
1410                  * attacher will swap queues.
1411                  */
1412
1413                 guest_produce_size = consume_size;
1414                 guest_consume_size = produce_size;
1415         } else {
1416                 guest_produce_size = produce_size;
1417                 guest_consume_size = consume_size;
1418         }
1419
1420         entry->qp.handle = handle;
1421         entry->qp.peer = peer;
1422         entry->qp.flags = flags;
1423         entry->qp.produce_size = guest_produce_size;
1424         entry->qp.consume_size = guest_consume_size;
1425         entry->qp.ref_count = 1;
1426         entry->create_id = context_id;
1427         entry->attach_id = VMCI_INVALID_ID;
1428         entry->state = VMCIQPB_NEW;
1429         entry->require_trusted_attach =
1430             !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
1431         entry->created_by_trusted =
1432             !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
1433         entry->vmci_page_files = false;
1434         entry->wakeup_cb = wakeup_cb;
1435         entry->client_data = client_data;
1436         entry->produce_q = qp_host_alloc_queue(guest_produce_size);
1437         if (entry->produce_q == NULL) {
1438                 result = VMCI_ERROR_NO_MEM;
1439                 goto error;
1440         }
1441         entry->consume_q = qp_host_alloc_queue(guest_consume_size);
1442         if (entry->consume_q == NULL) {
1443                 result = VMCI_ERROR_NO_MEM;
1444                 goto error;
1445         }
1446
1447         qp_init_queue_mutex(entry->produce_q, entry->consume_q);
1448
1449         INIT_LIST_HEAD(&entry->qp.list_item);
1450
1451         if (is_local) {
1452                 u8 *tmp;
1453
1454                 entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
1455                                            PAGE_SIZE, GFP_KERNEL);
1456                 if (entry->local_mem == NULL) {
1457                         result = VMCI_ERROR_NO_MEM;
1458                         goto error;
1459                 }
1460                 entry->state = VMCIQPB_CREATED_MEM;
1461                 entry->produce_q->q_header = entry->local_mem;
1462                 tmp = (u8 *)entry->local_mem + PAGE_SIZE *
1463                     (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
1464                 entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
1465         } else if (page_store) {
1466                 /*
1467                  * The VMX already initialized the queue pair headers, so no
1468                  * need for the kernel side to do that.
1469                  */
1470                 result = qp_host_register_user_memory(page_store,
1471                                                       entry->produce_q,
1472                                                       entry->consume_q);
1473                 if (result < VMCI_SUCCESS)
1474                         goto error;
1475
1476                 entry->state = VMCIQPB_CREATED_MEM;
1477         } else {
1478                 /*
1479                  * A create without a page_store may be either a host
1480                  * side create (in which case we are waiting for the
1481                  * guest side to supply the memory) or an old style
1482                  * queue pair create (in which case we will expect a
1483                  * set page store call as the next step).
1484                  */
1485                 entry->state = VMCIQPB_CREATED_NO_MEM;
1486         }
1487
1488         qp_list_add_entry(&qp_broker_list, &entry->qp);
1489         if (ent != NULL)
1490                 *ent = entry;
1491
1492         /* Add to resource obj */
1493         result = vmci_resource_add(&entry->resource,
1494                                    VMCI_RESOURCE_TYPE_QPAIR_HOST,
1495                                    handle);
1496         if (result != VMCI_SUCCESS) {
1497                 pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1498                         handle.context, handle.resource, result);
1499                 goto error;
1500         }
1501
1502         entry->qp.handle = vmci_resource_handle(&entry->resource);
1503         if (is_local) {
1504                 vmci_q_header_init(entry->produce_q->q_header,
1505                                    entry->qp.handle);
1506                 vmci_q_header_init(entry->consume_q->q_header,
1507                                    entry->qp.handle);
1508         }
1509
1510         vmci_ctx_qp_create(context, entry->qp.handle);
1511
1512         return VMCI_SUCCESS;
1513
1514  error:
1515         if (entry != NULL) {
1516                 qp_host_free_queue(entry->produce_q, guest_produce_size);
1517                 qp_host_free_queue(entry->consume_q, guest_consume_size);
1518                 kfree(entry);
1519         }
1520
1521         return result;
1522 }
1523
1524 /*
1525  * Enqueues an event datagram to notify the peer VM attached to
1526  * the given queue pair handle about attach/detach event by the
1527  * given VM.  Returns Payload size of datagram enqueued on
1528  * success, error code otherwise.
1529  */
1530 static int qp_notify_peer(bool attach,
1531                           struct vmci_handle handle,
1532                           u32 my_id,
1533                           u32 peer_id)
1534 {
1535         int rv;
1536         struct vmci_event_qp ev;
1537
1538         if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
1539             peer_id == VMCI_INVALID_ID)
1540                 return VMCI_ERROR_INVALID_ARGS;
1541
1542         /*
1543          * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
1544          * number of pending events from the hypervisor to a given VM
1545          * otherwise a rogue VM could do an arbitrary number of attach
1546          * and detach operations causing memory pressure in the host
1547          * kernel.
1548          */
1549
1550         ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
1551         ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1552                                           VMCI_CONTEXT_RESOURCE_ID);
1553         ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
1554         ev.msg.event_data.event = attach ?
1555             VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
1556         ev.payload.handle = handle;
1557         ev.payload.peer_id = my_id;
1558
1559         rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
1560                                     &ev.msg.hdr, false);
1561         if (rv < VMCI_SUCCESS)
1562                 pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
1563                         attach ? "ATTACH" : "DETACH", peer_id);
1564
1565         return rv;
1566 }
1567
1568 /*
1569  * The second endpoint issuing a queue pair allocation will attach to
1570  * the queue pair registered with the queue pair broker.
1571  *
1572  * If the attacher is a guest, it will associate a VMX virtual address
1573  * range with the queue pair as specified by the page_store. At this
1574  * point, the already attach host endpoint may start using the queue
1575  * pair, and an attach event is sent to it. For compatibility with
1576  * older VMX'en, that used a separate step to set the VMX virtual
1577  * address range, the virtual address range can be registered later
1578  * using vmci_qp_broker_set_page_store. In that case, a page_store of
1579  * NULL should be used, and the attach event will be generated once
1580  * the actual page store has been set.
1581  *
1582  * If the attacher is the host, a page_store of NULL should be used as
1583  * well, since the page store information is already set by the guest.
1584  *
1585  * For new VMX and host callers, the queue pair will be moved to the
1586  * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
1587  * moved to the VMCOQPB_ATTACHED_NO_MEM state.
1588  */
1589 static int qp_broker_attach(struct qp_broker_entry *entry,
1590                             u32 peer,
1591                             u32 flags,
1592                             u32 priv_flags,
1593                             u64 produce_size,
1594                             u64 consume_size,
1595                             struct vmci_qp_page_store *page_store,
1596                             struct vmci_ctx *context,
1597                             vmci_event_release_cb wakeup_cb,
1598                             void *client_data,
1599                             struct qp_broker_entry **ent)
1600 {
1601         const u32 context_id = vmci_ctx_get_id(context);
1602         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1603         int result;
1604
1605         if (entry->state != VMCIQPB_CREATED_NO_MEM &&
1606             entry->state != VMCIQPB_CREATED_MEM)
1607                 return VMCI_ERROR_UNAVAILABLE;
1608
1609         if (is_local) {
1610                 if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
1611                     context_id != entry->create_id) {
1612                         return VMCI_ERROR_INVALID_ARGS;
1613                 }
1614         } else if (context_id == entry->create_id ||
1615                    context_id == entry->attach_id) {
1616                 return VMCI_ERROR_ALREADY_EXISTS;
1617         }
1618
1619         if (VMCI_CONTEXT_IS_VM(context_id) &&
1620             VMCI_CONTEXT_IS_VM(entry->create_id))
1621                 return VMCI_ERROR_DST_UNREACHABLE;
1622
1623         /*
1624          * If we are attaching from a restricted context then the queuepair
1625          * must have been created by a trusted endpoint.
1626          */
1627         if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
1628             !entry->created_by_trusted)
1629                 return VMCI_ERROR_NO_ACCESS;
1630
1631         /*
1632          * If we are attaching to a queuepair that was created by a restricted
1633          * context then we must be trusted.
1634          */
1635         if (entry->require_trusted_attach &&
1636             (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
1637                 return VMCI_ERROR_NO_ACCESS;
1638
1639         /*
1640          * If the creator specifies VMCI_INVALID_ID in "peer" field, access
1641          * control check is not performed.
1642          */
1643         if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
1644                 return VMCI_ERROR_NO_ACCESS;
1645
1646         if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
1647                 /*
1648                  * Do not attach if the caller doesn't support Host Queue Pairs
1649                  * and a host created this queue pair.
1650                  */
1651
1652                 if (!vmci_ctx_supports_host_qp(context))
1653                         return VMCI_ERROR_INVALID_RESOURCE;
1654
1655         } else if (context_id == VMCI_HOST_CONTEXT_ID) {
1656                 struct vmci_ctx *create_context;
1657                 bool supports_host_qp;
1658
1659                 /*
1660                  * Do not attach a host to a user created queue pair if that
1661                  * user doesn't support host queue pair end points.
1662                  */
1663
1664                 create_context = vmci_ctx_get(entry->create_id);
1665                 supports_host_qp = vmci_ctx_supports_host_qp(create_context);
1666                 vmci_ctx_put(create_context);
1667
1668                 if (!supports_host_qp)
1669                         return VMCI_ERROR_INVALID_RESOURCE;
1670         }
1671
1672         if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
1673                 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1674
1675         if (context_id != VMCI_HOST_CONTEXT_ID) {
1676                 /*
1677                  * The queue pair broker entry stores values from the guest
1678                  * point of view, so an attaching guest should match the values
1679                  * stored in the entry.
1680                  */
1681
1682                 if (entry->qp.produce_size != produce_size ||
1683                     entry->qp.consume_size != consume_size) {
1684                         return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1685                 }
1686         } else if (entry->qp.produce_size != consume_size ||
1687                    entry->qp.consume_size != produce_size) {
1688                 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1689         }
1690
1691         if (context_id != VMCI_HOST_CONTEXT_ID) {
1692                 /*
1693                  * If a guest attached to a queue pair, it will supply
1694                  * the backing memory.  If this is a pre NOVMVM vmx,
1695                  * the backing memory will be supplied by calling
1696                  * vmci_qp_broker_set_page_store() following the
1697                  * return of the vmci_qp_broker_alloc() call. If it is
1698                  * a vmx of version NOVMVM or later, the page store
1699                  * must be supplied as part of the
1700                  * vmci_qp_broker_alloc call.  Under all circumstances
1701                  * must the initially created queue pair not have any
1702                  * memory associated with it already.
1703                  */
1704
1705                 if (entry->state != VMCIQPB_CREATED_NO_MEM)
1706                         return VMCI_ERROR_INVALID_ARGS;
1707
1708                 if (page_store != NULL) {
1709                         /*
1710                          * Patch up host state to point to guest
1711                          * supplied memory. The VMX already
1712                          * initialized the queue pair headers, so no
1713                          * need for the kernel side to do that.
1714                          */
1715
1716                         result = qp_host_register_user_memory(page_store,
1717                                                               entry->produce_q,
1718                                                               entry->consume_q);
1719                         if (result < VMCI_SUCCESS)
1720                                 return result;
1721
1722                         /*
1723                          * Preemptively load in the headers if non-blocking to
1724                          * prevent blocking later.
1725                          */
1726                         if (entry->qp.flags & VMCI_QPFLAG_NONBLOCK) {
1727                                 result = qp_host_map_queues(entry->produce_q,
1728                                                             entry->consume_q);
1729                                 if (result < VMCI_SUCCESS) {
1730                                         qp_host_unregister_user_memory(
1731                                                 entry->produce_q,
1732                                                 entry->consume_q);
1733                                         return result;
1734                                 }
1735                         }
1736
1737                         entry->state = VMCIQPB_ATTACHED_MEM;
1738                 } else {
1739                         entry->state = VMCIQPB_ATTACHED_NO_MEM;
1740                 }
1741         } else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
1742                 /*
1743                  * The host side is attempting to attach to a queue
1744                  * pair that doesn't have any memory associated with
1745                  * it. This must be a pre NOVMVM vmx that hasn't set
1746                  * the page store information yet, or a quiesced VM.
1747                  */
1748
1749                 return VMCI_ERROR_UNAVAILABLE;
1750         } else {
1751                 /*
1752                  * For non-blocking queue pairs, we cannot rely on
1753                  * enqueue/dequeue to map in the pages on the
1754                  * host-side, since it may block, so we make an
1755                  * attempt here.
1756                  */
1757
1758                 if (flags & VMCI_QPFLAG_NONBLOCK) {
1759                         result =
1760                             qp_host_map_queues(entry->produce_q,
1761                                                entry->consume_q);
1762                         if (result < VMCI_SUCCESS)
1763                                 return result;
1764
1765                         entry->qp.flags |= flags &
1766                             (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED);
1767                 }
1768
1769                 /* The host side has successfully attached to a queue pair. */
1770                 entry->state = VMCIQPB_ATTACHED_MEM;
1771         }
1772
1773         if (entry->state == VMCIQPB_ATTACHED_MEM) {
1774                 result =
1775                     qp_notify_peer(true, entry->qp.handle, context_id,
1776                                    entry->create_id);
1777                 if (result < VMCI_SUCCESS)
1778                         pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
1779                                 entry->create_id, entry->qp.handle.context,
1780                                 entry->qp.handle.resource);
1781         }
1782
1783         entry->attach_id = context_id;
1784         entry->qp.ref_count++;
1785         if (wakeup_cb) {
1786                 entry->wakeup_cb = wakeup_cb;
1787                 entry->client_data = client_data;
1788         }
1789
1790         /*
1791          * When attaching to local queue pairs, the context already has
1792          * an entry tracking the queue pair, so don't add another one.
1793          */
1794         if (!is_local)
1795                 vmci_ctx_qp_create(context, entry->qp.handle);
1796
1797         if (ent != NULL)
1798                 *ent = entry;
1799
1800         return VMCI_SUCCESS;
1801 }
1802
1803 /*
1804  * queue_pair_Alloc for use when setting up queue pair endpoints
1805  * on the host.
1806  */
1807 static int qp_broker_alloc(struct vmci_handle handle,
1808                            u32 peer,
1809                            u32 flags,
1810                            u32 priv_flags,
1811                            u64 produce_size,
1812                            u64 consume_size,
1813                            struct vmci_qp_page_store *page_store,
1814                            struct vmci_ctx *context,
1815                            vmci_event_release_cb wakeup_cb,
1816                            void *client_data,
1817                            struct qp_broker_entry **ent,
1818                            bool *swap)
1819 {
1820         const u32 context_id = vmci_ctx_get_id(context);
1821         bool create;
1822         struct qp_broker_entry *entry = NULL;
1823         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1824         int result;
1825
1826         if (vmci_handle_is_invalid(handle) ||
1827             (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
1828             !(produce_size || consume_size) ||
1829             !context || context_id == VMCI_INVALID_ID ||
1830             handle.context == VMCI_INVALID_ID) {
1831                 return VMCI_ERROR_INVALID_ARGS;
1832         }
1833
1834         if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
1835                 return VMCI_ERROR_INVALID_ARGS;
1836
1837         /*
1838          * In the initial argument check, we ensure that non-vmkernel hosts
1839          * are not allowed to create local queue pairs.
1840          */
1841
1842         mutex_lock(&qp_broker_list.mutex);
1843
1844         if (!is_local && vmci_ctx_qp_exists(context, handle)) {
1845                 pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
1846                          context_id, handle.context, handle.resource);
1847                 mutex_unlock(&qp_broker_list.mutex);
1848                 return VMCI_ERROR_ALREADY_EXISTS;
1849         }
1850
1851         if (handle.resource != VMCI_INVALID_ID)
1852                 entry = qp_broker_handle_to_entry(handle);
1853
1854         if (!entry) {
1855                 create = true;
1856                 result =
1857                     qp_broker_create(handle, peer, flags, priv_flags,
1858                                      produce_size, consume_size, page_store,
1859                                      context, wakeup_cb, client_data, ent);
1860         } else {
1861                 create = false;
1862                 result =
1863                     qp_broker_attach(entry, peer, flags, priv_flags,
1864                                      produce_size, consume_size, page_store,
1865                                      context, wakeup_cb, client_data, ent);
1866         }
1867
1868         mutex_unlock(&qp_broker_list.mutex);
1869
1870         if (swap)
1871                 *swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
1872                     !(create && is_local);
1873
1874         return result;
1875 }
1876
1877 /*
1878  * This function implements the kernel API for allocating a queue
1879  * pair.
1880  */
1881 static int qp_alloc_host_work(struct vmci_handle *handle,
1882                               struct vmci_queue **produce_q,
1883                               u64 produce_size,
1884                               struct vmci_queue **consume_q,
1885                               u64 consume_size,
1886                               u32 peer,
1887                               u32 flags,
1888                               u32 priv_flags,
1889                               vmci_event_release_cb wakeup_cb,
1890                               void *client_data)
1891 {
1892         struct vmci_handle new_handle;
1893         struct vmci_ctx *context;
1894         struct qp_broker_entry *entry;
1895         int result;
1896         bool swap;
1897
1898         if (vmci_handle_is_invalid(*handle)) {
1899                 new_handle = vmci_make_handle(
1900                         VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
1901         } else
1902                 new_handle = *handle;
1903
1904         context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1905         entry = NULL;
1906         result =
1907             qp_broker_alloc(new_handle, peer, flags, priv_flags,
1908                             produce_size, consume_size, NULL, context,
1909                             wakeup_cb, client_data, &entry, &swap);
1910         if (result == VMCI_SUCCESS) {
1911                 if (swap) {
1912                         /*
1913                          * If this is a local queue pair, the attacher
1914                          * will swap around produce and consume
1915                          * queues.
1916                          */
1917
1918                         *produce_q = entry->consume_q;
1919                         *consume_q = entry->produce_q;
1920                 } else {
1921                         *produce_q = entry->produce_q;
1922                         *consume_q = entry->consume_q;
1923                 }
1924
1925                 *handle = vmci_resource_handle(&entry->resource);
1926         } else {
1927                 *handle = VMCI_INVALID_HANDLE;
1928                 pr_devel("queue pair broker failed to alloc (result=%d)\n",
1929                          result);
1930         }
1931         vmci_ctx_put(context);
1932         return result;
1933 }
1934
1935 /*
1936  * Allocates a VMCI queue_pair. Only checks validity of input
1937  * arguments. The real work is done in the host or guest
1938  * specific function.
1939  */
1940 int vmci_qp_alloc(struct vmci_handle *handle,
1941                   struct vmci_queue **produce_q,
1942                   u64 produce_size,
1943                   struct vmci_queue **consume_q,
1944                   u64 consume_size,
1945                   u32 peer,
1946                   u32 flags,
1947                   u32 priv_flags,
1948                   bool guest_endpoint,
1949                   vmci_event_release_cb wakeup_cb,
1950                   void *client_data)
1951 {
1952         if (!handle || !produce_q || !consume_q ||
1953             (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
1954                 return VMCI_ERROR_INVALID_ARGS;
1955
1956         if (guest_endpoint) {
1957                 return qp_alloc_guest_work(handle, produce_q,
1958                                            produce_size, consume_q,
1959                                            consume_size, peer,
1960                                            flags, priv_flags);
1961         } else {
1962                 return qp_alloc_host_work(handle, produce_q,
1963                                           produce_size, consume_q,
1964                                           consume_size, peer, flags,
1965                                           priv_flags, wakeup_cb, client_data);
1966         }
1967 }
1968
1969 /*
1970  * This function implements the host kernel API for detaching from
1971  * a queue pair.
1972  */
1973 static int qp_detatch_host_work(struct vmci_handle handle)
1974 {
1975         int result;
1976         struct vmci_ctx *context;
1977
1978         context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1979
1980         result = vmci_qp_broker_detach(handle, context);
1981
1982         vmci_ctx_put(context);
1983         return result;
1984 }
1985
1986 /*
1987  * Detaches from a VMCI queue_pair. Only checks validity of input argument.
1988  * Real work is done in the host or guest specific function.
1989  */
1990 static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
1991 {
1992         if (vmci_handle_is_invalid(handle))
1993                 return VMCI_ERROR_INVALID_ARGS;
1994
1995         if (guest_endpoint)
1996                 return qp_detatch_guest_work(handle);
1997         else
1998                 return qp_detatch_host_work(handle);
1999 }
2000
2001 /*
2002  * Returns the entry from the head of the list. Assumes that the list is
2003  * locked.
2004  */
2005 static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
2006 {
2007         if (!list_empty(&qp_list->head)) {
2008                 struct qp_entry *entry =
2009                     list_first_entry(&qp_list->head, struct qp_entry,
2010                                      list_item);
2011                 return entry;
2012         }
2013
2014         return NULL;
2015 }
2016
2017 void vmci_qp_broker_exit(void)
2018 {
2019         struct qp_entry *entry;
2020         struct qp_broker_entry *be;
2021
2022         mutex_lock(&qp_broker_list.mutex);
2023
2024         while ((entry = qp_list_get_head(&qp_broker_list))) {
2025                 be = (struct qp_broker_entry *)entry;
2026
2027                 qp_list_remove_entry(&qp_broker_list, entry);
2028                 kfree(be);
2029         }
2030
2031         mutex_unlock(&qp_broker_list.mutex);
2032 }
2033
2034 /*
2035  * Requests that a queue pair be allocated with the VMCI queue
2036  * pair broker. Allocates a queue pair entry if one does not
2037  * exist. Attaches to one if it exists, and retrieves the page
2038  * files backing that queue_pair.  Assumes that the queue pair
2039  * broker lock is held.
2040  */
2041 int vmci_qp_broker_alloc(struct vmci_handle handle,
2042                          u32 peer,
2043                          u32 flags,
2044                          u32 priv_flags,
2045                          u64 produce_size,
2046                          u64 consume_size,
2047                          struct vmci_qp_page_store *page_store,
2048                          struct vmci_ctx *context)
2049 {
2050         return qp_broker_alloc(handle, peer, flags, priv_flags,
2051                                produce_size, consume_size,
2052                                page_store, context, NULL, NULL, NULL, NULL);
2053 }
2054
2055 /*
2056  * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
2057  * step to add the UVAs of the VMX mapping of the queue pair. This function
2058  * provides backwards compatibility with such VMX'en, and takes care of
2059  * registering the page store for a queue pair previously allocated by the
2060  * VMX during create or attach. This function will move the queue pair state
2061  * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
2062  * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
2063  * attached state with memory, the queue pair is ready to be used by the
2064  * host peer, and an attached event will be generated.
2065  *
2066  * Assumes that the queue pair broker lock is held.
2067  *
2068  * This function is only used by the hosted platform, since there is no
2069  * issue with backwards compatibility for vmkernel.
2070  */
2071 int vmci_qp_broker_set_page_store(struct vmci_handle handle,
2072                                   u64 produce_uva,
2073                                   u64 consume_uva,
2074                                   struct vmci_ctx *context)
2075 {
2076         struct qp_broker_entry *entry;
2077         int result;
2078         const u32 context_id = vmci_ctx_get_id(context);
2079
2080         if (vmci_handle_is_invalid(handle) || !context ||
2081             context_id == VMCI_INVALID_ID)
2082                 return VMCI_ERROR_INVALID_ARGS;
2083
2084         /*
2085          * We only support guest to host queue pairs, so the VMX must
2086          * supply UVAs for the mapped page files.
2087          */
2088
2089         if (produce_uva == 0 || consume_uva == 0)
2090                 return VMCI_ERROR_INVALID_ARGS;
2091
2092         mutex_lock(&qp_broker_list.mutex);
2093
2094         if (!vmci_ctx_qp_exists(context, handle)) {
2095                 pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2096                         context_id, handle.context, handle.resource);
2097                 result = VMCI_ERROR_NOT_FOUND;
2098                 goto out;
2099         }
2100
2101         entry = qp_broker_handle_to_entry(handle);
2102         if (!entry) {
2103                 result = VMCI_ERROR_NOT_FOUND;
2104                 goto out;
2105         }
2106
2107         /*
2108          * If I'm the owner then I can set the page store.
2109          *
2110          * Or, if a host created the queue_pair and I'm the attached peer
2111          * then I can set the page store.
2112          */
2113         if (entry->create_id != context_id &&
2114             (entry->create_id != VMCI_HOST_CONTEXT_ID ||
2115              entry->attach_id != context_id)) {
2116                 result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
2117                 goto out;
2118         }
2119
2120         if (entry->state != VMCIQPB_CREATED_NO_MEM &&
2121             entry->state != VMCIQPB_ATTACHED_NO_MEM) {
2122                 result = VMCI_ERROR_UNAVAILABLE;
2123                 goto out;
2124         }
2125
2126         result = qp_host_get_user_memory(produce_uva, consume_uva,
2127                                          entry->produce_q, entry->consume_q);
2128         if (result < VMCI_SUCCESS)
2129                 goto out;
2130
2131         result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2132         if (result < VMCI_SUCCESS) {
2133                 qp_host_unregister_user_memory(entry->produce_q,
2134                                                entry->consume_q);
2135                 goto out;
2136         }
2137
2138         if (entry->state == VMCIQPB_CREATED_NO_MEM)
2139                 entry->state = VMCIQPB_CREATED_MEM;
2140         else
2141                 entry->state = VMCIQPB_ATTACHED_MEM;
2142
2143         entry->vmci_page_files = true;
2144
2145         if (entry->state == VMCIQPB_ATTACHED_MEM) {
2146                 result =
2147                     qp_notify_peer(true, handle, context_id, entry->create_id);
2148                 if (result < VMCI_SUCCESS) {
2149                         pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
2150                                 entry->create_id, entry->qp.handle.context,
2151                                 entry->qp.handle.resource);
2152                 }
2153         }
2154
2155         result = VMCI_SUCCESS;
2156  out:
2157         mutex_unlock(&qp_broker_list.mutex);
2158         return result;
2159 }
2160
2161 /*
2162  * Resets saved queue headers for the given QP broker
2163  * entry. Should be used when guest memory becomes available
2164  * again, or the guest detaches.
2165  */
2166 static void qp_reset_saved_headers(struct qp_broker_entry *entry)
2167 {
2168         entry->produce_q->saved_header = NULL;
2169         entry->consume_q->saved_header = NULL;
2170 }
2171
2172 /*
2173  * The main entry point for detaching from a queue pair registered with the
2174  * queue pair broker. If more than one endpoint is attached to the queue
2175  * pair, the first endpoint will mainly decrement a reference count and
2176  * generate a notification to its peer. The last endpoint will clean up
2177  * the queue pair state registered with the broker.
2178  *
2179  * When a guest endpoint detaches, it will unmap and unregister the guest
2180  * memory backing the queue pair. If the host is still attached, it will
2181  * no longer be able to access the queue pair content.
2182  *
2183  * If the queue pair is already in a state where there is no memory
2184  * registered for the queue pair (any *_NO_MEM state), it will transition to
2185  * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
2186  * endpoint is the first of two endpoints to detach. If the host endpoint is
2187  * the first out of two to detach, the queue pair will move to the
2188  * VMCIQPB_SHUTDOWN_MEM state.
2189  */
2190 int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
2191 {
2192         struct qp_broker_entry *entry;
2193         const u32 context_id = vmci_ctx_get_id(context);
2194         u32 peer_id;
2195         bool is_local = false;
2196         int result;
2197
2198         if (vmci_handle_is_invalid(handle) || !context ||
2199             context_id == VMCI_INVALID_ID) {
2200                 return VMCI_ERROR_INVALID_ARGS;
2201         }
2202
2203         mutex_lock(&qp_broker_list.mutex);
2204
2205         if (!vmci_ctx_qp_exists(context, handle)) {
2206                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2207                          context_id, handle.context, handle.resource);
2208                 result = VMCI_ERROR_NOT_FOUND;
2209                 goto out;
2210         }
2211
2212         entry = qp_broker_handle_to_entry(handle);
2213         if (!entry) {
2214                 pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
2215                          context_id, handle.context, handle.resource);
2216                 result = VMCI_ERROR_NOT_FOUND;
2217                 goto out;
2218         }
2219
2220         if (context_id != entry->create_id && context_id != entry->attach_id) {
2221                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2222                 goto out;
2223         }
2224
2225         if (context_id == entry->create_id) {
2226                 peer_id = entry->attach_id;
2227                 entry->create_id = VMCI_INVALID_ID;
2228         } else {
2229                 peer_id = entry->create_id;
2230                 entry->attach_id = VMCI_INVALID_ID;
2231         }
2232         entry->qp.ref_count--;
2233
2234         is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2235
2236         if (context_id != VMCI_HOST_CONTEXT_ID) {
2237                 bool headers_mapped;
2238
2239                 /*
2240                  * Pre NOVMVM vmx'en may detach from a queue pair
2241                  * before setting the page store, and in that case
2242                  * there is no user memory to detach from. Also, more
2243                  * recent VMX'en may detach from a queue pair in the
2244                  * quiesced state.
2245                  */
2246
2247                 qp_acquire_queue_mutex(entry->produce_q);
2248                 headers_mapped = entry->produce_q->q_header ||
2249                     entry->consume_q->q_header;
2250                 if (QPBROKERSTATE_HAS_MEM(entry)) {
2251                         result =
2252                             qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
2253                                                  entry->produce_q,
2254                                                  entry->consume_q);
2255                         if (result < VMCI_SUCCESS)
2256                                 pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2257                                         handle.context, handle.resource,
2258                                         result);
2259
2260                         if (entry->vmci_page_files)
2261                                 qp_host_unregister_user_memory(entry->produce_q,
2262                                                                entry->
2263                                                                consume_q);
2264                         else
2265                                 qp_host_unregister_user_memory(entry->produce_q,
2266                                                                entry->
2267                                                                consume_q);
2268
2269                 }
2270
2271                 if (!headers_mapped)
2272                         qp_reset_saved_headers(entry);
2273
2274                 qp_release_queue_mutex(entry->produce_q);
2275
2276                 if (!headers_mapped && entry->wakeup_cb)
2277                         entry->wakeup_cb(entry->client_data);
2278
2279         } else {
2280                 if (entry->wakeup_cb) {
2281                         entry->wakeup_cb = NULL;
2282                         entry->client_data = NULL;
2283                 }
2284         }
2285
2286         if (entry->qp.ref_count == 0) {
2287                 qp_list_remove_entry(&qp_broker_list, &entry->qp);
2288
2289                 if (is_local)
2290                         kfree(entry->local_mem);
2291
2292                 qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
2293                 qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
2294                 qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
2295                 /* Unlink from resource hash table and free callback */
2296                 vmci_resource_remove(&entry->resource);
2297
2298                 kfree(entry);
2299
2300                 vmci_ctx_qp_destroy(context, handle);
2301         } else {
2302                 qp_notify_peer(false, handle, context_id, peer_id);
2303                 if (context_id == VMCI_HOST_CONTEXT_ID &&
2304                     QPBROKERSTATE_HAS_MEM(entry)) {
2305                         entry->state = VMCIQPB_SHUTDOWN_MEM;
2306                 } else {
2307                         entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
2308                 }
2309
2310                 if (!is_local)
2311                         vmci_ctx_qp_destroy(context, handle);
2312
2313         }
2314         result = VMCI_SUCCESS;
2315  out:
2316         mutex_unlock(&qp_broker_list.mutex);
2317         return result;
2318 }
2319
2320 /*
2321  * Establishes the necessary mappings for a queue pair given a
2322  * reference to the queue pair guest memory. This is usually
2323  * called when a guest is unquiesced and the VMX is allowed to
2324  * map guest memory once again.
2325  */
2326 int vmci_qp_broker_map(struct vmci_handle handle,
2327                        struct vmci_ctx *context,
2328                        u64 guest_mem)
2329 {
2330         struct qp_broker_entry *entry;
2331         const u32 context_id = vmci_ctx_get_id(context);
2332         bool is_local = false;
2333         int result;
2334
2335         if (vmci_handle_is_invalid(handle) || !context ||
2336             context_id == VMCI_INVALID_ID)
2337                 return VMCI_ERROR_INVALID_ARGS;
2338
2339         mutex_lock(&qp_broker_list.mutex);
2340
2341         if (!vmci_ctx_qp_exists(context, handle)) {
2342                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2343                          context_id, handle.context, handle.resource);
2344                 result = VMCI_ERROR_NOT_FOUND;
2345                 goto out;
2346         }
2347
2348         entry = qp_broker_handle_to_entry(handle);
2349         if (!entry) {
2350                 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2351                          context_id, handle.context, handle.resource);
2352                 result = VMCI_ERROR_NOT_FOUND;
2353                 goto out;
2354         }
2355
2356         if (context_id != entry->create_id && context_id != entry->attach_id) {
2357                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2358                 goto out;
2359         }
2360
2361         is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2362         result = VMCI_SUCCESS;
2363
2364         if (context_id != VMCI_HOST_CONTEXT_ID) {
2365                 struct vmci_qp_page_store page_store;
2366
2367                 page_store.pages = guest_mem;
2368                 page_store.len = QPE_NUM_PAGES(entry->qp);
2369
2370                 qp_acquire_queue_mutex(entry->produce_q);
2371                 qp_reset_saved_headers(entry);
2372                 result =
2373                     qp_host_register_user_memory(&page_store,
2374                                                  entry->produce_q,
2375                                                  entry->consume_q);
2376                 qp_release_queue_mutex(entry->produce_q);
2377                 if (result == VMCI_SUCCESS) {
2378                         /* Move state from *_NO_MEM to *_MEM */
2379
2380                         entry->state++;
2381
2382                         if (entry->wakeup_cb)
2383                                 entry->wakeup_cb(entry->client_data);
2384                 }
2385         }
2386
2387  out:
2388         mutex_unlock(&qp_broker_list.mutex);
2389         return result;
2390 }
2391
2392 /*
2393  * Saves a snapshot of the queue headers for the given QP broker
2394  * entry. Should be used when guest memory is unmapped.
2395  * Results:
2396  * VMCI_SUCCESS on success, appropriate error code if guest memory
2397  * can't be accessed..
2398  */
2399 static int qp_save_headers(struct qp_broker_entry *entry)
2400 {
2401         int result;
2402
2403         if (entry->produce_q->saved_header != NULL &&
2404             entry->consume_q->saved_header != NULL) {
2405                 /*
2406                  *  If the headers have already been saved, we don't need to do
2407                  *  it again, and we don't want to map in the headers
2408                  *  unnecessarily.
2409                  */
2410
2411                 return VMCI_SUCCESS;
2412         }
2413
2414         if (NULL == entry->produce_q->q_header ||
2415             NULL == entry->consume_q->q_header) {
2416                 result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2417                 if (result < VMCI_SUCCESS)
2418                         return result;
2419         }
2420
2421         memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
2422                sizeof(entry->saved_produce_q));
2423         entry->produce_q->saved_header = &entry->saved_produce_q;
2424         memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
2425                sizeof(entry->saved_consume_q));
2426         entry->consume_q->saved_header = &entry->saved_consume_q;
2427
2428         return VMCI_SUCCESS;
2429 }
2430
2431 /*
2432  * Removes all references to the guest memory of a given queue pair, and
2433  * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
2434  * called when a VM is being quiesced where access to guest memory should
2435  * avoided.
2436  */
2437 int vmci_qp_broker_unmap(struct vmci_handle handle,
2438                          struct vmci_ctx *context,
2439                          u32 gid)
2440 {
2441         struct qp_broker_entry *entry;
2442         const u32 context_id = vmci_ctx_get_id(context);
2443         bool is_local = false;
2444         int result;
2445
2446         if (vmci_handle_is_invalid(handle) || !context ||
2447             context_id == VMCI_INVALID_ID)
2448                 return VMCI_ERROR_INVALID_ARGS;
2449
2450         mutex_lock(&qp_broker_list.mutex);
2451
2452         if (!vmci_ctx_qp_exists(context, handle)) {
2453                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2454                          context_id, handle.context, handle.resource);
2455                 result = VMCI_ERROR_NOT_FOUND;
2456                 goto out;
2457         }
2458
2459         entry = qp_broker_handle_to_entry(handle);
2460         if (!entry) {
2461                 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2462                          context_id, handle.context, handle.resource);
2463                 result = VMCI_ERROR_NOT_FOUND;
2464                 goto out;
2465         }
2466
2467         if (context_id != entry->create_id && context_id != entry->attach_id) {
2468                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2469                 goto out;
2470         }
2471
2472         is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2473
2474         if (context_id != VMCI_HOST_CONTEXT_ID) {
2475                 qp_acquire_queue_mutex(entry->produce_q);
2476                 result = qp_save_headers(entry);
2477                 if (result < VMCI_SUCCESS)
2478                         pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2479                                 handle.context, handle.resource, result);
2480
2481                 qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
2482
2483                 /*
2484                  * On hosted, when we unmap queue pairs, the VMX will also
2485                  * unmap the guest memory, so we invalidate the previously
2486                  * registered memory. If the queue pair is mapped again at a
2487                  * later point in time, we will need to reregister the user
2488                  * memory with a possibly new user VA.
2489                  */
2490                 qp_host_unregister_user_memory(entry->produce_q,
2491                                                entry->consume_q);
2492
2493                 /*
2494                  * Move state from *_MEM to *_NO_MEM.
2495                  */
2496                 entry->state--;
2497
2498                 qp_release_queue_mutex(entry->produce_q);
2499         }
2500
2501         result = VMCI_SUCCESS;
2502
2503  out:
2504         mutex_unlock(&qp_broker_list.mutex);
2505         return result;
2506 }
2507
2508 /*
2509  * Destroys all guest queue pair endpoints. If active guest queue
2510  * pairs still exist, hypercalls to attempt detach from these
2511  * queue pairs will be made. Any failure to detach is silently
2512  * ignored.
2513  */
2514 void vmci_qp_guest_endpoints_exit(void)
2515 {
2516         struct qp_entry *entry;
2517         struct qp_guest_endpoint *ep;
2518
2519         mutex_lock(&qp_guest_endpoints.mutex);
2520
2521         while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
2522                 ep = (struct qp_guest_endpoint *)entry;
2523
2524                 /* Don't make a hypercall for local queue_pairs. */
2525                 if (!(entry->flags & VMCI_QPFLAG_LOCAL))
2526                         qp_detatch_hypercall(entry->handle);
2527
2528                 /* We cannot fail the exit, so let's reset ref_count. */
2529                 entry->ref_count = 0;
2530                 qp_list_remove_entry(&qp_guest_endpoints, entry);
2531
2532                 qp_guest_endpoint_destroy(ep);
2533         }
2534
2535         mutex_unlock(&qp_guest_endpoints.mutex);
2536 }
2537
2538 /*
2539  * Helper routine that will lock the queue pair before subsequent
2540  * operations.
2541  * Note: Non-blocking on the host side is currently only implemented in ESX.
2542  * Since non-blocking isn't yet implemented on the host personality we
2543  * have no reason to acquire a spin lock.  So to avoid the use of an
2544  * unnecessary lock only acquire the mutex if we can block.
2545  * Note: It is assumed that QPFLAG_PINNED implies QPFLAG_NONBLOCK.  Therefore
2546  * we can use the same locking function for access to both the queue
2547  * and the queue headers as it is the same logic.  Assert this behvior.
2548  */
2549 static void qp_lock(const struct vmci_qp *qpair)
2550 {
2551         if (vmci_can_block(qpair->flags))
2552                 qp_acquire_queue_mutex(qpair->produce_q);
2553 }
2554
2555 /*
2556  * Helper routine that unlocks the queue pair after calling
2557  * qp_lock.  Respects non-blocking and pinning flags.
2558  */
2559 static void qp_unlock(const struct vmci_qp *qpair)
2560 {
2561         if (vmci_can_block(qpair->flags))
2562                 qp_release_queue_mutex(qpair->produce_q);
2563 }
2564
2565 /*
2566  * The queue headers may not be mapped at all times. If a queue is
2567  * currently not mapped, it will be attempted to do so.
2568  */
2569 static int qp_map_queue_headers(struct vmci_queue *produce_q,
2570                                 struct vmci_queue *consume_q,
2571                                 bool can_block)
2572 {
2573         int result;
2574
2575         if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
2576                 if (can_block)
2577                         result = qp_host_map_queues(produce_q, consume_q);
2578                 else
2579                         result = VMCI_ERROR_QUEUEPAIR_NOT_READY;
2580
2581                 if (result < VMCI_SUCCESS)
2582                         return (produce_q->saved_header &&
2583                                 consume_q->saved_header) ?
2584                             VMCI_ERROR_QUEUEPAIR_NOT_READY :
2585                             VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2586         }
2587
2588         return VMCI_SUCCESS;
2589 }
2590
2591 /*
2592  * Helper routine that will retrieve the produce and consume
2593  * headers of a given queue pair. If the guest memory of the
2594  * queue pair is currently not available, the saved queue headers
2595  * will be returned, if these are available.
2596  */
2597 static int qp_get_queue_headers(const struct vmci_qp *qpair,
2598                                 struct vmci_queue_header **produce_q_header,
2599                                 struct vmci_queue_header **consume_q_header)
2600 {
2601         int result;
2602
2603         result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q,
2604                                       vmci_can_block(qpair->flags));
2605         if (result == VMCI_SUCCESS) {
2606                 *produce_q_header = qpair->produce_q->q_header;
2607                 *consume_q_header = qpair->consume_q->q_header;
2608         } else if (qpair->produce_q->saved_header &&
2609                    qpair->consume_q->saved_header) {
2610                 *produce_q_header = qpair->produce_q->saved_header;
2611                 *consume_q_header = qpair->consume_q->saved_header;
2612                 result = VMCI_SUCCESS;
2613         }
2614
2615         return result;
2616 }
2617
2618 /*
2619  * Callback from VMCI queue pair broker indicating that a queue
2620  * pair that was previously not ready, now either is ready or
2621  * gone forever.
2622  */
2623 static int qp_wakeup_cb(void *client_data)
2624 {
2625         struct vmci_qp *qpair = (struct vmci_qp *)client_data;
2626
2627         qp_lock(qpair);
2628         while (qpair->blocked > 0) {
2629                 qpair->blocked--;
2630                 qpair->generation++;
2631                 wake_up(&qpair->event);
2632         }
2633         qp_unlock(qpair);
2634
2635         return VMCI_SUCCESS;
2636 }
2637
2638 /*
2639  * Makes the calling thread wait for the queue pair to become
2640  * ready for host side access.  Returns true when thread is
2641  * woken up after queue pair state change, false otherwise.
2642  */
2643 static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
2644 {
2645         unsigned int generation;
2646
2647         if (qpair->flags & VMCI_QPFLAG_NONBLOCK)
2648                 return false;
2649
2650         qpair->blocked++;
2651         generation = qpair->generation;
2652         qp_unlock(qpair);
2653         wait_event(qpair->event, generation != qpair->generation);
2654         qp_lock(qpair);
2655
2656         return true;
2657 }
2658
2659 /*
2660  * Enqueues a given buffer to the produce queue using the provided
2661  * function. As many bytes as possible (space available in the queue)
2662  * are enqueued.  Assumes the queue->mutex has been acquired.  Returns
2663  * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
2664  * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
2665  * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
2666  * an error occured when accessing the buffer,
2667  * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
2668  * available.  Otherwise, the number of bytes written to the queue is
2669  * returned.  Updates the tail pointer of the produce queue.
2670  */
2671 static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
2672                                  struct vmci_queue *consume_q,
2673                                  const u64 produce_q_size,
2674                                  const void *buf,
2675                                  size_t buf_size,
2676                                  vmci_memcpy_to_queue_func memcpy_to_queue,
2677                                  bool can_block)
2678 {
2679         s64 free_space;
2680         u64 tail;
2681         size_t written;
2682         ssize_t result;
2683
2684         result = qp_map_queue_headers(produce_q, consume_q, can_block);
2685         if (unlikely(result != VMCI_SUCCESS))
2686                 return result;
2687
2688         free_space = vmci_q_header_free_space(produce_q->q_header,
2689                                               consume_q->q_header,
2690                                               produce_q_size);
2691         if (free_space == 0)
2692                 return VMCI_ERROR_QUEUEPAIR_NOSPACE;
2693
2694         if (free_space < VMCI_SUCCESS)
2695                 return (ssize_t) free_space;
2696
2697         written = (size_t) (free_space > buf_size ? buf_size : free_space);
2698         tail = vmci_q_header_producer_tail(produce_q->q_header);
2699         if (likely(tail + written < produce_q_size)) {
2700                 result = memcpy_to_queue(produce_q, tail, buf, 0, written);
2701         } else {
2702                 /* Tail pointer wraps around. */
2703
2704                 const size_t tmp = (size_t) (produce_q_size - tail);
2705
2706                 result = memcpy_to_queue(produce_q, tail, buf, 0, tmp);
2707                 if (result >= VMCI_SUCCESS)
2708                         result = memcpy_to_queue(produce_q, 0, buf, tmp,
2709                                                  written - tmp);
2710         }
2711
2712         if (result < VMCI_SUCCESS)
2713                 return result;
2714
2715         vmci_q_header_add_producer_tail(produce_q->q_header, written,
2716                                         produce_q_size);
2717         return written;
2718 }
2719
2720 /*
2721  * Dequeues data (if available) from the given consume queue. Writes data
2722  * to the user provided buffer using the provided function.
2723  * Assumes the queue->mutex has been acquired.
2724  * Results:
2725  * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
2726  * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
2727  * (as defined by the queue size).
2728  * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
2729  * Otherwise the number of bytes dequeued is returned.
2730  * Side effects:
2731  * Updates the head pointer of the consume queue.
2732  */
2733 static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
2734                                  struct vmci_queue *consume_q,
2735                                  const u64 consume_q_size,
2736                                  void *buf,
2737                                  size_t buf_size,
2738                                  vmci_memcpy_from_queue_func memcpy_from_queue,
2739                                  bool update_consumer,
2740                                  bool can_block)
2741 {
2742         s64 buf_ready;
2743         u64 head;
2744         size_t read;
2745         ssize_t result;
2746
2747         result = qp_map_queue_headers(produce_q, consume_q, can_block);
2748         if (unlikely(result != VMCI_SUCCESS))
2749                 return result;
2750
2751         buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
2752                                             produce_q->q_header,
2753                                             consume_q_size);
2754         if (buf_ready == 0)
2755                 return VMCI_ERROR_QUEUEPAIR_NODATA;
2756
2757         if (buf_ready < VMCI_SUCCESS)
2758                 return (ssize_t) buf_ready;
2759
2760         read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
2761         head = vmci_q_header_consumer_head(produce_q->q_header);
2762         if (likely(head + read < consume_q_size)) {
2763                 result = memcpy_from_queue(buf, 0, consume_q, head, read);
2764         } else {
2765                 /* Head pointer wraps around. */
2766
2767                 const size_t tmp = (size_t) (consume_q_size - head);
2768
2769                 result = memcpy_from_queue(buf, 0, consume_q, head, tmp);
2770                 if (result >= VMCI_SUCCESS)
2771                         result = memcpy_from_queue(buf, tmp, consume_q, 0,
2772                                                    read - tmp);
2773
2774         }
2775
2776         if (result < VMCI_SUCCESS)
2777                 return result;
2778
2779         if (update_consumer)
2780                 vmci_q_header_add_consumer_head(produce_q->q_header,
2781                                                 read, consume_q_size);
2782
2783         return read;
2784 }
2785
2786 /*
2787  * vmci_qpair_alloc() - Allocates a queue pair.
2788  * @qpair:      Pointer for the new vmci_qp struct.
2789  * @handle:     Handle to track the resource.
2790  * @produce_qsize:      Desired size of the producer queue.
2791  * @consume_qsize:      Desired size of the consumer queue.
2792  * @peer:       ContextID of the peer.
2793  * @flags:      VMCI flags.
2794  * @priv_flags: VMCI priviledge flags.
2795  *
2796  * This is the client interface for allocating the memory for a
2797  * vmci_qp structure and then attaching to the underlying
2798  * queue.  If an error occurs allocating the memory for the
2799  * vmci_qp structure no attempt is made to attach.  If an
2800  * error occurs attaching, then the structure is freed.
2801  */
2802 int vmci_qpair_alloc(struct vmci_qp **qpair,
2803                      struct vmci_handle *handle,
2804                      u64 produce_qsize,
2805                      u64 consume_qsize,
2806                      u32 peer,
2807                      u32 flags,
2808                      u32 priv_flags)
2809 {
2810         struct vmci_qp *my_qpair;
2811         int retval;
2812         struct vmci_handle src = VMCI_INVALID_HANDLE;
2813         struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
2814         enum vmci_route route;
2815         vmci_event_release_cb wakeup_cb;
2816         void *client_data;
2817
2818         /*
2819          * Restrict the size of a queuepair.  The device already
2820          * enforces a limit on the total amount of memory that can be
2821          * allocated to queuepairs for a guest.  However, we try to
2822          * allocate this memory before we make the queuepair
2823          * allocation hypercall.  On Linux, we allocate each page
2824          * separately, which means rather than fail, the guest will
2825          * thrash while it tries to allocate, and will become
2826          * increasingly unresponsive to the point where it appears to
2827          * be hung.  So we place a limit on the size of an individual
2828          * queuepair here, and leave the device to enforce the
2829          * restriction on total queuepair memory.  (Note that this
2830          * doesn't prevent all cases; a user with only this much
2831          * physical memory could still get into trouble.)  The error
2832          * used by the device is NO_RESOURCES, so use that here too.
2833          */
2834
2835         if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
2836             produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
2837                 return VMCI_ERROR_NO_RESOURCES;
2838
2839         retval = vmci_route(&src, &dst, false, &route);
2840         if (retval < VMCI_SUCCESS)
2841                 route = vmci_guest_code_active() ?
2842                     VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
2843
2844         /* If NONBLOCK or PINNED is set, we better be the guest personality. */
2845         if ((!vmci_can_block(flags) || vmci_qp_pinned(flags)) &&
2846             VMCI_ROUTE_AS_GUEST != route) {
2847                 pr_devel("Not guest personality w/ NONBLOCK OR PINNED set");
2848                 return VMCI_ERROR_INVALID_ARGS;
2849         }
2850
2851         /*
2852          * Limit the size of pinned QPs and check sanity.
2853          *
2854          * Pinned pages implies non-blocking mode.  Mutexes aren't acquired
2855          * when the NONBLOCK flag is set in qpair code; and also should not be
2856          * acquired when the PINNED flagged is set.  Since pinning pages
2857          * implies we want speed, it makes no sense not to have NONBLOCK
2858          * set if PINNED is set.  Hence enforce this implication.
2859          */
2860         if (vmci_qp_pinned(flags)) {
2861                 if (vmci_can_block(flags)) {
2862                         pr_err("Attempted to enable pinning w/o non-blocking");
2863                         return VMCI_ERROR_INVALID_ARGS;
2864                 }
2865
2866                 if (produce_qsize + consume_qsize > VMCI_MAX_PINNED_QP_MEMORY)
2867                         return VMCI_ERROR_NO_RESOURCES;
2868         }
2869
2870         my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
2871         if (!my_qpair)
2872                 return VMCI_ERROR_NO_MEM;
2873
2874         my_qpair->produce_q_size = produce_qsize;
2875         my_qpair->consume_q_size = consume_qsize;
2876         my_qpair->peer = peer;
2877         my_qpair->flags = flags;
2878         my_qpair->priv_flags = priv_flags;
2879
2880         wakeup_cb = NULL;
2881         client_data = NULL;
2882
2883         if (VMCI_ROUTE_AS_HOST == route) {
2884                 my_qpair->guest_endpoint = false;
2885                 if (!(flags & VMCI_QPFLAG_LOCAL)) {
2886                         my_qpair->blocked = 0;
2887                         my_qpair->generation = 0;
2888                         init_waitqueue_head(&my_qpair->event);
2889                         wakeup_cb = qp_wakeup_cb;
2890                         client_data = (void *)my_qpair;
2891                 }
2892         } else {
2893                 my_qpair->guest_endpoint = true;
2894         }
2895
2896         retval = vmci_qp_alloc(handle,
2897                                &my_qpair->produce_q,
2898                                my_qpair->produce_q_size,
2899                                &my_qpair->consume_q,
2900                                my_qpair->consume_q_size,
2901                                my_qpair->peer,
2902                                my_qpair->flags,
2903                                my_qpair->priv_flags,
2904                                my_qpair->guest_endpoint,
2905                                wakeup_cb, client_data);
2906
2907         if (retval < VMCI_SUCCESS) {
2908                 kfree(my_qpair);
2909                 return retval;
2910         }
2911
2912         *qpair = my_qpair;
2913         my_qpair->handle = *handle;
2914
2915         return retval;
2916 }
2917 EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
2918
2919 /*
2920  * vmci_qpair_detach() - Detatches the client from a queue pair.
2921  * @qpair:      Reference of a pointer to the qpair struct.
2922  *
2923  * This is the client interface for detaching from a VMCIQPair.
2924  * Note that this routine will free the memory allocated for the
2925  * vmci_qp structure too.
2926  */
2927 int vmci_qpair_detach(struct vmci_qp **qpair)
2928 {
2929         int result;
2930         struct vmci_qp *old_qpair;
2931
2932         if (!qpair || !(*qpair))
2933                 return VMCI_ERROR_INVALID_ARGS;
2934
2935         old_qpair = *qpair;
2936         result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
2937
2938         /*
2939          * The guest can fail to detach for a number of reasons, and
2940          * if it does so, it will cleanup the entry (if there is one).
2941          * The host can fail too, but it won't cleanup the entry
2942          * immediately, it will do that later when the context is
2943          * freed.  Either way, we need to release the qpair struct
2944          * here; there isn't much the caller can do, and we don't want
2945          * to leak.
2946          */
2947
2948         memset(old_qpair, 0, sizeof(*old_qpair));
2949         old_qpair->handle = VMCI_INVALID_HANDLE;
2950         old_qpair->peer = VMCI_INVALID_ID;
2951         kfree(old_qpair);
2952         *qpair = NULL;
2953
2954         return result;
2955 }
2956 EXPORT_SYMBOL_GPL(vmci_qpair_detach);
2957
2958 /*
2959  * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
2960  * @qpair:      Pointer to the queue pair struct.
2961  * @producer_tail:      Reference used for storing producer tail index.
2962  * @consumer_head:      Reference used for storing the consumer head index.
2963  *
2964  * This is the client interface for getting the current indexes of the
2965  * QPair from the point of the view of the caller as the producer.
2966  */
2967 int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
2968                                    u64 *producer_tail,
2969                                    u64 *consumer_head)
2970 {
2971         struct vmci_queue_header *produce_q_header;
2972         struct vmci_queue_header *consume_q_header;
2973         int result;
2974
2975         if (!qpair)
2976                 return VMCI_ERROR_INVALID_ARGS;
2977
2978         qp_lock(qpair);
2979         result =
2980             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2981         if (result == VMCI_SUCCESS)
2982                 vmci_q_header_get_pointers(produce_q_header, consume_q_header,
2983                                            producer_tail, consumer_head);
2984         qp_unlock(qpair);
2985
2986         if (result == VMCI_SUCCESS &&
2987             ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
2988              (consumer_head && *consumer_head >= qpair->produce_q_size)))
2989                 return VMCI_ERROR_INVALID_SIZE;
2990
2991         return result;
2992 }
2993 EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
2994
2995 /*
2996  * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the comsumer.
2997  * @qpair:      Pointer to the queue pair struct.
2998  * @consumer_tail:      Reference used for storing consumer tail index.
2999  * @producer_head:      Reference used for storing the producer head index.
3000  *
3001  * This is the client interface for getting the current indexes of the
3002  * QPair from the point of the view of the caller as the consumer.
3003  */
3004 int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
3005                                    u64 *consumer_tail,
3006                                    u64 *producer_head)
3007 {
3008         struct vmci_queue_header *produce_q_header;
3009         struct vmci_queue_header *consume_q_header;
3010         int result;
3011
3012         if (!qpair)
3013                 return VMCI_ERROR_INVALID_ARGS;
3014
3015         qp_lock(qpair);
3016         result =
3017             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3018         if (result == VMCI_SUCCESS)
3019                 vmci_q_header_get_pointers(consume_q_header, produce_q_header,
3020                                            consumer_tail, producer_head);
3021         qp_unlock(qpair);
3022
3023         if (result == VMCI_SUCCESS &&
3024             ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
3025              (producer_head && *producer_head >= qpair->consume_q_size)))
3026                 return VMCI_ERROR_INVALID_SIZE;
3027
3028         return result;
3029 }
3030 EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
3031
3032 /*
3033  * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
3034  * @qpair:      Pointer to the queue pair struct.
3035  *
3036  * This is the client interface for getting the amount of free
3037  * space in the QPair from the point of the view of the caller as
3038  * the producer which is the common case.  Returns < 0 if err, else
3039  * available bytes into which data can be enqueued if > 0.
3040  */
3041 s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
3042 {
3043         struct vmci_queue_header *produce_q_header;
3044         struct vmci_queue_header *consume_q_header;
3045         s64 result;
3046
3047         if (!qpair)
3048                 return VMCI_ERROR_INVALID_ARGS;
3049
3050         qp_lock(qpair);
3051         result =
3052             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3053         if (result == VMCI_SUCCESS)
3054                 result = vmci_q_header_free_space(produce_q_header,
3055                                                   consume_q_header,
3056                                                   qpair->produce_q_size);
3057         else
3058                 result = 0;
3059
3060         qp_unlock(qpair);
3061
3062         return result;
3063 }
3064 EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
3065
3066 /*
3067  * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
3068  * @qpair:      Pointer to the queue pair struct.
3069  *
3070  * This is the client interface for getting the amount of free
3071  * space in the QPair from the point of the view of the caller as
3072  * the consumer which is not the common case.  Returns < 0 if err, else
3073  * available bytes into which data can be enqueued if > 0.
3074  */
3075 s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
3076 {
3077         struct vmci_queue_header *produce_q_header;
3078         struct vmci_queue_header *consume_q_header;
3079         s64 result;
3080
3081         if (!qpair)
3082                 return VMCI_ERROR_INVALID_ARGS;
3083
3084         qp_lock(qpair);
3085         result =
3086             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3087         if (result == VMCI_SUCCESS)
3088                 result = vmci_q_header_free_space(consume_q_header,
3089                                                   produce_q_header,
3090                                                   qpair->consume_q_size);
3091         else
3092                 result = 0;
3093
3094         qp_unlock(qpair);
3095
3096         return result;
3097 }
3098 EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
3099
3100 /*
3101  * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
3102  * producer queue.
3103  * @qpair:      Pointer to the queue pair struct.
3104  *
3105  * This is the client interface for getting the amount of
3106  * enqueued data in the QPair from the point of the view of the
3107  * caller as the producer which is not the common case.  Returns < 0 if err,
3108  * else available bytes that may be read.
3109  */
3110 s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
3111 {
3112         struct vmci_queue_header *produce_q_header;
3113         struct vmci_queue_header *consume_q_header;
3114         s64 result;
3115
3116         if (!qpair)
3117                 return VMCI_ERROR_INVALID_ARGS;
3118
3119         qp_lock(qpair);
3120         result =
3121             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3122         if (result == VMCI_SUCCESS)
3123                 result = vmci_q_header_buf_ready(produce_q_header,
3124                                                  consume_q_header,
3125                                                  qpair->produce_q_size);
3126         else
3127                 result = 0;
3128
3129         qp_unlock(qpair);
3130
3131         return result;
3132 }
3133 EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
3134
3135 /*
3136  * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
3137  * consumer queue.
3138  * @qpair:      Pointer to the queue pair struct.
3139  *
3140  * This is the client interface for getting the amount of
3141  * enqueued data in the QPair from the point of the view of the
3142  * caller as the consumer which is the normal case.  Returns < 0 if err,
3143  * else available bytes that may be read.
3144  */
3145 s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
3146 {
3147         struct vmci_queue_header *produce_q_header;
3148         struct vmci_queue_header *consume_q_header;
3149         s64 result;
3150
3151         if (!qpair)
3152                 return VMCI_ERROR_INVALID_ARGS;
3153
3154         qp_lock(qpair);
3155         result =
3156             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3157         if (result == VMCI_SUCCESS)
3158                 result = vmci_q_header_buf_ready(consume_q_header,
3159                                                  produce_q_header,
3160                                                  qpair->consume_q_size);
3161         else
3162                 result = 0;
3163
3164         qp_unlock(qpair);
3165
3166         return result;
3167 }
3168 EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
3169
3170 /*
3171  * vmci_qpair_enqueue() - Throw data on the queue.
3172  * @qpair:      Pointer to the queue pair struct.
3173  * @buf:        Pointer to buffer containing data
3174  * @buf_size:   Length of buffer.
3175  * @buf_type:   Buffer type (Unused).
3176  *
3177  * This is the client interface for enqueueing data into the queue.
3178  * Returns number of bytes enqueued or < 0 on error.
3179  */
3180 ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
3181                            const void *buf,
3182                            size_t buf_size,
3183                            int buf_type)
3184 {
3185         ssize_t result;
3186
3187         if (!qpair || !buf)
3188                 return VMCI_ERROR_INVALID_ARGS;
3189
3190         qp_lock(qpair);
3191
3192         do {
3193                 result = qp_enqueue_locked(qpair->produce_q,
3194                                            qpair->consume_q,
3195                                            qpair->produce_q_size,
3196                                            buf, buf_size,
3197                                            qp_memcpy_to_queue,
3198                                            vmci_can_block(qpair->flags));
3199
3200                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3201                     !qp_wait_for_ready_queue(qpair))
3202                         result = VMCI_ERROR_WOULD_BLOCK;
3203
3204         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3205
3206         qp_unlock(qpair);
3207
3208         return result;
3209 }
3210 EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
3211
3212 /*
3213  * vmci_qpair_dequeue() - Get data from the queue.
3214  * @qpair:      Pointer to the queue pair struct.
3215  * @buf:        Pointer to buffer for the data
3216  * @buf_size:   Length of buffer.
3217  * @buf_type:   Buffer type (Unused).
3218  *
3219  * This is the client interface for dequeueing data from the queue.
3220  * Returns number of bytes dequeued or < 0 on error.
3221  */
3222 ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
3223                            void *buf,
3224                            size_t buf_size,
3225                            int buf_type)
3226 {
3227         ssize_t result;
3228
3229         if (!qpair || !buf)
3230                 return VMCI_ERROR_INVALID_ARGS;
3231
3232         qp_lock(qpair);
3233
3234         do {
3235                 result = qp_dequeue_locked(qpair->produce_q,
3236                                            qpair->consume_q,
3237                                            qpair->consume_q_size,
3238                                            buf, buf_size,
3239                                            qp_memcpy_from_queue, true,
3240                                            vmci_can_block(qpair->flags));
3241
3242                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3243                     !qp_wait_for_ready_queue(qpair))
3244                         result = VMCI_ERROR_WOULD_BLOCK;
3245
3246         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3247
3248         qp_unlock(qpair);
3249
3250         return result;
3251 }
3252 EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
3253
3254 /*
3255  * vmci_qpair_peek() - Peek at the data in the queue.
3256  * @qpair:      Pointer to the queue pair struct.
3257  * @buf:        Pointer to buffer for the data
3258  * @buf_size:   Length of buffer.
3259  * @buf_type:   Buffer type (Unused on Linux).
3260  *
3261  * This is the client interface for peeking into a queue.  (I.e.,
3262  * copy data from the queue without updating the head pointer.)
3263  * Returns number of bytes dequeued or < 0 on error.
3264  */
3265 ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
3266                         void *buf,
3267                         size_t buf_size,
3268                         int buf_type)
3269 {
3270         ssize_t result;
3271
3272         if (!qpair || !buf)
3273                 return VMCI_ERROR_INVALID_ARGS;
3274
3275         qp_lock(qpair);
3276
3277         do {
3278                 result = qp_dequeue_locked(qpair->produce_q,
3279                                            qpair->consume_q,
3280                                            qpair->consume_q_size,
3281                                            buf, buf_size,
3282                                            qp_memcpy_from_queue, false,
3283                                            vmci_can_block(qpair->flags));
3284
3285                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3286                     !qp_wait_for_ready_queue(qpair))
3287                         result = VMCI_ERROR_WOULD_BLOCK;
3288
3289         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3290
3291         qp_unlock(qpair);
3292
3293         return result;
3294 }
3295 EXPORT_SYMBOL_GPL(vmci_qpair_peek);
3296
3297 /*
3298  * vmci_qpair_enquev() - Throw data on the queue using iov.
3299  * @qpair:      Pointer to the queue pair struct.
3300  * @iov:        Pointer to buffer containing data
3301  * @iov_size:   Length of buffer.
3302  * @buf_type:   Buffer type (Unused).
3303  *
3304  * This is the client interface for enqueueing data into the queue.
3305  * This function uses IO vectors to handle the work. Returns number
3306  * of bytes enqueued or < 0 on error.
3307  */
3308 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
3309                           void *iov,
3310                           size_t iov_size,
3311                           int buf_type)
3312 {
3313         ssize_t result;
3314
3315         if (!qpair || !iov)
3316                 return VMCI_ERROR_INVALID_ARGS;
3317
3318         qp_lock(qpair);
3319
3320         do {
3321                 result = qp_enqueue_locked(qpair->produce_q,
3322                                            qpair->consume_q,
3323                                            qpair->produce_q_size,
3324                                            iov, iov_size,
3325                                            qp_memcpy_to_queue_iov,
3326                                            vmci_can_block(qpair->flags));
3327
3328                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3329                     !qp_wait_for_ready_queue(qpair))
3330                         result = VMCI_ERROR_WOULD_BLOCK;
3331
3332         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3333
3334         qp_unlock(qpair);
3335
3336         return result;
3337 }
3338 EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
3339
3340 /*
3341  * vmci_qpair_dequev() - Get data from the queue using iov.
3342  * @qpair:      Pointer to the queue pair struct.
3343  * @iov:        Pointer to buffer for the data
3344  * @iov_size:   Length of buffer.
3345  * @buf_type:   Buffer type (Unused).
3346  *
3347  * This is the client interface for dequeueing data from the queue.
3348  * This function uses IO vectors to handle the work. Returns number
3349  * of bytes dequeued or < 0 on error.
3350  */
3351 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
3352                           void *iov,
3353                           size_t iov_size,
3354                           int buf_type)
3355 {
3356         ssize_t result;
3357
3358         if (!qpair || !iov)
3359                 return VMCI_ERROR_INVALID_ARGS;
3360
3361         qp_lock(qpair);
3362
3363         do {
3364                 result = qp_dequeue_locked(qpair->produce_q,
3365                                            qpair->consume_q,
3366                                            qpair->consume_q_size,
3367                                            iov, iov_size,
3368                                            qp_memcpy_from_queue_iov,
3369                                            true, vmci_can_block(qpair->flags));
3370
3371                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3372                     !qp_wait_for_ready_queue(qpair))
3373                         result = VMCI_ERROR_WOULD_BLOCK;
3374
3375         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3376
3377         qp_unlock(qpair);
3378
3379         return result;
3380 }
3381 EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
3382
3383 /*
3384  * vmci_qpair_peekv() - Peek at the data in the queue using iov.
3385  * @qpair:      Pointer to the queue pair struct.
3386  * @iov:        Pointer to buffer for the data
3387  * @iov_size:   Length of buffer.
3388  * @buf_type:   Buffer type (Unused on Linux).
3389  *
3390  * This is the client interface for peeking into a queue.  (I.e.,
3391  * copy data from the queue without updating the head pointer.)
3392  * This function uses IO vectors to handle the work. Returns number
3393  * of bytes peeked or < 0 on error.
3394  */
3395 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
3396                          void *iov,
3397                          size_t iov_size,
3398                          int buf_type)
3399 {
3400         ssize_t result;
3401
3402         if (!qpair || !iov)
3403                 return VMCI_ERROR_INVALID_ARGS;
3404
3405         qp_lock(qpair);
3406
3407         do {
3408                 result = qp_dequeue_locked(qpair->produce_q,
3409                                            qpair->consume_q,
3410                                            qpair->consume_q_size,
3411                                            iov, iov_size,
3412                                            qp_memcpy_from_queue_iov,
3413                                            false, vmci_can_block(qpair->flags));
3414
3415                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3416                     !qp_wait_for_ready_queue(qpair))
3417                         result = VMCI_ERROR_WOULD_BLOCK;
3418
3419         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3420
3421         qp_unlock(qpair);
3422         return result;
3423 }
3424 EXPORT_SYMBOL_GPL(vmci_qpair_peekv);