hv: run non-blocking message handlers in the dispatch tasklet
[cascardo/linux.git] / drivers / hv / channel_mgmt.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  */
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <linux/delay.h>
27 #include <linux/mm.h>
28 #include <linux/slab.h>
29 #include <linux/list.h>
30 #include <linux/module.h>
31 #include <linux/completion.h>
32 #include <linux/hyperv.h>
33
34 #include "hyperv_vmbus.h"
35
36 struct vmbus_rescind_work {
37         struct work_struct work;
38         struct vmbus_channel *channel;
39 };
40
41 /**
42  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
43  * @icmsghdrp: Pointer to msg header structure
44  * @icmsg_negotiate: Pointer to negotiate message structure
45  * @buf: Raw buffer channel data
46  *
47  * @icmsghdrp is of type &struct icmsg_hdr.
48  * @negop is of type &struct icmsg_negotiate.
49  * Set up and fill in default negotiate response message.
50  *
51  * The fw_version specifies the  framework version that
52  * we can support and srv_version specifies the service
53  * version we can support.
54  *
55  * Mainly used by Hyper-V drivers.
56  */
57 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
58                                 struct icmsg_negotiate *negop, u8 *buf,
59                                 int fw_version, int srv_version)
60 {
61         int icframe_major, icframe_minor;
62         int icmsg_major, icmsg_minor;
63         int fw_major, fw_minor;
64         int srv_major, srv_minor;
65         int i;
66         bool found_match = false;
67
68         icmsghdrp->icmsgsize = 0x10;
69         fw_major = (fw_version >> 16);
70         fw_minor = (fw_version & 0xFFFF);
71
72         srv_major = (srv_version >> 16);
73         srv_minor = (srv_version & 0xFFFF);
74
75         negop = (struct icmsg_negotiate *)&buf[
76                 sizeof(struct vmbuspipe_hdr) +
77                 sizeof(struct icmsg_hdr)];
78
79         icframe_major = negop->icframe_vercnt;
80         icframe_minor = 0;
81
82         icmsg_major = negop->icmsg_vercnt;
83         icmsg_minor = 0;
84
85         /*
86          * Select the framework version number we will
87          * support.
88          */
89
90         for (i = 0; i < negop->icframe_vercnt; i++) {
91                 if ((negop->icversion_data[i].major == fw_major) &&
92                    (negop->icversion_data[i].minor == fw_minor)) {
93                         icframe_major = negop->icversion_data[i].major;
94                         icframe_minor = negop->icversion_data[i].minor;
95                         found_match = true;
96                 }
97         }
98
99         if (!found_match)
100                 goto fw_error;
101
102         found_match = false;
103
104         for (i = negop->icframe_vercnt;
105                  (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
106                 if ((negop->icversion_data[i].major == srv_major) &&
107                    (negop->icversion_data[i].minor == srv_minor)) {
108                         icmsg_major = negop->icversion_data[i].major;
109                         icmsg_minor = negop->icversion_data[i].minor;
110                         found_match = true;
111                 }
112         }
113
114         /*
115          * Respond with the framework and service
116          * version numbers we can support.
117          */
118
119 fw_error:
120         if (!found_match) {
121                 negop->icframe_vercnt = 0;
122                 negop->icmsg_vercnt = 0;
123         } else {
124                 negop->icframe_vercnt = 1;
125                 negop->icmsg_vercnt = 1;
126         }
127
128         negop->icversion_data[0].major = icframe_major;
129         negop->icversion_data[0].minor = icframe_minor;
130         negop->icversion_data[1].major = icmsg_major;
131         negop->icversion_data[1].minor = icmsg_minor;
132         return found_match;
133 }
134
135 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
136
137 static void vmbus_sc_creation_cb(struct work_struct *work)
138 {
139         struct vmbus_channel *newchannel = container_of(work,
140                                                         struct vmbus_channel,
141                                                         work);
142         struct vmbus_channel *primary_channel = newchannel->primary_channel;
143
144         /*
145          * On entry sc_creation_callback has been already verified to
146          * be non-NULL.
147          */
148         primary_channel->sc_creation_callback(newchannel);
149 }
150
151 /*
152  * alloc_channel - Allocate and initialize a vmbus channel object
153  */
154 static struct vmbus_channel *alloc_channel(void)
155 {
156         static atomic_t chan_num = ATOMIC_INIT(0);
157         struct vmbus_channel *channel;
158
159         channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
160         if (!channel)
161                 return NULL;
162
163         channel->id = atomic_inc_return(&chan_num);
164         spin_lock_init(&channel->inbound_lock);
165         spin_lock_init(&channel->lock);
166
167         INIT_LIST_HEAD(&channel->sc_list);
168         INIT_LIST_HEAD(&channel->percpu_list);
169
170         channel->controlwq = alloc_workqueue("hv_vmbus_ctl/%d", WQ_MEM_RECLAIM,
171                                              1, channel->id);
172         if (!channel->controlwq) {
173                 kfree(channel);
174                 return NULL;
175         }
176
177         return channel;
178 }
179
180 /*
181  * release_hannel - Release the vmbus channel object itself
182  */
183 static void release_channel(struct work_struct *work)
184 {
185         struct vmbus_channel *channel = container_of(work,
186                                                      struct vmbus_channel,
187                                                      work);
188
189         destroy_workqueue(channel->controlwq);
190
191         kfree(channel);
192 }
193
194 /*
195  * free_channel - Release the resources used by the vmbus channel object
196  */
197 static void free_channel(struct vmbus_channel *channel)
198 {
199
200         /*
201          * We have to release the channel's workqueue/thread in the vmbus's
202          * workqueue/thread context
203          * ie we can't destroy ourselves.
204          */
205         INIT_WORK(&channel->work, release_channel);
206         queue_work(vmbus_connection.work_queue, &channel->work);
207 }
208
209 static void process_rescind_fn(struct work_struct *work)
210 {
211         struct vmbus_rescind_work *rc_work;
212         struct vmbus_channel *channel;
213         struct device *dev;
214
215         rc_work = container_of(work, struct vmbus_rescind_work, work);
216         channel = rc_work->channel;
217
218         /*
219          * We have already acquired a reference on the channel
220          * and so it cannot vanish underneath us.
221          * It is possible (while very unlikely) that we may
222          * get here while the processing of the initial offer
223          * is still not complete. Deal with this situation by
224          * just waiting until the channel is in the correct state.
225          */
226
227         while (channel->work.func != release_channel)
228                 msleep(1000);
229
230         if (channel->device_obj) {
231                 dev = get_device(&channel->device_obj->device);
232                 if (dev) {
233                         vmbus_device_unregister(channel->device_obj);
234                         put_device(dev);
235                 }
236         } else {
237                 hv_process_channel_removal(channel,
238                                            channel->offermsg.child_relid);
239         }
240         kfree(work);
241 }
242
243 static void percpu_channel_enq(void *arg)
244 {
245         struct vmbus_channel *channel = arg;
246         int cpu = smp_processor_id();
247
248         list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
249 }
250
251 static void percpu_channel_deq(void *arg)
252 {
253         struct vmbus_channel *channel = arg;
254
255         list_del(&channel->percpu_list);
256 }
257
258
259 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
260 {
261         struct vmbus_channel_relid_released msg;
262         unsigned long flags;
263         struct vmbus_channel *primary_channel;
264
265         memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
266         msg.child_relid = relid;
267         msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
268         vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
269
270         if (channel == NULL)
271                 return;
272
273         if (channel->target_cpu != get_cpu()) {
274                 put_cpu();
275                 smp_call_function_single(channel->target_cpu,
276                                          percpu_channel_deq, channel, true);
277         } else {
278                 percpu_channel_deq(channel);
279                 put_cpu();
280         }
281
282         if (channel->primary_channel == NULL) {
283                 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
284                 list_del(&channel->listentry);
285                 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
286         } else {
287                 primary_channel = channel->primary_channel;
288                 spin_lock_irqsave(&primary_channel->lock, flags);
289                 list_del(&channel->sc_list);
290                 spin_unlock_irqrestore(&primary_channel->lock, flags);
291         }
292         free_channel(channel);
293 }
294
295 void vmbus_free_channels(void)
296 {
297         struct vmbus_channel *channel;
298
299         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
300                 vmbus_device_unregister(channel->device_obj);
301                 free_channel(channel);
302         }
303 }
304
305 static void vmbus_do_device_register(struct work_struct *work)
306 {
307         struct hv_device *device_obj;
308         int ret;
309         unsigned long flags;
310         struct vmbus_channel *newchannel = container_of(work,
311                                                      struct vmbus_channel,
312                                                      work);
313
314         ret = vmbus_device_register(newchannel->device_obj);
315         if (ret != 0) {
316                 pr_err("unable to add child device object (relid %d)\n",
317                         newchannel->offermsg.child_relid);
318                 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
319                 list_del(&newchannel->listentry);
320                 device_obj = newchannel->device_obj;
321                 newchannel->device_obj = NULL;
322                 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
323
324                 if (newchannel->target_cpu != get_cpu()) {
325                         put_cpu();
326                         smp_call_function_single(newchannel->target_cpu,
327                                          percpu_channel_deq, newchannel, true);
328                 } else {
329                         percpu_channel_deq(newchannel);
330                         put_cpu();
331                 }
332
333                 kfree(device_obj);
334                 if (!newchannel->rescind) {
335                         free_channel(newchannel);
336                         return;
337                 }
338         }
339         /*
340          * The next state for this channel is to be freed.
341          */
342         INIT_WORK(&newchannel->work, release_channel);
343 }
344
345 /*
346  * vmbus_process_offer - Process the offer by creating a channel/device
347  * associated with this offer
348  */
349 static void vmbus_process_offer(struct vmbus_channel *newchannel)
350 {
351         struct vmbus_channel *channel;
352         bool fnew = true;
353         bool enq = false;
354         unsigned long flags;
355
356         /* Make sure this is a new offer */
357         spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
358
359         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
360                 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
361                         newchannel->offermsg.offer.if_type) &&
362                         !uuid_le_cmp(channel->offermsg.offer.if_instance,
363                                 newchannel->offermsg.offer.if_instance)) {
364                         fnew = false;
365                         break;
366                 }
367         }
368
369         if (fnew) {
370                 list_add_tail(&newchannel->listentry,
371                               &vmbus_connection.chn_list);
372                 enq = true;
373         }
374
375         spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
376
377         if (enq) {
378                 if (newchannel->target_cpu != get_cpu()) {
379                         put_cpu();
380                         smp_call_function_single(newchannel->target_cpu,
381                                                  percpu_channel_enq,
382                                                  newchannel, true);
383                 } else {
384                         percpu_channel_enq(newchannel);
385                         put_cpu();
386                 }
387         }
388         if (!fnew) {
389                 /*
390                  * Check to see if this is a sub-channel.
391                  */
392                 if (newchannel->offermsg.offer.sub_channel_index != 0) {
393                         /*
394                          * Process the sub-channel.
395                          */
396                         newchannel->primary_channel = channel;
397                         spin_lock_irqsave(&channel->lock, flags);
398                         list_add_tail(&newchannel->sc_list, &channel->sc_list);
399                         spin_unlock_irqrestore(&channel->lock, flags);
400
401                         if (newchannel->target_cpu != get_cpu()) {
402                                 put_cpu();
403                                 smp_call_function_single(newchannel->target_cpu,
404                                                          percpu_channel_enq,
405                                                          newchannel, true);
406                         } else {
407                                 percpu_channel_enq(newchannel);
408                                 put_cpu();
409                         }
410
411                         newchannel->state = CHANNEL_OPEN_STATE;
412                         channel->num_sc++;
413                         if (channel->sc_creation_callback != NULL) {
414                                 /*
415                                  * We need to invoke the sub-channel creation
416                                  * callback; invoke this in a seperate work
417                                  * context since we are currently running on
418                                  * the global work context in which we handle
419                                  * messages from the host.
420                                  */
421                                 INIT_WORK(&newchannel->work,
422                                           vmbus_sc_creation_cb);
423                                 queue_work(newchannel->controlwq,
424                                            &newchannel->work);
425                         }
426
427                         return;
428                 }
429
430                 goto err_free_chan;
431         }
432
433         /*
434          * This state is used to indicate a successful open
435          * so that when we do close the channel normally, we
436          * can cleanup properly
437          */
438         newchannel->state = CHANNEL_OPEN_STATE;
439
440         /*
441          * Start the process of binding this offer to the driver
442          * We need to set the DeviceObject field before calling
443          * vmbus_child_dev_add()
444          */
445         newchannel->device_obj = vmbus_device_create(
446                 &newchannel->offermsg.offer.if_type,
447                 &newchannel->offermsg.offer.if_instance,
448                 newchannel);
449         if (!newchannel->device_obj)
450                 goto err_deq_chan;
451
452         /*
453          * Add the new device to the bus. This will kick off device-driver
454          * binding which eventually invokes the device driver's AddDevice()
455          * method.
456          * Invoke this call on the per-channel work context.
457          * Until we return from this function, rescind offer message
458          * cannot be processed as we are running on the global message
459          * handling work.
460          */
461         INIT_WORK(&newchannel->work, vmbus_do_device_register);
462         queue_work(newchannel->controlwq, &newchannel->work);
463         return;
464
465 err_deq_chan:
466         spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
467         list_del(&newchannel->listentry);
468         spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
469
470         if (newchannel->target_cpu != get_cpu()) {
471                 put_cpu();
472                 smp_call_function_single(newchannel->target_cpu,
473                                          percpu_channel_deq, newchannel, true);
474         } else {
475                 percpu_channel_deq(newchannel);
476                 put_cpu();
477         }
478
479 err_free_chan:
480         free_channel(newchannel);
481 }
482
483 enum {
484         IDE = 0,
485         SCSI,
486         NIC,
487         MAX_PERF_CHN,
488 };
489
490 /*
491  * This is an array of device_ids (device types) that are performance critical.
492  * We attempt to distribute the interrupt load for these devices across
493  * all available CPUs.
494  */
495 static const struct hv_vmbus_device_id hp_devs[] = {
496         /* IDE */
497         { HV_IDE_GUID, },
498         /* Storage - SCSI */
499         { HV_SCSI_GUID, },
500         /* Network */
501         { HV_NIC_GUID, },
502         /* NetworkDirect Guest RDMA */
503         { HV_ND_GUID, },
504 };
505
506
507 /*
508  * We use this state to statically distribute the channel interrupt load.
509  */
510 static u32  next_vp;
511
512 /*
513  * Starting with Win8, we can statically distribute the incoming
514  * channel interrupt load by binding a channel to VCPU. We
515  * implement here a simple round robin scheme for distributing
516  * the interrupt load.
517  * We will bind channels that are not performance critical to cpu 0 and
518  * performance critical channels (IDE, SCSI and Network) will be uniformly
519  * distributed across all available CPUs.
520  */
521 static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
522 {
523         u32 cur_cpu;
524         int i;
525         bool perf_chn = false;
526         u32 max_cpus = num_online_cpus();
527
528         for (i = IDE; i < MAX_PERF_CHN; i++) {
529                 if (!memcmp(type_guid->b, hp_devs[i].guid,
530                                  sizeof(uuid_le))) {
531                         perf_chn = true;
532                         break;
533                 }
534         }
535         if ((vmbus_proto_version == VERSION_WS2008) ||
536             (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
537                 /*
538                  * Prior to win8, all channel interrupts are
539                  * delivered on cpu 0.
540                  * Also if the channel is not a performance critical
541                  * channel, bind it to cpu 0.
542                  */
543                 channel->target_cpu = 0;
544                 channel->target_vp = 0;
545                 return;
546         }
547         cur_cpu = (++next_vp % max_cpus);
548         channel->target_cpu = cur_cpu;
549         channel->target_vp = hv_context.vp_index[cur_cpu];
550 }
551
552 /*
553  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
554  *
555  */
556 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
557 {
558         struct vmbus_channel_offer_channel *offer;
559         struct vmbus_channel *newchannel;
560
561         offer = (struct vmbus_channel_offer_channel *)hdr;
562
563         /* Allocate the channel object and save this offer. */
564         newchannel = alloc_channel();
565         if (!newchannel) {
566                 pr_err("Unable to allocate channel object\n");
567                 return;
568         }
569
570         /*
571          * By default we setup state to enable batched
572          * reading. A specific service can choose to
573          * disable this prior to opening the channel.
574          */
575         newchannel->batched_reading = true;
576
577         /*
578          * Setup state for signalling the host.
579          */
580         newchannel->sig_event = (struct hv_input_signal_event *)
581                                 (ALIGN((unsigned long)
582                                 &newchannel->sig_buf,
583                                 HV_HYPERCALL_PARAM_ALIGN));
584
585         newchannel->sig_event->connectionid.asu32 = 0;
586         newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
587         newchannel->sig_event->flag_number = 0;
588         newchannel->sig_event->rsvdz = 0;
589
590         if (vmbus_proto_version != VERSION_WS2008) {
591                 newchannel->is_dedicated_interrupt =
592                                 (offer->is_dedicated_interrupt != 0);
593                 newchannel->sig_event->connectionid.u.id =
594                                 offer->connection_id;
595         }
596
597         init_vp_index(newchannel, &offer->offer.if_type);
598
599         memcpy(&newchannel->offermsg, offer,
600                sizeof(struct vmbus_channel_offer_channel));
601         newchannel->monitor_grp = (u8)offer->monitorid / 32;
602         newchannel->monitor_bit = (u8)offer->monitorid % 32;
603
604         vmbus_process_offer(newchannel);
605 }
606
607 /*
608  * vmbus_onoffer_rescind - Rescind offer handler.
609  *
610  * We queue a work item to process this offer synchronously
611  */
612 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
613 {
614         struct vmbus_channel_rescind_offer *rescind;
615         struct vmbus_channel *channel;
616         struct vmbus_rescind_work *rc_work;
617
618         rescind = (struct vmbus_channel_rescind_offer *)hdr;
619         channel = relid2channel(rescind->child_relid, true);
620
621         if (channel == NULL) {
622                 hv_process_channel_removal(NULL, rescind->child_relid);
623                 return;
624         }
625
626         /*
627          * We have acquired a reference on the channel and have posted
628          * the rescind state. Perform further cleanup in a work context
629          * that is different from the global work context in which
630          * we process messages from the host (we are currently executing
631          * on that global context.
632          */
633         rc_work = kzalloc(sizeof(struct vmbus_rescind_work), GFP_KERNEL);
634         if (!rc_work) {
635                 pr_err("Unable to allocate memory for rescind processing ");
636                 return;
637         }
638         rc_work->channel = channel;
639         INIT_WORK(&rc_work->work, process_rescind_fn);
640         schedule_work(&rc_work->work);
641 }
642
643 /*
644  * vmbus_onoffers_delivered -
645  * This is invoked when all offers have been delivered.
646  *
647  * Nothing to do here.
648  */
649 static void vmbus_onoffers_delivered(
650                         struct vmbus_channel_message_header *hdr)
651 {
652 }
653
654 /*
655  * vmbus_onopen_result - Open result handler.
656  *
657  * This is invoked when we received a response to our channel open request.
658  * Find the matching request, copy the response and signal the requesting
659  * thread.
660  */
661 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
662 {
663         struct vmbus_channel_open_result *result;
664         struct vmbus_channel_msginfo *msginfo;
665         struct vmbus_channel_message_header *requestheader;
666         struct vmbus_channel_open_channel *openmsg;
667         unsigned long flags;
668
669         result = (struct vmbus_channel_open_result *)hdr;
670
671         /*
672          * Find the open msg, copy the result and signal/unblock the wait event
673          */
674         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
675
676         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
677                                 msglistentry) {
678                 requestheader =
679                         (struct vmbus_channel_message_header *)msginfo->msg;
680
681                 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
682                         openmsg =
683                         (struct vmbus_channel_open_channel *)msginfo->msg;
684                         if (openmsg->child_relid == result->child_relid &&
685                             openmsg->openid == result->openid) {
686                                 memcpy(&msginfo->response.open_result,
687                                        result,
688                                        sizeof(
689                                         struct vmbus_channel_open_result));
690                                 complete(&msginfo->waitevent);
691                                 break;
692                         }
693                 }
694         }
695         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
696 }
697
698 /*
699  * vmbus_ongpadl_created - GPADL created handler.
700  *
701  * This is invoked when we received a response to our gpadl create request.
702  * Find the matching request, copy the response and signal the requesting
703  * thread.
704  */
705 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
706 {
707         struct vmbus_channel_gpadl_created *gpadlcreated;
708         struct vmbus_channel_msginfo *msginfo;
709         struct vmbus_channel_message_header *requestheader;
710         struct vmbus_channel_gpadl_header *gpadlheader;
711         unsigned long flags;
712
713         gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
714
715         /*
716          * Find the establish msg, copy the result and signal/unblock the wait
717          * event
718          */
719         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
720
721         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
722                                 msglistentry) {
723                 requestheader =
724                         (struct vmbus_channel_message_header *)msginfo->msg;
725
726                 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
727                         gpadlheader =
728                         (struct vmbus_channel_gpadl_header *)requestheader;
729
730                         if ((gpadlcreated->child_relid ==
731                              gpadlheader->child_relid) &&
732                             (gpadlcreated->gpadl == gpadlheader->gpadl)) {
733                                 memcpy(&msginfo->response.gpadl_created,
734                                        gpadlcreated,
735                                        sizeof(
736                                         struct vmbus_channel_gpadl_created));
737                                 complete(&msginfo->waitevent);
738                                 break;
739                         }
740                 }
741         }
742         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
743 }
744
745 /*
746  * vmbus_ongpadl_torndown - GPADL torndown handler.
747  *
748  * This is invoked when we received a response to our gpadl teardown request.
749  * Find the matching request, copy the response and signal the requesting
750  * thread.
751  */
752 static void vmbus_ongpadl_torndown(
753                         struct vmbus_channel_message_header *hdr)
754 {
755         struct vmbus_channel_gpadl_torndown *gpadl_torndown;
756         struct vmbus_channel_msginfo *msginfo;
757         struct vmbus_channel_message_header *requestheader;
758         struct vmbus_channel_gpadl_teardown *gpadl_teardown;
759         unsigned long flags;
760
761         gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
762
763         /*
764          * Find the open msg, copy the result and signal/unblock the wait event
765          */
766         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
767
768         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
769                                 msglistentry) {
770                 requestheader =
771                         (struct vmbus_channel_message_header *)msginfo->msg;
772
773                 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
774                         gpadl_teardown =
775                         (struct vmbus_channel_gpadl_teardown *)requestheader;
776
777                         if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
778                                 memcpy(&msginfo->response.gpadl_torndown,
779                                        gpadl_torndown,
780                                        sizeof(
781                                         struct vmbus_channel_gpadl_torndown));
782                                 complete(&msginfo->waitevent);
783                                 break;
784                         }
785                 }
786         }
787         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
788 }
789
790 /*
791  * vmbus_onversion_response - Version response handler
792  *
793  * This is invoked when we received a response to our initiate contact request.
794  * Find the matching request, copy the response and signal the requesting
795  * thread.
796  */
797 static void vmbus_onversion_response(
798                 struct vmbus_channel_message_header *hdr)
799 {
800         struct vmbus_channel_msginfo *msginfo;
801         struct vmbus_channel_message_header *requestheader;
802         struct vmbus_channel_version_response *version_response;
803         unsigned long flags;
804
805         version_response = (struct vmbus_channel_version_response *)hdr;
806         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
807
808         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
809                                 msglistentry) {
810                 requestheader =
811                         (struct vmbus_channel_message_header *)msginfo->msg;
812
813                 if (requestheader->msgtype ==
814                     CHANNELMSG_INITIATE_CONTACT) {
815                         memcpy(&msginfo->response.version_response,
816                               version_response,
817                               sizeof(struct vmbus_channel_version_response));
818                         complete(&msginfo->waitevent);
819                 }
820         }
821         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
822 }
823
824 /* Channel message dispatch table */
825 struct vmbus_channel_message_table_entry
826         channel_message_table[CHANNELMSG_COUNT] = {
827         {CHANNELMSG_INVALID,                    0, NULL},
828         {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
829         {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
830         {CHANNELMSG_REQUESTOFFERS,              0, NULL},
831         {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
832         {CHANNELMSG_OPENCHANNEL,                0, NULL},
833         {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
834         {CHANNELMSG_CLOSECHANNEL,               0, NULL},
835         {CHANNELMSG_GPADL_HEADER,               0, NULL},
836         {CHANNELMSG_GPADL_BODY,                 0, NULL},
837         {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
838         {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
839         {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
840         {CHANNELMSG_RELID_RELEASED,             0, NULL},
841         {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
842         {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
843         {CHANNELMSG_UNLOAD,                     0, NULL},
844 };
845
846 /*
847  * vmbus_onmessage - Handler for channel protocol messages.
848  *
849  * This is invoked in the vmbus worker thread context.
850  */
851 void vmbus_onmessage(void *context)
852 {
853         struct hv_message *msg = context;
854         struct vmbus_channel_message_header *hdr;
855         int size;
856
857         hdr = (struct vmbus_channel_message_header *)msg->u.payload;
858         size = msg->header.payload_size;
859
860         if (hdr->msgtype >= CHANNELMSG_COUNT) {
861                 pr_err("Received invalid channel message type %d size %d\n",
862                            hdr->msgtype, size);
863                 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
864                                      (unsigned char *)msg->u.payload, size);
865                 return;
866         }
867
868         if (channel_message_table[hdr->msgtype].message_handler)
869                 channel_message_table[hdr->msgtype].message_handler(hdr);
870         else
871                 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
872 }
873
874 /*
875  * vmbus_request_offers - Send a request to get all our pending offers.
876  */
877 int vmbus_request_offers(void)
878 {
879         struct vmbus_channel_message_header *msg;
880         struct vmbus_channel_msginfo *msginfo;
881         int ret;
882
883         msginfo = kmalloc(sizeof(*msginfo) +
884                           sizeof(struct vmbus_channel_message_header),
885                           GFP_KERNEL);
886         if (!msginfo)
887                 return -ENOMEM;
888
889         msg = (struct vmbus_channel_message_header *)msginfo->msg;
890
891         msg->msgtype = CHANNELMSG_REQUESTOFFERS;
892
893
894         ret = vmbus_post_msg(msg,
895                                sizeof(struct vmbus_channel_message_header));
896         if (ret != 0) {
897                 pr_err("Unable to request offers - %d\n", ret);
898
899                 goto cleanup;
900         }
901
902 cleanup:
903         kfree(msginfo);
904
905         return ret;
906 }
907
908 /*
909  * Retrieve the (sub) channel on which to send an outgoing request.
910  * When a primary channel has multiple sub-channels, we try to
911  * distribute the load equally amongst all available channels.
912  */
913 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
914 {
915         struct list_head *cur, *tmp;
916         int cur_cpu;
917         struct vmbus_channel *cur_channel;
918         struct vmbus_channel *outgoing_channel = primary;
919         int next_channel;
920         int i = 1;
921
922         if (list_empty(&primary->sc_list))
923                 return outgoing_channel;
924
925         next_channel = primary->next_oc++;
926
927         if (next_channel > (primary->num_sc)) {
928                 primary->next_oc = 0;
929                 return outgoing_channel;
930         }
931
932         cur_cpu = hv_context.vp_index[get_cpu()];
933         put_cpu();
934         list_for_each_safe(cur, tmp, &primary->sc_list) {
935                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
936                 if (cur_channel->state != CHANNEL_OPENED_STATE)
937                         continue;
938
939                 if (cur_channel->target_vp == cur_cpu)
940                         return cur_channel;
941
942                 if (i == next_channel)
943                         return cur_channel;
944
945                 i++;
946         }
947
948         return outgoing_channel;
949 }
950 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
951
952 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
953 {
954         struct list_head *cur, *tmp;
955         struct vmbus_channel *cur_channel;
956
957         if (primary_channel->sc_creation_callback == NULL)
958                 return;
959
960         list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
961                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
962
963                 primary_channel->sc_creation_callback(cur_channel);
964         }
965 }
966
967 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
968                                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
969 {
970         primary_channel->sc_creation_callback = sc_cr_cb;
971 }
972 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
973
974 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
975 {
976         bool ret;
977
978         ret = !list_empty(&primary->sc_list);
979
980         if (ret) {
981                 /*
982                  * Invoke the callback on sub-channel creation.
983                  * This will present a uniform interface to the
984                  * clients.
985                  */
986                 invoke_sc_cb(primary);
987         }
988
989         return ret;
990 }
991 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);