drivers/hv/channel_mgmt.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  */
  21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/kernel.h>
  24 #include <linux/sched.h>
  25 #include <linux/wait.h>
  26 #include <linux/delay.h>
  27 #include <linux/mm.h>
  28 #include <linux/slab.h>
  29 #include <linux/list.h>
  30 #include <linux/module.h>
  31 #include <linux/completion.h>
  32 #include <linux/hyperv.h>
  33
  34 #include "hyperv_vmbus.h"
  35
  36 struct vmbus_rescind_work {
  37         struct work_struct work;
  38         struct vmbus_channel *channel;
  39 };
  40
  41 /**
  42  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
  43  * @icmsghdrp: Pointer to msg header structure
  44  * @icmsg_negotiate: Pointer to negotiate message structure
  45  * @buf: Raw buffer channel data
  46  *
  47  * @icmsghdrp is of type &struct icmsg_hdr.
  48  * @negop is of type &struct icmsg_negotiate.
  49  * Set up and fill in default negotiate response message.
  50  *
  51  * The fw_version specifies the  framework version that
  52  * we can support and srv_version specifies the service
  53  * version we can support.
  54  *
  55  * Mainly used by Hyper-V drivers.
  56  */
  57 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
  58                                 struct icmsg_negotiate *negop, u8 *buf,
  59                                 int fw_version, int srv_version)
  60 {
  61         int icframe_major, icframe_minor;
  62         int icmsg_major, icmsg_minor;
  63         int fw_major, fw_minor;
  64         int srv_major, srv_minor;
  65         int i;
  66         bool found_match = false;
  67
  68         icmsghdrp->icmsgsize = 0x10;
  69         fw_major = (fw_version >> 16);
  70         fw_minor = (fw_version & 0xFFFF);
  71
  72         srv_major = (srv_version >> 16);
  73         srv_minor = (srv_version & 0xFFFF);
  74
  75         negop = (struct icmsg_negotiate *)&buf[
  76                 sizeof(struct vmbuspipe_hdr) +
  77                 sizeof(struct icmsg_hdr)];
  78
  79         icframe_major = negop->icframe_vercnt;
  80         icframe_minor = 0;
  81
  82         icmsg_major = negop->icmsg_vercnt;
  83         icmsg_minor = 0;
  84
  85         /*
  86          * Select the framework version number we will
  87          * support.
  88          */
  89
  90         for (i = 0; i < negop->icframe_vercnt; i++) {
  91                 if ((negop->icversion_data[i].major == fw_major) &&
  92                    (negop->icversion_data[i].minor == fw_minor)) {
  93                         icframe_major = negop->icversion_data[i].major;
  94                         icframe_minor = negop->icversion_data[i].minor;
  95                         found_match = true;
  96                 }
  97         }
  98
  99         if (!found_match)
 100                 goto fw_error;
 101
 102         found_match = false;
 103
 104         for (i = negop->icframe_vercnt;
 105                  (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
 106                 if ((negop->icversion_data[i].major == srv_major) &&
 107                    (negop->icversion_data[i].minor == srv_minor)) {
 108                         icmsg_major = negop->icversion_data[i].major;
 109                         icmsg_minor = negop->icversion_data[i].minor;
 110                         found_match = true;
 111                 }
 112         }
 113
 114         /*
 115          * Respond with the framework and service
 116          * version numbers we can support.
 117          */
 118
 119 fw_error:
 120         if (!found_match) {
 121                 negop->icframe_vercnt = 0;
 122                 negop->icmsg_vercnt = 0;
 123         } else {
 124                 negop->icframe_vercnt = 1;
 125                 negop->icmsg_vercnt = 1;
 126         }
 127
 128         negop->icversion_data[0].major = icframe_major;
 129         negop->icversion_data[0].minor = icframe_minor;
 130         negop->icversion_data[1].major = icmsg_major;
 131         negop->icversion_data[1].minor = icmsg_minor;
 132         return found_match;
 133 }
 134
 135 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 136
 137 static void vmbus_sc_creation_cb(struct work_struct *work)
 138 {
 139         struct vmbus_channel *newchannel = container_of(work,
 140                                                         struct vmbus_channel,
 141                                                         work);
 142         struct vmbus_channel *primary_channel = newchannel->primary_channel;
 143
 144         /*
 145          * On entry sc_creation_callback has been already verified to
 146          * be non-NULL.
 147          */
 148         primary_channel->sc_creation_callback(newchannel);
 149 }
 150
 151 /*
 152  * alloc_channel - Allocate and initialize a vmbus channel object
 153  */
 154 static struct vmbus_channel *alloc_channel(void)
 155 {
 156         static atomic_t chan_num = ATOMIC_INIT(0);
 157         struct vmbus_channel *channel;
 158
 159         channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 160         if (!channel)
 161                 return NULL;
 162
 163         channel->id = atomic_inc_return(&chan_num);
 164         spin_lock_init(&channel->inbound_lock);
 165         spin_lock_init(&channel->lock);
 166
 167         INIT_LIST_HEAD(&channel->sc_list);
 168         INIT_LIST_HEAD(&channel->percpu_list);
 169
 170         channel->controlwq = alloc_workqueue("hv_vmbus_ctl/%d", WQ_MEM_RECLAIM,
 171                                              1, channel->id);
 172         if (!channel->controlwq) {
 173                 kfree(channel);
 174                 return NULL;
 175         }
 176
 177         return channel;
 178 }
 179
 180 /*
 181  * release_hannel - Release the vmbus channel object itself
 182  */
 183 static void release_channel(struct work_struct *work)
 184 {
 185         struct vmbus_channel *channel = container_of(work,
 186                                                      struct vmbus_channel,
 187                                                      work);
 188
 189         destroy_workqueue(channel->controlwq);
 190
 191         kfree(channel);
 192 }
 193
 194 /*
 195  * free_channel - Release the resources used by the vmbus channel object
 196  */
 197 static void free_channel(struct vmbus_channel *channel)
 198 {
 199
 200         /*
 201          * We have to release the channel's workqueue/thread in the vmbus's
 202          * workqueue/thread context
 203          * ie we can't destroy ourselves.
 204          */
 205         INIT_WORK(&channel->work, release_channel);
 206         queue_work(vmbus_connection.work_queue, &channel->work);
 207 }
 208
 209 static void process_rescind_fn(struct work_struct *work)
 210 {
 211         struct vmbus_rescind_work *rc_work;
 212         struct vmbus_channel *channel;
 213         struct device *dev;
 214
 215         rc_work = container_of(work, struct vmbus_rescind_work, work);
 216         channel = rc_work->channel;
 217
 218         /*
 219          * We have already acquired a reference on the channel
 220          * and so it cannot vanish underneath us.
 221          * It is possible (while very unlikely) that we may
 222          * get here while the processing of the initial offer
 223          * is still not complete. Deal with this situation by
 224          * just waiting until the channel is in the correct state.
 225          */
 226
 227         while (channel->work.func != release_channel)
 228                 msleep(1000);
 229
 230         if (channel->device_obj) {
 231                 dev = get_device(&channel->device_obj->device);
 232                 if (dev) {
 233                         vmbus_device_unregister(channel->device_obj);
 234                         put_device(dev);
 235                 }
 236         } else {
 237                 hv_process_channel_removal(channel,
 238                                            channel->offermsg.child_relid);
 239         }
 240         kfree(work);
 241 }
 242
 243 static void percpu_channel_enq(void *arg)
 244 {
 245         struct vmbus_channel *channel = arg;
 246         int cpu = smp_processor_id();
 247
 248         list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
 249 }
 250
 251 static void percpu_channel_deq(void *arg)
 252 {
 253         struct vmbus_channel *channel = arg;
 254
 255         list_del(&channel->percpu_list);
 256 }
 257
 258
 259 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 260 {
 261         struct vmbus_channel_relid_released msg;
 262         unsigned long flags;
 263         struct vmbus_channel *primary_channel;
 264
 265         memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 266         msg.child_relid = relid;
 267         msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 268         vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 269
 270         if (channel == NULL)
 271                 return;
 272
 273         if (channel->target_cpu != get_cpu()) {
 274                 put_cpu();
 275                 smp_call_function_single(channel->target_cpu,
 276                                          percpu_channel_deq, channel, true);
 277         } else {
 278                 percpu_channel_deq(channel);
 279                 put_cpu();
 280         }
 281
 282         if (channel->primary_channel == NULL) {
 283                 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 284                 list_del(&channel->listentry);
 285                 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 286         } else {
 287                 primary_channel = channel->primary_channel;
 288                 spin_lock_irqsave(&primary_channel->lock, flags);
 289                 list_del(&channel->sc_list);
 290                 spin_unlock_irqrestore(&primary_channel->lock, flags);
 291         }
 292         free_channel(channel);
 293 }
 294
 295 void vmbus_free_channels(void)
 296 {
 297         struct vmbus_channel *channel;
 298
 299         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 300                 vmbus_device_unregister(channel->device_obj);
 301                 free_channel(channel);
 302         }
 303 }
 304
 305 static void vmbus_do_device_register(struct work_struct *work)
 306 {
 307         struct hv_device *device_obj;
 308         int ret;
 309         unsigned long flags;
 310         struct vmbus_channel *newchannel = container_of(work,
 311                                                      struct vmbus_channel,
 312                                                      work);
 313
 314         ret = vmbus_device_register(newchannel->device_obj);
 315         if (ret != 0) {
 316                 pr_err("unable to add child device object (relid %d)\n",
 317                         newchannel->offermsg.child_relid);
 318                 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 319                 list_del(&newchannel->listentry);
 320                 device_obj = newchannel->device_obj;
 321                 newchannel->device_obj = NULL;
 322                 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 323
 324                 if (newchannel->target_cpu != get_cpu()) {
 325                         put_cpu();
 326                         smp_call_function_single(newchannel->target_cpu,
 327                                          percpu_channel_deq, newchannel, true);
 328                 } else {
 329                         percpu_channel_deq(newchannel);
 330                         put_cpu();
 331                 }
 332
 333                 kfree(device_obj);
 334                 if (!newchannel->rescind) {
 335                         free_channel(newchannel);
 336                         return;
 337                 }
 338         }
 339         /*
 340          * The next state for this channel is to be freed.
 341          */
 342         INIT_WORK(&newchannel->work, release_channel);
 343 }
 344
 345 /*
 346  * vmbus_process_offer - Process the offer by creating a channel/device
 347  * associated with this offer
 348  */
 349 static void vmbus_process_offer(struct vmbus_channel *newchannel)
 350 {
 351         struct vmbus_channel *channel;
 352         bool fnew = true;
 353         bool enq = false;
 354         unsigned long flags;
 355
 356         /* Make sure this is a new offer */
 357         spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 358
 359         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 360                 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
 361                         newchannel->offermsg.offer.if_type) &&
 362                         !uuid_le_cmp(channel->offermsg.offer.if_instance,
 363                                 newchannel->offermsg.offer.if_instance)) {
 364                         fnew = false;
 365                         break;
 366                 }
 367         }
 368
 369         if (fnew) {
 370                 list_add_tail(&newchannel->listentry,
 371                               &vmbus_connection.chn_list);
 372                 enq = true;
 373         }
 374
 375         spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 376
 377         if (enq) {
 378                 if (newchannel->target_cpu != get_cpu()) {
 379                         put_cpu();
 380                         smp_call_function_single(newchannel->target_cpu,
 381                                                  percpu_channel_enq,
 382                                                  newchannel, true);
 383                 } else {
 384                         percpu_channel_enq(newchannel);
 385                         put_cpu();
 386                 }
 387         }
 388         if (!fnew) {
 389                 /*
 390                  * Check to see if this is a sub-channel.
 391                  */
 392                 if (newchannel->offermsg.offer.sub_channel_index != 0) {
 393                         /*
 394                          * Process the sub-channel.
 395                          */
 396                         newchannel->primary_channel = channel;
 397                         spin_lock_irqsave(&channel->lock, flags);
 398                         list_add_tail(&newchannel->sc_list, &channel->sc_list);
 399                         spin_unlock_irqrestore(&channel->lock, flags);
 400
 401                         if (newchannel->target_cpu != get_cpu()) {
 402                                 put_cpu();
 403                                 smp_call_function_single(newchannel->target_cpu,
 404                                                          percpu_channel_enq,
 405                                                          newchannel, true);
 406                         } else {
 407                                 percpu_channel_enq(newchannel);
 408                                 put_cpu();
 409                         }
 410
 411                         newchannel->state = CHANNEL_OPEN_STATE;
 412                         channel->num_sc++;
 413                         if (channel->sc_creation_callback != NULL) {
 414                                 /*
 415                                  * We need to invoke the sub-channel creation
 416                                  * callback; invoke this in a seperate work
 417                                  * context since we are currently running on
 418                                  * the global work context in which we handle
 419                                  * messages from the host.
 420                                  */
 421                                 INIT_WORK(&newchannel->work,
 422                                           vmbus_sc_creation_cb);
 423                                 queue_work(newchannel->controlwq,
 424                                            &newchannel->work);
 425                         }
 426
 427                         return;
 428                 }
 429
 430                 goto err_free_chan;
 431         }
 432
 433         /*
 434          * This state is used to indicate a successful open
 435          * so that when we do close the channel normally, we
 436          * can cleanup properly
 437          */
 438         newchannel->state = CHANNEL_OPEN_STATE;
 439
 440         /*
 441          * Start the process of binding this offer to the driver
 442          * We need to set the DeviceObject field before calling
 443          * vmbus_child_dev_add()
 444          */
 445         newchannel->device_obj = vmbus_device_create(
 446                 &newchannel->offermsg.offer.if_type,
 447                 &newchannel->offermsg.offer.if_instance,
 448                 newchannel);
 449         if (!newchannel->device_obj)
 450                 goto err_deq_chan;
 451
 452         /*
 453          * Add the new device to the bus. This will kick off device-driver
 454          * binding which eventually invokes the device driver's AddDevice()
 455          * method.
 456          * Invoke this call on the per-channel work context.
 457          * Until we return from this function, rescind offer message
 458          * cannot be processed as we are running on the global message
 459          * handling work.
 460          */
 461         INIT_WORK(&newchannel->work, vmbus_do_device_register);
 462         queue_work(newchannel->controlwq, &newchannel->work);
 463         return;
 464
 465 err_deq_chan:
 466         spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 467         list_del(&newchannel->listentry);
 468         spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 469
 470         if (newchannel->target_cpu != get_cpu()) {
 471                 put_cpu();
 472                 smp_call_function_single(newchannel->target_cpu,
 473                                          percpu_channel_deq, newchannel, true);
 474         } else {
 475                 percpu_channel_deq(newchannel);
 476                 put_cpu();
 477         }
 478
 479 err_free_chan:
 480         free_channel(newchannel);
 481 }
 482
 483 enum {
 484         IDE = 0,
 485         SCSI,
 486         NIC,
 487         MAX_PERF_CHN,
 488 };
 489
 490 /*
 491  * This is an array of device_ids (device types) that are performance critical.
 492  * We attempt to distribute the interrupt load for these devices across
 493  * all available CPUs.
 494  */
 495 static const struct hv_vmbus_device_id hp_devs[] = {
 496         /* IDE */
 497         { HV_IDE_GUID, },
 498         /* Storage - SCSI */
 499         { HV_SCSI_GUID, },
 500         /* Network */
 501         { HV_NIC_GUID, },
 502         /* NetworkDirect Guest RDMA */
 503         { HV_ND_GUID, },
 504 };
 505
 506
 507 /*
 508  * We use this state to statically distribute the channel interrupt load.
 509  */
 510 static u32  next_vp;
 511
 512 /*
 513  * Starting with Win8, we can statically distribute the incoming
 514  * channel interrupt load by binding a channel to VCPU. We
 515  * implement here a simple round robin scheme for distributing
 516  * the interrupt load.
 517  * We will bind channels that are not performance critical to cpu 0 and
 518  * performance critical channels (IDE, SCSI and Network) will be uniformly
 519  * distributed across all available CPUs.
 520  */
 521 static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
 522 {
 523         u32 cur_cpu;
 524         int i;
 525         bool perf_chn = false;
 526         u32 max_cpus = num_online_cpus();
 527
 528         for (i = IDE; i < MAX_PERF_CHN; i++) {
 529                 if (!memcmp(type_guid->b, hp_devs[i].guid,
 530                                  sizeof(uuid_le))) {
 531                         perf_chn = true;
 532                         break;
 533                 }
 534         }
 535         if ((vmbus_proto_version == VERSION_WS2008) ||
 536             (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 537                 /*
 538                  * Prior to win8, all channel interrupts are
 539                  * delivered on cpu 0.
 540                  * Also if the channel is not a performance critical
 541                  * channel, bind it to cpu 0.
 542                  */
 543                 channel->target_cpu = 0;
 544                 channel->target_vp = 0;
 545                 return;
 546         }
 547         cur_cpu = (++next_vp % max_cpus);
 548         channel->target_cpu = cur_cpu;
 549         channel->target_vp = hv_context.vp_index[cur_cpu];
 550 }
 551
 552 /*
 553  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 554  *
 555  */
 556 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 557 {
 558         struct vmbus_channel_offer_channel *offer;
 559         struct vmbus_channel *newchannel;
 560
 561         offer = (struct vmbus_channel_offer_channel *)hdr;
 562
 563         /* Allocate the channel object and save this offer. */
 564         newchannel = alloc_channel();
 565         if (!newchannel) {
 566                 pr_err("Unable to allocate channel object\n");
 567                 return;
 568         }
 569
 570         /*
 571          * By default we setup state to enable batched
 572          * reading. A specific service can choose to
 573          * disable this prior to opening the channel.
 574          */
 575         newchannel->batched_reading = true;
 576
 577         /*
 578          * Setup state for signalling the host.
 579          */
 580         newchannel->sig_event = (struct hv_input_signal_event *)
 581                                 (ALIGN((unsigned long)
 582                                 &newchannel->sig_buf,
 583                                 HV_HYPERCALL_PARAM_ALIGN));
 584
 585         newchannel->sig_event->connectionid.asu32 = 0;
 586         newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
 587         newchannel->sig_event->flag_number = 0;
 588         newchannel->sig_event->rsvdz = 0;
 589
 590         if (vmbus_proto_version != VERSION_WS2008) {
 591                 newchannel->is_dedicated_interrupt =
 592                                 (offer->is_dedicated_interrupt != 0);
 593                 newchannel->sig_event->connectionid.u.id =
 594                                 offer->connection_id;
 595         }
 596
 597         init_vp_index(newchannel, &offer->offer.if_type);
 598
 599         memcpy(&newchannel->offermsg, offer,
 600                sizeof(struct vmbus_channel_offer_channel));
 601         newchannel->monitor_grp = (u8)offer->monitorid / 32;
 602         newchannel->monitor_bit = (u8)offer->monitorid % 32;
 603
 604         vmbus_process_offer(newchannel);
 605 }
 606
 607 /*
 608  * vmbus_onoffer_rescind - Rescind offer handler.
 609  *
 610  * We queue a work item to process this offer synchronously
 611  */
 612 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 613 {
 614         struct vmbus_channel_rescind_offer *rescind;
 615         struct vmbus_channel *channel;
 616         struct vmbus_rescind_work *rc_work;
 617
 618         rescind = (struct vmbus_channel_rescind_offer *)hdr;
 619         channel = relid2channel(rescind->child_relid, true);
 620
 621         if (channel == NULL) {
 622                 hv_process_channel_removal(NULL, rescind->child_relid);
 623                 return;
 624         }
 625
 626         /*
 627          * We have acquired a reference on the channel and have posted
 628          * the rescind state. Perform further cleanup in a work context
 629          * that is different from the global work context in which
 630          * we process messages from the host (we are currently executing
 631          * on that global context.
 632          */
 633         rc_work = kzalloc(sizeof(struct vmbus_rescind_work), GFP_KERNEL);
 634         if (!rc_work) {
 635                 pr_err("Unable to allocate memory for rescind processing ");
 636                 return;
 637         }
 638         rc_work->channel = channel;
 639         INIT_WORK(&rc_work->work, process_rescind_fn);
 640         schedule_work(&rc_work->work);
 641 }
 642
 643 /*
 644  * vmbus_onoffers_delivered -
 645  * This is invoked when all offers have been delivered.
 646  *
 647  * Nothing to do here.
 648  */
 649 static void vmbus_onoffers_delivered(
 650                         struct vmbus_channel_message_header *hdr)
 651 {
 652 }
 653
 654 /*
 655  * vmbus_onopen_result - Open result handler.
 656  *
 657  * This is invoked when we received a response to our channel open request.
 658  * Find the matching request, copy the response and signal the requesting
 659  * thread.
 660  */
 661 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
 662 {
 663         struct vmbus_channel_open_result *result;
 664         struct vmbus_channel_msginfo *msginfo;
 665         struct vmbus_channel_message_header *requestheader;
 666         struct vmbus_channel_open_channel *openmsg;
 667         unsigned long flags;
 668
 669         result = (struct vmbus_channel_open_result *)hdr;
 670
 671         /*
 672          * Find the open msg, copy the result and signal/unblock the wait event
 673          */
 674         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 675
 676         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 677                                 msglistentry) {
 678                 requestheader =
 679                         (struct vmbus_channel_message_header *)msginfo->msg;
 680
 681                 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
 682                         openmsg =
 683                         (struct vmbus_channel_open_channel *)msginfo->msg;
 684                         if (openmsg->child_relid == result->child_relid &&
 685                             openmsg->openid == result->openid) {
 686                                 memcpy(&msginfo->response.open_result,
 687                                        result,
 688                                        sizeof(
 689                                         struct vmbus_channel_open_result));
 690                                 complete(&msginfo->waitevent);
 691                                 break;
 692                         }
 693                 }
 694         }
 695         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 696 }
 697
 698 /*
 699  * vmbus_ongpadl_created - GPADL created handler.
 700  *
 701  * This is invoked when we received a response to our gpadl create request.
 702  * Find the matching request, copy the response and signal the requesting
 703  * thread.
 704  */
 705 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
 706 {
 707         struct vmbus_channel_gpadl_created *gpadlcreated;
 708         struct vmbus_channel_msginfo *msginfo;
 709         struct vmbus_channel_message_header *requestheader;
 710         struct vmbus_channel_gpadl_header *gpadlheader;
 711         unsigned long flags;
 712
 713         gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
 714
 715         /*
 716          * Find the establish msg, copy the result and signal/unblock the wait
 717          * event
 718          */
 719         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 720
 721         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 722                                 msglistentry) {
 723                 requestheader =
 724                         (struct vmbus_channel_message_header *)msginfo->msg;
 725
 726                 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
 727                         gpadlheader =
 728                         (struct vmbus_channel_gpadl_header *)requestheader;
 729
 730                         if ((gpadlcreated->child_relid ==
 731                              gpadlheader->child_relid) &&
 732                             (gpadlcreated->gpadl == gpadlheader->gpadl)) {
 733                                 memcpy(&msginfo->response.gpadl_created,
 734                                        gpadlcreated,
 735                                        sizeof(
 736                                         struct vmbus_channel_gpadl_created));
 737                                 complete(&msginfo->waitevent);
 738                                 break;
 739                         }
 740                 }
 741         }
 742         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 743 }
 744
 745 /*
 746  * vmbus_ongpadl_torndown - GPADL torndown handler.
 747  *
 748  * This is invoked when we received a response to our gpadl teardown request.
 749  * Find the matching request, copy the response and signal the requesting
 750  * thread.
 751  */
 752 static void vmbus_ongpadl_torndown(
 753                         struct vmbus_channel_message_header *hdr)
 754 {
 755         struct vmbus_channel_gpadl_torndown *gpadl_torndown;
 756         struct vmbus_channel_msginfo *msginfo;
 757         struct vmbus_channel_message_header *requestheader;
 758         struct vmbus_channel_gpadl_teardown *gpadl_teardown;
 759         unsigned long flags;
 760
 761         gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
 762
 763         /*
 764          * Find the open msg, copy the result and signal/unblock the wait event
 765          */
 766         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 767
 768         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 769                                 msglistentry) {
 770                 requestheader =
 771                         (struct vmbus_channel_message_header *)msginfo->msg;
 772
 773                 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
 774                         gpadl_teardown =
 775                         (struct vmbus_channel_gpadl_teardown *)requestheader;
 776
 777                         if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
 778                                 memcpy(&msginfo->response.gpadl_torndown,
 779                                        gpadl_torndown,
 780                                        sizeof(
 781                                         struct vmbus_channel_gpadl_torndown));
 782                                 complete(&msginfo->waitevent);
 783                                 break;
 784                         }
 785                 }
 786         }
 787         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 788 }
 789
 790 /*
 791  * vmbus_onversion_response - Version response handler
 792  *
 793  * This is invoked when we received a response to our initiate contact request.
 794  * Find the matching request, copy the response and signal the requesting
 795  * thread.
 796  */
 797 static void vmbus_onversion_response(
 798                 struct vmbus_channel_message_header *hdr)
 799 {
 800         struct vmbus_channel_msginfo *msginfo;
 801         struct vmbus_channel_message_header *requestheader;
 802         struct vmbus_channel_version_response *version_response;
 803         unsigned long flags;
 804
 805         version_response = (struct vmbus_channel_version_response *)hdr;
 806         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 807
 808         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 809                                 msglistentry) {
 810                 requestheader =
 811                         (struct vmbus_channel_message_header *)msginfo->msg;
 812
 813                 if (requestheader->msgtype ==
 814                     CHANNELMSG_INITIATE_CONTACT) {
 815                         memcpy(&msginfo->response.version_response,
 816                               version_response,
 817                               sizeof(struct vmbus_channel_version_response));
 818                         complete(&msginfo->waitevent);
 819                 }
 820         }
 821         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 822 }
 823
 824 /* Channel message dispatch table */
 825 struct vmbus_channel_message_table_entry
 826         channel_message_table[CHANNELMSG_COUNT] = {
 827         {CHANNELMSG_INVALID,                    0, NULL},
 828         {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
 829         {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
 830         {CHANNELMSG_REQUESTOFFERS,              0, NULL},
 831         {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
 832         {CHANNELMSG_OPENCHANNEL,                0, NULL},
 833         {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
 834         {CHANNELMSG_CLOSECHANNEL,               0, NULL},
 835         {CHANNELMSG_GPADL_HEADER,               0, NULL},
 836         {CHANNELMSG_GPADL_BODY,                 0, NULL},
 837         {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
 838         {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
 839         {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
 840         {CHANNELMSG_RELID_RELEASED,             0, NULL},
 841         {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
 842         {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
 843         {CHANNELMSG_UNLOAD,                     0, NULL},
 844 };
 845
 846 /*
 847  * vmbus_onmessage - Handler for channel protocol messages.
 848  *
 849  * This is invoked in the vmbus worker thread context.
 850  */
 851 void vmbus_onmessage(void *context)
 852 {
 853         struct hv_message *msg = context;
 854         struct vmbus_channel_message_header *hdr;
 855         int size;
 856
 857         hdr = (struct vmbus_channel_message_header *)msg->u.payload;
 858         size = msg->header.payload_size;
 859
 860         if (hdr->msgtype >= CHANNELMSG_COUNT) {
 861                 pr_err("Received invalid channel message type %d size %d\n",
 862                            hdr->msgtype, size);
 863                 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
 864                                      (unsigned char *)msg->u.payload, size);
 865                 return;
 866         }
 867
 868         if (channel_message_table[hdr->msgtype].message_handler)
 869                 channel_message_table[hdr->msgtype].message_handler(hdr);
 870         else
 871                 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
 872 }
 873
 874 /*
 875  * vmbus_request_offers - Send a request to get all our pending offers.
 876  */
 877 int vmbus_request_offers(void)
 878 {
 879         struct vmbus_channel_message_header *msg;
 880         struct vmbus_channel_msginfo *msginfo;
 881         int ret;
 882
 883         msginfo = kmalloc(sizeof(*msginfo) +
 884                           sizeof(struct vmbus_channel_message_header),
 885                           GFP_KERNEL);
 886         if (!msginfo)
 887                 return -ENOMEM;
 888
 889         msg = (struct vmbus_channel_message_header *)msginfo->msg;
 890
 891         msg->msgtype = CHANNELMSG_REQUESTOFFERS;
 892
 893
 894         ret = vmbus_post_msg(msg,
 895                                sizeof(struct vmbus_channel_message_header));
 896         if (ret != 0) {
 897                 pr_err("Unable to request offers - %d\n", ret);
 898
 899                 goto cleanup;
 900         }
 901
 902 cleanup:
 903         kfree(msginfo);
 904
 905         return ret;
 906 }
 907
 908 /*
 909  * Retrieve the (sub) channel on which to send an outgoing request.
 910  * When a primary channel has multiple sub-channels, we try to
 911  * distribute the load equally amongst all available channels.
 912  */
 913 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
 914 {
 915         struct list_head *cur, *tmp;
 916         int cur_cpu;
 917         struct vmbus_channel *cur_channel;
 918         struct vmbus_channel *outgoing_channel = primary;
 919         int next_channel;
 920         int i = 1;
 921
 922         if (list_empty(&primary->sc_list))
 923                 return outgoing_channel;
 924
 925         next_channel = primary->next_oc++;
 926
 927         if (next_channel > (primary->num_sc)) {
 928                 primary->next_oc = 0;
 929                 return outgoing_channel;
 930         }
 931
 932         cur_cpu = hv_context.vp_index[get_cpu()];
 933         put_cpu();
 934         list_for_each_safe(cur, tmp, &primary->sc_list) {
 935                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
 936                 if (cur_channel->state != CHANNEL_OPENED_STATE)
 937                         continue;
 938
 939                 if (cur_channel->target_vp == cur_cpu)
 940                         return cur_channel;
 941
 942                 if (i == next_channel)
 943                         return cur_channel;
 944
 945                 i++;
 946         }
 947
 948         return outgoing_channel;
 949 }
 950 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
 951
 952 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
 953 {
 954         struct list_head *cur, *tmp;
 955         struct vmbus_channel *cur_channel;
 956
 957         if (primary_channel->sc_creation_callback == NULL)
 958                 return;
 959
 960         list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
 961                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
 962
 963                 primary_channel->sc_creation_callback(cur_channel);
 964         }
 965 }
 966
 967 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
 968                                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
 969 {
 970         primary_channel->sc_creation_callback = sc_cr_cb;
 971 }
 972 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
 973
 974 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
 975 {
 976         bool ret;
 977
 978         ret = !list_empty(&primary->sc_list);
 979
 980         if (ret) {
 981                 /*
 982                  * Invoke the callback on sub-channel creation.
 983                  * This will present a uniform interface to the
 984                  * clients.
 985                  */
 986                 invoke_sc_cb(primary);
 987         }
 988
 989         return ret;
 990 }
 991 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);