drivers/hv/channel_mgmt.c

   1 /*
   2  * Copyright (c) 2009, Microsoft Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * You should have received a copy of the GNU General Public License along with
  14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15  * Place - Suite 330, Boston, MA 02111-1307 USA.
  16  *
  17  * Authors:
  18  *   Haiyang Zhang <haiyangz@microsoft.com>
  19  *   Hank Janssen  <hjanssen@microsoft.com>
  20  */
  21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/kernel.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/sched.h>
  26 #include <linux/wait.h>
  27 #include <linux/mm.h>
  28 #include <linux/slab.h>
  29 #include <linux/list.h>
  30 #include <linux/module.h>
  31 #include <linux/completion.h>
  32 #include <linux/delay.h>
  33 #include <linux/hyperv.h>
  34
  35 #include "hyperv_vmbus.h"
  36
  37 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
  38
  39 static const struct vmbus_device vmbus_devs[] = {
  40         /* IDE */
  41         { .dev_type = HV_IDE,
  42           HV_IDE_GUID,
  43           .perf_device = true,
  44         },
  45
  46         /* SCSI */
  47         { .dev_type = HV_SCSI,
  48           HV_SCSI_GUID,
  49           .perf_device = true,
  50         },
  51
  52         /* Fibre Channel */
  53         { .dev_type = HV_FC,
  54           HV_SYNTHFC_GUID,
  55           .perf_device = true,
  56         },
  57
  58         /* Synthetic NIC */
  59         { .dev_type = HV_NIC,
  60           HV_NIC_GUID,
  61           .perf_device = true,
  62         },
  63
  64         /* Network Direct */
  65         { .dev_type = HV_ND,
  66           HV_ND_GUID,
  67           .perf_device = true,
  68         },
  69
  70         /* PCIE */
  71         { .dev_type = HV_PCIE,
  72           HV_PCIE_GUID,
  73           .perf_device = true,
  74         },
  75
  76         /* Synthetic Frame Buffer */
  77         { .dev_type = HV_FB,
  78           HV_SYNTHVID_GUID,
  79           .perf_device = false,
  80         },
  81
  82         /* Synthetic Keyboard */
  83         { .dev_type = HV_KBD,
  84           HV_KBD_GUID,
  85           .perf_device = false,
  86         },
  87
  88         /* Synthetic MOUSE */
  89         { .dev_type = HV_MOUSE,
  90           HV_MOUSE_GUID,
  91           .perf_device = false,
  92         },
  93
  94         /* KVP */
  95         { .dev_type = HV_KVP,
  96           HV_KVP_GUID,
  97           .perf_device = false,
  98         },
  99
 100         /* Time Synch */
 101         { .dev_type = HV_TS,
 102           HV_TS_GUID,
 103           .perf_device = false,
 104         },
 105
 106         /* Heartbeat */
 107         { .dev_type = HV_HB,
 108           HV_HEART_BEAT_GUID,
 109           .perf_device = false,
 110         },
 111
 112         /* Shutdown */
 113         { .dev_type = HV_SHUTDOWN,
 114           HV_SHUTDOWN_GUID,
 115           .perf_device = false,
 116         },
 117
 118         /* File copy */
 119         { .dev_type = HV_FCOPY,
 120           HV_FCOPY_GUID,
 121           .perf_device = false,
 122         },
 123
 124         /* Backup */
 125         { .dev_type = HV_BACKUP,
 126           HV_VSS_GUID,
 127           .perf_device = false,
 128         },
 129
 130         /* Dynamic Memory */
 131         { .dev_type = HV_DM,
 132           HV_DM_GUID,
 133           .perf_device = false,
 134         },
 135
 136         /* Unknown GUID */
 137         { .dev_type = HV_UNKOWN,
 138           .perf_device = false,
 139         },
 140 };
 141
 142 static const struct {
 143         uuid_le guid;
 144 } vmbus_unsupported_devs[] = {
 145         { HV_AVMA1_GUID },
 146         { HV_AVMA2_GUID },
 147         { HV_RDV_GUID   },
 148 };
 149
 150 static bool is_unsupported_vmbus_devs(const uuid_le *guid)
 151 {
 152         int i;
 153
 154         for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
 155                 if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
 156                         return true;
 157         return false;
 158 }
 159
 160 static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 161 {
 162         const uuid_le *guid = &channel->offermsg.offer.if_type;
 163         u16 i;
 164
 165         if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
 166                 return HV_UNKOWN;
 167
 168         for (i = HV_IDE; i < HV_UNKOWN; i++) {
 169                 if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 170                         return i;
 171         }
 172         pr_info("Unknown GUID: %pUl\n", guid);
 173         return i;
 174 }
 175
 176 /**
 177  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
 178  * @icmsghdrp: Pointer to msg header structure
 179  * @icmsg_negotiate: Pointer to negotiate message structure
 180  * @buf: Raw buffer channel data
 181  *
 182  * @icmsghdrp is of type &struct icmsg_hdr.
 183  * @negop is of type &struct icmsg_negotiate.
 184  * Set up and fill in default negotiate response message.
 185  *
 186  * The fw_version specifies the  framework version that
 187  * we can support and srv_version specifies the service
 188  * version we can support.
 189  *
 190  * Mainly used by Hyper-V drivers.
 191  */
 192 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
 193                                 struct icmsg_negotiate *negop, u8 *buf,
 194                                 int fw_version, int srv_version)
 195 {
 196         int icframe_major, icframe_minor;
 197         int icmsg_major, icmsg_minor;
 198         int fw_major, fw_minor;
 199         int srv_major, srv_minor;
 200         int i;
 201         bool found_match = false;
 202
 203         icmsghdrp->icmsgsize = 0x10;
 204         fw_major = (fw_version >> 16);
 205         fw_minor = (fw_version & 0xFFFF);
 206
 207         srv_major = (srv_version >> 16);
 208         srv_minor = (srv_version & 0xFFFF);
 209
 210         negop = (struct icmsg_negotiate *)&buf[
 211                 sizeof(struct vmbuspipe_hdr) +
 212                 sizeof(struct icmsg_hdr)];
 213
 214         icframe_major = negop->icframe_vercnt;
 215         icframe_minor = 0;
 216
 217         icmsg_major = negop->icmsg_vercnt;
 218         icmsg_minor = 0;
 219
 220         /*
 221          * Select the framework version number we will
 222          * support.
 223          */
 224
 225         for (i = 0; i < negop->icframe_vercnt; i++) {
 226                 if ((negop->icversion_data[i].major == fw_major) &&
 227                    (negop->icversion_data[i].minor == fw_minor)) {
 228                         icframe_major = negop->icversion_data[i].major;
 229                         icframe_minor = negop->icversion_data[i].minor;
 230                         found_match = true;
 231                 }
 232         }
 233
 234         if (!found_match)
 235                 goto fw_error;
 236
 237         found_match = false;
 238
 239         for (i = negop->icframe_vercnt;
 240                  (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
 241                 if ((negop->icversion_data[i].major == srv_major) &&
 242                    (negop->icversion_data[i].minor == srv_minor)) {
 243                         icmsg_major = negop->icversion_data[i].major;
 244                         icmsg_minor = negop->icversion_data[i].minor;
 245                         found_match = true;
 246                 }
 247         }
 248
 249         /*
 250          * Respond with the framework and service
 251          * version numbers we can support.
 252          */
 253
 254 fw_error:
 255         if (!found_match) {
 256                 negop->icframe_vercnt = 0;
 257                 negop->icmsg_vercnt = 0;
 258         } else {
 259                 negop->icframe_vercnt = 1;
 260                 negop->icmsg_vercnt = 1;
 261         }
 262
 263         negop->icversion_data[0].major = icframe_major;
 264         negop->icversion_data[0].minor = icframe_minor;
 265         negop->icversion_data[1].major = icmsg_major;
 266         negop->icversion_data[1].minor = icmsg_minor;
 267         return found_match;
 268 }
 269
 270 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 271
 272 /*
 273  * alloc_channel - Allocate and initialize a vmbus channel object
 274  */
 275 static struct vmbus_channel *alloc_channel(void)
 276 {
 277         struct vmbus_channel *channel;
 278
 279         channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 280         if (!channel)
 281                 return NULL;
 282
 283         channel->acquire_ring_lock = true;
 284         spin_lock_init(&channel->inbound_lock);
 285         spin_lock_init(&channel->lock);
 286
 287         INIT_LIST_HEAD(&channel->sc_list);
 288         INIT_LIST_HEAD(&channel->percpu_list);
 289
 290         return channel;
 291 }
 292
 293 /*
 294  * free_channel - Release the resources used by the vmbus channel object
 295  */
 296 static void free_channel(struct vmbus_channel *channel)
 297 {
 298         kfree(channel);
 299 }
 300
 301 static void percpu_channel_enq(void *arg)
 302 {
 303         struct vmbus_channel *channel = arg;
 304         int cpu = smp_processor_id();
 305
 306         list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
 307 }
 308
 309 static void percpu_channel_deq(void *arg)
 310 {
 311         struct vmbus_channel *channel = arg;
 312
 313         list_del(&channel->percpu_list);
 314 }
 315
 316
 317 static void vmbus_release_relid(u32 relid)
 318 {
 319         struct vmbus_channel_relid_released msg;
 320
 321         memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 322         msg.child_relid = relid;
 323         msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 324         vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 325 }
 326
 327 void hv_event_tasklet_disable(struct vmbus_channel *channel)
 328 {
 329         struct tasklet_struct *tasklet;
 330         tasklet = hv_context.event_dpc[channel->target_cpu];
 331         tasklet_disable(tasklet);
 332 }
 333
 334 void hv_event_tasklet_enable(struct vmbus_channel *channel)
 335 {
 336         struct tasklet_struct *tasklet;
 337         tasklet = hv_context.event_dpc[channel->target_cpu];
 338         tasklet_enable(tasklet);
 339
 340         /* In case there is any pending event */
 341         tasklet_schedule(tasklet);
 342 }
 343
 344 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 345 {
 346         unsigned long flags;
 347         struct vmbus_channel *primary_channel;
 348
 349         BUG_ON(!channel->rescind);
 350         BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 351
 352         hv_event_tasklet_disable(channel);
 353         if (channel->target_cpu != get_cpu()) {
 354                 put_cpu();
 355                 smp_call_function_single(channel->target_cpu,
 356                                          percpu_channel_deq, channel, true);
 357         } else {
 358                 percpu_channel_deq(channel);
 359                 put_cpu();
 360         }
 361         hv_event_tasklet_enable(channel);
 362
 363         if (channel->primary_channel == NULL) {
 364                 list_del(&channel->listentry);
 365
 366                 primary_channel = channel;
 367         } else {
 368                 primary_channel = channel->primary_channel;
 369                 spin_lock_irqsave(&primary_channel->lock, flags);
 370                 list_del(&channel->sc_list);
 371                 primary_channel->num_sc--;
 372                 spin_unlock_irqrestore(&primary_channel->lock, flags);
 373         }
 374
 375         /*
 376          * We need to free the bit for init_vp_index() to work in the case
 377          * of sub-channel, when we reload drivers like hv_netvsc.
 378          */
 379         if (channel->affinity_policy == HV_LOCALIZED)
 380                 cpumask_clear_cpu(channel->target_cpu,
 381                                   &primary_channel->alloced_cpus_in_node);
 382
 383         vmbus_release_relid(relid);
 384
 385         free_channel(channel);
 386 }
 387
 388 void vmbus_free_channels(void)
 389 {
 390         struct vmbus_channel *channel, *tmp;
 391
 392         list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
 393                 listentry) {
 394                 /* hv_process_channel_removal() needs this */
 395                 channel->rescind = true;
 396
 397                 vmbus_device_unregister(channel->device_obj);
 398         }
 399 }
 400
 401 /*
 402  * vmbus_process_offer - Process the offer by creating a channel/device
 403  * associated with this offer
 404  */
 405 static void vmbus_process_offer(struct vmbus_channel *newchannel)
 406 {
 407         struct vmbus_channel *channel;
 408         bool fnew = true;
 409         unsigned long flags;
 410         u16 dev_type;
 411         int ret;
 412
 413         /* Make sure this is a new offer */
 414         mutex_lock(&vmbus_connection.channel_mutex);
 415
 416         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 417                 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
 418                         newchannel->offermsg.offer.if_type) &&
 419                         !uuid_le_cmp(channel->offermsg.offer.if_instance,
 420                                 newchannel->offermsg.offer.if_instance)) {
 421                         fnew = false;
 422                         break;
 423                 }
 424         }
 425
 426         if (fnew)
 427                 list_add_tail(&newchannel->listentry,
 428                               &vmbus_connection.chn_list);
 429
 430         mutex_unlock(&vmbus_connection.channel_mutex);
 431
 432         if (!fnew) {
 433                 /*
 434                  * Check to see if this is a sub-channel.
 435                  */
 436                 if (newchannel->offermsg.offer.sub_channel_index != 0) {
 437                         /*
 438                          * Process the sub-channel.
 439                          */
 440                         newchannel->primary_channel = channel;
 441                         spin_lock_irqsave(&channel->lock, flags);
 442                         list_add_tail(&newchannel->sc_list, &channel->sc_list);
 443                         channel->num_sc++;
 444                         spin_unlock_irqrestore(&channel->lock, flags);
 445                 } else
 446                         goto err_free_chan;
 447         }
 448
 449         dev_type = hv_get_dev_type(newchannel);
 450         if (dev_type == HV_NIC)
 451                 set_channel_signal_state(newchannel, HV_SIGNAL_POLICY_EXPLICIT);
 452
 453         init_vp_index(newchannel, dev_type);
 454
 455         hv_event_tasklet_disable(newchannel);
 456         if (newchannel->target_cpu != get_cpu()) {
 457                 put_cpu();
 458                 smp_call_function_single(newchannel->target_cpu,
 459                                          percpu_channel_enq,
 460                                          newchannel, true);
 461         } else {
 462                 percpu_channel_enq(newchannel);
 463                 put_cpu();
 464         }
 465         hv_event_tasklet_enable(newchannel);
 466
 467         /*
 468          * This state is used to indicate a successful open
 469          * so that when we do close the channel normally, we
 470          * can cleanup properly
 471          */
 472         newchannel->state = CHANNEL_OPEN_STATE;
 473
 474         if (!fnew) {
 475                 if (channel->sc_creation_callback != NULL)
 476                         channel->sc_creation_callback(newchannel);
 477                 return;
 478         }
 479
 480         /*
 481          * Start the process of binding this offer to the driver
 482          * We need to set the DeviceObject field before calling
 483          * vmbus_child_dev_add()
 484          */
 485         newchannel->device_obj = vmbus_device_create(
 486                 &newchannel->offermsg.offer.if_type,
 487                 &newchannel->offermsg.offer.if_instance,
 488                 newchannel);
 489         if (!newchannel->device_obj)
 490                 goto err_deq_chan;
 491
 492         newchannel->device_obj->device_id = dev_type;
 493         /*
 494          * Add the new device to the bus. This will kick off device-driver
 495          * binding which eventually invokes the device driver's AddDevice()
 496          * method.
 497          */
 498         mutex_lock(&vmbus_connection.channel_mutex);
 499         ret = vmbus_device_register(newchannel->device_obj);
 500         mutex_unlock(&vmbus_connection.channel_mutex);
 501
 502         if (ret != 0) {
 503                 pr_err("unable to add child device object (relid %d)\n",
 504                         newchannel->offermsg.child_relid);
 505                 kfree(newchannel->device_obj);
 506                 goto err_deq_chan;
 507         }
 508         return;
 509
 510 err_deq_chan:
 511         mutex_lock(&vmbus_connection.channel_mutex);
 512         list_del(&newchannel->listentry);
 513         mutex_unlock(&vmbus_connection.channel_mutex);
 514
 515         hv_event_tasklet_disable(newchannel);
 516         if (newchannel->target_cpu != get_cpu()) {
 517                 put_cpu();
 518                 smp_call_function_single(newchannel->target_cpu,
 519                                          percpu_channel_deq, newchannel, true);
 520         } else {
 521                 percpu_channel_deq(newchannel);
 522                 put_cpu();
 523         }
 524         hv_event_tasklet_enable(newchannel);
 525
 526         vmbus_release_relid(newchannel->offermsg.child_relid);
 527
 528 err_free_chan:
 529         free_channel(newchannel);
 530 }
 531
 532 /*
 533  * We use this state to statically distribute the channel interrupt load.
 534  */
 535 static int next_numa_node_id;
 536
 537 /*
 538  * Starting with Win8, we can statically distribute the incoming
 539  * channel interrupt load by binding a channel to VCPU.
 540  * We do this in a hierarchical fashion:
 541  * First distribute the primary channels across available NUMA nodes
 542  * and then distribute the subchannels amongst the CPUs in the NUMA
 543  * node assigned to the primary channel.
 544  *
 545  * For pre-win8 hosts or non-performance critical channels we assign the
 546  * first CPU in the first NUMA node.
 547  */
 548 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 549 {
 550         u32 cur_cpu;
 551         bool perf_chn = vmbus_devs[dev_type].perf_device;
 552         struct vmbus_channel *primary = channel->primary_channel;
 553         int next_node;
 554         struct cpumask available_mask;
 555         struct cpumask *alloced_mask;
 556
 557         if ((vmbus_proto_version == VERSION_WS2008) ||
 558             (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 559                 /*
 560                  * Prior to win8, all channel interrupts are
 561                  * delivered on cpu 0.
 562                  * Also if the channel is not a performance critical
 563                  * channel, bind it to cpu 0.
 564                  */
 565                 channel->numa_node = 0;
 566                 channel->target_cpu = 0;
 567                 channel->target_vp = hv_context.vp_index[0];
 568                 return;
 569         }
 570
 571         /*
 572          * Based on the channel affinity policy, we will assign the NUMA
 573          * nodes.
 574          */
 575
 576         if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 577                 while (true) {
 578                         next_node = next_numa_node_id++;
 579                         if (next_node == nr_node_ids) {
 580                                 next_node = next_numa_node_id = 0;
 581                                 continue;
 582                         }
 583                         if (cpumask_empty(cpumask_of_node(next_node)))
 584                                 continue;
 585                         break;
 586                 }
 587                 channel->numa_node = next_node;
 588                 primary = channel;
 589         }
 590         alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
 591
 592         if (cpumask_weight(alloced_mask) ==
 593             cpumask_weight(cpumask_of_node(primary->numa_node))) {
 594                 /*
 595                  * We have cycled through all the CPUs in the node;
 596                  * reset the alloced map.
 597                  */
 598                 cpumask_clear(alloced_mask);
 599         }
 600
 601         cpumask_xor(&available_mask, alloced_mask,
 602                     cpumask_of_node(primary->numa_node));
 603
 604         cur_cpu = -1;
 605
 606         if (primary->affinity_policy == HV_LOCALIZED) {
 607                 /*
 608                  * Normally Hyper-V host doesn't create more subchannels
 609                  * than there are VCPUs on the node but it is possible when not
 610                  * all present VCPUs on the node are initialized by guest.
 611                  * Clear the alloced_cpus_in_node to start over.
 612                  */
 613                 if (cpumask_equal(&primary->alloced_cpus_in_node,
 614                                   cpumask_of_node(primary->numa_node)))
 615                         cpumask_clear(&primary->alloced_cpus_in_node);
 616         }
 617
 618         while (true) {
 619                 cur_cpu = cpumask_next(cur_cpu, &available_mask);
 620                 if (cur_cpu >= nr_cpu_ids) {
 621                         cur_cpu = -1;
 622                         cpumask_copy(&available_mask,
 623                                      cpumask_of_node(primary->numa_node));
 624                         continue;
 625                 }
 626
 627                 if (primary->affinity_policy == HV_LOCALIZED) {
 628                         /*
 629                          * NOTE: in the case of sub-channel, we clear the
 630                          * sub-channel related bit(s) in
 631                          * primary->alloced_cpus_in_node in
 632                          * hv_process_channel_removal(), so when we
 633                          * reload drivers like hv_netvsc in SMP guest, here
 634                          * we're able to re-allocate
 635                          * bit from primary->alloced_cpus_in_node.
 636                          */
 637                         if (!cpumask_test_cpu(cur_cpu,
 638                                               &primary->alloced_cpus_in_node)) {
 639                                 cpumask_set_cpu(cur_cpu,
 640                                                 &primary->alloced_cpus_in_node);
 641                                 cpumask_set_cpu(cur_cpu, alloced_mask);
 642                                 break;
 643                         }
 644                 } else {
 645                         cpumask_set_cpu(cur_cpu, alloced_mask);
 646                         break;
 647                 }
 648         }
 649
 650         channel->target_cpu = cur_cpu;
 651         channel->target_vp = hv_context.vp_index[cur_cpu];
 652 }
 653
 654 static void vmbus_wait_for_unload(void)
 655 {
 656         int cpu;
 657         void *page_addr;
 658         struct hv_message *msg;
 659         struct vmbus_channel_message_header *hdr;
 660         u32 message_type;
 661
 662         /*
 663          * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
 664          * used for initial contact or to CPU0 depending on host version. When
 665          * we're crashing on a different CPU let's hope that IRQ handler on
 666          * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
 667          * functional and vmbus_unload_response() will complete
 668          * vmbus_connection.unload_event. If not, the last thing we can do is
 669          * read message pages for all CPUs directly.
 670          */
 671         while (1) {
 672                 if (completion_done(&vmbus_connection.unload_event))
 673                         break;
 674
 675                 for_each_online_cpu(cpu) {
 676                         page_addr = hv_context.synic_message_page[cpu];
 677                         msg = (struct hv_message *)page_addr +
 678                                 VMBUS_MESSAGE_SINT;
 679
 680                         message_type = READ_ONCE(msg->header.message_type);
 681                         if (message_type == HVMSG_NONE)
 682                                 continue;
 683
 684                         hdr = (struct vmbus_channel_message_header *)
 685                                 msg->u.payload;
 686
 687                         if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
 688                                 complete(&vmbus_connection.unload_event);
 689
 690                         vmbus_signal_eom(msg, message_type);
 691                 }
 692
 693                 mdelay(10);
 694         }
 695
 696         /*
 697          * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
 698          * maybe-pending messages on all CPUs to be able to receive new
 699          * messages after we reconnect.
 700          */
 701         for_each_online_cpu(cpu) {
 702                 page_addr = hv_context.synic_message_page[cpu];
 703                 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 704                 msg->header.message_type = HVMSG_NONE;
 705         }
 706 }
 707
 708 /*
 709  * vmbus_unload_response - Handler for the unload response.
 710  */
 711 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
 712 {
 713         /*
 714          * This is a global event; just wakeup the waiting thread.
 715          * Once we successfully unload, we can cleanup the monitor state.
 716          */
 717         complete(&vmbus_connection.unload_event);
 718 }
 719
 720 void vmbus_initiate_unload(bool crash)
 721 {
 722         struct vmbus_channel_message_header hdr;
 723
 724         /* Pre-Win2012R2 hosts don't support reconnect */
 725         if (vmbus_proto_version < VERSION_WIN8_1)
 726                 return;
 727
 728         init_completion(&vmbus_connection.unload_event);
 729         memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 730         hdr.msgtype = CHANNELMSG_UNLOAD;
 731         vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
 732
 733         /*
 734          * vmbus_initiate_unload() is also called on crash and the crash can be
 735          * happening in an interrupt context, where scheduling is impossible.
 736          */
 737         if (!crash)
 738                 wait_for_completion(&vmbus_connection.unload_event);
 739         else
 740                 vmbus_wait_for_unload();
 741 }
 742
 743 /*
 744  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 745  *
 746  */
 747 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 748 {
 749         struct vmbus_channel_offer_channel *offer;
 750         struct vmbus_channel *newchannel;
 751
 752         offer = (struct vmbus_channel_offer_channel *)hdr;
 753
 754         /* Allocate the channel object and save this offer. */
 755         newchannel = alloc_channel();
 756         if (!newchannel) {
 757                 pr_err("Unable to allocate channel object\n");
 758                 return;
 759         }
 760
 761         /*
 762          * By default we setup state to enable batched
 763          * reading. A specific service can choose to
 764          * disable this prior to opening the channel.
 765          */
 766         newchannel->batched_reading = true;
 767
 768         /*
 769          * Setup state for signalling the host.
 770          */
 771         newchannel->sig_event = (struct hv_input_signal_event *)
 772                                 (ALIGN((unsigned long)
 773                                 &newchannel->sig_buf,
 774                                 HV_HYPERCALL_PARAM_ALIGN));
 775
 776         newchannel->sig_event->connectionid.asu32 = 0;
 777         newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
 778         newchannel->sig_event->flag_number = 0;
 779         newchannel->sig_event->rsvdz = 0;
 780
 781         if (vmbus_proto_version != VERSION_WS2008) {
 782                 newchannel->is_dedicated_interrupt =
 783                                 (offer->is_dedicated_interrupt != 0);
 784                 newchannel->sig_event->connectionid.u.id =
 785                                 offer->connection_id;
 786         }
 787
 788         memcpy(&newchannel->offermsg, offer,
 789                sizeof(struct vmbus_channel_offer_channel));
 790         newchannel->monitor_grp = (u8)offer->monitorid / 32;
 791         newchannel->monitor_bit = (u8)offer->monitorid % 32;
 792
 793         vmbus_process_offer(newchannel);
 794 }
 795
 796 /*
 797  * vmbus_onoffer_rescind - Rescind offer handler.
 798  *
 799  * We queue a work item to process this offer synchronously
 800  */
 801 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 802 {
 803         struct vmbus_channel_rescind_offer *rescind;
 804         struct vmbus_channel *channel;
 805         unsigned long flags;
 806         struct device *dev;
 807
 808         rescind = (struct vmbus_channel_rescind_offer *)hdr;
 809
 810         mutex_lock(&vmbus_connection.channel_mutex);
 811         channel = relid2channel(rescind->child_relid);
 812
 813         if (channel == NULL) {
 814                 /*
 815                  * This is very impossible, because in
 816                  * vmbus_process_offer(), we have already invoked
 817                  * vmbus_release_relid() on error.
 818                  */
 819                 goto out;
 820         }
 821
 822         spin_lock_irqsave(&channel->lock, flags);
 823         channel->rescind = true;
 824         spin_unlock_irqrestore(&channel->lock, flags);
 825
 826         if (channel->device_obj) {
 827                 if (channel->chn_rescind_callback) {
 828                         channel->chn_rescind_callback(channel);
 829                         goto out;
 830                 }
 831                 /*
 832                  * We will have to unregister this device from the
 833                  * driver core.
 834                  */
 835                 dev = get_device(&channel->device_obj->device);
 836                 if (dev) {
 837                         vmbus_device_unregister(channel->device_obj);
 838                         put_device(dev);
 839                 }
 840         } else {
 841                 hv_process_channel_removal(channel,
 842                         channel->offermsg.child_relid);
 843         }
 844
 845 out:
 846         mutex_unlock(&vmbus_connection.channel_mutex);
 847 }
 848
 849 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
 850 {
 851         mutex_lock(&vmbus_connection.channel_mutex);
 852
 853         BUG_ON(!is_hvsock_channel(channel));
 854
 855         channel->rescind = true;
 856         vmbus_device_unregister(channel->device_obj);
 857
 858         mutex_unlock(&vmbus_connection.channel_mutex);
 859 }
 860 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
 861
 862
 863 /*
 864  * vmbus_onoffers_delivered -
 865  * This is invoked when all offers have been delivered.
 866  *
 867  * Nothing to do here.
 868  */
 869 static void vmbus_onoffers_delivered(
 870                         struct vmbus_channel_message_header *hdr)
 871 {
 872 }
 873
 874 /*
 875  * vmbus_onopen_result - Open result handler.
 876  *
 877  * This is invoked when we received a response to our channel open request.
 878  * Find the matching request, copy the response and signal the requesting
 879  * thread.
 880  */
 881 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
 882 {
 883         struct vmbus_channel_open_result *result;
 884         struct vmbus_channel_msginfo *msginfo;
 885         struct vmbus_channel_message_header *requestheader;
 886         struct vmbus_channel_open_channel *openmsg;
 887         unsigned long flags;
 888
 889         result = (struct vmbus_channel_open_result *)hdr;
 890
 891         /*
 892          * Find the open msg, copy the result and signal/unblock the wait event
 893          */
 894         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 895
 896         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 897                                 msglistentry) {
 898                 requestheader =
 899                         (struct vmbus_channel_message_header *)msginfo->msg;
 900
 901                 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
 902                         openmsg =
 903                         (struct vmbus_channel_open_channel *)msginfo->msg;
 904                         if (openmsg->child_relid == result->child_relid &&
 905                             openmsg->openid == result->openid) {
 906                                 memcpy(&msginfo->response.open_result,
 907                                        result,
 908                                        sizeof(
 909                                         struct vmbus_channel_open_result));
 910                                 complete(&msginfo->waitevent);
 911                                 break;
 912                         }
 913                 }
 914         }
 915         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 916 }
 917
 918 /*
 919  * vmbus_ongpadl_created - GPADL created handler.
 920  *
 921  * This is invoked when we received a response to our gpadl create request.
 922  * Find the matching request, copy the response and signal the requesting
 923  * thread.
 924  */
 925 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
 926 {
 927         struct vmbus_channel_gpadl_created *gpadlcreated;
 928         struct vmbus_channel_msginfo *msginfo;
 929         struct vmbus_channel_message_header *requestheader;
 930         struct vmbus_channel_gpadl_header *gpadlheader;
 931         unsigned long flags;
 932
 933         gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
 934
 935         /*
 936          * Find the establish msg, copy the result and signal/unblock the wait
 937          * event
 938          */
 939         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 940
 941         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 942                                 msglistentry) {
 943                 requestheader =
 944                         (struct vmbus_channel_message_header *)msginfo->msg;
 945
 946                 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
 947                         gpadlheader =
 948                         (struct vmbus_channel_gpadl_header *)requestheader;
 949
 950                         if ((gpadlcreated->child_relid ==
 951                              gpadlheader->child_relid) &&
 952                             (gpadlcreated->gpadl == gpadlheader->gpadl)) {
 953                                 memcpy(&msginfo->response.gpadl_created,
 954                                        gpadlcreated,
 955                                        sizeof(
 956                                         struct vmbus_channel_gpadl_created));
 957                                 complete(&msginfo->waitevent);
 958                                 break;
 959                         }
 960                 }
 961         }
 962         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 963 }
 964
 965 /*
 966  * vmbus_ongpadl_torndown - GPADL torndown handler.
 967  *
 968  * This is invoked when we received a response to our gpadl teardown request.
 969  * Find the matching request, copy the response and signal the requesting
 970  * thread.
 971  */
 972 static void vmbus_ongpadl_torndown(
 973                         struct vmbus_channel_message_header *hdr)
 974 {
 975         struct vmbus_channel_gpadl_torndown *gpadl_torndown;
 976         struct vmbus_channel_msginfo *msginfo;
 977         struct vmbus_channel_message_header *requestheader;
 978         struct vmbus_channel_gpadl_teardown *gpadl_teardown;
 979         unsigned long flags;
 980
 981         gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
 982
 983         /*
 984          * Find the open msg, copy the result and signal/unblock the wait event
 985          */
 986         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 987
 988         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 989                                 msglistentry) {
 990                 requestheader =
 991                         (struct vmbus_channel_message_header *)msginfo->msg;
 992
 993                 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
 994                         gpadl_teardown =
 995                         (struct vmbus_channel_gpadl_teardown *)requestheader;
 996
 997                         if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
 998                                 memcpy(&msginfo->response.gpadl_torndown,
 999                                        gpadl_torndown,
1000                                        sizeof(
1001                                         struct vmbus_channel_gpadl_torndown));
1002                                 complete(&msginfo->waitevent);
1003                                 break;
1004                         }
1005                 }
1006         }
1007         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1008 }
1009
1010 /*
1011  * vmbus_onversion_response - Version response handler
1012  *
1013  * This is invoked when we received a response to our initiate contact request.
1014  * Find the matching request, copy the response and signal the requesting
1015  * thread.
1016  */
1017 static void vmbus_onversion_response(
1018                 struct vmbus_channel_message_header *hdr)
1019 {
1020         struct vmbus_channel_msginfo *msginfo;
1021         struct vmbus_channel_message_header *requestheader;
1022         struct vmbus_channel_version_response *version_response;
1023         unsigned long flags;
1024
1025         version_response = (struct vmbus_channel_version_response *)hdr;
1026         spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1027
1028         list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1029                                 msglistentry) {
1030                 requestheader =
1031                         (struct vmbus_channel_message_header *)msginfo->msg;
1032
1033                 if (requestheader->msgtype ==
1034                     CHANNELMSG_INITIATE_CONTACT) {
1035                         memcpy(&msginfo->response.version_response,
1036                               version_response,
1037                               sizeof(struct vmbus_channel_version_response));
1038                         complete(&msginfo->waitevent);
1039                 }
1040         }
1041         spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1042 }
1043
1044 /* Channel message dispatch table */
1045 struct vmbus_channel_message_table_entry
1046         channel_message_table[CHANNELMSG_COUNT] = {
1047         {CHANNELMSG_INVALID,                    0, NULL},
1048         {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
1049         {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
1050         {CHANNELMSG_REQUESTOFFERS,              0, NULL},
1051         {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
1052         {CHANNELMSG_OPENCHANNEL,                0, NULL},
1053         {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
1054         {CHANNELMSG_CLOSECHANNEL,               0, NULL},
1055         {CHANNELMSG_GPADL_HEADER,               0, NULL},
1056         {CHANNELMSG_GPADL_BODY,                 0, NULL},
1057         {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
1058         {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
1059         {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
1060         {CHANNELMSG_RELID_RELEASED,             0, NULL},
1061         {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
1062         {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
1063         {CHANNELMSG_UNLOAD,                     0, NULL},
1064         {CHANNELMSG_UNLOAD_RESPONSE,            1, vmbus_unload_response},
1065         {CHANNELMSG_18,                         0, NULL},
1066         {CHANNELMSG_19,                         0, NULL},
1067         {CHANNELMSG_20,                         0, NULL},
1068         {CHANNELMSG_TL_CONNECT_REQUEST,         0, NULL},
1069 };
1070
1071 /*
1072  * vmbus_onmessage - Handler for channel protocol messages.
1073  *
1074  * This is invoked in the vmbus worker thread context.
1075  */
1076 void vmbus_onmessage(void *context)
1077 {
1078         struct hv_message *msg = context;
1079         struct vmbus_channel_message_header *hdr;
1080         int size;
1081
1082         hdr = (struct vmbus_channel_message_header *)msg->u.payload;
1083         size = msg->header.payload_size;
1084
1085         if (hdr->msgtype >= CHANNELMSG_COUNT) {
1086                 pr_err("Received invalid channel message type %d size %d\n",
1087                            hdr->msgtype, size);
1088                 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1089                                      (unsigned char *)msg->u.payload, size);
1090                 return;
1091         }
1092
1093         if (channel_message_table[hdr->msgtype].message_handler)
1094                 channel_message_table[hdr->msgtype].message_handler(hdr);
1095         else
1096                 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1097 }
1098
1099 /*
1100  * vmbus_request_offers - Send a request to get all our pending offers.
1101  */
1102 int vmbus_request_offers(void)
1103 {
1104         struct vmbus_channel_message_header *msg;
1105         struct vmbus_channel_msginfo *msginfo;
1106         int ret;
1107
1108         msginfo = kmalloc(sizeof(*msginfo) +
1109                           sizeof(struct vmbus_channel_message_header),
1110                           GFP_KERNEL);
1111         if (!msginfo)
1112                 return -ENOMEM;
1113
1114         msg = (struct vmbus_channel_message_header *)msginfo->msg;
1115
1116         msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1117
1118
1119         ret = vmbus_post_msg(msg,
1120                                sizeof(struct vmbus_channel_message_header));
1121         if (ret != 0) {
1122                 pr_err("Unable to request offers - %d\n", ret);
1123
1124                 goto cleanup;
1125         }
1126
1127 cleanup:
1128         kfree(msginfo);
1129
1130         return ret;
1131 }
1132
1133 /*
1134  * Retrieve the (sub) channel on which to send an outgoing request.
1135  * When a primary channel has multiple sub-channels, we try to
1136  * distribute the load equally amongst all available channels.
1137  */
1138 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1139 {
1140         struct list_head *cur, *tmp;
1141         int cur_cpu;
1142         struct vmbus_channel *cur_channel;
1143         struct vmbus_channel *outgoing_channel = primary;
1144         int next_channel;
1145         int i = 1;
1146
1147         if (list_empty(&primary->sc_list))
1148                 return outgoing_channel;
1149
1150         next_channel = primary->next_oc++;
1151
1152         if (next_channel > (primary->num_sc)) {
1153                 primary->next_oc = 0;
1154                 return outgoing_channel;
1155         }
1156
1157         cur_cpu = hv_context.vp_index[get_cpu()];
1158         put_cpu();
1159         list_for_each_safe(cur, tmp, &primary->sc_list) {
1160                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1161                 if (cur_channel->state != CHANNEL_OPENED_STATE)
1162                         continue;
1163
1164                 if (cur_channel->target_vp == cur_cpu)
1165                         return cur_channel;
1166
1167                 if (i == next_channel)
1168                         return cur_channel;
1169
1170                 i++;
1171         }
1172
1173         return outgoing_channel;
1174 }
1175 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1176
1177 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1178 {
1179         struct list_head *cur, *tmp;
1180         struct vmbus_channel *cur_channel;
1181
1182         if (primary_channel->sc_creation_callback == NULL)
1183                 return;
1184
1185         list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1186                 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1187
1188                 primary_channel->sc_creation_callback(cur_channel);
1189         }
1190 }
1191
1192 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1193                                 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1194 {
1195         primary_channel->sc_creation_callback = sc_cr_cb;
1196 }
1197 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1198
1199 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1200 {
1201         bool ret;
1202
1203         ret = !list_empty(&primary->sc_list);
1204
1205         if (ret) {
1206                 /*
1207                  * Invoke the callback on sub-channel creation.
1208                  * This will present a uniform interface to the
1209                  * clients.
1210                  */
1211                 invoke_sc_cb(primary);
1212         }
1213
1214         return ret;
1215 }
1216 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1217
1218 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1219                 void (*chn_rescind_cb)(struct vmbus_channel *))
1220 {
1221         channel->chn_rescind_callback = chn_rescind_cb;
1222 }
1223 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);