hv_netvsc: Eliminate completion_func from struct hv_netvsc_packet
[cascardo/linux.git] / drivers / net / hyperv / netvsc_drv.c
index 409b48e..7f4f6c3 100644 (file)
@@ -42,6 +42,7 @@
 
 
 #define RING_SIZE_MIN 64
+#define LINKCHANGE_INT (2 * HZ)
 static int ring_size = 128;
 module_param(ring_size, int, S_IRUGO);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
@@ -272,17 +273,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
        }
 
-       return q_idx;
-}
-
-void netvsc_xmit_completion(void *context)
-{
-       struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
-       struct sk_buff *skb = (struct sk_buff *)
-               (unsigned long)packet->send_completion_tid;
+       if (!nvsc_dev->chn_table[q_idx])
+               q_idx = 0;
 
-       if (skb)
-               dev_kfree_skb_any(skb);
+       return q_idx;
 }
 
 static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
@@ -320,9 +314,10 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
 }
 
 static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
-                          struct hv_netvsc_packet *packet)
+                          struct hv_netvsc_packet *packet,
+                          struct hv_page_buffer **page_buf)
 {
-       struct hv_page_buffer *pb = packet->page_buf;
+       struct hv_page_buffer *pb = *page_buf;
        u32 slots_used = 0;
        char *data = skb->data;
        int frags = skb_shinfo(skb)->nr_frags;
@@ -432,8 +427,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
        u32 net_trans_info;
        u32 hash;
        u32 skb_length;
-       u32 pkt_sz;
        struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
+       struct hv_page_buffer *pb = page_buf;
        struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
 
        /* We will atmost need two pages to describe the rndis
@@ -460,42 +455,38 @@ check_size:
                goto check_size;
        }
 
-       pkt_sz = sizeof(struct hv_netvsc_packet) + RNDIS_AND_PPI_SIZE;
-
-       ret = skb_cow_head(skb, pkt_sz);
+       /*
+        * Place the rndis header in the skb head room and
+        * the skb->cb will be used for hv_netvsc_packet
+        * structure.
+        */
+       ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
        if (ret) {
                netdev_err(net, "unable to alloc hv_netvsc_packet\n");
                ret = -ENOMEM;
                goto drop;
        }
-       /* Use the headroom for building up the packet */
-       packet = (struct hv_netvsc_packet *)skb->head;
+       /* Use the skb control buffer for building up the packet */
+       BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
+                       FIELD_SIZEOF(struct sk_buff, cb));
+       packet = (struct hv_netvsc_packet *)skb->cb;
 
        packet->status = 0;
        packet->xmit_more = skb->xmit_more;
 
        packet->vlan_tci = skb->vlan_tci;
-       packet->page_buf = page_buf;
 
        packet->q_idx = skb_get_queue_mapping(skb);
 
-       packet->is_data_pkt = true;
        packet->total_data_buflen = skb->len;
 
-       packet->rndis_msg = (struct rndis_message *)((unsigned long)packet +
-                               sizeof(struct hv_netvsc_packet));
+       rndis_msg = (struct rndis_message *)skb->head;
 
-       memset(packet->rndis_msg, 0, RNDIS_AND_PPI_SIZE);
-
-       /* Set the completion routine */
-       packet->send_completion = netvsc_xmit_completion;
-       packet->send_completion_ctx = packet;
-       packet->send_completion_tid = (unsigned long)skb;
+       memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);
 
        isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
 
        /* Add the rndis header */
-       rndis_msg = packet->rndis_msg;
        rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
        rndis_msg->msg_len = packet->total_data_buflen;
        rndis_pkt = &rndis_msg->msg.pkt;
@@ -617,9 +608,10 @@ do_send:
        rndis_msg->msg_len += rndis_msg_size;
        packet->total_data_buflen = rndis_msg->msg_len;
        packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
-                                              skb, packet);
+                                              skb, packet, &pb);
 
-       ret = netvsc_send(net_device_ctx->device_ctx, packet);
+       ret = netvsc_send(net_device_ctx->device_ctx, packet,
+                         rndis_msg, &pb, skb);
 
 drop:
        if (ret == 0) {
@@ -647,37 +639,33 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
        struct net_device *net;
        struct net_device_context *ndev_ctx;
        struct netvsc_device *net_device;
-       struct rndis_device *rdev;
+       struct netvsc_reconfig *event;
+       unsigned long flags;
 
-       net_device = hv_get_drvdata(device_obj);
-       rdev = net_device->extension;
-
-       switch (indicate->status) {
-       case RNDIS_STATUS_MEDIA_CONNECT:
-               rdev->link_state = false;
-               break;
-       case RNDIS_STATUS_MEDIA_DISCONNECT:
-               rdev->link_state = true;
-               break;
-       case RNDIS_STATUS_NETWORK_CHANGE:
-               rdev->link_change = true;
-               break;
-       default:
+       /* Handle link change statuses only */
+       if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE &&
+           indicate->status != RNDIS_STATUS_MEDIA_CONNECT &&
+           indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT)
                return;
-       }
 
+       net_device = hv_get_drvdata(device_obj);
        net = net_device->ndev;
 
        if (!net || net->reg_state != NETREG_REGISTERED)
                return;
 
        ndev_ctx = netdev_priv(net);
-       if (!rdev->link_state) {
-               schedule_delayed_work(&ndev_ctx->dwork, 0);
-               schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
-       } else {
-               schedule_delayed_work(&ndev_ctx->dwork, 0);
-       }
+
+       event = kzalloc(sizeof(*event), GFP_ATOMIC);
+       if (!event)
+               return;
+       event->event = indicate->status;
+
+       spin_lock_irqsave(&ndev_ctx->lock, flags);
+       list_add_tail(&event->list, &ndev_ctx->reconfig_events);
+       spin_unlock_irqrestore(&ndev_ctx->lock, flags);
+
+       schedule_delayed_work(&ndev_ctx->dwork, 0);
 }
 
 /*
@@ -686,7 +674,9 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
  */
 int netvsc_recv_callback(struct hv_device *device_obj,
                                struct hv_netvsc_packet *packet,
-                               struct ndis_tcp_ip_checksum_info *csum_info)
+                               void **data,
+                               struct ndis_tcp_ip_checksum_info *csum_info,
+                               struct vmbus_channel *channel)
 {
        struct net_device *net;
        struct net_device_context *net_device_ctx;
@@ -713,7 +703,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
         * Copy to skb. This copy is needed here since the memory pointed by
         * hv_netvsc_packet cannot be deallocated
         */
-       memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
+       memcpy(skb_put(skb, packet->total_data_buflen), *data,
                packet->total_data_buflen);
 
        skb->protocol = eth_type_trans(skb, net);
@@ -732,7 +722,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
                                       packet->vlan_tci);
 
-       skb_record_rx_queue(skb, packet->channel->
+       skb_record_rx_queue(skb, channel->
                            offermsg.offer.sub_channel_index);
 
        u64_stats_update_begin(&rx_stats->syncp);
@@ -1009,12 +999,9 @@ static const struct net_device_ops device_ops = {
 };
 
 /*
- * Send GARP packet to network peers after migrations.
- * After Quick Migration, the network is not immediately operational in the
- * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
- * another netif_notify_peers() into a delayed work, otherwise GARP packet
- * will not be sent after quick migration, and cause network disconnection.
- * Also, we update the carrier status here.
+ * Handle link status changes. For RNDIS_STATUS_NETWORK_CHANGE emulate link
+ * down/up sequence. In case of RNDIS_STATUS_MEDIA_CONNECT when carrier is
+ * present send GARP packet to network peers with netif_notify_peers().
  */
 static void netvsc_link_change(struct work_struct *w)
 {
@@ -1022,36 +1009,89 @@ static void netvsc_link_change(struct work_struct *w)
        struct net_device *net;
        struct netvsc_device *net_device;
        struct rndis_device *rdev;
-       bool notify, refresh = false;
-       char *argv[] = { "/etc/init.d/network", "restart", NULL };
-       char *envp[] = { "HOME=/", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
-
-       rtnl_lock();
+       struct netvsc_reconfig *event = NULL;
+       bool notify = false, reschedule = false;
+       unsigned long flags, next_reconfig, delay;
 
        ndev_ctx = container_of(w, struct net_device_context, dwork.work);
        net_device = hv_get_drvdata(ndev_ctx->device_ctx);
        rdev = net_device->extension;
        net = net_device->ndev;
 
-       if (rdev->link_state) {
-               netif_carrier_off(net);
-               notify = false;
-       } else {
-               netif_carrier_on(net);
-               notify = true;
-               if (rdev->link_change) {
-                       rdev->link_change = false;
-                       refresh = true;
+       next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT;
+       if (time_is_after_jiffies(next_reconfig)) {
+               /* link_watch only sends one notification with current state
+                * per second, avoid doing reconfig more frequently. Handle
+                * wrap around.
+                */
+               delay = next_reconfig - jiffies;
+               delay = delay < LINKCHANGE_INT ? delay : LINKCHANGE_INT;
+               schedule_delayed_work(&ndev_ctx->dwork, delay);
+               return;
+       }
+       ndev_ctx->last_reconfig = jiffies;
+
+       spin_lock_irqsave(&ndev_ctx->lock, flags);
+       if (!list_empty(&ndev_ctx->reconfig_events)) {
+               event = list_first_entry(&ndev_ctx->reconfig_events,
+                                        struct netvsc_reconfig, list);
+               list_del(&event->list);
+               reschedule = !list_empty(&ndev_ctx->reconfig_events);
+       }
+       spin_unlock_irqrestore(&ndev_ctx->lock, flags);
+
+       if (!event)
+               return;
+
+       rtnl_lock();
+
+       switch (event->event) {
+               /* Only the following events are possible due to the check in
+                * netvsc_linkstatus_callback()
+                */
+       case RNDIS_STATUS_MEDIA_CONNECT:
+               if (rdev->link_state) {
+                       rdev->link_state = false;
+                       netif_carrier_on(net);
+                       netif_tx_wake_all_queues(net);
+               } else {
+                       notify = true;
                }
+               kfree(event);
+               break;
+       case RNDIS_STATUS_MEDIA_DISCONNECT:
+               if (!rdev->link_state) {
+                       rdev->link_state = true;
+                       netif_carrier_off(net);
+                       netif_tx_stop_all_queues(net);
+               }
+               kfree(event);
+               break;
+       case RNDIS_STATUS_NETWORK_CHANGE:
+               /* Only makes sense if carrier is present */
+               if (!rdev->link_state) {
+                       rdev->link_state = true;
+                       netif_carrier_off(net);
+                       netif_tx_stop_all_queues(net);
+                       event->event = RNDIS_STATUS_MEDIA_CONNECT;
+                       spin_lock_irqsave(&ndev_ctx->lock, flags);
+                       list_add_tail(&event->list, &ndev_ctx->reconfig_events);
+                       spin_unlock_irqrestore(&ndev_ctx->lock, flags);
+                       reschedule = true;
+               }
+               break;
        }
 
        rtnl_unlock();
 
-       if (refresh)
-               call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-
        if (notify)
                netdev_notify_peers(net);
+
+       /* link_watch only sends one notification with current state per
+        * second, handle next reconfig event in 2 seconds.
+        */
+       if (reschedule)
+               schedule_delayed_work(&ndev_ctx->dwork, LINKCHANGE_INT);
 }
 
 static void netvsc_free_netdev(struct net_device *netdev)
@@ -1071,16 +1111,12 @@ static int netvsc_probe(struct hv_device *dev,
        struct netvsc_device_info device_info;
        struct netvsc_device *nvdev;
        int ret;
-       u32 max_needed_headroom;
 
        net = alloc_etherdev_mq(sizeof(struct net_device_context),
                                num_online_cpus());
        if (!net)
                return -ENOMEM;
 
-       max_needed_headroom = sizeof(struct hv_netvsc_packet) +
-                             RNDIS_AND_PPI_SIZE;
-
        netif_carrier_off(net);
 
        net_device_ctx = netdev_priv(net);
@@ -1106,6 +1142,9 @@ static int netvsc_probe(struct hv_device *dev,
        INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
        INIT_WORK(&net_device_ctx->work, do_set_multicast);
 
+       spin_lock_init(&net_device_ctx->lock);
+       INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+
        net->netdev_ops = &device_ops;
 
        net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
@@ -1116,13 +1155,6 @@ static int netvsc_probe(struct hv_device *dev,
        net->ethtool_ops = &ethtool_ops;
        SET_NETDEV_DEV(net, &dev->device);
 
-       /*
-        * Request additional head room in the skb.
-        * We will use this space to build the rndis
-        * heaser and other state we need to maintain.
-        */
-       net->needed_headroom = max_needed_headroom;
-
        /* Notify the netvsc driver of the new device */
        memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
@@ -1145,8 +1177,6 @@ static int netvsc_probe(struct hv_device *dev,
                pr_err("Unable to register netdev.\n");
                rndis_filter_device_remove(dev);
                netvsc_free_netdev(net);
-       } else {
-               schedule_delayed_work(&net_device_ctx->dwork, 0);
        }
 
        return ret;