Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / drivers / net / virtio_net.c
index 9fbdfcd..cdc7c90 100644 (file)
@@ -124,12 +124,14 @@ struct virtnet_info {
        /* Lock for config space updates */
        struct mutex config_lock;
 
+       /* Page_frag for GFP_KERNEL packet buffer allocation when we run
+        * low on memory.
+        */
+       struct page_frag alloc_frag;
+
        /* Does the affinity hint is set for virtqueues? */
        bool affinity_hint_set;
 
-       /* Per-cpu variable to show the mapping from CPU to virtqueue */
-       int __percpu *vq_index;
-
        /* CPU hot plug notifier */
        struct notifier_block nb;
 };
@@ -217,33 +219,18 @@ static void skb_xmit_done(struct virtqueue *vq)
        netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
-static void set_skb_frag(struct sk_buff *skb, struct page *page,
-                        unsigned int offset, unsigned int *len)
-{
-       int size = min((unsigned)PAGE_SIZE - offset, *len);
-       int i = skb_shinfo(skb)->nr_frags;
-
-       __skb_fill_page_desc(skb, i, page, offset, size);
-
-       skb->data_len += size;
-       skb->len += size;
-       skb->truesize += PAGE_SIZE;
-       skb_shinfo(skb)->nr_frags++;
-       skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
-       *len -= size;
-}
-
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct receive_queue *rq,
-                                  struct page *page, unsigned int len)
+                                  struct page *page, unsigned int offset,
+                                  unsigned int len, unsigned int truesize)
 {
        struct virtnet_info *vi = rq->vq->vdev->priv;
        struct sk_buff *skb;
        struct skb_vnet_hdr *hdr;
-       unsigned int copy, hdr_len, offset;
+       unsigned int copy, hdr_len, hdr_padded_len;
        char *p;
 
-       p = page_address(page);
+       p = page_address(page) + offset;
 
        /* copy small packet so we can reuse these pages for small data */
        skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
@@ -254,16 +241,17 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
 
        if (vi->mergeable_rx_bufs) {
                hdr_len = sizeof hdr->mhdr;
-               offset = hdr_len;
+               hdr_padded_len = sizeof hdr->mhdr;
        } else {
                hdr_len = sizeof hdr->hdr;
-               offset = sizeof(struct padded_vnet_hdr);
+               hdr_padded_len = sizeof(struct padded_vnet_hdr);
        }
 
        memcpy(hdr, p, hdr_len);
 
        len -= hdr_len;
-       p += offset;
+       offset += hdr_padded_len;
+       p += hdr_padded_len;
 
        copy = len;
        if (copy > skb_tailroom(skb))
@@ -273,6 +261,14 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
        len -= copy;
        offset += copy;
 
+       if (vi->mergeable_rx_bufs) {
+               if (len)
+                       skb_add_rx_frag(skb, 0, page, offset, len, truesize);
+               else
+                       put_page(page);
+               return skb;
+       }
+
        /*
         * Verify that we can indeed put this data into a skb.
         * This is here to handle cases when the device erroneously
@@ -284,9 +280,12 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
                dev_kfree_skb(skb);
                return NULL;
        }
-
+       BUG_ON(offset >= PAGE_SIZE);
        while (len) {
-               set_skb_frag(skb, page, offset, &len);
+               unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
+               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
+                               frag_size, truesize);
+               len -= frag_size;
                page = (struct page *)page->private;
                offset = 0;
        }
@@ -297,33 +296,59 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
        return skb;
 }
 
-static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
+static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
 {
-       struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
+       struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb);
+       struct sk_buff *curr_skb = head_skb;
+       char *buf;
        struct page *page;
-       int num_buf, i, len;
+       int num_buf, len, offset;
 
        num_buf = hdr->mhdr.num_buffers;
        while (--num_buf) {
-               i = skb_shinfo(skb)->nr_frags;
-               if (i >= MAX_SKB_FRAGS) {
-                       pr_debug("%s: packet too long\n", skb->dev->name);
-                       skb->dev->stats.rx_length_errors++;
-                       return -EINVAL;
-               }
-               page = virtqueue_get_buf(rq->vq, &len);
-               if (!page) {
+               int num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
+               buf = virtqueue_get_buf(rq->vq, &len);
+               if (unlikely(!buf)) {
                        pr_debug("%s: rx error: %d buffers missing\n",
-                                skb->dev->name, hdr->mhdr.num_buffers);
-                       skb->dev->stats.rx_length_errors++;
+                                head_skb->dev->name, hdr->mhdr.num_buffers);
+                       head_skb->dev->stats.rx_length_errors++;
                        return -EINVAL;
                }
-
-               if (len > PAGE_SIZE)
-                       len = PAGE_SIZE;
-
-               set_skb_frag(skb, page, 0, &len);
-
+               if (unlikely(len > MAX_PACKET_LEN)) {
+                       pr_debug("%s: rx error: merge buffer too long\n",
+                                head_skb->dev->name);
+                       len = MAX_PACKET_LEN;
+               }
+               if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
+                       struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
+                       if (unlikely(!nskb)) {
+                               head_skb->dev->stats.rx_dropped++;
+                               return -ENOMEM;
+                       }
+                       if (curr_skb == head_skb)
+                               skb_shinfo(curr_skb)->frag_list = nskb;
+                       else
+                               curr_skb->next = nskb;
+                       curr_skb = nskb;
+                       head_skb->truesize += nskb->truesize;
+                       num_skb_frags = 0;
+               }
+               if (curr_skb != head_skb) {
+                       head_skb->data_len += len;
+                       head_skb->len += len;
+                       head_skb->truesize += MAX_PACKET_LEN;
+               }
+               page = virt_to_head_page(buf);
+               offset = buf - (char *)page_address(page);
+               if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
+                       put_page(page);
+                       skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
+                                            len, MAX_PACKET_LEN);
+               } else {
+                       skb_add_rx_frag(curr_skb, num_skb_frags, page,
+                                       offset, len,
+                                       MAX_PACKET_LEN);
+               }
                --rq->num;
        }
        return 0;
@@ -341,8 +366,10 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
        if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
                pr_debug("%s: short packet %i\n", dev->name, len);
                dev->stats.rx_length_errors++;
-               if (vi->mergeable_rx_bufs || vi->big_packets)
+               if (vi->big_packets)
                        give_pages(rq, buf);
+               else if (vi->mergeable_rx_bufs)
+                       put_page(virt_to_head_page(buf));
                else
                        dev_kfree_skb(buf);
                return;
@@ -352,19 +379,28 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
                skb = buf;
                len -= sizeof(struct virtio_net_hdr);
                skb_trim(skb, len);
+       } else if (vi->mergeable_rx_bufs) {
+               struct page *page = virt_to_head_page(buf);
+               skb = page_to_skb(rq, page,
+                                 (char *)buf - (char *)page_address(page),
+                                 len, MAX_PACKET_LEN);
+               if (unlikely(!skb)) {
+                       dev->stats.rx_dropped++;
+                       put_page(page);
+                       return;
+               }
+               if (receive_mergeable(rq, skb)) {
+                       dev_kfree_skb(skb);
+                       return;
+               }
        } else {
                page = buf;
-               skb = page_to_skb(rq, page, len);
+               skb = page_to_skb(rq, page, 0, len, PAGE_SIZE);
                if (unlikely(!skb)) {
                        dev->stats.rx_dropped++;
                        give_pages(rq, page);
                        return;
                }
-               if (vi->mergeable_rx_bufs)
-                       if (receive_mergeable(rq, skb)) {
-                               dev_kfree_skb(skb);
-                               return;
-                       }
        }
 
        hdr = skb_vnet_hdr(skb);
@@ -501,18 +537,28 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 
 static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 {
-       struct page *page;
+       struct virtnet_info *vi = rq->vq->vdev->priv;
+       char *buf = NULL;
        int err;
 
-       page = get_a_page(rq, gfp);
-       if (!page)
+       if (gfp & __GFP_WAIT) {
+               if (skb_page_frag_refill(MAX_PACKET_LEN, &vi->alloc_frag,
+                                        gfp)) {
+                       buf = (char *)page_address(vi->alloc_frag.page) +
+                             vi->alloc_frag.offset;
+                       get_page(vi->alloc_frag.page);
+                       vi->alloc_frag.offset += MAX_PACKET_LEN;
+               }
+       } else {
+               buf = netdev_alloc_frag(MAX_PACKET_LEN);
+       }
+       if (!buf)
                return -ENOMEM;
 
-       sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
-
-       err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp);
+       sg_init_one(rq->sg, buf, MAX_PACKET_LEN);
+       err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
        if (err < 0)
-               give_pages(rq, page);
+               put_page(virt_to_head_page(buf));
 
        return err;
 }
@@ -545,7 +591,8 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
        } while (rq->vq->num_free);
        if (unlikely(rq->num > rq->max))
                rq->max = rq->num;
-       virtqueue_kick(rq->vq);
+       if (unlikely(!virtqueue_kick(rq->vq)))
+               return false;
        return !oom;
 }
 
@@ -751,7 +798,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
        err = xmit_skb(sq, skb);
 
        /* This should not happen! */
-       if (unlikely(err)) {
+       if (unlikely(err) || unlikely(!virtqueue_kick(sq->vq))) {
                dev->stats.tx_fifo_errors++;
                if (net_ratelimit())
                        dev_warn(&dev->dev,
@@ -760,7 +807,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
                kfree_skb(skb);
                return NETDEV_TX_OK;
        }
-       virtqueue_kick(sq->vq);
 
        /* Don't wait up for transmitted skbs to be freed. */
        skb_orphan(skb);
@@ -819,12 +865,14 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
        BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC)
               < 0);
 
-       virtqueue_kick(vi->cvq);
+       if (unlikely(!virtqueue_kick(vi->cvq)))
+               return status == VIRTIO_NET_OK;
 
        /* Spin for a response, the kick causes an ioport write, trapping
         * into the hypervisor, so the request should be handled immediately.
         */
-       while (!virtqueue_get_buf(vi->cvq, &tmp))
+       while (!virtqueue_get_buf(vi->cvq, &tmp) &&
+              !virtqueue_is_broken(vi->cvq))
                cpu_relax();
 
        return status == VIRTIO_NET_OK;
@@ -852,8 +900,13 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p)
                        return -EINVAL;
                }
        } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
-               vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
-                                 addr->sa_data, dev->addr_len);
+               unsigned int i;
+
+               /* Naturally, this has an atomicity problem. */
+               for (i = 0; i < dev->addr_len; i++)
+                       virtio_cwrite8(vdev,
+                                      offsetof(struct virtio_net_config, mac) +
+                                      i, addr->sa_data[i]);
        }
 
        eth_commit_mac_addr_change(dev, p);
@@ -1065,7 +1118,6 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
 static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
 {
        int i;
-       int cpu;
 
        if (vi->affinity_hint_set) {
                for (i = 0; i < vi->max_queue_pairs; i++) {
@@ -1075,16 +1127,6 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
 
                vi->affinity_hint_set = false;
        }
-
-       i = 0;
-       for_each_online_cpu(cpu) {
-               if (cpu == hcpu) {
-                       *per_cpu_ptr(vi->vq_index, cpu) = -1;
-               } else {
-                       *per_cpu_ptr(vi->vq_index, cpu) =
-                               ++i % vi->curr_queue_pairs;
-               }
-       }
 }
 
 static void virtnet_set_affinity(struct virtnet_info *vi)
@@ -1106,7 +1148,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
        for_each_online_cpu(cpu) {
                virtqueue_set_affinity(vi->rq[i].vq, cpu);
                virtqueue_set_affinity(vi->sq[i].vq, cpu);
-               *per_cpu_ptr(vi->vq_index, cpu) = i;
+               netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
                i++;
        }
 
@@ -1118,11 +1160,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
 {
        struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
 
-       mutex_lock(&vi->config_lock);
-
-       if (!vi->config_enable)
-               goto done;
-
        switch(action & ~CPU_TASKS_FROZEN) {
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
@@ -1136,8 +1173,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
                break;
        }
 
-done:
-       mutex_unlock(&vi->config_lock);
        return NOTIFY_OK;
 }
 
@@ -1227,28 +1262,6 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
        return 0;
 }
 
-/* To avoid contending a lock hold by a vcpu who would exit to host, select the
- * txq based on the processor id.
- */
-static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
-{
-       int txq;
-       struct virtnet_info *vi = netdev_priv(dev);
-
-       if (skb_rx_queue_recorded(skb)) {
-               txq = skb_get_rx_queue(skb);
-       } else {
-               txq = *__this_cpu_ptr(vi->vq_index);
-               if (txq == -1)
-                       txq = 0;
-       }
-
-       while (unlikely(txq >= dev->real_num_tx_queues))
-               txq -= dev->real_num_tx_queues;
-
-       return txq;
-}
-
 static const struct net_device_ops virtnet_netdev = {
        .ndo_open            = virtnet_open,
        .ndo_stop            = virtnet_close,
@@ -1260,7 +1273,6 @@ static const struct net_device_ops virtnet_netdev = {
        .ndo_get_stats64     = virtnet_stats,
        .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
-       .ndo_select_queue     = virtnet_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = virtnet_netpoll,
 #endif
@@ -1276,9 +1288,8 @@ static void virtnet_config_changed_work(struct work_struct *work)
        if (!vi->config_enable)
                goto done;
 
-       if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS,
-                             offsetof(struct virtio_net_config, status),
-                             &v) < 0)
+       if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
+                                struct virtio_net_config, status, &v) < 0)
                goto done;
 
        if (v & VIRTIO_NET_S_ANNOUNCE) {
@@ -1343,8 +1354,10 @@ static void free_unused_bufs(struct virtnet_info *vi)
                struct virtqueue *vq = vi->rq[i].vq;
 
                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-                       if (vi->mergeable_rx_bufs || vi->big_packets)
+                       if (vi->big_packets)
                                give_pages(&vi->rq[i], buf);
+                       else if (vi->mergeable_rx_bufs)
+                               put_page(virt_to_head_page(buf));
                        else
                                dev_kfree_skb(buf);
                        --vi->rq[i].num;
@@ -1500,9 +1513,9 @@ static int virtnet_probe(struct virtio_device *vdev)
        u16 max_queue_pairs;
 
        /* Find if host supports multiqueue virtio_net device */
-       err = virtio_config_val(vdev, VIRTIO_NET_F_MQ,
-                               offsetof(struct virtio_net_config,
-                               max_virtqueue_pairs), &max_queue_pairs);
+       err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
+                                  struct virtio_net_config,
+                                  max_virtqueue_pairs, &max_queue_pairs);
 
        /* We need at least 2 queue's */
        if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
@@ -1554,9 +1567,11 @@ static int virtnet_probe(struct virtio_device *vdev)
        dev->vlan_features = dev->features;
 
        /* Configuration may specify what MAC to use.  Otherwise random. */
-       if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC,
-                                 offsetof(struct virtio_net_config, mac),
-                                 dev->dev_addr, dev->addr_len) < 0)
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
+               virtio_cread_bytes(vdev,
+                                  offsetof(struct virtio_net_config, mac),
+                                  dev->dev_addr, dev->addr_len);
+       else
                eth_hw_addr_random(dev);
 
        /* Set up our device-specific information */
@@ -1569,9 +1584,12 @@ static int virtnet_probe(struct virtio_device *vdev)
        if (vi->stats == NULL)
                goto free;
 
-       vi->vq_index = alloc_percpu(int);
-       if (vi->vq_index == NULL)
-               goto free_stats;
+       for_each_possible_cpu(i) {
+               struct virtnet_stats *virtnet_stats;
+               virtnet_stats = per_cpu_ptr(vi->stats, i);
+               u64_stats_init(&virtnet_stats->tx_syncp);
+               u64_stats_init(&virtnet_stats->rx_syncp);
+       }
 
        mutex_init(&vi->config_lock);
        vi->config_enable = true;
@@ -1599,7 +1617,7 @@ static int virtnet_probe(struct virtio_device *vdev)
        /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
        err = init_vqs(vi);
        if (err)
-               goto free_index;
+               goto free_stats;
 
        netif_set_real_num_tx_queues(dev, 1);
        netif_set_real_num_rx_queues(dev, 1);
@@ -1650,8 +1668,8 @@ free_recv_bufs:
 free_vqs:
        cancel_delayed_work_sync(&vi->refill);
        virtnet_del_vqs(vi);
-free_index:
-       free_percpu(vi->vq_index);
+       if (vi->alloc_frag.page)
+               put_page(vi->alloc_frag.page);
 free_stats:
        free_percpu(vi->stats);
 free:
@@ -1685,20 +1703,23 @@ static void virtnet_remove(struct virtio_device *vdev)
        unregister_netdev(vi->dev);
 
        remove_vq_common(vi);
+       if (vi->alloc_frag.page)
+               put_page(vi->alloc_frag.page);
 
        flush_work(&vi->config_work);
 
-       free_percpu(vi->vq_index);
        free_percpu(vi->stats);
        free_netdev(vi->dev);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int virtnet_freeze(struct virtio_device *vdev)
 {
        struct virtnet_info *vi = vdev->priv;
        int i;
 
+       unregister_hotcpu_notifier(&vi->nb);
+
        /* Prevent config work handler from accessing the device */
        mutex_lock(&vi->config_lock);
        vi->config_enable = false;
@@ -1747,6 +1768,10 @@ static int virtnet_restore(struct virtio_device *vdev)
        virtnet_set_queues(vi, vi->curr_queue_pairs);
        rtnl_unlock();
 
+       err = register_hotcpu_notifier(&vi->nb);
+       if (err)
+               return err;
+
        return 0;
 }
 #endif
@@ -1778,7 +1803,7 @@ static struct virtio_driver virtio_net_driver = {
        .probe =        virtnet_probe,
        .remove =       virtnet_remove,
        .config_changed = virtnet_config_changed,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
        .freeze =       virtnet_freeze,
        .restore =      virtnet_restore,
 #endif