Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / drivers / net / virtio_net.c
index 057ea13..cdc7c90 100644 (file)
@@ -124,12 +124,14 @@ struct virtnet_info {
        /* Lock for config space updates */
        struct mutex config_lock;
 
+       /* Page_frag for GFP_KERNEL packet buffer allocation when we run
+        * low on memory.
+        */
+       struct page_frag alloc_frag;
+
        /* Does the affinity hint is set for virtqueues? */
        bool affinity_hint_set;
 
-       /* Per-cpu variable to show the mapping from CPU to virtqueue */
-       int __percpu *vq_index;
-
        /* CPU hot plug notifier */
        struct notifier_block nb;
 };
@@ -217,33 +219,18 @@ static void skb_xmit_done(struct virtqueue *vq)
        netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
-static void set_skb_frag(struct sk_buff *skb, struct page *page,
-                        unsigned int offset, unsigned int *len)
-{
-       int size = min((unsigned)PAGE_SIZE - offset, *len);
-       int i = skb_shinfo(skb)->nr_frags;
-
-       __skb_fill_page_desc(skb, i, page, offset, size);
-
-       skb->data_len += size;
-       skb->len += size;
-       skb->truesize += PAGE_SIZE;
-       skb_shinfo(skb)->nr_frags++;
-       skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
-       *len -= size;
-}
-
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct receive_queue *rq,
-                                  struct page *page, unsigned int len)
+                                  struct page *page, unsigned int offset,
+                                  unsigned int len, unsigned int truesize)
 {
        struct virtnet_info *vi = rq->vq->vdev->priv;
        struct sk_buff *skb;
        struct skb_vnet_hdr *hdr;
-       unsigned int copy, hdr_len, offset;
+       unsigned int copy, hdr_len, hdr_padded_len;
        char *p;
 
-       p = page_address(page);
+       p = page_address(page) + offset;
 
        /* copy small packet so we can reuse these pages for small data */
        skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
@@ -254,16 +241,17 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
 
        if (vi->mergeable_rx_bufs) {
                hdr_len = sizeof hdr->mhdr;
-               offset = hdr_len;
+               hdr_padded_len = sizeof hdr->mhdr;
        } else {
                hdr_len = sizeof hdr->hdr;
-               offset = sizeof(struct padded_vnet_hdr);
+               hdr_padded_len = sizeof(struct padded_vnet_hdr);
        }
 
        memcpy(hdr, p, hdr_len);
 
        len -= hdr_len;
-       p += offset;
+       offset += hdr_padded_len;
+       p += hdr_padded_len;
 
        copy = len;
        if (copy > skb_tailroom(skb))
@@ -273,6 +261,14 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
        len -= copy;
        offset += copy;
 
+       if (vi->mergeable_rx_bufs) {
+               if (len)
+                       skb_add_rx_frag(skb, 0, page, offset, len, truesize);
+               else
+                       put_page(page);
+               return skb;
+       }
+
        /*
         * Verify that we can indeed put this data into a skb.
         * This is here to handle cases when the device erroneously
@@ -284,9 +280,12 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
                dev_kfree_skb(skb);
                return NULL;
        }
-
+       BUG_ON(offset >= PAGE_SIZE);
        while (len) {
-               set_skb_frag(skb, page, offset, &len);
+               unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
+               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
+                               frag_size, truesize);
+               len -= frag_size;
                page = (struct page *)page->private;
                offset = 0;
        }
@@ -297,33 +296,59 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq,
        return skb;
 }
 
-static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
+static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
 {
-       struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
+       struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb);
+       struct sk_buff *curr_skb = head_skb;
+       char *buf;
        struct page *page;
-       int num_buf, i, len;
+       int num_buf, len, offset;
 
        num_buf = hdr->mhdr.num_buffers;
        while (--num_buf) {
-               i = skb_shinfo(skb)->nr_frags;
-               if (i >= MAX_SKB_FRAGS) {
-                       pr_debug("%s: packet too long\n", skb->dev->name);
-                       skb->dev->stats.rx_length_errors++;
-                       return -EINVAL;
-               }
-               page = virtqueue_get_buf(rq->vq, &len);
-               if (!page) {
+               int num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
+               buf = virtqueue_get_buf(rq->vq, &len);
+               if (unlikely(!buf)) {
                        pr_debug("%s: rx error: %d buffers missing\n",
-                                skb->dev->name, hdr->mhdr.num_buffers);
-                       skb->dev->stats.rx_length_errors++;
+                                head_skb->dev->name, hdr->mhdr.num_buffers);
+                       head_skb->dev->stats.rx_length_errors++;
                        return -EINVAL;
                }
-
-               if (len > PAGE_SIZE)
-                       len = PAGE_SIZE;
-
-               set_skb_frag(skb, page, 0, &len);
-
+               if (unlikely(len > MAX_PACKET_LEN)) {
+                       pr_debug("%s: rx error: merge buffer too long\n",
+                                head_skb->dev->name);
+                       len = MAX_PACKET_LEN;
+               }
+               if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
+                       struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
+                       if (unlikely(!nskb)) {
+                               head_skb->dev->stats.rx_dropped++;
+                               return -ENOMEM;
+                       }
+                       if (curr_skb == head_skb)
+                               skb_shinfo(curr_skb)->frag_list = nskb;
+                       else
+                               curr_skb->next = nskb;
+                       curr_skb = nskb;
+                       head_skb->truesize += nskb->truesize;
+                       num_skb_frags = 0;
+               }
+               if (curr_skb != head_skb) {
+                       head_skb->data_len += len;
+                       head_skb->len += len;
+                       head_skb->truesize += MAX_PACKET_LEN;
+               }
+               page = virt_to_head_page(buf);
+               offset = buf - (char *)page_address(page);
+               if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
+                       put_page(page);
+                       skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
+                                            len, MAX_PACKET_LEN);
+               } else {
+                       skb_add_rx_frag(curr_skb, num_skb_frags, page,
+                                       offset, len,
+                                       MAX_PACKET_LEN);
+               }
                --rq->num;
        }
        return 0;
@@ -341,8 +366,10 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
        if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
                pr_debug("%s: short packet %i\n", dev->name, len);
                dev->stats.rx_length_errors++;
-               if (vi->mergeable_rx_bufs || vi->big_packets)
+               if (vi->big_packets)
                        give_pages(rq, buf);
+               else if (vi->mergeable_rx_bufs)
+                       put_page(virt_to_head_page(buf));
                else
                        dev_kfree_skb(buf);
                return;
@@ -352,19 +379,28 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
                skb = buf;
                len -= sizeof(struct virtio_net_hdr);
                skb_trim(skb, len);
+       } else if (vi->mergeable_rx_bufs) {
+               struct page *page = virt_to_head_page(buf);
+               skb = page_to_skb(rq, page,
+                                 (char *)buf - (char *)page_address(page),
+                                 len, MAX_PACKET_LEN);
+               if (unlikely(!skb)) {
+                       dev->stats.rx_dropped++;
+                       put_page(page);
+                       return;
+               }
+               if (receive_mergeable(rq, skb)) {
+                       dev_kfree_skb(skb);
+                       return;
+               }
        } else {
                page = buf;
-               skb = page_to_skb(rq, page, len);
+               skb = page_to_skb(rq, page, 0, len, PAGE_SIZE);
                if (unlikely(!skb)) {
                        dev->stats.rx_dropped++;
                        give_pages(rq, page);
                        return;
                }
-               if (vi->mergeable_rx_bufs)
-                       if (receive_mergeable(rq, skb)) {
-                               dev_kfree_skb(skb);
-                               return;
-                       }
        }
 
        hdr = skb_vnet_hdr(skb);
@@ -501,18 +537,28 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 
 static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 {
-       struct page *page;
+       struct virtnet_info *vi = rq->vq->vdev->priv;
+       char *buf = NULL;
        int err;
 
-       page = get_a_page(rq, gfp);
-       if (!page)
+       if (gfp & __GFP_WAIT) {
+               if (skb_page_frag_refill(MAX_PACKET_LEN, &vi->alloc_frag,
+                                        gfp)) {
+                       buf = (char *)page_address(vi->alloc_frag.page) +
+                             vi->alloc_frag.offset;
+                       get_page(vi->alloc_frag.page);
+                       vi->alloc_frag.offset += MAX_PACKET_LEN;
+               }
+       } else {
+               buf = netdev_alloc_frag(MAX_PACKET_LEN);
+       }
+       if (!buf)
                return -ENOMEM;
 
-       sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
-
-       err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp);
+       sg_init_one(rq->sg, buf, MAX_PACKET_LEN);
+       err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
        if (err < 0)
-               give_pages(rq, page);
+               put_page(virt_to_head_page(buf));
 
        return err;
 }
@@ -945,7 +991,9 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
                return -EINVAL;
        } else {
                vi->curr_queue_pairs = queue_pairs;
-               schedule_delayed_work(&vi->refill, 0);
+               /* virtnet_open() will refill when device is going to up. */
+               if (dev->flags & IFF_UP)
+                       schedule_delayed_work(&vi->refill, 0);
        }
 
        return 0;
@@ -1070,7 +1118,6 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
 static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
 {
        int i;
-       int cpu;
 
        if (vi->affinity_hint_set) {
                for (i = 0; i < vi->max_queue_pairs; i++) {
@@ -1080,16 +1127,6 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
 
                vi->affinity_hint_set = false;
        }
-
-       i = 0;
-       for_each_online_cpu(cpu) {
-               if (cpu == hcpu) {
-                       *per_cpu_ptr(vi->vq_index, cpu) = -1;
-               } else {
-                       *per_cpu_ptr(vi->vq_index, cpu) =
-                               ++i % vi->curr_queue_pairs;
-               }
-       }
 }
 
 static void virtnet_set_affinity(struct virtnet_info *vi)
@@ -1111,7 +1148,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
        for_each_online_cpu(cpu) {
                virtqueue_set_affinity(vi->rq[i].vq, cpu);
                virtqueue_set_affinity(vi->sq[i].vq, cpu);
-               *per_cpu_ptr(vi->vq_index, cpu) = i;
+               netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
                i++;
        }
 
@@ -1135,6 +1172,7 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
        default:
                break;
        }
+
        return NOTIFY_OK;
 }
 
@@ -1224,28 +1262,6 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
        return 0;
 }
 
-/* To avoid contending a lock hold by a vcpu who would exit to host, select the
- * txq based on the processor id.
- */
-static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
-{
-       int txq;
-       struct virtnet_info *vi = netdev_priv(dev);
-
-       if (skb_rx_queue_recorded(skb)) {
-               txq = skb_get_rx_queue(skb);
-       } else {
-               txq = *__this_cpu_ptr(vi->vq_index);
-               if (txq == -1)
-                       txq = 0;
-       }
-
-       while (unlikely(txq >= dev->real_num_tx_queues))
-               txq -= dev->real_num_tx_queues;
-
-       return txq;
-}
-
 static const struct net_device_ops virtnet_netdev = {
        .ndo_open            = virtnet_open,
        .ndo_stop            = virtnet_close,
@@ -1257,7 +1273,6 @@ static const struct net_device_ops virtnet_netdev = {
        .ndo_get_stats64     = virtnet_stats,
        .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
-       .ndo_select_queue     = virtnet_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = virtnet_netpoll,
 #endif
@@ -1339,8 +1354,10 @@ static void free_unused_bufs(struct virtnet_info *vi)
                struct virtqueue *vq = vi->rq[i].vq;
 
                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-                       if (vi->mergeable_rx_bufs || vi->big_packets)
+                       if (vi->big_packets)
                                give_pages(&vi->rq[i], buf);
+                       else if (vi->mergeable_rx_bufs)
+                               put_page(virt_to_head_page(buf));
                        else
                                dev_kfree_skb(buf);
                        --vi->rq[i].num;
@@ -1567,9 +1584,12 @@ static int virtnet_probe(struct virtio_device *vdev)
        if (vi->stats == NULL)
                goto free;
 
-       vi->vq_index = alloc_percpu(int);
-       if (vi->vq_index == NULL)
-               goto free_stats;
+       for_each_possible_cpu(i) {
+               struct virtnet_stats *virtnet_stats;
+               virtnet_stats = per_cpu_ptr(vi->stats, i);
+               u64_stats_init(&virtnet_stats->tx_syncp);
+               u64_stats_init(&virtnet_stats->rx_syncp);
+       }
 
        mutex_init(&vi->config_lock);
        vi->config_enable = true;
@@ -1597,7 +1617,7 @@ static int virtnet_probe(struct virtio_device *vdev)
        /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
        err = init_vqs(vi);
        if (err)
-               goto free_index;
+               goto free_stats;
 
        netif_set_real_num_tx_queues(dev, 1);
        netif_set_real_num_rx_queues(dev, 1);
@@ -1648,8 +1668,8 @@ free_recv_bufs:
 free_vqs:
        cancel_delayed_work_sync(&vi->refill);
        virtnet_del_vqs(vi);
-free_index:
-       free_percpu(vi->vq_index);
+       if (vi->alloc_frag.page)
+               put_page(vi->alloc_frag.page);
 free_stats:
        free_percpu(vi->stats);
 free:
@@ -1683,10 +1703,11 @@ static void virtnet_remove(struct virtio_device *vdev)
        unregister_netdev(vi->dev);
 
        remove_vq_common(vi);
+       if (vi->alloc_frag.page)
+               put_page(vi->alloc_frag.page);
 
        flush_work(&vi->config_work);
 
-       free_percpu(vi->vq_index);
        free_percpu(vi->stats);
        free_netdev(vi->dev);
 }
@@ -1697,6 +1718,8 @@ static int virtnet_freeze(struct virtio_device *vdev)
        struct virtnet_info *vi = vdev->priv;
        int i;
 
+       unregister_hotcpu_notifier(&vi->nb);
+
        /* Prevent config work handler from accessing the device */
        mutex_lock(&vi->config_lock);
        vi->config_enable = false;
@@ -1741,7 +1764,13 @@ static int virtnet_restore(struct virtio_device *vdev)
        vi->config_enable = true;
        mutex_unlock(&vi->config_lock);
 
+       rtnl_lock();
        virtnet_set_queues(vi, vi->curr_queue_pairs);
+       rtnl_unlock();
+
+       err = register_hotcpu_notifier(&vi->nb);
+       if (err)
+               return err;
 
        return 0;
 }