net-timestamp: no-payload option in txtimestamp test
[cascardo/linux.git] / net / core / dev.c
index 683d493..1d564d6 100644 (file)
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 static inline struct list_head *ptype_head(const struct packet_type *pt)
 {
        if (pt->type == htons(ETH_P_ALL))
-               return &ptype_all;
+               return pt->dev ? &pt->dev->ptype_all : &ptype_all;
        else
-               return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+               return pt->dev ? &pt->dev->ptype_specific :
+                                &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 }
 
 /**
@@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
        return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline void deliver_ptype_list_skb(struct sk_buff *skb,
+                                         struct packet_type **pt,
+                                         struct net_device *dev, __be16 type,
+                                         struct list_head *ptype_list)
+{
+       struct packet_type *ptype, *pt_prev = *pt;
+
+       list_for_each_entry_rcu(ptype, ptype_list, list) {
+               if (ptype->type != type)
+                       continue;
+               if (pt_prev)
+                       deliver_skb(skb, pt_prev, dev);
+               pt_prev = ptype;
+       }
+       *pt = pt_prev;
+}
+
 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 {
        if (!ptype->af_packet_priv || !skb->sk)
@@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
        struct packet_type *ptype;
        struct sk_buff *skb2 = NULL;
        struct packet_type *pt_prev = NULL;
+       struct list_head *ptype_list = &ptype_all;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(ptype, &ptype_all, list) {
+again:
+       list_for_each_entry_rcu(ptype, ptype_list, list) {
                /* Never send packets back to the socket
                 * they originated from - MvS (miquels@drinkel.ow.org)
                 */
-               if ((ptype->dev == dev || !ptype->dev) &&
-                   (!skb_loop_sk(ptype, skb))) {
-                       if (pt_prev) {
-                               deliver_skb(skb2, pt_prev, skb->dev);
-                               pt_prev = ptype;
-                               continue;
-                       }
+               if (skb_loop_sk(ptype, skb))
+                       continue;
 
-                       skb2 = skb_clone(skb, GFP_ATOMIC);
-                       if (!skb2)
-                               break;
+               if (pt_prev) {
+                       deliver_skb(skb2, pt_prev, skb->dev);
+                       pt_prev = ptype;
+                       continue;
+               }
 
-                       net_timestamp_set(skb2);
+               /* need to clone skb, done only once */
+               skb2 = skb_clone(skb, GFP_ATOMIC);
+               if (!skb2)
+                       goto out_unlock;
 
-                       /* skb->nh should be correctly
-                          set by sender, so that the second statement is
-                          just protection against buggy protocols.
-                        */
-                       skb_reset_mac_header(skb2);
-
-                       if (skb_network_header(skb2) < skb2->data ||
-                           skb_network_header(skb2) > skb_tail_pointer(skb2)) {
-                               net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
-                                                    ntohs(skb2->protocol),
-                                                    dev->name);
-                               skb_reset_network_header(skb2);
-                       }
+               net_timestamp_set(skb2);
 
-                       skb2->transport_header = skb2->network_header;
-                       skb2->pkt_type = PACKET_OUTGOING;
-                       pt_prev = ptype;
+               /* skb->nh should be correctly
+                * set by sender, so that the second statement is
+                * just protection against buggy protocols.
+                */
+               skb_reset_mac_header(skb2);
+
+               if (skb_network_header(skb2) < skb2->data ||
+                   skb_network_header(skb2) > skb_tail_pointer(skb2)) {
+                       net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+                                            ntohs(skb2->protocol),
+                                            dev->name);
+                       skb_reset_network_header(skb2);
                }
+
+               skb2->transport_header = skb2->network_header;
+               skb2->pkt_type = PACKET_OUTGOING;
+               pt_prev = ptype;
+       }
+
+       if (ptype_list == &ptype_all) {
+               ptype_list = &dev->ptype_all;
+               goto again;
        }
+out_unlock:
        if (pt_prev)
                pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
        rcu_read_unlock();
@@ -2578,7 +2605,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
        if (skb->encapsulation)
                features &= dev->hw_enc_features;
 
-       if (!vlan_tx_tag_present(skb)) {
+       if (!skb_vlan_tag_present(skb)) {
                if (unlikely(protocol == htons(ETH_P_8021Q) ||
                             protocol == htons(ETH_P_8021AD))) {
                        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2617,7 +2644,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
        unsigned int len;
        int rc;
 
-       if (!list_empty(&ptype_all))
+       if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
                dev_queue_xmit_nit(skb, dev);
 
        len = skb->len;
@@ -2659,7 +2686,7 @@ out:
 static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
                                          netdev_features_t features)
 {
-       if (vlan_tx_tag_present(skb) &&
+       if (skb_vlan_tag_present(skb) &&
            !vlan_hw_offload_capable(features, skb->vlan_proto))
                skb = __vlan_hwaccel_push_inside(skb);
        return skb;
@@ -3615,7 +3642,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
        struct packet_type *ptype, *pt_prev;
        rx_handler_func_t *rx_handler;
        struct net_device *orig_dev;
-       struct net_device *null_or_dev;
        bool deliver_exact = false;
        int ret = NET_RX_DROP;
        __be16 type;
@@ -3658,11 +3684,15 @@ another_round:
                goto skip_taps;
 
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
-               if (!ptype->dev || ptype->dev == skb->dev) {
-                       if (pt_prev)
-                               ret = deliver_skb(skb, pt_prev, orig_dev);
-                       pt_prev = ptype;
-               }
+               if (pt_prev)
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+               pt_prev = ptype;
+       }
+
+       list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+               if (pt_prev)
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+               pt_prev = ptype;
        }
 
 skip_taps:
@@ -3676,7 +3706,7 @@ ncls:
        if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
                goto drop;
 
-       if (vlan_tx_tag_present(skb)) {
+       if (skb_vlan_tag_present(skb)) {
                if (pt_prev) {
                        ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = NULL;
@@ -3708,8 +3738,8 @@ ncls:
                }
        }
 
-       if (unlikely(vlan_tx_tag_present(skb))) {
-               if (vlan_tx_tag_get_id(skb))
+       if (unlikely(skb_vlan_tag_present(skb))) {
+               if (skb_vlan_tag_get_id(skb))
                        skb->pkt_type = PACKET_OTHERHOST;
                /* Note: we might in the future use prio bits
                 * and set skb->priority like in vlan_do_receive()
@@ -3718,19 +3748,21 @@ ncls:
                skb->vlan_tci = 0;
        }
 
+       type = skb->protocol;
+
        /* deliver only exact match when indicated */
-       null_or_dev = deliver_exact ? skb->dev : NULL;
+       if (likely(!deliver_exact)) {
+               deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+                                      &ptype_base[ntohs(type) &
+                                                  PTYPE_HASH_MASK]);
+       }
 
-       type = skb->protocol;
-       list_for_each_entry_rcu(ptype,
-                       &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-               if (ptype->type == type &&
-                   (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
-                    ptype->dev == orig_dev)) {
-                       if (pt_prev)
-                               ret = deliver_skb(skb, pt_prev, orig_dev);
-                       pt_prev = ptype;
-               }
+       deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+                              &orig_dev->ptype_specific);
+
+       if (unlikely(skb->dev != orig_dev)) {
+               deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+                                      &skb->dev->ptype_specific);
        }
 
        if (pt_prev) {
@@ -6172,13 +6204,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 {
        unsigned int i, count = dev->num_rx_queues;
        struct netdev_rx_queue *rx;
+       size_t sz = count * sizeof(*rx);
 
        BUG_ON(count < 1);
 
-       rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-       if (!rx)
-               return -ENOMEM;
-
+       rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+       if (!rx) {
+               rx = vzalloc(sz);
+               if (!rx)
+                       return -ENOMEM;
+       }
        dev->_rx = rx;
 
        for (i = 0; i < count; i++)
@@ -6576,6 +6611,8 @@ void netdev_run_todo(void)
 
                /* paranoia */
                BUG_ON(netdev_refcnt_read(dev));
+               BUG_ON(!list_empty(&dev->ptype_all));
+               BUG_ON(!list_empty(&dev->ptype_specific));
                WARN_ON(rcu_access_pointer(dev->ip_ptr));
                WARN_ON(rcu_access_pointer(dev->ip6_ptr));
                WARN_ON(dev->dn_ptr);
@@ -6758,6 +6795,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        INIT_LIST_HEAD(&dev->adj_list.lower);
        INIT_LIST_HEAD(&dev->all_adj_list.upper);
        INIT_LIST_HEAD(&dev->all_adj_list.lower);
+       INIT_LIST_HEAD(&dev->ptype_all);
+       INIT_LIST_HEAD(&dev->ptype_specific);
        dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
        setup(dev);
 
@@ -6808,7 +6847,7 @@ void free_netdev(struct net_device *dev)
 
        netif_free_tx_queues(dev);
 #ifdef CONFIG_SYSFS
-       kfree(dev->_rx);
+       kvfree(dev->_rx);
 #endif
 
        kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -7072,10 +7111,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                oldsd->output_queue = NULL;
                oldsd->output_queue_tailp = &oldsd->output_queue;
        }
-       /* Append NAPI poll list from offline CPU. */
-       if (!list_empty(&oldsd->poll_list)) {
-               list_splice_init(&oldsd->poll_list, &sd->poll_list);
-               raise_softirq_irqoff(NET_RX_SOFTIRQ);
+       /* Append NAPI poll list from offline CPU, with one exception :
+        * process_backlog() must be called by cpu owning percpu backlog.
+        * We properly handle process_queue & input_pkt_queue later.
+        */
+       while (!list_empty(&oldsd->poll_list)) {
+               struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
+                                                           struct napi_struct,
+                                                           poll_list);
+
+               list_del_init(&napi->poll_list);
+               if (napi->poll == process_backlog)
+                       napi->state = 0;
+               else
+                       ____napi_schedule(sd, napi);
        }
 
        raise_softirq_irqoff(NET_TX_SOFTIRQ);
@@ -7086,7 +7135,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                netif_rx_internal(skb);
                input_queue_head_incr(oldsd);
        }
-       while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+       while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
                netif_rx_internal(skb);
                input_queue_head_incr(oldsd);
        }