#include <linux/hashtable.h>
#include <linux/vmalloc.h>
#include <linux/if_macvlan.h>
+#include <linux/errqueue.h>
#include "net-sysfs.h"
static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
+static int call_netdevice_notifiers_info(unsigned long val,
+ struct net_device *dev,
+ struct netdev_notifier_info *info);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
*/
int dev_change_name(struct net_device *dev, const char *newname)
{
+ unsigned char old_assign_type;
char oldname[IFNAMSIZ];
int err = 0;
int ret;
return err;
}
+ if (oldname[0] && !strchr(oldname, '%'))
+ netdev_info(dev, "renamed from %s\n", oldname);
+
+ old_assign_type = dev->name_assign_type;
+ dev->name_assign_type = NET_NAME_RENAMED;
+
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
write_seqcount_end(&devnet_rename_seq);
return ret;
}
write_seqcount_begin(&devnet_rename_seq);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
+ old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
pr_err("%s: name change rollback failed: %d\n",
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
+ struct netdev_notifier_change_info change_info;
+
+ change_info.flags_changed = 0;
+ call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ &change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
}
* If we don't do this there is a chance ndo_poll_controller
* or ndo_poll may be running while we open the device
*/
- netpoll_rx_disable(dev);
+ netpoll_poll_disable(dev);
ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
ret = notifier_to_errno(ret);
if (!ret && ops->ndo_open)
ret = ops->ndo_open(dev);
- netpoll_rx_enable(dev);
+ netpoll_poll_enable(dev);
if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
might_sleep();
list_for_each_entry(dev, head, close_list) {
+ /* Temporarily disable netpoll until the interface is down */
+ netpoll_poll_disable(dev);
+
call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
clear_bit(__LINK_STATE_START, &dev->state);
* dev->stop() will invoke napi_disable() on all of it's
* napi_struct instances on this device.
*/
- smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ smp_mb__after_atomic(); /* Commit netif_running(). */
}
dev_deactivate_many(head);
ops->ndo_stop(dev);
dev->flags &= ~IFF_UP;
+ netpoll_poll_enable(dev);
}
return 0;
int retval;
LIST_HEAD(single);
- /* Temporarily disable netpoll until the interface is down */
- netpoll_rx_disable(dev);
-
list_add(&dev->close_list, &single);
retval = __dev_close_many(&single);
list_del(&single);
- netpoll_rx_enable(dev);
return retval;
}
if (dev->flags & IFF_UP) {
LIST_HEAD(single);
- /* Block netpoll rx while the interface is going down */
- netpoll_rx_disable(dev);
-
list_add(&dev->close_list, &single);
dev_close_many(&single);
list_del(&single);
-
- netpoll_rx_enable(dev);
}
return 0;
}
__net_timestamp(SKB); \
} \
-static inline bool is_skb_forwardable(struct net_device *dev,
- struct sk_buff *skb)
+bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
{
unsigned int len;
return false;
}
+EXPORT_SYMBOL_GPL(is_skb_forwardable);
+
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+ }
+
+ if (unlikely(!is_skb_forwardable(dev, skb))) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ skb_scrub_packet(skb, true);
+ skb->protocol = eth_type_trans(skb, dev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__dev_forward_skb);
/**
* dev_forward_skb - loopback an skb to another netif
*/
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
- if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
- }
-
- if (unlikely(!is_skb_forwardable(dev, skb))) {
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
- }
-
- skb_scrub_packet(skb, true);
- skb->protocol = eth_type_trans(skb, dev);
-
- return netif_rx_internal(skb);
+ return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
}
EXPORT_SYMBOL(skb_checksum_help);
-__be16 skb_network_protocol(struct sk_buff *skb)
+__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
+ unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol;
- int vlan_depth = ETH_HLEN;
/* Tunnel gso handlers can set protocol to ethernet. */
if (type == htons(ETH_P_TEB)) {
type = eth->h_proto;
}
- while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vh;
+ /* if skb->protocol is 802.1Q/AD then the header should already be
+ * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
+ * ETH_HLEN otherwise
+ */
+ if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+ if (vlan_depth) {
+ if (WARN_ON(vlan_depth < VLAN_HLEN))
+ return 0;
+ vlan_depth -= VLAN_HLEN;
+ } else {
+ vlan_depth = ETH_HLEN;
+ }
+ do {
+ struct vlan_hdr *vh;
- if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
- return 0;
+ if (unlikely(!pskb_may_pull(skb,
+ vlan_depth + VLAN_HLEN)))
+ return 0;
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
+ vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
+ } while (type == htons(ETH_P_8021Q) ||
+ type == htons(ETH_P_8021AD));
}
+ *depth = vlan_depth;
+
return type;
}
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_offload *ptype;
- __be16 type = skb_network_protocol(skb);
+ int vlan_depth = skb->mac_len;
+ __be16 type = skb_network_protocol(skb, &vlan_depth);
if (unlikely(!type))
return ERR_PTR(-EINVAL);
- __skb_pull(skb, skb->mac_len);
+ __skb_pull(skb, vlan_depth);
rcu_read_lock();
list_for_each_entry_rcu(ptype, &offload_base, list) {
skb_warn_bad_offload(skb);
- if (skb_header_cloned(skb) &&
- (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ err = skb_cow_head(skb, 0);
+ if (err < 0)
return ERR_PTR(err);
}
* 2. No high memory really exists on this machine.
*/
-static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
return 0;
}
+/* If MPLS offload request, verify we are testing hardware MPLS features
+ * instead of standard features for the netdev.
+ */
+#ifdef CONFIG_NET_MPLS_GSO
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
+ features &= skb->dev->mpls_features;
+
+ return features;
+}
+#else
+static netdev_features_t net_mpls_features(struct sk_buff *skb,
+ netdev_features_t features,
+ __be16 type)
+{
+ return features;
+}
+#endif
+
static netdev_features_t harmonize_features(struct sk_buff *skb,
- const struct net_device *dev,
- netdev_features_t features)
+ netdev_features_t features)
{
+ int tmp;
+ __be16 type;
+
+ type = skb_network_protocol(skb, &tmp);
+ features = net_mpls_features(skb, features, type);
+
if (skb->ip_summed != CHECKSUM_NONE &&
- !can_checksum_protocol(features, skb_network_protocol(skb))) {
+ !can_checksum_protocol(features, type)) {
features &= ~NETIF_F_ALL_CSUM;
- } else if (illegal_highdma(dev, skb)) {
+ } else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
}
return features;
}
-netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
- const struct net_device *dev)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
- netdev_features_t features = dev->features;
+ netdev_features_t features = skb->dev->features;
- if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
+ if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, dev, features);
+ return harmonize_features(skb, features);
}
- features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX);
+ features = netdev_intersect_features(features,
+ skb->dev->vlan_features |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
- features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
- NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_STAG_TX;
+ features = netdev_intersect_features(features,
+ NETIF_F_SG |
+ NETIF_F_HIGHDMA |
+ NETIF_F_FRAGLIST |
+ NETIF_F_GEN_CSUM |
+ NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX);
- return harmonize_features(skb, dev, features);
+ return harmonize_features(skb, features);
}
-EXPORT_SYMBOL(netif_skb_dev_features);
+EXPORT_SYMBOL(netif_skb_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC_STATE_RUNNING owner to get the lock more often
- * and dequeue packets faster.
+ * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
if (unlikely(contended))
skb_reset_mac_header(skb);
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
+ __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
rc = -ENETDOWN;
rcu_read_unlock_bh();
+ atomic_long_inc(&dev->tx_dropped);
kfree_skb(skb);
return rc;
out:
flow_table = rcu_dereference(rxqueue->rps_flow_table);
if (!flow_table)
goto out;
- flow_id = skb->rxhash & flow_table->mask;
+ flow_id = skb_get_hash(skb) & flow_table->mask;
rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
rxq_index, flow_id);
if (rc < 0)
struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
u16 tcpu;
+ u32 hash;
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
}
skb_reset_network_header(skb);
- if (!skb_get_hash(skb))
+ hash = skb_get_hash(skb);
+ if (!hash)
goto done;
flow_table = rcu_dereference(rxqueue->rps_flow_table);
u16 next_cpu;
struct rps_dev_flow *rflow;
- rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+ rflow = &flow_table->flows[hash & flow_table->mask];
tcpu = rflow->cpu;
- next_cpu = sock_flow_table->ents[skb->rxhash &
- sock_flow_table->mask];
+ next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
/*
* If the desired CPU (where last recvmsg was done) is
}
if (map) {
- tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+ tcpu = map->cpus[((u64) hash * map->len) >> 32];
if (cpu_online(tcpu)) {
cpu = tcpu;
{
int ret;
- /* if netpoll wants it, pretend we never saw it */
- if (netpoll_rx(skb))
- return NET_RX_DROP;
-
net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
qdisc_run(q);
&q->state)) {
__netif_reschedule(q);
} else {
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
}
* @rx_handler: receive handler to register
* @rx_handler_data: data pointer that is used by rx handler
*
- * Register a receive hander for a device. This handler will then be
+ * Register a receive handler for a device. This handler will then be
* called from __netif_receive_skb. A negative errno code is returned
* on a failure.
*
static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_ARP):
- case __constant_htons(ETH_P_IP):
- case __constant_htons(ETH_P_IPV6):
- case __constant_htons(ETH_P_8021Q):
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
return true;
default:
return false;
trace_netif_receive_skb(skb);
- /* if we've gotten here through NAPI, check netpoll */
- if (netpoll_receive_skb(skb))
- goto out;
-
orig_dev = skb->dev;
skb_reset_network_header(skb);
if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
- skb = vlan_untag(skb);
+ skb = skb_vlan_untag(skb);
if (unlikely(!skb))
goto unlock;
}
unlock:
rcu_read_unlock();
-out:
return ret;
}
diffs |= p->vlan_tci ^ skb->vlan_tci;
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
- skb_gro_mac_header(skb));
+ skb_mac_header(skb));
else if (!diffs)
diffs = memcmp(skb_mac_header(p),
- skb_gro_mac_header(skb),
+ skb_mac_header(skb),
maclen);
NAPI_GRO_CB(p)->same_flow = !diffs;
}
}
}
+static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
+{
+ struct skb_shared_info *pinfo = skb_shinfo(skb);
+
+ BUG_ON(skb->end - skb->tail < grow);
+
+ memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+ skb->data_len -= grow;
+ skb->tail += grow;
+
+ pinfo->frags[0].page_offset += grow;
+ skb_frag_size_sub(&pinfo->frags[0], grow);
+
+ if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
+ skb_frag_unref(skb, 0);
+ memmove(pinfo->frags, pinfo->frags + 1,
+ --pinfo->nr_frags * sizeof(pinfo->frags[0]));
+ }
+}
+
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
struct list_head *head = &offload_base;
int same_flow;
enum gro_result ret;
+ int grow;
- if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
+ if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
if (skb_is_gso(skb) || skb_has_frag_list(skb))
goto normal;
- skb_gro_reset_offset(skb);
gro_list_prepare(napi, skb);
NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
}
NAPI_GRO_CB(skb)->count = 1;
NAPI_GRO_CB(skb)->age = jiffies;
+ NAPI_GRO_CB(skb)->last = skb;
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
ret = GRO_HELD;
pull:
- if (skb_headlen(skb) < skb_gro_offset(skb)) {
- int grow = skb_gro_offset(skb) - skb_headlen(skb);
-
- BUG_ON(skb->end - skb->tail < grow);
-
- memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
-
- skb->tail += grow;
- skb->data_len -= grow;
-
- skb_shinfo(skb)->frags[0].page_offset += grow;
- skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
-
- if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
- skb_frag_unref(skb, 0);
- memmove(skb_shinfo(skb)->frags,
- skb_shinfo(skb)->frags + 1,
- --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
- }
- }
-
+ grow = skb_gro_offset(skb) - skb_headlen(skb);
+ if (grow > 0)
+ gro_pull_from_frag0(skb, grow);
ok:
return ret;
{
trace_napi_gro_receive_entry(skb);
+ skb_gro_reset_offset(skb);
+
return napi_skb_finish(dev_gro_receive(napi, skb), skb);
}
EXPORT_SYMBOL(napi_gro_receive);
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+ skb->encapsulation = 0;
+ skb_shinfo(skb)->gso_type = 0;
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
napi->skb = skb;
}
}
EXPORT_SYMBOL(napi_get_frags);
-static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
- gro_result_t ret)
+static gro_result_t napi_frags_finish(struct napi_struct *napi,
+ struct sk_buff *skb,
+ gro_result_t ret)
{
switch (ret) {
case GRO_NORMAL:
- if (netif_receive_skb_internal(skb))
+ case GRO_HELD:
+ __skb_push(skb, ETH_HLEN);
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
ret = GRO_DROP;
break;
napi_reuse_skb(napi, skb);
break;
- case GRO_HELD:
case GRO_MERGED:
break;
}
return ret;
}
+/* Upper GRO stack assumes network header starts at gro_offset=0
+ * Drivers could call both napi_gro_frags() and napi_gro_receive()
+ * We copy ethernet header into skb->data to have a common layout.
+ */
static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
{
struct sk_buff *skb = napi->skb;
+ const struct ethhdr *eth;
+ unsigned int hlen = sizeof(*eth);
napi->skb = NULL;
- if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
- napi_reuse_skb(napi, skb);
- return NULL;
+ skb_reset_mac_header(skb);
+ skb_gro_reset_offset(skb);
+
+ eth = skb_gro_header_fast(skb, 0);
+ if (unlikely(skb_gro_header_hard(skb, hlen))) {
+ eth = skb_gro_header_slow(skb, hlen, 0);
+ if (unlikely(!eth)) {
+ napi_reuse_skb(napi, skb);
+ return NULL;
+ }
+ } else {
+ gro_pull_from_frag0(skb, hlen);
+ NAPI_GRO_CB(skb)->frag0 += hlen;
+ NAPI_GRO_CB(skb)->frag0_len -= hlen;
}
- skb->protocol = eth_type_trans(skb, skb->dev);
+ __skb_pull(skb, hlen);
+
+ /*
+ * This works because the only protocols we care about don't require
+ * special handling.
+ * We'll fix it up properly in napi_frags_finish()
+ */
+ skb->protocol = eth->h_proto;
return skb;
}
struct softnet_data *next = remsd->rps_ipi_next;
if (cpu_online(remsd->cpu))
- __smp_call_function_single(remsd->cpu,
- &remsd->csd, 0);
+ smp_call_function_single_async(remsd->cpu,
+ &remsd->csd);
remsd = next;
}
} else
#endif
napi->weight = weight_p;
local_irq_disable();
- while (work < quota) {
+ while (1) {
struct sk_buff *skb;
- unsigned int qlen;
while ((skb = __skb_dequeue(&sd->process_queue))) {
local_irq_enable();
}
rps_lock(sd);
- qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen)
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
- &sd->process_queue);
-
- if (qlen < quota - work) {
+ if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
* only current cpu owns and manipulates this napi,
- * and NAPI_STATE_SCHED is the only possible flag set on backlog.
- * we can use a plain write instead of clear_bit(),
+ * and NAPI_STATE_SCHED is the only possible flag set
+ * on backlog.
+ * We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
list_del(&napi->poll_list);
napi->state = 0;
+ rps_unlock(sd);
- quota = work + qlen;
+ break;
}
+
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
rps_unlock(sd);
}
local_irq_enable();
BUG_ON(n->gro_list);
list_del(&n->poll_list);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
}
EXPORT_SYMBOL(__napi_complete);
}
EXPORT_SYMBOL(netdev_adjacent_get_private);
+/**
+ * netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *upper;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
+
+ upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->adj_list.upper)
+ return NULL;
+
+ *iter = &upper->list;
+
+ return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
/**
* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device
if (&lower->list == &dev->adj_list.lower)
return NULL;
- if (iter)
- *iter = lower->list.next;
+ *iter = lower->list.next;
return lower->private;
}
if (&lower->list == &dev->adj_list.lower)
return NULL;
- if (iter)
- *iter = &lower->list;
+ *iter = &lower->list;
return lower->private;
}
EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
+/**
+ * netdev_lower_get_next - Get the next device from the lower neighbour
+ * list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
+
+ return lower->dev;
+}
+EXPORT_SYMBOL(netdev_lower_get_next);
+
/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
sysfs_remove_link(&(dev->dev.kobj), linkname);
}
-#define netdev_adjacent_is_neigh_list(dev, dev_list) \
- (dev_list == &dev->adj_list.upper || \
- dev_list == &dev->adj_list.lower)
+static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
+ struct net_device *adj_dev,
+ struct list_head *dev_list)
+{
+ return (dev_list == &dev->adj_list.upper ||
+ dev_list == &dev->adj_list.lower) &&
+ net_eq(dev_net(dev), dev_net(adj_dev));
+}
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
pr_debug("dev_hold for %s, because of link added from %s to %s\n",
adj_dev->name, dev->name, adj_dev->name);
- if (netdev_adjacent_is_neigh_list(dev, dev_list)) {
+ if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
if (ret)
goto free_adj;
return 0;
remove_symlinks:
- if (netdev_adjacent_is_neigh_list(dev, dev_list))
+ if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
free_adj:
kfree(adj);
if (adj->master)
sysfs_remove_link(&(dev->dev.kobj), "master");
- if (netdev_adjacent_is_neigh_list(dev, dev_list))
+ if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
list_del_rcu(&adj->list);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+void netdev_adjacent_add_links(struct net_device *dev)
+{
+ struct netdev_adjacent *iter;
+
+ struct net *net = dev_net(dev);
+
+ list_for_each_entry(iter, &dev->adj_list.upper, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_add(iter->dev, dev,
+ &iter->dev->adj_list.lower);
+ netdev_adjacent_sysfs_add(dev, iter->dev,
+ &dev->adj_list.upper);
+ }
+
+ list_for_each_entry(iter, &dev->adj_list.lower, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_add(iter->dev, dev,
+ &iter->dev->adj_list.upper);
+ netdev_adjacent_sysfs_add(dev, iter->dev,
+ &dev->adj_list.lower);
+ }
+}
+
+void netdev_adjacent_del_links(struct net_device *dev)
+{
+ struct netdev_adjacent *iter;
+
+ struct net *net = dev_net(dev);
+
+ list_for_each_entry(iter, &dev->adj_list.upper, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_del(iter->dev, dev->name,
+ &iter->dev->adj_list.lower);
+ netdev_adjacent_sysfs_del(dev, iter->dev->name,
+ &dev->adj_list.upper);
+ }
+
+ list_for_each_entry(iter, &dev->adj_list.lower, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
+ netdev_adjacent_sysfs_del(iter->dev, dev->name,
+ &iter->dev->adj_list.upper);
+ netdev_adjacent_sysfs_del(dev, iter->dev->name,
+ &dev->adj_list.lower);
+ }
+}
+
void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
{
struct netdev_adjacent *iter;
+ struct net *net = dev_net(dev);
+
list_for_each_entry(iter, &dev->adj_list.upper, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.lower);
netdev_adjacent_sysfs_add(iter->dev, dev,
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
+ if (!net_eq(net,dev_net(iter->dev)))
+ continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.upper);
netdev_adjacent_sysfs_add(iter->dev, dev,
}
EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
+int dev_get_nest_level(struct net_device *dev,
+ bool (*type_check)(struct net_device *dev))
+{
+ struct net_device *lower = NULL;
+ struct list_head *iter;
+ int max_nest = -1;
+ int nest;
+
+ ASSERT_RTNL();
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ nest = dev_get_nest_level(lower, type_check);
+ if (max_nest < nest)
+ max_nest = nest;
+ }
+
+ if (type_check(dev))
+ max_nest++;
+
+ return max_nest;
+}
+EXPORT_SYMBOL(dev_get_nest_level);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
*/
ret = 0;
- if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
+ if ((old_flags ^ flags) & IFF_UP)
ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
- if (!ret)
- dev_set_rx_mode(dev);
- }
-
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
unsigned int old_flags = dev->flags;
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
-static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
-
/*
* Flush the unicast and multicast chains
*/
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
}
}
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ if (dev->netdev_ops->ndo_busy_poll)
+ features |= NETIF_F_BUSY_POLL;
+ else
+#endif
+ features &= ~NETIF_F_BUSY_POLL;
+
return features;
}
static void netif_free_tx_queues(struct net_device *dev)
{
- if (is_vmalloc_addr(dev->_tx))
- vfree(dev->_tx);
- else
- kfree(dev->_tx);
+ kvfree(dev->_tx);
}
static int netif_alloc_netdev_queues(struct net_device *dev)
netdev_stats_to_stats64(storage, &dev->stats);
}
storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
{
char *addr = (char *)dev - dev->padded;
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
/**
* alloc_netdev_mqs - allocate network device
- * @sizeof_priv: size of private data to allocate space for
- * @name: device name format string
- * @setup: callback to initialize device
- * @txqs: the number of TX subqueues to allocate
- * @rxqs: the number of RX subqueues to allocate
+ * @sizeof_priv: size of private data to allocate space for
+ * @name: device name format string
+ * @name_assign_type: origin of device name
+ * @setup: callback to initialize device
+ * @txqs: the number of TX subqueues to allocate
+ * @rxqs: the number of RX subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subqueue structs
* for each queue on the device.
*/
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+ unsigned char name_assign_type,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs)
{
#endif
strcpy(dev->name, name);
+ dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
free_pcpu:
free_percpu(dev->pcpu_refcnt);
- netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kfree(dev->_rx);
-#endif
-
free_dev:
netdev_freemem(dev);
return NULL;
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
+ *
+ * Note: As most callers use a stack allocated list_head,
+ * we force a list_del() to make sure stack wont be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
+ list_del(head);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
/* Send a netdev-removed uevent to the old namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
+ netdev_adjacent_del_links(dev);
/* Actually switch the network namespace */
dev_net_set(dev, net);
/* Send a netdev-add uevent to the new namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+ netdev_adjacent_add_links(dev);
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
if (dev && dev->dev.parent) {
r = dev_printk_emit(level[1] - '0',
dev->dev.parent,
- "%s %s %s: %pV",
+ "%s %s %s%s: %pV",
dev_driver_string(dev->dev.parent),
dev_name(dev->dev.parent),
- netdev_name(dev), vaf);
+ netdev_name(dev), netdev_reg_state(dev),
+ vaf);
} else if (dev) {
- r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+ r = printk("%s%s%s: %pV", level, netdev_name(dev),
+ netdev_reg_state(dev), vaf);
} else {
r = printk("%s(NULL net_device): %pV", level, vaf);
}
rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
- if (dev->rtnl_link_ops)
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
else
unregister_netdevice_queue(dev, &dev_kill_list);
}
}
unregister_netdevice_many(&dev_kill_list);
- list_del(&dev_kill_list);
rtnl_unlock();
}