2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
58 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
67 return hash_32((__force u32)key ^ (__force u32)remote,
71 static inline void __tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
73 struct dst_entry *old_dst;
75 if (dst && (dst->flags & DST_NOCACHE))
78 spin_lock_bh(&t->dst_lock);
79 old_dst = rcu_dereference_raw(t->dst_cache);
80 rcu_assign_pointer(t->dst_cache, dst);
82 spin_unlock_bh(&t->dst_lock);
85 static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 __tunnel_dst_set(t, dst);
90 static inline void tunnel_dst_reset(struct ip_tunnel *t)
92 tunnel_dst_set(t, NULL);
95 static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
97 struct dst_entry *dst;
100 dst = rcu_dereference(t->dst_cache);
107 struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
109 struct dst_entry *dst = tunnel_dst_get(t);
111 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
119 /* Often modified stats are per cpu, other are shared (netdev->stats) */
120 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
121 struct rtnl_link_stats64 *tot)
125 for_each_possible_cpu(i) {
126 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
127 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
131 start = u64_stats_fetch_begin_bh(&tstats->syncp);
132 rx_packets = tstats->rx_packets;
133 tx_packets = tstats->tx_packets;
134 rx_bytes = tstats->rx_bytes;
135 tx_bytes = tstats->tx_bytes;
136 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
138 tot->rx_packets += rx_packets;
139 tot->tx_packets += tx_packets;
140 tot->rx_bytes += rx_bytes;
141 tot->tx_bytes += tx_bytes;
144 tot->multicast = dev->stats.multicast;
146 tot->rx_crc_errors = dev->stats.rx_crc_errors;
147 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
148 tot->rx_length_errors = dev->stats.rx_length_errors;
149 tot->rx_frame_errors = dev->stats.rx_frame_errors;
150 tot->rx_errors = dev->stats.rx_errors;
152 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
153 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
154 tot->tx_dropped = dev->stats.tx_dropped;
155 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
156 tot->tx_errors = dev->stats.tx_errors;
158 tot->collisions = dev->stats.collisions;
162 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
164 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
165 __be16 flags, __be32 key)
167 if (p->i_flags & TUNNEL_KEY) {
168 if (flags & TUNNEL_KEY)
169 return key == p->i_key;
171 /* key expected, none present */
174 return !(flags & TUNNEL_KEY);
177 /* Fallback tunnel: no source, no destination, no key, no options
180 We require exact key match i.e. if a key is present in packet
181 it will match only tunnel with the same key; if it is not present,
182 it will match only keyless tunnel.
184 All keysless packets, if not matched configured keyless tunnels
185 will match fallback tunnel.
186 Given src, dst and key, find appropriate for input tunnel.
188 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
189 int link, __be16 flags,
190 __be32 remote, __be32 local,
194 struct ip_tunnel *t, *cand = NULL;
195 struct hlist_head *head;
197 hash = ip_tunnel_hash(itn, key, remote);
198 head = &itn->tunnels[hash];
200 hlist_for_each_entry_rcu(t, head, hash_node) {
201 if (local != t->parms.iph.saddr ||
202 remote != t->parms.iph.daddr ||
203 !(t->dev->flags & IFF_UP))
206 if (!ip_tunnel_key_match(&t->parms, flags, key))
209 if (t->parms.link == link)
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (remote != t->parms.iph.daddr ||
217 !(t->dev->flags & IFF_UP))
220 if (!ip_tunnel_key_match(&t->parms, flags, key))
223 if (t->parms.link == link)
229 hash = ip_tunnel_hash(itn, key, 0);
230 head = &itn->tunnels[hash];
232 hlist_for_each_entry_rcu(t, head, hash_node) {
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 !(t->dev->flags & IFF_UP))
239 if (!ip_tunnel_key_match(&t->parms, flags, key))
242 if (t->parms.link == link)
248 if (flags & TUNNEL_NO_KEY)
249 goto skip_key_lookup;
251 hlist_for_each_entry_rcu(t, head, hash_node) {
252 if (t->parms.i_key != key ||
253 !(t->dev->flags & IFF_UP))
256 if (t->parms.link == link)
266 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
267 return netdev_priv(itn->fb_tunnel_dev);
272 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
274 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
275 struct ip_tunnel_parm *parms)
280 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
281 remote = parms->iph.daddr;
285 h = ip_tunnel_hash(itn, parms->i_key, remote);
286 return &itn->tunnels[h];
289 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
291 struct hlist_head *head = ip_bucket(itn, &t->parms);
293 hlist_add_head_rcu(&t->hash_node, head);
296 static void ip_tunnel_del(struct ip_tunnel *t)
298 hlist_del_init_rcu(&t->hash_node);
301 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
302 struct ip_tunnel_parm *parms,
305 __be32 remote = parms->iph.daddr;
306 __be32 local = parms->iph.saddr;
307 __be32 key = parms->i_key;
308 int link = parms->link;
309 struct ip_tunnel *t = NULL;
310 struct hlist_head *head = ip_bucket(itn, parms);
312 hlist_for_each_entry_rcu(t, head, hash_node) {
313 if (local == t->parms.iph.saddr &&
314 remote == t->parms.iph.daddr &&
315 key == t->parms.i_key &&
316 link == t->parms.link &&
317 type == t->dev->type)
323 static struct net_device *__ip_tunnel_create(struct net *net,
324 const struct rtnl_link_ops *ops,
325 struct ip_tunnel_parm *parms)
328 struct ip_tunnel *tunnel;
329 struct net_device *dev;
333 strlcpy(name, parms->name, IFNAMSIZ);
335 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
339 strlcpy(name, ops->kind, IFNAMSIZ);
340 strncat(name, "%d", 2);
344 dev = alloc_netdev(ops->priv_size, name, ops->setup);
349 dev_net_set(dev, net);
351 dev->rtnl_link_ops = ops;
353 tunnel = netdev_priv(dev);
354 tunnel->parms = *parms;
357 err = register_netdevice(dev);
369 static inline void init_tunnel_flow(struct flowi4 *fl4,
371 __be32 daddr, __be32 saddr,
372 __be32 key, __u8 tos, int oif)
374 memset(fl4, 0, sizeof(*fl4));
375 fl4->flowi4_oif = oif;
378 fl4->flowi4_tos = tos;
379 fl4->flowi4_proto = proto;
380 fl4->fl4_gre_key = key;
383 static int ip_tunnel_bind_dev(struct net_device *dev)
385 struct net_device *tdev = NULL;
386 struct ip_tunnel *tunnel = netdev_priv(dev);
387 const struct iphdr *iph;
388 int hlen = LL_MAX_HEADER;
389 int mtu = ETH_DATA_LEN;
390 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
392 iph = &tunnel->parms.iph;
394 /* Guess output device to choose reasonable mtu and needed_headroom */
399 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
400 iph->saddr, tunnel->parms.o_key,
401 RT_TOS(iph->tos), tunnel->parms.link);
402 rt = ip_route_output_key(tunnel->net, &fl4);
406 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
409 if (dev->type != ARPHRD_ETHER)
410 dev->flags |= IFF_POINTOPOINT;
413 if (!tdev && tunnel->parms.link)
414 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
417 hlen = tdev->hard_header_len + tdev->needed_headroom;
420 dev->iflink = tunnel->parms.link;
422 dev->needed_headroom = t_hlen + hlen;
423 mtu -= (dev->hard_header_len + t_hlen);
431 static struct ip_tunnel *ip_tunnel_create(struct net *net,
432 struct ip_tunnel_net *itn,
433 struct ip_tunnel_parm *parms)
435 struct ip_tunnel *nt, *fbt;
436 struct net_device *dev;
438 BUG_ON(!itn->fb_tunnel_dev);
439 fbt = netdev_priv(itn->fb_tunnel_dev);
440 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
444 dev->mtu = ip_tunnel_bind_dev(dev);
446 nt = netdev_priv(dev);
447 ip_tunnel_add(itn, nt);
451 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
452 const struct tnl_ptk_info *tpi, bool log_ecn_error)
454 struct pcpu_tstats *tstats;
455 const struct iphdr *iph = ip_hdr(skb);
458 #ifdef CONFIG_NET_IPGRE_BROADCAST
459 if (ipv4_is_multicast(iph->daddr)) {
460 /* Looped back packet, drop it! */
461 if (rt_is_output_route(skb_rtable(skb)))
463 tunnel->dev->stats.multicast++;
464 skb->pkt_type = PACKET_BROADCAST;
468 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
469 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
470 tunnel->dev->stats.rx_crc_errors++;
471 tunnel->dev->stats.rx_errors++;
475 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
476 if (!(tpi->flags&TUNNEL_SEQ) ||
477 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
478 tunnel->dev->stats.rx_fifo_errors++;
479 tunnel->dev->stats.rx_errors++;
482 tunnel->i_seqno = ntohl(tpi->seq) + 1;
485 err = IP_ECN_decapsulate(iph, skb);
488 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
489 &iph->saddr, iph->tos);
491 ++tunnel->dev->stats.rx_frame_errors;
492 ++tunnel->dev->stats.rx_errors;
497 tstats = this_cpu_ptr(tunnel->dev->tstats);
498 u64_stats_update_begin(&tstats->syncp);
499 tstats->rx_packets++;
500 tstats->rx_bytes += skb->len;
501 u64_stats_update_end(&tstats->syncp);
503 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
505 if (tunnel->dev->type == ARPHRD_ETHER) {
506 skb->protocol = eth_type_trans(skb, tunnel->dev);
507 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
509 skb->dev = tunnel->dev;
512 gro_cells_receive(&tunnel->gro_cells, skb);
519 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
521 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
522 struct rtable *rt, __be16 df)
524 struct ip_tunnel *tunnel = netdev_priv(dev);
525 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
529 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
530 - sizeof(struct iphdr) - tunnel->hlen;
532 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
535 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
537 if (skb->protocol == htons(ETH_P_IP)) {
538 if (!skb_is_gso(skb) &&
539 (df & htons(IP_DF)) && mtu < pkt_size) {
540 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
541 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
545 #if IS_ENABLED(CONFIG_IPV6)
546 else if (skb->protocol == htons(ETH_P_IPV6)) {
547 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
549 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
550 mtu >= IPV6_MIN_MTU) {
551 if ((tunnel->parms.iph.daddr &&
552 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
553 rt6->rt6i_dst.plen == 128) {
554 rt6->rt6i_flags |= RTF_MODIFIED;
555 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
559 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
561 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
569 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
570 const struct iphdr *tnl_params, const u8 protocol)
572 struct ip_tunnel *tunnel = netdev_priv(dev);
573 const struct iphdr *inner_iph;
577 struct rtable *rt = NULL; /* Route to the other host */
578 unsigned int max_headroom; /* The extra header space needed */
581 bool connected = true;
583 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
585 dst = tnl_params->daddr;
589 if (skb_dst(skb) == NULL) {
590 dev->stats.tx_fifo_errors++;
594 if (skb->protocol == htons(ETH_P_IP)) {
595 rt = skb_rtable(skb);
596 dst = rt_nexthop(rt, inner_iph->daddr);
598 #if IS_ENABLED(CONFIG_IPV6)
599 else if (skb->protocol == htons(ETH_P_IPV6)) {
600 const struct in6_addr *addr6;
601 struct neighbour *neigh;
602 bool do_tx_error_icmp;
605 neigh = dst_neigh_lookup(skb_dst(skb),
606 &ipv6_hdr(skb)->daddr);
610 addr6 = (const struct in6_addr *)&neigh->primary_key;
611 addr_type = ipv6_addr_type(addr6);
613 if (addr_type == IPV6_ADDR_ANY) {
614 addr6 = &ipv6_hdr(skb)->daddr;
615 addr_type = ipv6_addr_type(addr6);
618 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
619 do_tx_error_icmp = true;
621 do_tx_error_icmp = false;
622 dst = addr6->s6_addr32[3];
624 neigh_release(neigh);
625 if (do_tx_error_icmp)
635 tos = tnl_params->tos;
638 if (skb->protocol == htons(ETH_P_IP)) {
639 tos = inner_iph->tos;
641 } else if (skb->protocol == htons(ETH_P_IPV6)) {
642 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
647 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
648 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
651 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
654 rt = ip_route_output_key(tunnel->net, &fl4);
657 dev->stats.tx_carrier_errors++;
661 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
664 if (rt->dst.dev == dev) {
666 dev->stats.collisions++;
670 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
675 if (tunnel->err_count > 0) {
676 if (time_before(jiffies,
677 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
680 dst_link_failure(skb);
682 tunnel->err_count = 0;
685 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
686 ttl = tnl_params->ttl;
688 if (skb->protocol == htons(ETH_P_IP))
689 ttl = inner_iph->ttl;
690 #if IS_ENABLED(CONFIG_IPV6)
691 else if (skb->protocol == htons(ETH_P_IPV6))
692 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
695 ttl = ip4_dst_hoplimit(&rt->dst);
698 df = tnl_params->frag_off;
699 if (skb->protocol == htons(ETH_P_IP))
700 df |= (inner_iph->frag_off&htons(IP_DF));
702 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
703 + rt->dst.header_len;
704 if (max_headroom > dev->needed_headroom)
705 dev->needed_headroom = max_headroom;
707 if (skb_cow_head(skb, dev->needed_headroom)) {
708 dev->stats.tx_dropped++;
713 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
714 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
715 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
719 #if IS_ENABLED(CONFIG_IPV6)
721 dst_link_failure(skb);
724 dev->stats.tx_errors++;
727 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
729 static void ip_tunnel_update(struct ip_tunnel_net *itn,
731 struct net_device *dev,
732 struct ip_tunnel_parm *p,
736 t->parms.iph.saddr = p->iph.saddr;
737 t->parms.iph.daddr = p->iph.daddr;
738 t->parms.i_key = p->i_key;
739 t->parms.o_key = p->o_key;
740 if (dev->type != ARPHRD_ETHER) {
741 memcpy(dev->dev_addr, &p->iph.saddr, 4);
742 memcpy(dev->broadcast, &p->iph.daddr, 4);
744 ip_tunnel_add(itn, t);
746 t->parms.iph.ttl = p->iph.ttl;
747 t->parms.iph.tos = p->iph.tos;
748 t->parms.iph.frag_off = p->iph.frag_off;
750 if (t->parms.link != p->link) {
753 t->parms.link = p->link;
754 mtu = ip_tunnel_bind_dev(dev);
759 netdev_state_change(dev);
762 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
766 struct net *net = dev_net(dev);
767 struct ip_tunnel *tunnel = netdev_priv(dev);
768 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
770 BUG_ON(!itn->fb_tunnel_dev);
774 if (dev == itn->fb_tunnel_dev)
775 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
777 t = netdev_priv(dev);
778 memcpy(p, &t->parms, sizeof(*p));
784 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
787 p->iph.frag_off |= htons(IP_DF);
788 if (!(p->i_flags&TUNNEL_KEY))
790 if (!(p->o_flags&TUNNEL_KEY))
793 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
795 if (!t && (cmd == SIOCADDTUNNEL))
796 t = ip_tunnel_create(net, itn, p);
798 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
805 unsigned int nflags = 0;
807 if (ipv4_is_multicast(p->iph.daddr))
808 nflags = IFF_BROADCAST;
809 else if (p->iph.daddr)
810 nflags = IFF_POINTOPOINT;
812 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
817 t = netdev_priv(dev);
823 ip_tunnel_update(itn, t, dev, p, true);
825 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
830 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
833 if (dev == itn->fb_tunnel_dev) {
835 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
839 if (t == netdev_priv(itn->fb_tunnel_dev))
843 unregister_netdevice(dev);
854 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
856 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
858 struct ip_tunnel *tunnel = netdev_priv(dev);
859 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
862 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
867 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
869 static void ip_tunnel_dev_free(struct net_device *dev)
871 struct ip_tunnel *tunnel = netdev_priv(dev);
873 gro_cells_destroy(&tunnel->gro_cells);
874 free_percpu(dev->tstats);
878 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
880 struct ip_tunnel *tunnel = netdev_priv(dev);
881 struct ip_tunnel_net *itn;
883 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
885 if (itn->fb_tunnel_dev != dev) {
886 ip_tunnel_del(netdev_priv(dev));
887 unregister_netdevice_queue(dev, head);
890 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
892 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
893 struct rtnl_link_ops *ops, char *devname)
895 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
896 struct ip_tunnel_parm parms;
899 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
900 INIT_HLIST_HEAD(&itn->tunnels[i]);
903 itn->fb_tunnel_dev = NULL;
907 memset(&parms, 0, sizeof(parms));
909 strlcpy(parms.name, devname, IFNAMSIZ);
912 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
913 /* FB netdevice is special: we have one, and only one per netns.
914 * Allowing to move it to another netns is clearly unsafe.
916 if (!IS_ERR(itn->fb_tunnel_dev)) {
917 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
918 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
922 return PTR_RET(itn->fb_tunnel_dev);
924 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
926 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
927 struct rtnl_link_ops *ops)
929 struct net *net = dev_net(itn->fb_tunnel_dev);
930 struct net_device *dev, *aux;
933 for_each_netdev_safe(net, dev, aux)
934 if (dev->rtnl_link_ops == ops)
935 unregister_netdevice_queue(dev, head);
937 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
939 struct hlist_node *n;
940 struct hlist_head *thead = &itn->tunnels[h];
942 hlist_for_each_entry_safe(t, n, thead, hash_node)
943 /* If dev is in the same netns, it has already
944 * been added to the list by the previous loop.
946 if (!net_eq(dev_net(t->dev), net))
947 unregister_netdevice_queue(t->dev, head);
951 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
956 ip_tunnel_destroy(itn, &list, ops);
957 unregister_netdevice_many(&list);
960 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
962 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
963 struct ip_tunnel_parm *p)
965 struct ip_tunnel *nt;
966 struct net *net = dev_net(dev);
967 struct ip_tunnel_net *itn;
971 nt = netdev_priv(dev);
972 itn = net_generic(net, nt->ip_tnl_net_id);
974 if (ip_tunnel_find(itn, p, dev->type))
979 err = register_netdevice(dev);
983 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
984 eth_hw_addr_random(dev);
986 mtu = ip_tunnel_bind_dev(dev);
990 ip_tunnel_add(itn, nt);
995 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
997 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
998 struct ip_tunnel_parm *p)
1000 struct ip_tunnel *t;
1001 struct ip_tunnel *tunnel = netdev_priv(dev);
1002 struct net *net = tunnel->net;
1003 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1005 if (dev == itn->fb_tunnel_dev)
1008 t = ip_tunnel_find(itn, p, dev->type);
1016 if (dev->type != ARPHRD_ETHER) {
1017 unsigned int nflags = 0;
1019 if (ipv4_is_multicast(p->iph.daddr))
1020 nflags = IFF_BROADCAST;
1021 else if (p->iph.daddr)
1022 nflags = IFF_POINTOPOINT;
1024 if ((dev->flags ^ nflags) &
1025 (IFF_POINTOPOINT | IFF_BROADCAST))
1030 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1033 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1035 int ip_tunnel_init(struct net_device *dev)
1037 struct ip_tunnel *tunnel = netdev_priv(dev);
1038 struct iphdr *iph = &tunnel->parms.iph;
1041 dev->destructor = ip_tunnel_dev_free;
1042 dev->tstats = alloc_percpu(struct pcpu_tstats);
1046 for_each_possible_cpu(i) {
1047 struct pcpu_tstats *ipt_stats;
1048 ipt_stats = per_cpu_ptr(dev->tstats, i);
1049 u64_stats_init(&ipt_stats->syncp);
1052 err = gro_cells_init(&tunnel->gro_cells, dev);
1054 free_percpu(dev->tstats);
1059 tunnel->net = dev_net(dev);
1060 strcpy(tunnel->parms.name, dev->name);
1064 tunnel->dst_cache = NULL;
1065 spin_lock_init(&tunnel->dst_lock);
1069 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1071 void ip_tunnel_uninit(struct net_device *dev)
1073 struct ip_tunnel *tunnel = netdev_priv(dev);
1074 struct net *net = tunnel->net;
1075 struct ip_tunnel_net *itn;
1077 itn = net_generic(net, tunnel->ip_tnl_net_id);
1078 /* fb_tunnel_dev will be unregisted in net-exit call. */
1079 if (itn->fb_tunnel_dev != dev)
1080 ip_tunnel_del(netdev_priv(dev));
1082 tunnel_dst_reset(tunnel);
1084 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1086 /* Do least required initialization, rest of init is done in tunnel_init call */
1087 void ip_tunnel_setup(struct net_device *dev, int net_id)
1089 struct ip_tunnel *tunnel = netdev_priv(dev);
1090 tunnel->ip_tnl_net_id = net_id;
1092 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1094 MODULE_LICENSE("GPL");