2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
60 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 return hash_32((__force u32)key ^ (__force u32)remote,
72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
73 struct dst_entry *dst, __be32 saddr)
75 struct dst_entry *old_dst;
78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83 static noinline void tunnel_dst_set(struct ip_tunnel *t,
84 struct dst_entry *dst, __be32 saddr)
86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
89 static void tunnel_dst_reset(struct ip_tunnel *t)
91 tunnel_dst_set(t, NULL, 0);
94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
98 for_each_possible_cpu(i)
99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
106 struct ip_tunnel_dst *idst;
107 struct dst_entry *dst;
110 idst = raw_cpu_ptr(t->dst_cache);
111 dst = rcu_dereference(idst->dst);
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
124 return (struct rtable *)dst;
127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
134 /* key expected, none present */
137 return !(flags & TUNNEL_KEY);
140 /* Fallback tunnel: no source, no destination, no key, no options
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 Given src, dst and key, find appropriate for input tunnel.
151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
160 hash = ip_tunnel_hash(key, remote);
161 head = &itn->tunnels[hash];
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
172 if (t->parms.link == link)
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
180 t->parms.iph.saddr != 0 ||
181 !(t->dev->flags & IFF_UP))
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
187 if (t->parms.link == link)
193 hash = ip_tunnel_hash(key, 0);
194 head = &itn->tunnels[hash];
196 hlist_for_each_entry_rcu(t, head, hash_node) {
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
201 if (!(t->dev->flags & IFF_UP))
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
207 if (t->parms.link == link)
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
220 !(t->dev->flags & IFF_UP))
223 if (t->parms.link == link)
233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234 return netdev_priv(itn->fb_tunnel_dev);
239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242 struct ip_tunnel_parm *parms)
246 __be32 i_key = parms->i_key;
248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249 remote = parms->iph.daddr;
253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
256 h = ip_tunnel_hash(i_key, remote);
257 return &itn->tunnels[h];
260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
262 struct hlist_head *head = ip_bucket(itn, &t->parms);
264 hlist_add_head_rcu(&t->hash_node, head);
267 static void ip_tunnel_del(struct ip_tunnel *t)
269 hlist_del_init_rcu(&t->hash_node);
272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273 struct ip_tunnel_parm *parms,
276 __be32 remote = parms->iph.daddr;
277 __be32 local = parms->iph.saddr;
278 __be32 key = parms->i_key;
279 __be16 flags = parms->i_flags;
280 int link = parms->link;
281 struct ip_tunnel *t = NULL;
282 struct hlist_head *head = ip_bucket(itn, parms);
284 hlist_for_each_entry_rcu(t, head, hash_node) {
285 if (local == t->parms.iph.saddr &&
286 remote == t->parms.iph.daddr &&
287 link == t->parms.link &&
288 type == t->dev->type &&
289 ip_tunnel_key_match(&t->parms, flags, key))
295 static struct net_device *__ip_tunnel_create(struct net *net,
296 const struct rtnl_link_ops *ops,
297 struct ip_tunnel_parm *parms)
300 struct ip_tunnel *tunnel;
301 struct net_device *dev;
305 strlcpy(name, parms->name, IFNAMSIZ);
307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
311 strlcpy(name, ops->kind, IFNAMSIZ);
312 strncat(name, "%d", 2);
316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
321 dev_net_set(dev, net);
323 dev->rtnl_link_ops = ops;
325 tunnel = netdev_priv(dev);
326 tunnel->parms = *parms;
329 err = register_netdevice(dev);
341 static inline void init_tunnel_flow(struct flowi4 *fl4,
343 __be32 daddr, __be32 saddr,
344 __be32 key, __u8 tos, int oif)
346 memset(fl4, 0, sizeof(*fl4));
347 fl4->flowi4_oif = oif;
350 fl4->flowi4_tos = tos;
351 fl4->flowi4_proto = proto;
352 fl4->fl4_gre_key = key;
355 static int ip_tunnel_bind_dev(struct net_device *dev)
357 struct net_device *tdev = NULL;
358 struct ip_tunnel *tunnel = netdev_priv(dev);
359 const struct iphdr *iph;
360 int hlen = LL_MAX_HEADER;
361 int mtu = ETH_DATA_LEN;
362 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
364 iph = &tunnel->parms.iph;
366 /* Guess output device to choose reasonable mtu and needed_headroom */
371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372 iph->saddr, tunnel->parms.o_key,
373 RT_TOS(iph->tos), tunnel->parms.link);
374 rt = ip_route_output_key(tunnel->net, &fl4);
378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
381 if (dev->type != ARPHRD_ETHER)
382 dev->flags |= IFF_POINTOPOINT;
385 if (!tdev && tunnel->parms.link)
386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
389 hlen = tdev->hard_header_len + tdev->needed_headroom;
392 dev->iflink = tunnel->parms.link;
394 dev->needed_headroom = t_hlen + hlen;
395 mtu -= (dev->hard_header_len + t_hlen);
403 static struct ip_tunnel *ip_tunnel_create(struct net *net,
404 struct ip_tunnel_net *itn,
405 struct ip_tunnel_parm *parms)
407 struct ip_tunnel *nt;
408 struct net_device *dev;
410 BUG_ON(!itn->fb_tunnel_dev);
411 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
413 return ERR_CAST(dev);
415 dev->mtu = ip_tunnel_bind_dev(dev);
417 nt = netdev_priv(dev);
418 ip_tunnel_add(itn, nt);
422 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
423 const struct tnl_ptk_info *tpi, bool log_ecn_error)
425 struct pcpu_sw_netstats *tstats;
426 const struct iphdr *iph = ip_hdr(skb);
429 #ifdef CONFIG_NET_IPGRE_BROADCAST
430 if (ipv4_is_multicast(iph->daddr)) {
431 tunnel->dev->stats.multicast++;
432 skb->pkt_type = PACKET_BROADCAST;
436 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
437 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
438 tunnel->dev->stats.rx_crc_errors++;
439 tunnel->dev->stats.rx_errors++;
443 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
444 if (!(tpi->flags&TUNNEL_SEQ) ||
445 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
446 tunnel->dev->stats.rx_fifo_errors++;
447 tunnel->dev->stats.rx_errors++;
450 tunnel->i_seqno = ntohl(tpi->seq) + 1;
453 skb_reset_network_header(skb);
455 err = IP_ECN_decapsulate(iph, skb);
458 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
459 &iph->saddr, iph->tos);
461 ++tunnel->dev->stats.rx_frame_errors;
462 ++tunnel->dev->stats.rx_errors;
467 tstats = this_cpu_ptr(tunnel->dev->tstats);
468 u64_stats_update_begin(&tstats->syncp);
469 tstats->rx_packets++;
470 tstats->rx_bytes += skb->len;
471 u64_stats_update_end(&tstats->syncp);
473 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
475 if (tunnel->dev->type == ARPHRD_ETHER) {
476 skb->protocol = eth_type_trans(skb, tunnel->dev);
477 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
479 skb->dev = tunnel->dev;
482 gro_cells_receive(&tunnel->gro_cells, skb);
489 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
491 static int ip_encap_hlen(struct ip_tunnel_encap *e)
493 const struct ip_tunnel_encap_ops *ops;
496 if (e->type == TUNNEL_ENCAP_NONE)
499 if (e->type >= MAX_IPTUN_ENCAP_OPS)
503 ops = rcu_dereference(iptun_encaps[e->type]);
504 if (likely(ops && ops->encap_hlen))
505 hlen = ops->encap_hlen(e);
511 const struct ip_tunnel_encap_ops __rcu *
512 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
514 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
517 if (num >= MAX_IPTUN_ENCAP_OPS)
520 return !cmpxchg((const struct ip_tunnel_encap_ops **)
524 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
526 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
531 if (num >= MAX_IPTUN_ENCAP_OPS)
534 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
536 ops, NULL) == ops) ? 0 : -1;
542 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
544 int ip_tunnel_encap_setup(struct ip_tunnel *t,
545 struct ip_tunnel_encap *ipencap)
549 memset(&t->encap, 0, sizeof(t->encap));
551 hlen = ip_encap_hlen(ipencap);
555 t->encap.type = ipencap->type;
556 t->encap.sport = ipencap->sport;
557 t->encap.dport = ipencap->dport;
558 t->encap.flags = ipencap->flags;
560 t->encap_hlen = hlen;
561 t->hlen = t->encap_hlen + t->tun_hlen;
565 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
567 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
568 u8 *protocol, struct flowi4 *fl4)
570 const struct ip_tunnel_encap_ops *ops;
573 if (t->encap.type == TUNNEL_ENCAP_NONE)
577 ops = rcu_dereference(iptun_encaps[t->encap.type]);
578 if (likely(ops && ops->build_header))
579 ret = ops->build_header(skb, &t->encap, protocol, fl4);
584 EXPORT_SYMBOL(ip_tunnel_encap);
586 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
587 struct rtable *rt, __be16 df)
589 struct ip_tunnel *tunnel = netdev_priv(dev);
590 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
594 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
595 - sizeof(struct iphdr) - tunnel->hlen;
597 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
600 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
602 if (skb->protocol == htons(ETH_P_IP)) {
603 if (!skb_is_gso(skb) &&
604 (df & htons(IP_DF)) && mtu < pkt_size) {
605 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
606 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
610 #if IS_ENABLED(CONFIG_IPV6)
611 else if (skb->protocol == htons(ETH_P_IPV6)) {
612 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
614 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
615 mtu >= IPV6_MIN_MTU) {
616 if ((tunnel->parms.iph.daddr &&
617 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
618 rt6->rt6i_dst.plen == 128) {
619 rt6->rt6i_flags |= RTF_MODIFIED;
620 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
624 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
626 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
634 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
635 const struct iphdr *tnl_params, u8 protocol)
637 struct ip_tunnel *tunnel = netdev_priv(dev);
638 const struct iphdr *inner_iph;
642 struct rtable *rt; /* Route to the other host */
643 unsigned int max_headroom; /* The extra header space needed */
648 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
649 connected = (tunnel->parms.iph.daddr != 0);
651 dst = tnl_params->daddr;
655 if (skb_dst(skb) == NULL) {
656 dev->stats.tx_fifo_errors++;
660 if (skb->protocol == htons(ETH_P_IP)) {
661 rt = skb_rtable(skb);
662 dst = rt_nexthop(rt, inner_iph->daddr);
664 #if IS_ENABLED(CONFIG_IPV6)
665 else if (skb->protocol == htons(ETH_P_IPV6)) {
666 const struct in6_addr *addr6;
667 struct neighbour *neigh;
668 bool do_tx_error_icmp;
671 neigh = dst_neigh_lookup(skb_dst(skb),
672 &ipv6_hdr(skb)->daddr);
676 addr6 = (const struct in6_addr *)&neigh->primary_key;
677 addr_type = ipv6_addr_type(addr6);
679 if (addr_type == IPV6_ADDR_ANY) {
680 addr6 = &ipv6_hdr(skb)->daddr;
681 addr_type = ipv6_addr_type(addr6);
684 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
685 do_tx_error_icmp = true;
687 do_tx_error_icmp = false;
688 dst = addr6->s6_addr32[3];
690 neigh_release(neigh);
691 if (do_tx_error_icmp)
701 tos = tnl_params->tos;
704 if (skb->protocol == htons(ETH_P_IP)) {
705 tos = inner_iph->tos;
707 } else if (skb->protocol == htons(ETH_P_IPV6)) {
708 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
713 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
714 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
716 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
719 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
722 rt = ip_route_output_key(tunnel->net, &fl4);
725 dev->stats.tx_carrier_errors++;
729 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
732 if (rt->dst.dev == dev) {
734 dev->stats.collisions++;
738 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
743 if (tunnel->err_count > 0) {
744 if (time_before(jiffies,
745 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
748 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
749 dst_link_failure(skb);
751 tunnel->err_count = 0;
754 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
755 ttl = tnl_params->ttl;
757 if (skb->protocol == htons(ETH_P_IP))
758 ttl = inner_iph->ttl;
759 #if IS_ENABLED(CONFIG_IPV6)
760 else if (skb->protocol == htons(ETH_P_IPV6))
761 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
764 ttl = ip4_dst_hoplimit(&rt->dst);
767 df = tnl_params->frag_off;
768 if (skb->protocol == htons(ETH_P_IP))
769 df |= (inner_iph->frag_off&htons(IP_DF));
771 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
772 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
773 if (max_headroom > dev->needed_headroom)
774 dev->needed_headroom = max_headroom;
776 if (skb_cow_head(skb, dev->needed_headroom)) {
778 dev->stats.tx_dropped++;
783 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
784 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
785 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
789 #if IS_ENABLED(CONFIG_IPV6)
791 dst_link_failure(skb);
794 dev->stats.tx_errors++;
797 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
799 static void ip_tunnel_update(struct ip_tunnel_net *itn,
801 struct net_device *dev,
802 struct ip_tunnel_parm *p,
806 t->parms.iph.saddr = p->iph.saddr;
807 t->parms.iph.daddr = p->iph.daddr;
808 t->parms.i_key = p->i_key;
809 t->parms.o_key = p->o_key;
810 if (dev->type != ARPHRD_ETHER) {
811 memcpy(dev->dev_addr, &p->iph.saddr, 4);
812 memcpy(dev->broadcast, &p->iph.daddr, 4);
814 ip_tunnel_add(itn, t);
816 t->parms.iph.ttl = p->iph.ttl;
817 t->parms.iph.tos = p->iph.tos;
818 t->parms.iph.frag_off = p->iph.frag_off;
820 if (t->parms.link != p->link) {
823 t->parms.link = p->link;
824 mtu = ip_tunnel_bind_dev(dev);
828 ip_tunnel_dst_reset_all(t);
829 netdev_state_change(dev);
832 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
835 struct ip_tunnel *t = netdev_priv(dev);
836 struct net *net = t->net;
837 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
839 BUG_ON(!itn->fb_tunnel_dev);
842 if (dev == itn->fb_tunnel_dev) {
843 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
845 t = netdev_priv(dev);
847 memcpy(p, &t->parms, sizeof(*p));
853 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
856 p->iph.frag_off |= htons(IP_DF);
857 if (!(p->i_flags & VTI_ISVTI)) {
858 if (!(p->i_flags & TUNNEL_KEY))
860 if (!(p->o_flags & TUNNEL_KEY))
864 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
866 if (cmd == SIOCADDTUNNEL) {
868 t = ip_tunnel_create(net, itn, p);
869 err = PTR_ERR_OR_ZERO(t);
876 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
883 unsigned int nflags = 0;
885 if (ipv4_is_multicast(p->iph.daddr))
886 nflags = IFF_BROADCAST;
887 else if (p->iph.daddr)
888 nflags = IFF_POINTOPOINT;
890 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
895 t = netdev_priv(dev);
901 ip_tunnel_update(itn, t, dev, p, true);
909 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
912 if (dev == itn->fb_tunnel_dev) {
914 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918 if (t == netdev_priv(itn->fb_tunnel_dev))
922 unregister_netdevice(dev);
933 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
935 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
937 struct ip_tunnel *tunnel = netdev_priv(dev);
938 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
941 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
946 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
948 static void ip_tunnel_dev_free(struct net_device *dev)
950 struct ip_tunnel *tunnel = netdev_priv(dev);
952 gro_cells_destroy(&tunnel->gro_cells);
953 free_percpu(tunnel->dst_cache);
954 free_percpu(dev->tstats);
958 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
960 struct ip_tunnel *tunnel = netdev_priv(dev);
961 struct ip_tunnel_net *itn;
963 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
965 if (itn->fb_tunnel_dev != dev) {
966 ip_tunnel_del(netdev_priv(dev));
967 unregister_netdevice_queue(dev, head);
970 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
972 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
973 struct rtnl_link_ops *ops, char *devname)
975 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
976 struct ip_tunnel_parm parms;
979 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
980 INIT_HLIST_HEAD(&itn->tunnels[i]);
983 itn->fb_tunnel_dev = NULL;
987 memset(&parms, 0, sizeof(parms));
989 strlcpy(parms.name, devname, IFNAMSIZ);
992 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
993 /* FB netdevice is special: we have one, and only one per netns.
994 * Allowing to move it to another netns is clearly unsafe.
996 if (!IS_ERR(itn->fb_tunnel_dev)) {
997 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
998 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
999 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1003 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1005 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1007 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1008 struct rtnl_link_ops *ops)
1010 struct net *net = dev_net(itn->fb_tunnel_dev);
1011 struct net_device *dev, *aux;
1014 for_each_netdev_safe(net, dev, aux)
1015 if (dev->rtnl_link_ops == ops)
1016 unregister_netdevice_queue(dev, head);
1018 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1019 struct ip_tunnel *t;
1020 struct hlist_node *n;
1021 struct hlist_head *thead = &itn->tunnels[h];
1023 hlist_for_each_entry_safe(t, n, thead, hash_node)
1024 /* If dev is in the same netns, it has already
1025 * been added to the list by the previous loop.
1027 if (!net_eq(dev_net(t->dev), net))
1028 unregister_netdevice_queue(t->dev, head);
1032 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1037 ip_tunnel_destroy(itn, &list, ops);
1038 unregister_netdevice_many(&list);
1041 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1043 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1044 struct ip_tunnel_parm *p)
1046 struct ip_tunnel *nt;
1047 struct net *net = dev_net(dev);
1048 struct ip_tunnel_net *itn;
1052 nt = netdev_priv(dev);
1053 itn = net_generic(net, nt->ip_tnl_net_id);
1055 if (ip_tunnel_find(itn, p, dev->type))
1060 err = register_netdevice(dev);
1064 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1065 eth_hw_addr_random(dev);
1067 mtu = ip_tunnel_bind_dev(dev);
1071 ip_tunnel_add(itn, nt);
1076 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1078 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1079 struct ip_tunnel_parm *p)
1081 struct ip_tunnel *t;
1082 struct ip_tunnel *tunnel = netdev_priv(dev);
1083 struct net *net = tunnel->net;
1084 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1086 if (dev == itn->fb_tunnel_dev)
1089 t = ip_tunnel_find(itn, p, dev->type);
1097 if (dev->type != ARPHRD_ETHER) {
1098 unsigned int nflags = 0;
1100 if (ipv4_is_multicast(p->iph.daddr))
1101 nflags = IFF_BROADCAST;
1102 else if (p->iph.daddr)
1103 nflags = IFF_POINTOPOINT;
1105 if ((dev->flags ^ nflags) &
1106 (IFF_POINTOPOINT | IFF_BROADCAST))
1111 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1114 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1116 int ip_tunnel_init(struct net_device *dev)
1118 struct ip_tunnel *tunnel = netdev_priv(dev);
1119 struct iphdr *iph = &tunnel->parms.iph;
1122 dev->destructor = ip_tunnel_dev_free;
1123 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1127 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1128 if (!tunnel->dst_cache) {
1129 free_percpu(dev->tstats);
1133 err = gro_cells_init(&tunnel->gro_cells, dev);
1135 free_percpu(tunnel->dst_cache);
1136 free_percpu(dev->tstats);
1141 tunnel->net = dev_net(dev);
1142 strcpy(tunnel->parms.name, dev->name);
1148 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1150 void ip_tunnel_uninit(struct net_device *dev)
1152 struct ip_tunnel *tunnel = netdev_priv(dev);
1153 struct net *net = tunnel->net;
1154 struct ip_tunnel_net *itn;
1156 itn = net_generic(net, tunnel->ip_tnl_net_id);
1157 /* fb_tunnel_dev will be unregisted in net-exit call. */
1158 if (itn->fb_tunnel_dev != dev)
1159 ip_tunnel_del(netdev_priv(dev));
1161 ip_tunnel_dst_reset_all(tunnel);
1163 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1165 /* Do least required initialization, rest of init is done in tunnel_init call */
1166 void ip_tunnel_setup(struct net_device *dev, int net_id)
1168 struct ip_tunnel *tunnel = netdev_priv(dev);
1169 tunnel->ip_tnl_net_id = net_id;
1171 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1173 MODULE_LICENSE("GPL");