Merge tag 'renesas-soc-fixes-for-v4.5' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57 #include <net/udp.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst, __be32 saddr)
73 {
74         struct dst_entry *old_dst;
75
76         dst_clone(dst);
77         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78         dst_release(old_dst);
79         idst->saddr = saddr;
80 }
81
82 static noinline void tunnel_dst_set(struct ip_tunnel *t,
83                            struct dst_entry *dst, __be32 saddr)
84 {
85         __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
86 }
87
88 static void tunnel_dst_reset(struct ip_tunnel *t)
89 {
90         tunnel_dst_set(t, NULL, 0);
91 }
92
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94 {
95         int i;
96
97         for_each_possible_cpu(i)
98                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99 }
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103                                         u32 cookie, __be32 *saddr)
104 {
105         struct ip_tunnel_dst *idst;
106         struct dst_entry *dst;
107
108         rcu_read_lock();
109         idst = raw_cpu_ptr(t->dst_cache);
110         dst = rcu_dereference(idst->dst);
111         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112                 dst = NULL;
113         if (dst) {
114                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115                         *saddr = idst->saddr;
116                 } else {
117                         tunnel_dst_reset(t);
118                         dst_release(dst);
119                         dst = NULL;
120                 }
121         }
122         rcu_read_unlock();
123         return (struct rtable *)dst;
124 }
125
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127                                 __be16 flags, __be32 key)
128 {
129         if (p->i_flags & TUNNEL_KEY) {
130                 if (flags & TUNNEL_KEY)
131                         return key == p->i_key;
132                 else
133                         /* key expected, none present */
134                         return false;
135         } else
136                 return !(flags & TUNNEL_KEY);
137 }
138
139 /* Fallback tunnel: no source, no destination, no key, no options
140
141    Tunnel hash table:
142    We require exact key match i.e. if a key is present in packet
143    it will match only tunnel with the same key; if it is not present,
144    it will match only keyless tunnel.
145
146    All keysless packets, if not matched configured keyless tunnels
147    will match fallback tunnel.
148    Given src, dst and key, find appropriate for input tunnel.
149 */
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151                                    int link, __be16 flags,
152                                    __be32 remote, __be32 local,
153                                    __be32 key)
154 {
155         unsigned int hash;
156         struct ip_tunnel *t, *cand = NULL;
157         struct hlist_head *head;
158
159         hash = ip_tunnel_hash(key, remote);
160         head = &itn->tunnels[hash];
161
162         hlist_for_each_entry_rcu(t, head, hash_node) {
163                 if (local != t->parms.iph.saddr ||
164                     remote != t->parms.iph.daddr ||
165                     !(t->dev->flags & IFF_UP))
166                         continue;
167
168                 if (!ip_tunnel_key_match(&t->parms, flags, key))
169                         continue;
170
171                 if (t->parms.link == link)
172                         return t;
173                 else
174                         cand = t;
175         }
176
177         hlist_for_each_entry_rcu(t, head, hash_node) {
178                 if (remote != t->parms.iph.daddr ||
179                     t->parms.iph.saddr != 0 ||
180                     !(t->dev->flags & IFF_UP))
181                         continue;
182
183                 if (!ip_tunnel_key_match(&t->parms, flags, key))
184                         continue;
185
186                 if (t->parms.link == link)
187                         return t;
188                 else if (!cand)
189                         cand = t;
190         }
191
192         hash = ip_tunnel_hash(key, 0);
193         head = &itn->tunnels[hash];
194
195         hlist_for_each_entry_rcu(t, head, hash_node) {
196                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198                         continue;
199
200                 if (!(t->dev->flags & IFF_UP))
201                         continue;
202
203                 if (!ip_tunnel_key_match(&t->parms, flags, key))
204                         continue;
205
206                 if (t->parms.link == link)
207                         return t;
208                 else if (!cand)
209                         cand = t;
210         }
211
212         if (flags & TUNNEL_NO_KEY)
213                 goto skip_key_lookup;
214
215         hlist_for_each_entry_rcu(t, head, hash_node) {
216                 if (t->parms.i_key != key ||
217                     t->parms.iph.saddr != 0 ||
218                     t->parms.iph.daddr != 0 ||
219                     !(t->dev->flags & IFF_UP))
220                         continue;
221
222                 if (t->parms.link == link)
223                         return t;
224                 else if (!cand)
225                         cand = t;
226         }
227
228 skip_key_lookup:
229         if (cand)
230                 return cand;
231
232         t = rcu_dereference(itn->collect_md_tun);
233         if (t)
234                 return t;
235
236         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
237                 return netdev_priv(itn->fb_tunnel_dev);
238
239         return NULL;
240 }
241 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
242
243 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
244                                     struct ip_tunnel_parm *parms)
245 {
246         unsigned int h;
247         __be32 remote;
248         __be32 i_key = parms->i_key;
249
250         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
251                 remote = parms->iph.daddr;
252         else
253                 remote = 0;
254
255         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
256                 i_key = 0;
257
258         h = ip_tunnel_hash(i_key, remote);
259         return &itn->tunnels[h];
260 }
261
262 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
263 {
264         struct hlist_head *head = ip_bucket(itn, &t->parms);
265
266         if (t->collect_md)
267                 rcu_assign_pointer(itn->collect_md_tun, t);
268         hlist_add_head_rcu(&t->hash_node, head);
269 }
270
271 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
272 {
273         if (t->collect_md)
274                 rcu_assign_pointer(itn->collect_md_tun, NULL);
275         hlist_del_init_rcu(&t->hash_node);
276 }
277
278 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
279                                         struct ip_tunnel_parm *parms,
280                                         int type)
281 {
282         __be32 remote = parms->iph.daddr;
283         __be32 local = parms->iph.saddr;
284         __be32 key = parms->i_key;
285         __be16 flags = parms->i_flags;
286         int link = parms->link;
287         struct ip_tunnel *t = NULL;
288         struct hlist_head *head = ip_bucket(itn, parms);
289
290         hlist_for_each_entry_rcu(t, head, hash_node) {
291                 if (local == t->parms.iph.saddr &&
292                     remote == t->parms.iph.daddr &&
293                     link == t->parms.link &&
294                     type == t->dev->type &&
295                     ip_tunnel_key_match(&t->parms, flags, key))
296                         break;
297         }
298         return t;
299 }
300
301 static struct net_device *__ip_tunnel_create(struct net *net,
302                                              const struct rtnl_link_ops *ops,
303                                              struct ip_tunnel_parm *parms)
304 {
305         int err;
306         struct ip_tunnel *tunnel;
307         struct net_device *dev;
308         char name[IFNAMSIZ];
309
310         if (parms->name[0])
311                 strlcpy(name, parms->name, IFNAMSIZ);
312         else {
313                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
314                         err = -E2BIG;
315                         goto failed;
316                 }
317                 strlcpy(name, ops->kind, IFNAMSIZ);
318                 strncat(name, "%d", 2);
319         }
320
321         ASSERT_RTNL();
322         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
323         if (!dev) {
324                 err = -ENOMEM;
325                 goto failed;
326         }
327         dev_net_set(dev, net);
328
329         dev->rtnl_link_ops = ops;
330
331         tunnel = netdev_priv(dev);
332         tunnel->parms = *parms;
333         tunnel->net = net;
334
335         err = register_netdevice(dev);
336         if (err)
337                 goto failed_free;
338
339         return dev;
340
341 failed_free:
342         free_netdev(dev);
343 failed:
344         return ERR_PTR(err);
345 }
346
347 static inline void init_tunnel_flow(struct flowi4 *fl4,
348                                     int proto,
349                                     __be32 daddr, __be32 saddr,
350                                     __be32 key, __u8 tos, int oif)
351 {
352         memset(fl4, 0, sizeof(*fl4));
353         fl4->flowi4_oif = oif;
354         fl4->daddr = daddr;
355         fl4->saddr = saddr;
356         fl4->flowi4_tos = tos;
357         fl4->flowi4_proto = proto;
358         fl4->fl4_gre_key = key;
359 }
360
361 static int ip_tunnel_bind_dev(struct net_device *dev)
362 {
363         struct net_device *tdev = NULL;
364         struct ip_tunnel *tunnel = netdev_priv(dev);
365         const struct iphdr *iph;
366         int hlen = LL_MAX_HEADER;
367         int mtu = ETH_DATA_LEN;
368         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
369
370         iph = &tunnel->parms.iph;
371
372         /* Guess output device to choose reasonable mtu and needed_headroom */
373         if (iph->daddr) {
374                 struct flowi4 fl4;
375                 struct rtable *rt;
376
377                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
378                                  iph->saddr, tunnel->parms.o_key,
379                                  RT_TOS(iph->tos), tunnel->parms.link);
380                 rt = ip_route_output_key(tunnel->net, &fl4);
381
382                 if (!IS_ERR(rt)) {
383                         tdev = rt->dst.dev;
384                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
385                         ip_rt_put(rt);
386                 }
387                 if (dev->type != ARPHRD_ETHER)
388                         dev->flags |= IFF_POINTOPOINT;
389         }
390
391         if (!tdev && tunnel->parms.link)
392                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
393
394         if (tdev) {
395                 hlen = tdev->hard_header_len + tdev->needed_headroom;
396                 mtu = tdev->mtu;
397         }
398
399         dev->needed_headroom = t_hlen + hlen;
400         mtu -= (dev->hard_header_len + t_hlen);
401
402         if (mtu < 68)
403                 mtu = 68;
404
405         return mtu;
406 }
407
408 static struct ip_tunnel *ip_tunnel_create(struct net *net,
409                                           struct ip_tunnel_net *itn,
410                                           struct ip_tunnel_parm *parms)
411 {
412         struct ip_tunnel *nt;
413         struct net_device *dev;
414
415         BUG_ON(!itn->fb_tunnel_dev);
416         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
417         if (IS_ERR(dev))
418                 return ERR_CAST(dev);
419
420         dev->mtu = ip_tunnel_bind_dev(dev);
421
422         nt = netdev_priv(dev);
423         ip_tunnel_add(itn, nt);
424         return nt;
425 }
426
427 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
428                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
429                   bool log_ecn_error)
430 {
431         struct pcpu_sw_netstats *tstats;
432         const struct iphdr *iph = ip_hdr(skb);
433         int err;
434
435 #ifdef CONFIG_NET_IPGRE_BROADCAST
436         if (ipv4_is_multicast(iph->daddr)) {
437                 tunnel->dev->stats.multicast++;
438                 skb->pkt_type = PACKET_BROADCAST;
439         }
440 #endif
441
442         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
443              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
444                 tunnel->dev->stats.rx_crc_errors++;
445                 tunnel->dev->stats.rx_errors++;
446                 goto drop;
447         }
448
449         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
450                 if (!(tpi->flags&TUNNEL_SEQ) ||
451                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
452                         tunnel->dev->stats.rx_fifo_errors++;
453                         tunnel->dev->stats.rx_errors++;
454                         goto drop;
455                 }
456                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
457         }
458
459         skb_reset_network_header(skb);
460
461         err = IP_ECN_decapsulate(iph, skb);
462         if (unlikely(err)) {
463                 if (log_ecn_error)
464                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
465                                         &iph->saddr, iph->tos);
466                 if (err > 1) {
467                         ++tunnel->dev->stats.rx_frame_errors;
468                         ++tunnel->dev->stats.rx_errors;
469                         goto drop;
470                 }
471         }
472
473         tstats = this_cpu_ptr(tunnel->dev->tstats);
474         u64_stats_update_begin(&tstats->syncp);
475         tstats->rx_packets++;
476         tstats->rx_bytes += skb->len;
477         u64_stats_update_end(&tstats->syncp);
478
479         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
480
481         if (tunnel->dev->type == ARPHRD_ETHER) {
482                 skb->protocol = eth_type_trans(skb, tunnel->dev);
483                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
484         } else {
485                 skb->dev = tunnel->dev;
486         }
487
488         if (tun_dst)
489                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
490
491         gro_cells_receive(&tunnel->gro_cells, skb);
492         return 0;
493
494 drop:
495         kfree_skb(skb);
496         return 0;
497 }
498 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
499
500 static int ip_encap_hlen(struct ip_tunnel_encap *e)
501 {
502         const struct ip_tunnel_encap_ops *ops;
503         int hlen = -EINVAL;
504
505         if (e->type == TUNNEL_ENCAP_NONE)
506                 return 0;
507
508         if (e->type >= MAX_IPTUN_ENCAP_OPS)
509                 return -EINVAL;
510
511         rcu_read_lock();
512         ops = rcu_dereference(iptun_encaps[e->type]);
513         if (likely(ops && ops->encap_hlen))
514                 hlen = ops->encap_hlen(e);
515         rcu_read_unlock();
516
517         return hlen;
518 }
519
520 const struct ip_tunnel_encap_ops __rcu *
521                 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
522
523 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
524                             unsigned int num)
525 {
526         if (num >= MAX_IPTUN_ENCAP_OPS)
527                 return -ERANGE;
528
529         return !cmpxchg((const struct ip_tunnel_encap_ops **)
530                         &iptun_encaps[num],
531                         NULL, ops) ? 0 : -1;
532 }
533 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
534
535 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
536                             unsigned int num)
537 {
538         int ret;
539
540         if (num >= MAX_IPTUN_ENCAP_OPS)
541                 return -ERANGE;
542
543         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
544                        &iptun_encaps[num],
545                        ops, NULL) == ops) ? 0 : -1;
546
547         synchronize_net();
548
549         return ret;
550 }
551 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
552
553 int ip_tunnel_encap_setup(struct ip_tunnel *t,
554                           struct ip_tunnel_encap *ipencap)
555 {
556         int hlen;
557
558         memset(&t->encap, 0, sizeof(t->encap));
559
560         hlen = ip_encap_hlen(ipencap);
561         if (hlen < 0)
562                 return hlen;
563
564         t->encap.type = ipencap->type;
565         t->encap.sport = ipencap->sport;
566         t->encap.dport = ipencap->dport;
567         t->encap.flags = ipencap->flags;
568
569         t->encap_hlen = hlen;
570         t->hlen = t->encap_hlen + t->tun_hlen;
571
572         return 0;
573 }
574 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
575
576 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
577                     u8 *protocol, struct flowi4 *fl4)
578 {
579         const struct ip_tunnel_encap_ops *ops;
580         int ret = -EINVAL;
581
582         if (t->encap.type == TUNNEL_ENCAP_NONE)
583                 return 0;
584
585         if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
586                 return -EINVAL;
587
588         rcu_read_lock();
589         ops = rcu_dereference(iptun_encaps[t->encap.type]);
590         if (likely(ops && ops->build_header))
591                 ret = ops->build_header(skb, &t->encap, protocol, fl4);
592         rcu_read_unlock();
593
594         return ret;
595 }
596 EXPORT_SYMBOL(ip_tunnel_encap);
597
598 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
599                             struct rtable *rt, __be16 df,
600                             const struct iphdr *inner_iph)
601 {
602         struct ip_tunnel *tunnel = netdev_priv(dev);
603         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
604         int mtu;
605
606         if (df)
607                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
608                                         - sizeof(struct iphdr) - tunnel->hlen;
609         else
610                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
611
612         if (skb_dst(skb))
613                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
614
615         if (skb->protocol == htons(ETH_P_IP)) {
616                 if (!skb_is_gso(skb) &&
617                     (inner_iph->frag_off & htons(IP_DF)) &&
618                     mtu < pkt_size) {
619                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
620                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
621                         return -E2BIG;
622                 }
623         }
624 #if IS_ENABLED(CONFIG_IPV6)
625         else if (skb->protocol == htons(ETH_P_IPV6)) {
626                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
627
628                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
629                            mtu >= IPV6_MIN_MTU) {
630                         if ((tunnel->parms.iph.daddr &&
631                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
632                             rt6->rt6i_dst.plen == 128) {
633                                 rt6->rt6i_flags |= RTF_MODIFIED;
634                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
635                         }
636                 }
637
638                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
639                                         mtu < pkt_size) {
640                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
641                         return -E2BIG;
642                 }
643         }
644 #endif
645         return 0;
646 }
647
648 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
649                     const struct iphdr *tnl_params, u8 protocol)
650 {
651         struct ip_tunnel *tunnel = netdev_priv(dev);
652         const struct iphdr *inner_iph;
653         struct flowi4 fl4;
654         u8     tos, ttl;
655         __be16 df;
656         struct rtable *rt;              /* Route to the other host */
657         unsigned int max_headroom;      /* The extra header space needed */
658         __be32 dst;
659         bool connected;
660
661         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
662         connected = (tunnel->parms.iph.daddr != 0);
663
664         dst = tnl_params->daddr;
665         if (dst == 0) {
666                 /* NBMA tunnel */
667
668                 if (!skb_dst(skb)) {
669                         dev->stats.tx_fifo_errors++;
670                         goto tx_error;
671                 }
672
673                 if (skb->protocol == htons(ETH_P_IP)) {
674                         rt = skb_rtable(skb);
675                         dst = rt_nexthop(rt, inner_iph->daddr);
676                 }
677 #if IS_ENABLED(CONFIG_IPV6)
678                 else if (skb->protocol == htons(ETH_P_IPV6)) {
679                         const struct in6_addr *addr6;
680                         struct neighbour *neigh;
681                         bool do_tx_error_icmp;
682                         int addr_type;
683
684                         neigh = dst_neigh_lookup(skb_dst(skb),
685                                                  &ipv6_hdr(skb)->daddr);
686                         if (!neigh)
687                                 goto tx_error;
688
689                         addr6 = (const struct in6_addr *)&neigh->primary_key;
690                         addr_type = ipv6_addr_type(addr6);
691
692                         if (addr_type == IPV6_ADDR_ANY) {
693                                 addr6 = &ipv6_hdr(skb)->daddr;
694                                 addr_type = ipv6_addr_type(addr6);
695                         }
696
697                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
698                                 do_tx_error_icmp = true;
699                         else {
700                                 do_tx_error_icmp = false;
701                                 dst = addr6->s6_addr32[3];
702                         }
703                         neigh_release(neigh);
704                         if (do_tx_error_icmp)
705                                 goto tx_error_icmp;
706                 }
707 #endif
708                 else
709                         goto tx_error;
710
711                 connected = false;
712         }
713
714         tos = tnl_params->tos;
715         if (tos & 0x1) {
716                 tos &= ~0x1;
717                 if (skb->protocol == htons(ETH_P_IP)) {
718                         tos = inner_iph->tos;
719                         connected = false;
720                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
721                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
722                         connected = false;
723                 }
724         }
725
726         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
727                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
728
729         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
730                 goto tx_error;
731
732         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
733
734         if (!rt) {
735                 rt = ip_route_output_key(tunnel->net, &fl4);
736
737                 if (IS_ERR(rt)) {
738                         dev->stats.tx_carrier_errors++;
739                         goto tx_error;
740                 }
741                 if (connected)
742                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
743         }
744
745         if (rt->dst.dev == dev) {
746                 ip_rt_put(rt);
747                 dev->stats.collisions++;
748                 goto tx_error;
749         }
750
751         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
752                 ip_rt_put(rt);
753                 goto tx_error;
754         }
755
756         if (tunnel->err_count > 0) {
757                 if (time_before(jiffies,
758                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
759                         tunnel->err_count--;
760
761                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
762                         dst_link_failure(skb);
763                 } else
764                         tunnel->err_count = 0;
765         }
766
767         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
768         ttl = tnl_params->ttl;
769         if (ttl == 0) {
770                 if (skb->protocol == htons(ETH_P_IP))
771                         ttl = inner_iph->ttl;
772 #if IS_ENABLED(CONFIG_IPV6)
773                 else if (skb->protocol == htons(ETH_P_IPV6))
774                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
775 #endif
776                 else
777                         ttl = ip4_dst_hoplimit(&rt->dst);
778         }
779
780         df = tnl_params->frag_off;
781         if (skb->protocol == htons(ETH_P_IP))
782                 df |= (inner_iph->frag_off&htons(IP_DF));
783
784         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
785                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
786         if (max_headroom > dev->needed_headroom)
787                 dev->needed_headroom = max_headroom;
788
789         if (skb_cow_head(skb, dev->needed_headroom)) {
790                 ip_rt_put(rt);
791                 dev->stats.tx_dropped++;
792                 kfree_skb(skb);
793                 return;
794         }
795
796         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
797                       df, !net_eq(tunnel->net, dev_net(dev)));
798         return;
799
800 #if IS_ENABLED(CONFIG_IPV6)
801 tx_error_icmp:
802         dst_link_failure(skb);
803 #endif
804 tx_error:
805         dev->stats.tx_errors++;
806         kfree_skb(skb);
807 }
808 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
809
810 static void ip_tunnel_update(struct ip_tunnel_net *itn,
811                              struct ip_tunnel *t,
812                              struct net_device *dev,
813                              struct ip_tunnel_parm *p,
814                              bool set_mtu)
815 {
816         ip_tunnel_del(itn, t);
817         t->parms.iph.saddr = p->iph.saddr;
818         t->parms.iph.daddr = p->iph.daddr;
819         t->parms.i_key = p->i_key;
820         t->parms.o_key = p->o_key;
821         if (dev->type != ARPHRD_ETHER) {
822                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
823                 memcpy(dev->broadcast, &p->iph.daddr, 4);
824         }
825         ip_tunnel_add(itn, t);
826
827         t->parms.iph.ttl = p->iph.ttl;
828         t->parms.iph.tos = p->iph.tos;
829         t->parms.iph.frag_off = p->iph.frag_off;
830
831         if (t->parms.link != p->link) {
832                 int mtu;
833
834                 t->parms.link = p->link;
835                 mtu = ip_tunnel_bind_dev(dev);
836                 if (set_mtu)
837                         dev->mtu = mtu;
838         }
839         ip_tunnel_dst_reset_all(t);
840         netdev_state_change(dev);
841 }
842
843 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
844 {
845         int err = 0;
846         struct ip_tunnel *t = netdev_priv(dev);
847         struct net *net = t->net;
848         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
849
850         BUG_ON(!itn->fb_tunnel_dev);
851         switch (cmd) {
852         case SIOCGETTUNNEL:
853                 if (dev == itn->fb_tunnel_dev) {
854                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
855                         if (!t)
856                                 t = netdev_priv(dev);
857                 }
858                 memcpy(p, &t->parms, sizeof(*p));
859                 break;
860
861         case SIOCADDTUNNEL:
862         case SIOCCHGTUNNEL:
863                 err = -EPERM;
864                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
865                         goto done;
866                 if (p->iph.ttl)
867                         p->iph.frag_off |= htons(IP_DF);
868                 if (!(p->i_flags & VTI_ISVTI)) {
869                         if (!(p->i_flags & TUNNEL_KEY))
870                                 p->i_key = 0;
871                         if (!(p->o_flags & TUNNEL_KEY))
872                                 p->o_key = 0;
873                 }
874
875                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
876
877                 if (cmd == SIOCADDTUNNEL) {
878                         if (!t) {
879                                 t = ip_tunnel_create(net, itn, p);
880                                 err = PTR_ERR_OR_ZERO(t);
881                                 break;
882                         }
883
884                         err = -EEXIST;
885                         break;
886                 }
887                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
888                         if (t) {
889                                 if (t->dev != dev) {
890                                         err = -EEXIST;
891                                         break;
892                                 }
893                         } else {
894                                 unsigned int nflags = 0;
895
896                                 if (ipv4_is_multicast(p->iph.daddr))
897                                         nflags = IFF_BROADCAST;
898                                 else if (p->iph.daddr)
899                                         nflags = IFF_POINTOPOINT;
900
901                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
902                                         err = -EINVAL;
903                                         break;
904                                 }
905
906                                 t = netdev_priv(dev);
907                         }
908                 }
909
910                 if (t) {
911                         err = 0;
912                         ip_tunnel_update(itn, t, dev, p, true);
913                 } else {
914                         err = -ENOENT;
915                 }
916                 break;
917
918         case SIOCDELTUNNEL:
919                 err = -EPERM;
920                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
921                         goto done;
922
923                 if (dev == itn->fb_tunnel_dev) {
924                         err = -ENOENT;
925                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
926                         if (!t)
927                                 goto done;
928                         err = -EPERM;
929                         if (t == netdev_priv(itn->fb_tunnel_dev))
930                                 goto done;
931                         dev = t->dev;
932                 }
933                 unregister_netdevice(dev);
934                 err = 0;
935                 break;
936
937         default:
938                 err = -EINVAL;
939         }
940
941 done:
942         return err;
943 }
944 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
945
946 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
947 {
948         struct ip_tunnel *tunnel = netdev_priv(dev);
949         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
950         int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
951
952         if (new_mtu < 68)
953                 return -EINVAL;
954
955         if (new_mtu > max_mtu) {
956                 if (strict)
957                         return -EINVAL;
958
959                 new_mtu = max_mtu;
960         }
961
962         dev->mtu = new_mtu;
963         return 0;
964 }
965 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
966
967 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
968 {
969         return __ip_tunnel_change_mtu(dev, new_mtu, true);
970 }
971 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
972
973 static void ip_tunnel_dev_free(struct net_device *dev)
974 {
975         struct ip_tunnel *tunnel = netdev_priv(dev);
976
977         gro_cells_destroy(&tunnel->gro_cells);
978         free_percpu(tunnel->dst_cache);
979         free_percpu(dev->tstats);
980         free_netdev(dev);
981 }
982
983 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
984 {
985         struct ip_tunnel *tunnel = netdev_priv(dev);
986         struct ip_tunnel_net *itn;
987
988         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
989
990         if (itn->fb_tunnel_dev != dev) {
991                 ip_tunnel_del(itn, netdev_priv(dev));
992                 unregister_netdevice_queue(dev, head);
993         }
994 }
995 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
996
997 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
998 {
999         struct ip_tunnel *tunnel = netdev_priv(dev);
1000
1001         return tunnel->net;
1002 }
1003 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1004
1005 int ip_tunnel_get_iflink(const struct net_device *dev)
1006 {
1007         struct ip_tunnel *tunnel = netdev_priv(dev);
1008
1009         return tunnel->parms.link;
1010 }
1011 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1012
1013 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
1014                                   struct rtnl_link_ops *ops, char *devname)
1015 {
1016         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1017         struct ip_tunnel_parm parms;
1018         unsigned int i;
1019
1020         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1021                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1022
1023         if (!ops) {
1024                 itn->fb_tunnel_dev = NULL;
1025                 return 0;
1026         }
1027
1028         memset(&parms, 0, sizeof(parms));
1029         if (devname)
1030                 strlcpy(parms.name, devname, IFNAMSIZ);
1031
1032         rtnl_lock();
1033         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1034         /* FB netdevice is special: we have one, and only one per netns.
1035          * Allowing to move it to another netns is clearly unsafe.
1036          */
1037         if (!IS_ERR(itn->fb_tunnel_dev)) {
1038                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1039                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1040                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1041         }
1042         rtnl_unlock();
1043
1044         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1045 }
1046 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1047
1048 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1049                               struct rtnl_link_ops *ops)
1050 {
1051         struct net *net = dev_net(itn->fb_tunnel_dev);
1052         struct net_device *dev, *aux;
1053         int h;
1054
1055         for_each_netdev_safe(net, dev, aux)
1056                 if (dev->rtnl_link_ops == ops)
1057                         unregister_netdevice_queue(dev, head);
1058
1059         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1060                 struct ip_tunnel *t;
1061                 struct hlist_node *n;
1062                 struct hlist_head *thead = &itn->tunnels[h];
1063
1064                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1065                         /* If dev is in the same netns, it has already
1066                          * been added to the list by the previous loop.
1067                          */
1068                         if (!net_eq(dev_net(t->dev), net))
1069                                 unregister_netdevice_queue(t->dev, head);
1070         }
1071 }
1072
1073 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1074 {
1075         LIST_HEAD(list);
1076
1077         rtnl_lock();
1078         ip_tunnel_destroy(itn, &list, ops);
1079         unregister_netdevice_many(&list);
1080         rtnl_unlock();
1081 }
1082 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1083
1084 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1085                       struct ip_tunnel_parm *p)
1086 {
1087         struct ip_tunnel *nt;
1088         struct net *net = dev_net(dev);
1089         struct ip_tunnel_net *itn;
1090         int mtu;
1091         int err;
1092
1093         nt = netdev_priv(dev);
1094         itn = net_generic(net, nt->ip_tnl_net_id);
1095
1096         if (nt->collect_md) {
1097                 if (rtnl_dereference(itn->collect_md_tun))
1098                         return -EEXIST;
1099         } else {
1100                 if (ip_tunnel_find(itn, p, dev->type))
1101                         return -EEXIST;
1102         }
1103
1104         nt->net = net;
1105         nt->parms = *p;
1106         err = register_netdevice(dev);
1107         if (err)
1108                 goto out;
1109
1110         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1111                 eth_hw_addr_random(dev);
1112
1113         mtu = ip_tunnel_bind_dev(dev);
1114         if (!tb[IFLA_MTU])
1115                 dev->mtu = mtu;
1116
1117         ip_tunnel_add(itn, nt);
1118 out:
1119         return err;
1120 }
1121 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1122
1123 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1124                          struct ip_tunnel_parm *p)
1125 {
1126         struct ip_tunnel *t;
1127         struct ip_tunnel *tunnel = netdev_priv(dev);
1128         struct net *net = tunnel->net;
1129         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1130
1131         if (dev == itn->fb_tunnel_dev)
1132                 return -EINVAL;
1133
1134         t = ip_tunnel_find(itn, p, dev->type);
1135
1136         if (t) {
1137                 if (t->dev != dev)
1138                         return -EEXIST;
1139         } else {
1140                 t = tunnel;
1141
1142                 if (dev->type != ARPHRD_ETHER) {
1143                         unsigned int nflags = 0;
1144
1145                         if (ipv4_is_multicast(p->iph.daddr))
1146                                 nflags = IFF_BROADCAST;
1147                         else if (p->iph.daddr)
1148                                 nflags = IFF_POINTOPOINT;
1149
1150                         if ((dev->flags ^ nflags) &
1151                             (IFF_POINTOPOINT | IFF_BROADCAST))
1152                                 return -EINVAL;
1153                 }
1154         }
1155
1156         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1157         return 0;
1158 }
1159 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1160
1161 int ip_tunnel_init(struct net_device *dev)
1162 {
1163         struct ip_tunnel *tunnel = netdev_priv(dev);
1164         struct iphdr *iph = &tunnel->parms.iph;
1165         int err;
1166
1167         dev->destructor = ip_tunnel_dev_free;
1168         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1169         if (!dev->tstats)
1170                 return -ENOMEM;
1171
1172         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1173         if (!tunnel->dst_cache) {
1174                 free_percpu(dev->tstats);
1175                 return -ENOMEM;
1176         }
1177
1178         err = gro_cells_init(&tunnel->gro_cells, dev);
1179         if (err) {
1180                 free_percpu(tunnel->dst_cache);
1181                 free_percpu(dev->tstats);
1182                 return err;
1183         }
1184
1185         tunnel->dev = dev;
1186         tunnel->net = dev_net(dev);
1187         strcpy(tunnel->parms.name, dev->name);
1188         iph->version            = 4;
1189         iph->ihl                = 5;
1190
1191         if (tunnel->collect_md) {
1192                 dev->features |= NETIF_F_NETNS_LOCAL;
1193                 netif_keep_dst(dev);
1194         }
1195         return 0;
1196 }
1197 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1198
1199 void ip_tunnel_uninit(struct net_device *dev)
1200 {
1201         struct ip_tunnel *tunnel = netdev_priv(dev);
1202         struct net *net = tunnel->net;
1203         struct ip_tunnel_net *itn;
1204
1205         itn = net_generic(net, tunnel->ip_tnl_net_id);
1206         /* fb_tunnel_dev will be unregisted in net-exit call. */
1207         if (itn->fb_tunnel_dev != dev)
1208                 ip_tunnel_del(itn, netdev_priv(dev));
1209
1210         ip_tunnel_dst_reset_all(tunnel);
1211 }
1212 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1213
1214 /* Do least required initialization, rest of init is done in tunnel_init call */
1215 void ip_tunnel_setup(struct net_device *dev, int net_id)
1216 {
1217         struct ip_tunnel *tunnel = netdev_priv(dev);
1218         tunnel->ip_tnl_net_id = net_id;
1219 }
1220 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1221
1222 MODULE_LICENSE("GPL");