ip_tunnel: Add sanity checks to ip_tunnel_encap_add_ops()
[cascardo/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58 #include <net/udp.h>
59
60 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ipv6.h>
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
64 #endif
65
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67 {
68         return hash_32((__force u32)key ^ (__force u32)remote,
69                          IP_TNL_HASH_BITS);
70 }
71
72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
73                              struct dst_entry *dst, __be32 saddr)
74 {
75         struct dst_entry *old_dst;
76
77         dst_clone(dst);
78         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
79         dst_release(old_dst);
80         idst->saddr = saddr;
81 }
82
83 static noinline void tunnel_dst_set(struct ip_tunnel *t,
84                            struct dst_entry *dst, __be32 saddr)
85 {
86         __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
87 }
88
89 static void tunnel_dst_reset(struct ip_tunnel *t)
90 {
91         tunnel_dst_set(t, NULL, 0);
92 }
93
94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
95 {
96         int i;
97
98         for_each_possible_cpu(i)
99                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
100 }
101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
102
103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104                                         u32 cookie, __be32 *saddr)
105 {
106         struct ip_tunnel_dst *idst;
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         idst = raw_cpu_ptr(t->dst_cache);
111         dst = rcu_dereference(idst->dst);
112         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
113                 dst = NULL;
114         if (dst) {
115                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116                         *saddr = idst->saddr;
117                 } else {
118                         tunnel_dst_reset(t);
119                         dst_release(dst);
120                         dst = NULL;
121                 }
122         }
123         rcu_read_unlock();
124         return (struct rtable *)dst;
125 }
126
127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128                                 __be16 flags, __be32 key)
129 {
130         if (p->i_flags & TUNNEL_KEY) {
131                 if (flags & TUNNEL_KEY)
132                         return key == p->i_key;
133                 else
134                         /* key expected, none present */
135                         return false;
136         } else
137                 return !(flags & TUNNEL_KEY);
138 }
139
140 /* Fallback tunnel: no source, no destination, no key, no options
141
142    Tunnel hash table:
143    We require exact key match i.e. if a key is present in packet
144    it will match only tunnel with the same key; if it is not present,
145    it will match only keyless tunnel.
146
147    All keysless packets, if not matched configured keyless tunnels
148    will match fallback tunnel.
149    Given src, dst and key, find appropriate for input tunnel.
150 */
151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152                                    int link, __be16 flags,
153                                    __be32 remote, __be32 local,
154                                    __be32 key)
155 {
156         unsigned int hash;
157         struct ip_tunnel *t, *cand = NULL;
158         struct hlist_head *head;
159
160         hash = ip_tunnel_hash(key, remote);
161         head = &itn->tunnels[hash];
162
163         hlist_for_each_entry_rcu(t, head, hash_node) {
164                 if (local != t->parms.iph.saddr ||
165                     remote != t->parms.iph.daddr ||
166                     !(t->dev->flags & IFF_UP))
167                         continue;
168
169                 if (!ip_tunnel_key_match(&t->parms, flags, key))
170                         continue;
171
172                 if (t->parms.link == link)
173                         return t;
174                 else
175                         cand = t;
176         }
177
178         hlist_for_each_entry_rcu(t, head, hash_node) {
179                 if (remote != t->parms.iph.daddr ||
180                     t->parms.iph.saddr != 0 ||
181                     !(t->dev->flags & IFF_UP))
182                         continue;
183
184                 if (!ip_tunnel_key_match(&t->parms, flags, key))
185                         continue;
186
187                 if (t->parms.link == link)
188                         return t;
189                 else if (!cand)
190                         cand = t;
191         }
192
193         hash = ip_tunnel_hash(key, 0);
194         head = &itn->tunnels[hash];
195
196         hlist_for_each_entry_rcu(t, head, hash_node) {
197                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
199                         continue;
200
201                 if (!(t->dev->flags & IFF_UP))
202                         continue;
203
204                 if (!ip_tunnel_key_match(&t->parms, flags, key))
205                         continue;
206
207                 if (t->parms.link == link)
208                         return t;
209                 else if (!cand)
210                         cand = t;
211         }
212
213         if (flags & TUNNEL_NO_KEY)
214                 goto skip_key_lookup;
215
216         hlist_for_each_entry_rcu(t, head, hash_node) {
217                 if (t->parms.i_key != key ||
218                     t->parms.iph.saddr != 0 ||
219                     t->parms.iph.daddr != 0 ||
220                     !(t->dev->flags & IFF_UP))
221                         continue;
222
223                 if (t->parms.link == link)
224                         return t;
225                 else if (!cand)
226                         cand = t;
227         }
228
229 skip_key_lookup:
230         if (cand)
231                 return cand;
232
233         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234                 return netdev_priv(itn->fb_tunnel_dev);
235
236
237         return NULL;
238 }
239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240
241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242                                     struct ip_tunnel_parm *parms)
243 {
244         unsigned int h;
245         __be32 remote;
246         __be32 i_key = parms->i_key;
247
248         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249                 remote = parms->iph.daddr;
250         else
251                 remote = 0;
252
253         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
254                 i_key = 0;
255
256         h = ip_tunnel_hash(i_key, remote);
257         return &itn->tunnels[h];
258 }
259
260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261 {
262         struct hlist_head *head = ip_bucket(itn, &t->parms);
263
264         hlist_add_head_rcu(&t->hash_node, head);
265 }
266
267 static void ip_tunnel_del(struct ip_tunnel *t)
268 {
269         hlist_del_init_rcu(&t->hash_node);
270 }
271
272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273                                         struct ip_tunnel_parm *parms,
274                                         int type)
275 {
276         __be32 remote = parms->iph.daddr;
277         __be32 local = parms->iph.saddr;
278         __be32 key = parms->i_key;
279         __be16 flags = parms->i_flags;
280         int link = parms->link;
281         struct ip_tunnel *t = NULL;
282         struct hlist_head *head = ip_bucket(itn, parms);
283
284         hlist_for_each_entry_rcu(t, head, hash_node) {
285                 if (local == t->parms.iph.saddr &&
286                     remote == t->parms.iph.daddr &&
287                     link == t->parms.link &&
288                     type == t->dev->type &&
289                     ip_tunnel_key_match(&t->parms, flags, key))
290                         break;
291         }
292         return t;
293 }
294
295 static struct net_device *__ip_tunnel_create(struct net *net,
296                                              const struct rtnl_link_ops *ops,
297                                              struct ip_tunnel_parm *parms)
298 {
299         int err;
300         struct ip_tunnel *tunnel;
301         struct net_device *dev;
302         char name[IFNAMSIZ];
303
304         if (parms->name[0])
305                 strlcpy(name, parms->name, IFNAMSIZ);
306         else {
307                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
308                         err = -E2BIG;
309                         goto failed;
310                 }
311                 strlcpy(name, ops->kind, IFNAMSIZ);
312                 strncat(name, "%d", 2);
313         }
314
315         ASSERT_RTNL();
316         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
317         if (!dev) {
318                 err = -ENOMEM;
319                 goto failed;
320         }
321         dev_net_set(dev, net);
322
323         dev->rtnl_link_ops = ops;
324
325         tunnel = netdev_priv(dev);
326         tunnel->parms = *parms;
327         tunnel->net = net;
328
329         err = register_netdevice(dev);
330         if (err)
331                 goto failed_free;
332
333         return dev;
334
335 failed_free:
336         free_netdev(dev);
337 failed:
338         return ERR_PTR(err);
339 }
340
341 static inline void init_tunnel_flow(struct flowi4 *fl4,
342                                     int proto,
343                                     __be32 daddr, __be32 saddr,
344                                     __be32 key, __u8 tos, int oif)
345 {
346         memset(fl4, 0, sizeof(*fl4));
347         fl4->flowi4_oif = oif;
348         fl4->daddr = daddr;
349         fl4->saddr = saddr;
350         fl4->flowi4_tos = tos;
351         fl4->flowi4_proto = proto;
352         fl4->fl4_gre_key = key;
353 }
354
355 static int ip_tunnel_bind_dev(struct net_device *dev)
356 {
357         struct net_device *tdev = NULL;
358         struct ip_tunnel *tunnel = netdev_priv(dev);
359         const struct iphdr *iph;
360         int hlen = LL_MAX_HEADER;
361         int mtu = ETH_DATA_LEN;
362         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363
364         iph = &tunnel->parms.iph;
365
366         /* Guess output device to choose reasonable mtu and needed_headroom */
367         if (iph->daddr) {
368                 struct flowi4 fl4;
369                 struct rtable *rt;
370
371                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372                                  iph->saddr, tunnel->parms.o_key,
373                                  RT_TOS(iph->tos), tunnel->parms.link);
374                 rt = ip_route_output_key(tunnel->net, &fl4);
375
376                 if (!IS_ERR(rt)) {
377                         tdev = rt->dst.dev;
378                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
379                         ip_rt_put(rt);
380                 }
381                 if (dev->type != ARPHRD_ETHER)
382                         dev->flags |= IFF_POINTOPOINT;
383         }
384
385         if (!tdev && tunnel->parms.link)
386                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
387
388         if (tdev) {
389                 hlen = tdev->hard_header_len + tdev->needed_headroom;
390                 mtu = tdev->mtu;
391         }
392         dev->iflink = tunnel->parms.link;
393
394         dev->needed_headroom = t_hlen + hlen;
395         mtu -= (dev->hard_header_len + t_hlen);
396
397         if (mtu < 68)
398                 mtu = 68;
399
400         return mtu;
401 }
402
403 static struct ip_tunnel *ip_tunnel_create(struct net *net,
404                                           struct ip_tunnel_net *itn,
405                                           struct ip_tunnel_parm *parms)
406 {
407         struct ip_tunnel *nt;
408         struct net_device *dev;
409
410         BUG_ON(!itn->fb_tunnel_dev);
411         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
412         if (IS_ERR(dev))
413                 return ERR_CAST(dev);
414
415         dev->mtu = ip_tunnel_bind_dev(dev);
416
417         nt = netdev_priv(dev);
418         ip_tunnel_add(itn, nt);
419         return nt;
420 }
421
422 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
423                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
424 {
425         struct pcpu_sw_netstats *tstats;
426         const struct iphdr *iph = ip_hdr(skb);
427         int err;
428
429 #ifdef CONFIG_NET_IPGRE_BROADCAST
430         if (ipv4_is_multicast(iph->daddr)) {
431                 tunnel->dev->stats.multicast++;
432                 skb->pkt_type = PACKET_BROADCAST;
433         }
434 #endif
435
436         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
437              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
438                 tunnel->dev->stats.rx_crc_errors++;
439                 tunnel->dev->stats.rx_errors++;
440                 goto drop;
441         }
442
443         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
444                 if (!(tpi->flags&TUNNEL_SEQ) ||
445                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
446                         tunnel->dev->stats.rx_fifo_errors++;
447                         tunnel->dev->stats.rx_errors++;
448                         goto drop;
449                 }
450                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
451         }
452
453         skb_reset_network_header(skb);
454
455         err = IP_ECN_decapsulate(iph, skb);
456         if (unlikely(err)) {
457                 if (log_ecn_error)
458                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
459                                         &iph->saddr, iph->tos);
460                 if (err > 1) {
461                         ++tunnel->dev->stats.rx_frame_errors;
462                         ++tunnel->dev->stats.rx_errors;
463                         goto drop;
464                 }
465         }
466
467         tstats = this_cpu_ptr(tunnel->dev->tstats);
468         u64_stats_update_begin(&tstats->syncp);
469         tstats->rx_packets++;
470         tstats->rx_bytes += skb->len;
471         u64_stats_update_end(&tstats->syncp);
472
473         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
474
475         if (tunnel->dev->type == ARPHRD_ETHER) {
476                 skb->protocol = eth_type_trans(skb, tunnel->dev);
477                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
478         } else {
479                 skb->dev = tunnel->dev;
480         }
481
482         gro_cells_receive(&tunnel->gro_cells, skb);
483         return 0;
484
485 drop:
486         kfree_skb(skb);
487         return 0;
488 }
489 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
490
491 static int ip_encap_hlen(struct ip_tunnel_encap *e)
492 {
493         const struct ip_tunnel_encap_ops *ops;
494         int hlen = -EINVAL;
495
496         if (e->type == TUNNEL_ENCAP_NONE)
497                 return 0;
498
499         if (e->type >= MAX_IPTUN_ENCAP_OPS)
500                 return -EINVAL;
501
502         rcu_read_lock();
503         ops = rcu_dereference(iptun_encaps[e->type]);
504         if (likely(ops && ops->encap_hlen))
505                 hlen = ops->encap_hlen(e);
506         rcu_read_unlock();
507
508         return hlen;
509 }
510
511 const struct ip_tunnel_encap_ops __rcu *
512                 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
513
514 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
515                             unsigned int num)
516 {
517         if (num >= MAX_IPTUN_ENCAP_OPS)
518                 return -ERANGE;
519
520         return !cmpxchg((const struct ip_tunnel_encap_ops **)
521                         &iptun_encaps[num],
522                         NULL, ops) ? 0 : -1;
523 }
524 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
525
526 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
527                             unsigned int num)
528 {
529         int ret;
530
531         if (num >= MAX_IPTUN_ENCAP_OPS)
532                 return -ERANGE;
533
534         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
535                        &iptun_encaps[num],
536                        ops, NULL) == ops) ? 0 : -1;
537
538         synchronize_net();
539
540         return ret;
541 }
542 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
543
544 int ip_tunnel_encap_setup(struct ip_tunnel *t,
545                           struct ip_tunnel_encap *ipencap)
546 {
547         int hlen;
548
549         memset(&t->encap, 0, sizeof(t->encap));
550
551         hlen = ip_encap_hlen(ipencap);
552         if (hlen < 0)
553                 return hlen;
554
555         t->encap.type = ipencap->type;
556         t->encap.sport = ipencap->sport;
557         t->encap.dport = ipencap->dport;
558         t->encap.flags = ipencap->flags;
559
560         t->encap_hlen = hlen;
561         t->hlen = t->encap_hlen + t->tun_hlen;
562
563         return 0;
564 }
565 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
566
567 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
568                     u8 *protocol, struct flowi4 *fl4)
569 {
570         const struct ip_tunnel_encap_ops *ops;
571         int ret = -EINVAL;
572
573         if (t->encap.type == TUNNEL_ENCAP_NONE)
574                 return 0;
575
576         rcu_read_lock();
577         ops = rcu_dereference(iptun_encaps[t->encap.type]);
578         if (likely(ops && ops->build_header))
579                 ret = ops->build_header(skb, &t->encap, protocol, fl4);
580         rcu_read_unlock();
581
582         return ret;
583 }
584 EXPORT_SYMBOL(ip_tunnel_encap);
585
586 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
587                             struct rtable *rt, __be16 df)
588 {
589         struct ip_tunnel *tunnel = netdev_priv(dev);
590         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
591         int mtu;
592
593         if (df)
594                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
595                                         - sizeof(struct iphdr) - tunnel->hlen;
596         else
597                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
598
599         if (skb_dst(skb))
600                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
601
602         if (skb->protocol == htons(ETH_P_IP)) {
603                 if (!skb_is_gso(skb) &&
604                     (df & htons(IP_DF)) && mtu < pkt_size) {
605                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
606                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
607                         return -E2BIG;
608                 }
609         }
610 #if IS_ENABLED(CONFIG_IPV6)
611         else if (skb->protocol == htons(ETH_P_IPV6)) {
612                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
613
614                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
615                            mtu >= IPV6_MIN_MTU) {
616                         if ((tunnel->parms.iph.daddr &&
617                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
618                             rt6->rt6i_dst.plen == 128) {
619                                 rt6->rt6i_flags |= RTF_MODIFIED;
620                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
621                         }
622                 }
623
624                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
625                                         mtu < pkt_size) {
626                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
627                         return -E2BIG;
628                 }
629         }
630 #endif
631         return 0;
632 }
633
634 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
635                     const struct iphdr *tnl_params, u8 protocol)
636 {
637         struct ip_tunnel *tunnel = netdev_priv(dev);
638         const struct iphdr *inner_iph;
639         struct flowi4 fl4;
640         u8     tos, ttl;
641         __be16 df;
642         struct rtable *rt;              /* Route to the other host */
643         unsigned int max_headroom;      /* The extra header space needed */
644         __be32 dst;
645         int err;
646         bool connected;
647
648         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
649         connected = (tunnel->parms.iph.daddr != 0);
650
651         dst = tnl_params->daddr;
652         if (dst == 0) {
653                 /* NBMA tunnel */
654
655                 if (skb_dst(skb) == NULL) {
656                         dev->stats.tx_fifo_errors++;
657                         goto tx_error;
658                 }
659
660                 if (skb->protocol == htons(ETH_P_IP)) {
661                         rt = skb_rtable(skb);
662                         dst = rt_nexthop(rt, inner_iph->daddr);
663                 }
664 #if IS_ENABLED(CONFIG_IPV6)
665                 else if (skb->protocol == htons(ETH_P_IPV6)) {
666                         const struct in6_addr *addr6;
667                         struct neighbour *neigh;
668                         bool do_tx_error_icmp;
669                         int addr_type;
670
671                         neigh = dst_neigh_lookup(skb_dst(skb),
672                                                  &ipv6_hdr(skb)->daddr);
673                         if (neigh == NULL)
674                                 goto tx_error;
675
676                         addr6 = (const struct in6_addr *)&neigh->primary_key;
677                         addr_type = ipv6_addr_type(addr6);
678
679                         if (addr_type == IPV6_ADDR_ANY) {
680                                 addr6 = &ipv6_hdr(skb)->daddr;
681                                 addr_type = ipv6_addr_type(addr6);
682                         }
683
684                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
685                                 do_tx_error_icmp = true;
686                         else {
687                                 do_tx_error_icmp = false;
688                                 dst = addr6->s6_addr32[3];
689                         }
690                         neigh_release(neigh);
691                         if (do_tx_error_icmp)
692                                 goto tx_error_icmp;
693                 }
694 #endif
695                 else
696                         goto tx_error;
697
698                 connected = false;
699         }
700
701         tos = tnl_params->tos;
702         if (tos & 0x1) {
703                 tos &= ~0x1;
704                 if (skb->protocol == htons(ETH_P_IP)) {
705                         tos = inner_iph->tos;
706                         connected = false;
707                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
708                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
709                         connected = false;
710                 }
711         }
712
713         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
714                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
715
716         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
717                 goto tx_error;
718
719         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
720
721         if (!rt) {
722                 rt = ip_route_output_key(tunnel->net, &fl4);
723
724                 if (IS_ERR(rt)) {
725                         dev->stats.tx_carrier_errors++;
726                         goto tx_error;
727                 }
728                 if (connected)
729                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
730         }
731
732         if (rt->dst.dev == dev) {
733                 ip_rt_put(rt);
734                 dev->stats.collisions++;
735                 goto tx_error;
736         }
737
738         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
739                 ip_rt_put(rt);
740                 goto tx_error;
741         }
742
743         if (tunnel->err_count > 0) {
744                 if (time_before(jiffies,
745                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
746                         tunnel->err_count--;
747
748                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
749                         dst_link_failure(skb);
750                 } else
751                         tunnel->err_count = 0;
752         }
753
754         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
755         ttl = tnl_params->ttl;
756         if (ttl == 0) {
757                 if (skb->protocol == htons(ETH_P_IP))
758                         ttl = inner_iph->ttl;
759 #if IS_ENABLED(CONFIG_IPV6)
760                 else if (skb->protocol == htons(ETH_P_IPV6))
761                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
762 #endif
763                 else
764                         ttl = ip4_dst_hoplimit(&rt->dst);
765         }
766
767         df = tnl_params->frag_off;
768         if (skb->protocol == htons(ETH_P_IP))
769                 df |= (inner_iph->frag_off&htons(IP_DF));
770
771         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
772                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
773         if (max_headroom > dev->needed_headroom)
774                 dev->needed_headroom = max_headroom;
775
776         if (skb_cow_head(skb, dev->needed_headroom)) {
777                 ip_rt_put(rt);
778                 dev->stats.tx_dropped++;
779                 kfree_skb(skb);
780                 return;
781         }
782
783         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
784                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
785         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
786
787         return;
788
789 #if IS_ENABLED(CONFIG_IPV6)
790 tx_error_icmp:
791         dst_link_failure(skb);
792 #endif
793 tx_error:
794         dev->stats.tx_errors++;
795         kfree_skb(skb);
796 }
797 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
798
799 static void ip_tunnel_update(struct ip_tunnel_net *itn,
800                              struct ip_tunnel *t,
801                              struct net_device *dev,
802                              struct ip_tunnel_parm *p,
803                              bool set_mtu)
804 {
805         ip_tunnel_del(t);
806         t->parms.iph.saddr = p->iph.saddr;
807         t->parms.iph.daddr = p->iph.daddr;
808         t->parms.i_key = p->i_key;
809         t->parms.o_key = p->o_key;
810         if (dev->type != ARPHRD_ETHER) {
811                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
812                 memcpy(dev->broadcast, &p->iph.daddr, 4);
813         }
814         ip_tunnel_add(itn, t);
815
816         t->parms.iph.ttl = p->iph.ttl;
817         t->parms.iph.tos = p->iph.tos;
818         t->parms.iph.frag_off = p->iph.frag_off;
819
820         if (t->parms.link != p->link) {
821                 int mtu;
822
823                 t->parms.link = p->link;
824                 mtu = ip_tunnel_bind_dev(dev);
825                 if (set_mtu)
826                         dev->mtu = mtu;
827         }
828         ip_tunnel_dst_reset_all(t);
829         netdev_state_change(dev);
830 }
831
832 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
833 {
834         int err = 0;
835         struct ip_tunnel *t = netdev_priv(dev);
836         struct net *net = t->net;
837         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
838
839         BUG_ON(!itn->fb_tunnel_dev);
840         switch (cmd) {
841         case SIOCGETTUNNEL:
842                 if (dev == itn->fb_tunnel_dev) {
843                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
844                         if (t == NULL)
845                                 t = netdev_priv(dev);
846                 }
847                 memcpy(p, &t->parms, sizeof(*p));
848                 break;
849
850         case SIOCADDTUNNEL:
851         case SIOCCHGTUNNEL:
852                 err = -EPERM;
853                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
854                         goto done;
855                 if (p->iph.ttl)
856                         p->iph.frag_off |= htons(IP_DF);
857                 if (!(p->i_flags & VTI_ISVTI)) {
858                         if (!(p->i_flags & TUNNEL_KEY))
859                                 p->i_key = 0;
860                         if (!(p->o_flags & TUNNEL_KEY))
861                                 p->o_key = 0;
862                 }
863
864                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
865
866                 if (cmd == SIOCADDTUNNEL) {
867                         if (!t) {
868                                 t = ip_tunnel_create(net, itn, p);
869                                 err = PTR_ERR_OR_ZERO(t);
870                                 break;
871                         }
872
873                         err = -EEXIST;
874                         break;
875                 }
876                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
877                         if (t != NULL) {
878                                 if (t->dev != dev) {
879                                         err = -EEXIST;
880                                         break;
881                                 }
882                         } else {
883                                 unsigned int nflags = 0;
884
885                                 if (ipv4_is_multicast(p->iph.daddr))
886                                         nflags = IFF_BROADCAST;
887                                 else if (p->iph.daddr)
888                                         nflags = IFF_POINTOPOINT;
889
890                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
891                                         err = -EINVAL;
892                                         break;
893                                 }
894
895                                 t = netdev_priv(dev);
896                         }
897                 }
898
899                 if (t) {
900                         err = 0;
901                         ip_tunnel_update(itn, t, dev, p, true);
902                 } else {
903                         err = -ENOENT;
904                 }
905                 break;
906
907         case SIOCDELTUNNEL:
908                 err = -EPERM;
909                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
910                         goto done;
911
912                 if (dev == itn->fb_tunnel_dev) {
913                         err = -ENOENT;
914                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
915                         if (t == NULL)
916                                 goto done;
917                         err = -EPERM;
918                         if (t == netdev_priv(itn->fb_tunnel_dev))
919                                 goto done;
920                         dev = t->dev;
921                 }
922                 unregister_netdevice(dev);
923                 err = 0;
924                 break;
925
926         default:
927                 err = -EINVAL;
928         }
929
930 done:
931         return err;
932 }
933 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
934
935 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
936 {
937         struct ip_tunnel *tunnel = netdev_priv(dev);
938         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
939
940         if (new_mtu < 68 ||
941             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
942                 return -EINVAL;
943         dev->mtu = new_mtu;
944         return 0;
945 }
946 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
947
948 static void ip_tunnel_dev_free(struct net_device *dev)
949 {
950         struct ip_tunnel *tunnel = netdev_priv(dev);
951
952         gro_cells_destroy(&tunnel->gro_cells);
953         free_percpu(tunnel->dst_cache);
954         free_percpu(dev->tstats);
955         free_netdev(dev);
956 }
957
958 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
959 {
960         struct ip_tunnel *tunnel = netdev_priv(dev);
961         struct ip_tunnel_net *itn;
962
963         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
964
965         if (itn->fb_tunnel_dev != dev) {
966                 ip_tunnel_del(netdev_priv(dev));
967                 unregister_netdevice_queue(dev, head);
968         }
969 }
970 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
971
972 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
973                                   struct rtnl_link_ops *ops, char *devname)
974 {
975         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
976         struct ip_tunnel_parm parms;
977         unsigned int i;
978
979         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
980                 INIT_HLIST_HEAD(&itn->tunnels[i]);
981
982         if (!ops) {
983                 itn->fb_tunnel_dev = NULL;
984                 return 0;
985         }
986
987         memset(&parms, 0, sizeof(parms));
988         if (devname)
989                 strlcpy(parms.name, devname, IFNAMSIZ);
990
991         rtnl_lock();
992         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
993         /* FB netdevice is special: we have one, and only one per netns.
994          * Allowing to move it to another netns is clearly unsafe.
995          */
996         if (!IS_ERR(itn->fb_tunnel_dev)) {
997                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
998                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
999                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1000         }
1001         rtnl_unlock();
1002
1003         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1004 }
1005 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1006
1007 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1008                               struct rtnl_link_ops *ops)
1009 {
1010         struct net *net = dev_net(itn->fb_tunnel_dev);
1011         struct net_device *dev, *aux;
1012         int h;
1013
1014         for_each_netdev_safe(net, dev, aux)
1015                 if (dev->rtnl_link_ops == ops)
1016                         unregister_netdevice_queue(dev, head);
1017
1018         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1019                 struct ip_tunnel *t;
1020                 struct hlist_node *n;
1021                 struct hlist_head *thead = &itn->tunnels[h];
1022
1023                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1024                         /* If dev is in the same netns, it has already
1025                          * been added to the list by the previous loop.
1026                          */
1027                         if (!net_eq(dev_net(t->dev), net))
1028                                 unregister_netdevice_queue(t->dev, head);
1029         }
1030 }
1031
1032 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1033 {
1034         LIST_HEAD(list);
1035
1036         rtnl_lock();
1037         ip_tunnel_destroy(itn, &list, ops);
1038         unregister_netdevice_many(&list);
1039         rtnl_unlock();
1040 }
1041 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1042
1043 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1044                       struct ip_tunnel_parm *p)
1045 {
1046         struct ip_tunnel *nt;
1047         struct net *net = dev_net(dev);
1048         struct ip_tunnel_net *itn;
1049         int mtu;
1050         int err;
1051
1052         nt = netdev_priv(dev);
1053         itn = net_generic(net, nt->ip_tnl_net_id);
1054
1055         if (ip_tunnel_find(itn, p, dev->type))
1056                 return -EEXIST;
1057
1058         nt->net = net;
1059         nt->parms = *p;
1060         err = register_netdevice(dev);
1061         if (err)
1062                 goto out;
1063
1064         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1065                 eth_hw_addr_random(dev);
1066
1067         mtu = ip_tunnel_bind_dev(dev);
1068         if (!tb[IFLA_MTU])
1069                 dev->mtu = mtu;
1070
1071         ip_tunnel_add(itn, nt);
1072
1073 out:
1074         return err;
1075 }
1076 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1077
1078 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1079                          struct ip_tunnel_parm *p)
1080 {
1081         struct ip_tunnel *t;
1082         struct ip_tunnel *tunnel = netdev_priv(dev);
1083         struct net *net = tunnel->net;
1084         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1085
1086         if (dev == itn->fb_tunnel_dev)
1087                 return -EINVAL;
1088
1089         t = ip_tunnel_find(itn, p, dev->type);
1090
1091         if (t) {
1092                 if (t->dev != dev)
1093                         return -EEXIST;
1094         } else {
1095                 t = tunnel;
1096
1097                 if (dev->type != ARPHRD_ETHER) {
1098                         unsigned int nflags = 0;
1099
1100                         if (ipv4_is_multicast(p->iph.daddr))
1101                                 nflags = IFF_BROADCAST;
1102                         else if (p->iph.daddr)
1103                                 nflags = IFF_POINTOPOINT;
1104
1105                         if ((dev->flags ^ nflags) &
1106                             (IFF_POINTOPOINT | IFF_BROADCAST))
1107                                 return -EINVAL;
1108                 }
1109         }
1110
1111         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1112         return 0;
1113 }
1114 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1115
1116 int ip_tunnel_init(struct net_device *dev)
1117 {
1118         struct ip_tunnel *tunnel = netdev_priv(dev);
1119         struct iphdr *iph = &tunnel->parms.iph;
1120         int err;
1121
1122         dev->destructor = ip_tunnel_dev_free;
1123         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1124         if (!dev->tstats)
1125                 return -ENOMEM;
1126
1127         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1128         if (!tunnel->dst_cache) {
1129                 free_percpu(dev->tstats);
1130                 return -ENOMEM;
1131         }
1132
1133         err = gro_cells_init(&tunnel->gro_cells, dev);
1134         if (err) {
1135                 free_percpu(tunnel->dst_cache);
1136                 free_percpu(dev->tstats);
1137                 return err;
1138         }
1139
1140         tunnel->dev = dev;
1141         tunnel->net = dev_net(dev);
1142         strcpy(tunnel->parms.name, dev->name);
1143         iph->version            = 4;
1144         iph->ihl                = 5;
1145
1146         return 0;
1147 }
1148 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1149
1150 void ip_tunnel_uninit(struct net_device *dev)
1151 {
1152         struct ip_tunnel *tunnel = netdev_priv(dev);
1153         struct net *net = tunnel->net;
1154         struct ip_tunnel_net *itn;
1155
1156         itn = net_generic(net, tunnel->ip_tnl_net_id);
1157         /* fb_tunnel_dev will be unregisted in net-exit call. */
1158         if (itn->fb_tunnel_dev != dev)
1159                 ip_tunnel_del(netdev_priv(dev));
1160
1161         ip_tunnel_dst_reset_all(tunnel);
1162 }
1163 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1164
1165 /* Do least required initialization, rest of init is done in tunnel_init call */
1166 void ip_tunnel_setup(struct net_device *dev, int net_id)
1167 {
1168         struct ip_tunnel *tunnel = netdev_priv(dev);
1169         tunnel->ip_tnl_net_id = net_id;
1170 }
1171 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1172
1173 MODULE_LICENSE("GPL");