Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
[cascardo/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         dst_clone(dst);
77         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78         dst_release(old_dst);
79 }
80
81 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
82 {
83         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
84 }
85
86 static void tunnel_dst_reset(struct ip_tunnel *t)
87 {
88         tunnel_dst_set(t, NULL);
89 }
90
91 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
92 {
93         int i;
94
95         for_each_possible_cpu(i)
96                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
97 }
98 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
99
100 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
101 {
102         struct dst_entry *dst;
103
104         rcu_read_lock();
105         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
106         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
107                 dst = NULL;
108         if (dst) {
109                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
110                         tunnel_dst_reset(t);
111                         dst_release(dst);
112                         dst = NULL;
113                 }
114         }
115         rcu_read_unlock();
116         return (struct rtable *)dst;
117 }
118
119 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
120                                 __be16 flags, __be32 key)
121 {
122         if (p->i_flags & TUNNEL_KEY) {
123                 if (flags & TUNNEL_KEY)
124                         return key == p->i_key;
125                 else
126                         /* key expected, none present */
127                         return false;
128         } else
129                 return !(flags & TUNNEL_KEY);
130 }
131
132 /* Fallback tunnel: no source, no destination, no key, no options
133
134    Tunnel hash table:
135    We require exact key match i.e. if a key is present in packet
136    it will match only tunnel with the same key; if it is not present,
137    it will match only keyless tunnel.
138
139    All keysless packets, if not matched configured keyless tunnels
140    will match fallback tunnel.
141    Given src, dst and key, find appropriate for input tunnel.
142 */
143 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
144                                    int link, __be16 flags,
145                                    __be32 remote, __be32 local,
146                                    __be32 key)
147 {
148         unsigned int hash;
149         struct ip_tunnel *t, *cand = NULL;
150         struct hlist_head *head;
151
152         hash = ip_tunnel_hash(key, remote);
153         head = &itn->tunnels[hash];
154
155         hlist_for_each_entry_rcu(t, head, hash_node) {
156                 if (local != t->parms.iph.saddr ||
157                     remote != t->parms.iph.daddr ||
158                     !(t->dev->flags & IFF_UP))
159                         continue;
160
161                 if (!ip_tunnel_key_match(&t->parms, flags, key))
162                         continue;
163
164                 if (t->parms.link == link)
165                         return t;
166                 else
167                         cand = t;
168         }
169
170         hlist_for_each_entry_rcu(t, head, hash_node) {
171                 if (remote != t->parms.iph.daddr ||
172                     !(t->dev->flags & IFF_UP))
173                         continue;
174
175                 if (!ip_tunnel_key_match(&t->parms, flags, key))
176                         continue;
177
178                 if (t->parms.link == link)
179                         return t;
180                 else if (!cand)
181                         cand = t;
182         }
183
184         hash = ip_tunnel_hash(key, 0);
185         head = &itn->tunnels[hash];
186
187         hlist_for_each_entry_rcu(t, head, hash_node) {
188                 if ((local != t->parms.iph.saddr &&
189                      (local != t->parms.iph.daddr ||
190                       !ipv4_is_multicast(local))) ||
191                     !(t->dev->flags & IFF_UP))
192                         continue;
193
194                 if (!ip_tunnel_key_match(&t->parms, flags, key))
195                         continue;
196
197                 if (t->parms.link == link)
198                         return t;
199                 else if (!cand)
200                         cand = t;
201         }
202
203         if (flags & TUNNEL_NO_KEY)
204                 goto skip_key_lookup;
205
206         hlist_for_each_entry_rcu(t, head, hash_node) {
207                 if (t->parms.i_key != key ||
208                     !(t->dev->flags & IFF_UP))
209                         continue;
210
211                 if (t->parms.link == link)
212                         return t;
213                 else if (!cand)
214                         cand = t;
215         }
216
217 skip_key_lookup:
218         if (cand)
219                 return cand;
220
221         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
222                 return netdev_priv(itn->fb_tunnel_dev);
223
224
225         return NULL;
226 }
227 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
228
229 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
230                                     struct ip_tunnel_parm *parms)
231 {
232         unsigned int h;
233         __be32 remote;
234         __be32 i_key = parms->i_key;
235
236         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
237                 remote = parms->iph.daddr;
238         else
239                 remote = 0;
240
241         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
242                 i_key = 0;
243
244         h = ip_tunnel_hash(i_key, remote);
245         return &itn->tunnels[h];
246 }
247
248 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
249 {
250         struct hlist_head *head = ip_bucket(itn, &t->parms);
251
252         hlist_add_head_rcu(&t->hash_node, head);
253 }
254
255 static void ip_tunnel_del(struct ip_tunnel *t)
256 {
257         hlist_del_init_rcu(&t->hash_node);
258 }
259
260 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
261                                         struct ip_tunnel_parm *parms,
262                                         int type)
263 {
264         __be32 remote = parms->iph.daddr;
265         __be32 local = parms->iph.saddr;
266         __be32 key = parms->i_key;
267         __be16 flags = parms->i_flags;
268         int link = parms->link;
269         struct ip_tunnel *t = NULL;
270         struct hlist_head *head = ip_bucket(itn, parms);
271
272         hlist_for_each_entry_rcu(t, head, hash_node) {
273                 if (local == t->parms.iph.saddr &&
274                     remote == t->parms.iph.daddr &&
275                     link == t->parms.link &&
276                     type == t->dev->type &&
277                     ip_tunnel_key_match(&t->parms, flags, key))
278                         break;
279         }
280         return t;
281 }
282
283 static struct net_device *__ip_tunnel_create(struct net *net,
284                                              const struct rtnl_link_ops *ops,
285                                              struct ip_tunnel_parm *parms)
286 {
287         int err;
288         struct ip_tunnel *tunnel;
289         struct net_device *dev;
290         char name[IFNAMSIZ];
291
292         if (parms->name[0])
293                 strlcpy(name, parms->name, IFNAMSIZ);
294         else {
295                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
296                         err = -E2BIG;
297                         goto failed;
298                 }
299                 strlcpy(name, ops->kind, IFNAMSIZ);
300                 strncat(name, "%d", 2);
301         }
302
303         ASSERT_RTNL();
304         dev = alloc_netdev(ops->priv_size, name, ops->setup);
305         if (!dev) {
306                 err = -ENOMEM;
307                 goto failed;
308         }
309         dev_net_set(dev, net);
310
311         dev->rtnl_link_ops = ops;
312
313         tunnel = netdev_priv(dev);
314         tunnel->parms = *parms;
315         tunnel->net = net;
316
317         err = register_netdevice(dev);
318         if (err)
319                 goto failed_free;
320
321         return dev;
322
323 failed_free:
324         free_netdev(dev);
325 failed:
326         return ERR_PTR(err);
327 }
328
329 static inline void init_tunnel_flow(struct flowi4 *fl4,
330                                     int proto,
331                                     __be32 daddr, __be32 saddr,
332                                     __be32 key, __u8 tos, int oif)
333 {
334         memset(fl4, 0, sizeof(*fl4));
335         fl4->flowi4_oif = oif;
336         fl4->daddr = daddr;
337         fl4->saddr = saddr;
338         fl4->flowi4_tos = tos;
339         fl4->flowi4_proto = proto;
340         fl4->fl4_gre_key = key;
341 }
342
343 static int ip_tunnel_bind_dev(struct net_device *dev)
344 {
345         struct net_device *tdev = NULL;
346         struct ip_tunnel *tunnel = netdev_priv(dev);
347         const struct iphdr *iph;
348         int hlen = LL_MAX_HEADER;
349         int mtu = ETH_DATA_LEN;
350         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
351
352         iph = &tunnel->parms.iph;
353
354         /* Guess output device to choose reasonable mtu and needed_headroom */
355         if (iph->daddr) {
356                 struct flowi4 fl4;
357                 struct rtable *rt;
358
359                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
360                                  iph->saddr, tunnel->parms.o_key,
361                                  RT_TOS(iph->tos), tunnel->parms.link);
362                 rt = ip_route_output_key(tunnel->net, &fl4);
363
364                 if (!IS_ERR(rt)) {
365                         tdev = rt->dst.dev;
366                         tunnel_dst_set(tunnel, &rt->dst);
367                         ip_rt_put(rt);
368                 }
369                 if (dev->type != ARPHRD_ETHER)
370                         dev->flags |= IFF_POINTOPOINT;
371         }
372
373         if (!tdev && tunnel->parms.link)
374                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
375
376         if (tdev) {
377                 hlen = tdev->hard_header_len + tdev->needed_headroom;
378                 mtu = tdev->mtu;
379         }
380         dev->iflink = tunnel->parms.link;
381
382         dev->needed_headroom = t_hlen + hlen;
383         mtu -= (dev->hard_header_len + t_hlen);
384
385         if (mtu < 68)
386                 mtu = 68;
387
388         return mtu;
389 }
390
391 static struct ip_tunnel *ip_tunnel_create(struct net *net,
392                                           struct ip_tunnel_net *itn,
393                                           struct ip_tunnel_parm *parms)
394 {
395         struct ip_tunnel *nt;
396         struct net_device *dev;
397
398         BUG_ON(!itn->fb_tunnel_dev);
399         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
400         if (IS_ERR(dev))
401                 return ERR_CAST(dev);
402
403         dev->mtu = ip_tunnel_bind_dev(dev);
404
405         nt = netdev_priv(dev);
406         ip_tunnel_add(itn, nt);
407         return nt;
408 }
409
410 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
411                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
412 {
413         struct pcpu_sw_netstats *tstats;
414         const struct iphdr *iph = ip_hdr(skb);
415         int err;
416
417 #ifdef CONFIG_NET_IPGRE_BROADCAST
418         if (ipv4_is_multicast(iph->daddr)) {
419                 tunnel->dev->stats.multicast++;
420                 skb->pkt_type = PACKET_BROADCAST;
421         }
422 #endif
423
424         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
425              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
426                 tunnel->dev->stats.rx_crc_errors++;
427                 tunnel->dev->stats.rx_errors++;
428                 goto drop;
429         }
430
431         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
432                 if (!(tpi->flags&TUNNEL_SEQ) ||
433                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
434                         tunnel->dev->stats.rx_fifo_errors++;
435                         tunnel->dev->stats.rx_errors++;
436                         goto drop;
437                 }
438                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
439         }
440
441         skb_reset_network_header(skb);
442
443         err = IP_ECN_decapsulate(iph, skb);
444         if (unlikely(err)) {
445                 if (log_ecn_error)
446                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
447                                         &iph->saddr, iph->tos);
448                 if (err > 1) {
449                         ++tunnel->dev->stats.rx_frame_errors;
450                         ++tunnel->dev->stats.rx_errors;
451                         goto drop;
452                 }
453         }
454
455         tstats = this_cpu_ptr(tunnel->dev->tstats);
456         u64_stats_update_begin(&tstats->syncp);
457         tstats->rx_packets++;
458         tstats->rx_bytes += skb->len;
459         u64_stats_update_end(&tstats->syncp);
460
461         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
462
463         if (tunnel->dev->type == ARPHRD_ETHER) {
464                 skb->protocol = eth_type_trans(skb, tunnel->dev);
465                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
466         } else {
467                 skb->dev = tunnel->dev;
468         }
469
470         gro_cells_receive(&tunnel->gro_cells, skb);
471         return 0;
472
473 drop:
474         kfree_skb(skb);
475         return 0;
476 }
477 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
478
479 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
480                             struct rtable *rt, __be16 df)
481 {
482         struct ip_tunnel *tunnel = netdev_priv(dev);
483         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
484         int mtu;
485
486         if (df)
487                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
488                                         - sizeof(struct iphdr) - tunnel->hlen;
489         else
490                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
491
492         if (skb_dst(skb))
493                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
494
495         if (skb->protocol == htons(ETH_P_IP)) {
496                 if (!skb_is_gso(skb) &&
497                     (df & htons(IP_DF)) && mtu < pkt_size) {
498                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
499                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
500                         return -E2BIG;
501                 }
502         }
503 #if IS_ENABLED(CONFIG_IPV6)
504         else if (skb->protocol == htons(ETH_P_IPV6)) {
505                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
506
507                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
508                            mtu >= IPV6_MIN_MTU) {
509                         if ((tunnel->parms.iph.daddr &&
510                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
511                             rt6->rt6i_dst.plen == 128) {
512                                 rt6->rt6i_flags |= RTF_MODIFIED;
513                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
514                         }
515                 }
516
517                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
518                                         mtu < pkt_size) {
519                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520                         return -E2BIG;
521                 }
522         }
523 #endif
524         return 0;
525 }
526
527 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
528                     const struct iphdr *tnl_params, const u8 protocol)
529 {
530         struct ip_tunnel *tunnel = netdev_priv(dev);
531         const struct iphdr *inner_iph;
532         struct flowi4 fl4;
533         u8     tos, ttl;
534         __be16 df;
535         struct rtable *rt;              /* Route to the other host */
536         unsigned int max_headroom;      /* The extra header space needed */
537         __be32 dst;
538         int err;
539         bool connected;
540
541         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
542         connected = (tunnel->parms.iph.daddr != 0);
543
544         dst = tnl_params->daddr;
545         if (dst == 0) {
546                 /* NBMA tunnel */
547
548                 if (skb_dst(skb) == NULL) {
549                         dev->stats.tx_fifo_errors++;
550                         goto tx_error;
551                 }
552
553                 if (skb->protocol == htons(ETH_P_IP)) {
554                         rt = skb_rtable(skb);
555                         dst = rt_nexthop(rt, inner_iph->daddr);
556                 }
557 #if IS_ENABLED(CONFIG_IPV6)
558                 else if (skb->protocol == htons(ETH_P_IPV6)) {
559                         const struct in6_addr *addr6;
560                         struct neighbour *neigh;
561                         bool do_tx_error_icmp;
562                         int addr_type;
563
564                         neigh = dst_neigh_lookup(skb_dst(skb),
565                                                  &ipv6_hdr(skb)->daddr);
566                         if (neigh == NULL)
567                                 goto tx_error;
568
569                         addr6 = (const struct in6_addr *)&neigh->primary_key;
570                         addr_type = ipv6_addr_type(addr6);
571
572                         if (addr_type == IPV6_ADDR_ANY) {
573                                 addr6 = &ipv6_hdr(skb)->daddr;
574                                 addr_type = ipv6_addr_type(addr6);
575                         }
576
577                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
578                                 do_tx_error_icmp = true;
579                         else {
580                                 do_tx_error_icmp = false;
581                                 dst = addr6->s6_addr32[3];
582                         }
583                         neigh_release(neigh);
584                         if (do_tx_error_icmp)
585                                 goto tx_error_icmp;
586                 }
587 #endif
588                 else
589                         goto tx_error;
590
591                 connected = false;
592         }
593
594         tos = tnl_params->tos;
595         if (tos & 0x1) {
596                 tos &= ~0x1;
597                 if (skb->protocol == htons(ETH_P_IP)) {
598                         tos = inner_iph->tos;
599                         connected = false;
600                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
601                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
602                         connected = false;
603                 }
604         }
605
606         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
607                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
608
609         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
610
611         if (!rt) {
612                 rt = ip_route_output_key(tunnel->net, &fl4);
613
614                 if (IS_ERR(rt)) {
615                         dev->stats.tx_carrier_errors++;
616                         goto tx_error;
617                 }
618                 if (connected)
619                         tunnel_dst_set(tunnel, &rt->dst);
620         }
621
622         if (rt->dst.dev == dev) {
623                 ip_rt_put(rt);
624                 dev->stats.collisions++;
625                 goto tx_error;
626         }
627
628         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
629                 ip_rt_put(rt);
630                 goto tx_error;
631         }
632
633         if (tunnel->err_count > 0) {
634                 if (time_before(jiffies,
635                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
636                         tunnel->err_count--;
637
638                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
639                         dst_link_failure(skb);
640                 } else
641                         tunnel->err_count = 0;
642         }
643
644         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
645         ttl = tnl_params->ttl;
646         if (ttl == 0) {
647                 if (skb->protocol == htons(ETH_P_IP))
648                         ttl = inner_iph->ttl;
649 #if IS_ENABLED(CONFIG_IPV6)
650                 else if (skb->protocol == htons(ETH_P_IPV6))
651                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
652 #endif
653                 else
654                         ttl = ip4_dst_hoplimit(&rt->dst);
655         }
656
657         df = tnl_params->frag_off;
658         if (skb->protocol == htons(ETH_P_IP))
659                 df |= (inner_iph->frag_off&htons(IP_DF));
660
661         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
662                         + rt->dst.header_len;
663         if (max_headroom > dev->needed_headroom)
664                 dev->needed_headroom = max_headroom;
665
666         if (skb_cow_head(skb, dev->needed_headroom)) {
667                 ip_rt_put(rt);
668                 dev->stats.tx_dropped++;
669                 kfree_skb(skb);
670                 return;
671         }
672
673         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
674                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
675         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
676
677         return;
678
679 #if IS_ENABLED(CONFIG_IPV6)
680 tx_error_icmp:
681         dst_link_failure(skb);
682 #endif
683 tx_error:
684         dev->stats.tx_errors++;
685         kfree_skb(skb);
686 }
687 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
688
689 static void ip_tunnel_update(struct ip_tunnel_net *itn,
690                              struct ip_tunnel *t,
691                              struct net_device *dev,
692                              struct ip_tunnel_parm *p,
693                              bool set_mtu)
694 {
695         ip_tunnel_del(t);
696         t->parms.iph.saddr = p->iph.saddr;
697         t->parms.iph.daddr = p->iph.daddr;
698         t->parms.i_key = p->i_key;
699         t->parms.o_key = p->o_key;
700         if (dev->type != ARPHRD_ETHER) {
701                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
702                 memcpy(dev->broadcast, &p->iph.daddr, 4);
703         }
704         ip_tunnel_add(itn, t);
705
706         t->parms.iph.ttl = p->iph.ttl;
707         t->parms.iph.tos = p->iph.tos;
708         t->parms.iph.frag_off = p->iph.frag_off;
709
710         if (t->parms.link != p->link) {
711                 int mtu;
712
713                 t->parms.link = p->link;
714                 mtu = ip_tunnel_bind_dev(dev);
715                 if (set_mtu)
716                         dev->mtu = mtu;
717         }
718         ip_tunnel_dst_reset_all(t);
719         netdev_state_change(dev);
720 }
721
722 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
723 {
724         int err = 0;
725         struct ip_tunnel *t = netdev_priv(dev);
726         struct net *net = t->net;
727         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
728
729         BUG_ON(!itn->fb_tunnel_dev);
730         switch (cmd) {
731         case SIOCGETTUNNEL:
732                 if (dev == itn->fb_tunnel_dev) {
733                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
734                         if (t == NULL)
735                                 t = netdev_priv(dev);
736                 }
737                 memcpy(p, &t->parms, sizeof(*p));
738                 break;
739
740         case SIOCADDTUNNEL:
741         case SIOCCHGTUNNEL:
742                 err = -EPERM;
743                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
744                         goto done;
745                 if (p->iph.ttl)
746                         p->iph.frag_off |= htons(IP_DF);
747                 if (!(p->i_flags & VTI_ISVTI)) {
748                         if (!(p->i_flags & TUNNEL_KEY))
749                                 p->i_key = 0;
750                         if (!(p->o_flags & TUNNEL_KEY))
751                                 p->o_key = 0;
752                 }
753
754                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
755
756                 if (!t && (cmd == SIOCADDTUNNEL)) {
757                         t = ip_tunnel_create(net, itn, p);
758                         err = PTR_ERR_OR_ZERO(t);
759                         break;
760                 }
761                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762                         if (t != NULL) {
763                                 if (t->dev != dev) {
764                                         err = -EEXIST;
765                                         break;
766                                 }
767                         } else {
768                                 unsigned int nflags = 0;
769
770                                 if (ipv4_is_multicast(p->iph.daddr))
771                                         nflags = IFF_BROADCAST;
772                                 else if (p->iph.daddr)
773                                         nflags = IFF_POINTOPOINT;
774
775                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776                                         err = -EINVAL;
777                                         break;
778                                 }
779
780                                 t = netdev_priv(dev);
781                         }
782                 }
783
784                 if (t) {
785                         err = 0;
786                         ip_tunnel_update(itn, t, dev, p, true);
787                 } else {
788                         err = -ENOENT;
789                 }
790                 break;
791
792         case SIOCDELTUNNEL:
793                 err = -EPERM;
794                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
795                         goto done;
796
797                 if (dev == itn->fb_tunnel_dev) {
798                         err = -ENOENT;
799                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
800                         if (t == NULL)
801                                 goto done;
802                         err = -EPERM;
803                         if (t == netdev_priv(itn->fb_tunnel_dev))
804                                 goto done;
805                         dev = t->dev;
806                 }
807                 unregister_netdevice(dev);
808                 err = 0;
809                 break;
810
811         default:
812                 err = -EINVAL;
813         }
814
815 done:
816         return err;
817 }
818 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
819
820 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
821 {
822         struct ip_tunnel *tunnel = netdev_priv(dev);
823         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
824
825         if (new_mtu < 68 ||
826             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
827                 return -EINVAL;
828         dev->mtu = new_mtu;
829         return 0;
830 }
831 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
832
833 static void ip_tunnel_dev_free(struct net_device *dev)
834 {
835         struct ip_tunnel *tunnel = netdev_priv(dev);
836
837         gro_cells_destroy(&tunnel->gro_cells);
838         free_percpu(tunnel->dst_cache);
839         free_percpu(dev->tstats);
840         free_netdev(dev);
841 }
842
843 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
844 {
845         struct ip_tunnel *tunnel = netdev_priv(dev);
846         struct ip_tunnel_net *itn;
847
848         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
849
850         if (itn->fb_tunnel_dev != dev) {
851                 ip_tunnel_del(netdev_priv(dev));
852                 unregister_netdevice_queue(dev, head);
853         }
854 }
855 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
856
857 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
858                                   struct rtnl_link_ops *ops, char *devname)
859 {
860         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
861         struct ip_tunnel_parm parms;
862         unsigned int i;
863
864         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
865                 INIT_HLIST_HEAD(&itn->tunnels[i]);
866
867         if (!ops) {
868                 itn->fb_tunnel_dev = NULL;
869                 return 0;
870         }
871
872         memset(&parms, 0, sizeof(parms));
873         if (devname)
874                 strlcpy(parms.name, devname, IFNAMSIZ);
875
876         rtnl_lock();
877         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
878         /* FB netdevice is special: we have one, and only one per netns.
879          * Allowing to move it to another netns is clearly unsafe.
880          */
881         if (!IS_ERR(itn->fb_tunnel_dev)) {
882                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
883                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
884                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
885         }
886         rtnl_unlock();
887
888         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
889 }
890 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
891
892 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
893                               struct rtnl_link_ops *ops)
894 {
895         struct net *net = dev_net(itn->fb_tunnel_dev);
896         struct net_device *dev, *aux;
897         int h;
898
899         for_each_netdev_safe(net, dev, aux)
900                 if (dev->rtnl_link_ops == ops)
901                         unregister_netdevice_queue(dev, head);
902
903         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
904                 struct ip_tunnel *t;
905                 struct hlist_node *n;
906                 struct hlist_head *thead = &itn->tunnels[h];
907
908                 hlist_for_each_entry_safe(t, n, thead, hash_node)
909                         /* If dev is in the same netns, it has already
910                          * been added to the list by the previous loop.
911                          */
912                         if (!net_eq(dev_net(t->dev), net))
913                                 unregister_netdevice_queue(t->dev, head);
914         }
915 }
916
917 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
918 {
919         LIST_HEAD(list);
920
921         rtnl_lock();
922         ip_tunnel_destroy(itn, &list, ops);
923         unregister_netdevice_many(&list);
924         rtnl_unlock();
925 }
926 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
927
928 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
929                       struct ip_tunnel_parm *p)
930 {
931         struct ip_tunnel *nt;
932         struct net *net = dev_net(dev);
933         struct ip_tunnel_net *itn;
934         int mtu;
935         int err;
936
937         nt = netdev_priv(dev);
938         itn = net_generic(net, nt->ip_tnl_net_id);
939
940         if (ip_tunnel_find(itn, p, dev->type))
941                 return -EEXIST;
942
943         nt->net = net;
944         nt->parms = *p;
945         err = register_netdevice(dev);
946         if (err)
947                 goto out;
948
949         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
950                 eth_hw_addr_random(dev);
951
952         mtu = ip_tunnel_bind_dev(dev);
953         if (!tb[IFLA_MTU])
954                 dev->mtu = mtu;
955
956         ip_tunnel_add(itn, nt);
957
958 out:
959         return err;
960 }
961 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
962
963 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
964                          struct ip_tunnel_parm *p)
965 {
966         struct ip_tunnel *t;
967         struct ip_tunnel *tunnel = netdev_priv(dev);
968         struct net *net = tunnel->net;
969         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
970
971         if (dev == itn->fb_tunnel_dev)
972                 return -EINVAL;
973
974         t = ip_tunnel_find(itn, p, dev->type);
975
976         if (t) {
977                 if (t->dev != dev)
978                         return -EEXIST;
979         } else {
980                 t = tunnel;
981
982                 if (dev->type != ARPHRD_ETHER) {
983                         unsigned int nflags = 0;
984
985                         if (ipv4_is_multicast(p->iph.daddr))
986                                 nflags = IFF_BROADCAST;
987                         else if (p->iph.daddr)
988                                 nflags = IFF_POINTOPOINT;
989
990                         if ((dev->flags ^ nflags) &
991                             (IFF_POINTOPOINT | IFF_BROADCAST))
992                                 return -EINVAL;
993                 }
994         }
995
996         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
997         return 0;
998 }
999 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1000
1001 int ip_tunnel_init(struct net_device *dev)
1002 {
1003         struct ip_tunnel *tunnel = netdev_priv(dev);
1004         struct iphdr *iph = &tunnel->parms.iph;
1005         int err;
1006
1007         dev->destructor = ip_tunnel_dev_free;
1008         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1009         if (!dev->tstats)
1010                 return -ENOMEM;
1011
1012         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1013         if (!tunnel->dst_cache) {
1014                 free_percpu(dev->tstats);
1015                 return -ENOMEM;
1016         }
1017
1018         err = gro_cells_init(&tunnel->gro_cells, dev);
1019         if (err) {
1020                 free_percpu(tunnel->dst_cache);
1021                 free_percpu(dev->tstats);
1022                 return err;
1023         }
1024
1025         tunnel->dev = dev;
1026         tunnel->net = dev_net(dev);
1027         strcpy(tunnel->parms.name, dev->name);
1028         iph->version            = 4;
1029         iph->ihl                = 5;
1030
1031         return 0;
1032 }
1033 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1034
1035 void ip_tunnel_uninit(struct net_device *dev)
1036 {
1037         struct ip_tunnel *tunnel = netdev_priv(dev);
1038         struct net *net = tunnel->net;
1039         struct ip_tunnel_net *itn;
1040
1041         itn = net_generic(net, tunnel->ip_tnl_net_id);
1042         /* fb_tunnel_dev will be unregisted in net-exit call. */
1043         if (itn->fb_tunnel_dev != dev)
1044                 ip_tunnel_del(netdev_priv(dev));
1045
1046         ip_tunnel_dst_reset_all(tunnel);
1047 }
1048 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1049
1050 /* Do least required initialization, rest of init is done in tunnel_init call */
1051 void ip_tunnel_setup(struct net_device *dev, int net_id)
1052 {
1053         struct ip_tunnel *tunnel = netdev_priv(dev);
1054         tunnel->ip_tnl_net_id = net_id;
1055 }
1056 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1057
1058 MODULE_LICENSE("GPL");