Merge remote-tracking branches 'spi/fix/qup' and 'spi/fix/topcliff-pch' into spi...
[cascardo/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     !(t->dev->flags & IFF_UP))
177                         continue;
178
179                 if (!ip_tunnel_key_match(&t->parms, flags, key))
180                         continue;
181
182                 if (t->parms.link == link)
183                         return t;
184                 else if (!cand)
185                         cand = t;
186         }
187
188         hash = ip_tunnel_hash(key, 0);
189         head = &itn->tunnels[hash];
190
191         hlist_for_each_entry_rcu(t, head, hash_node) {
192                 if ((local != t->parms.iph.saddr &&
193                      (local != t->parms.iph.daddr ||
194                       !ipv4_is_multicast(local))) ||
195                     !(t->dev->flags & IFF_UP))
196                         continue;
197
198                 if (!ip_tunnel_key_match(&t->parms, flags, key))
199                         continue;
200
201                 if (t->parms.link == link)
202                         return t;
203                 else if (!cand)
204                         cand = t;
205         }
206
207         if (flags & TUNNEL_NO_KEY)
208                 goto skip_key_lookup;
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (t->parms.i_key != key ||
212                     !(t->dev->flags & IFF_UP))
213                         continue;
214
215                 if (t->parms.link == link)
216                         return t;
217                 else if (!cand)
218                         cand = t;
219         }
220
221 skip_key_lookup:
222         if (cand)
223                 return cand;
224
225         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226                 return netdev_priv(itn->fb_tunnel_dev);
227
228
229         return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234                                     struct ip_tunnel_parm *parms)
235 {
236         unsigned int h;
237         __be32 remote;
238         __be32 i_key = parms->i_key;
239
240         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
241                 remote = parms->iph.daddr;
242         else
243                 remote = 0;
244
245         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
246                 i_key = 0;
247
248         h = ip_tunnel_hash(i_key, remote);
249         return &itn->tunnels[h];
250 }
251
252 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
253 {
254         struct hlist_head *head = ip_bucket(itn, &t->parms);
255
256         hlist_add_head_rcu(&t->hash_node, head);
257 }
258
259 static void ip_tunnel_del(struct ip_tunnel *t)
260 {
261         hlist_del_init_rcu(&t->hash_node);
262 }
263
264 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
265                                         struct ip_tunnel_parm *parms,
266                                         int type)
267 {
268         __be32 remote = parms->iph.daddr;
269         __be32 local = parms->iph.saddr;
270         __be32 key = parms->i_key;
271         int link = parms->link;
272         struct ip_tunnel *t = NULL;
273         struct hlist_head *head = ip_bucket(itn, parms);
274
275         hlist_for_each_entry_rcu(t, head, hash_node) {
276                 if (local == t->parms.iph.saddr &&
277                     remote == t->parms.iph.daddr &&
278                     key == t->parms.i_key &&
279                     link == t->parms.link &&
280                     type == t->dev->type)
281                         break;
282         }
283         return t;
284 }
285
286 static struct net_device *__ip_tunnel_create(struct net *net,
287                                              const struct rtnl_link_ops *ops,
288                                              struct ip_tunnel_parm *parms)
289 {
290         int err;
291         struct ip_tunnel *tunnel;
292         struct net_device *dev;
293         char name[IFNAMSIZ];
294
295         if (parms->name[0])
296                 strlcpy(name, parms->name, IFNAMSIZ);
297         else {
298                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
299                         err = -E2BIG;
300                         goto failed;
301                 }
302                 strlcpy(name, ops->kind, IFNAMSIZ);
303                 strncat(name, "%d", 2);
304         }
305
306         ASSERT_RTNL();
307         dev = alloc_netdev(ops->priv_size, name, ops->setup);
308         if (!dev) {
309                 err = -ENOMEM;
310                 goto failed;
311         }
312         dev_net_set(dev, net);
313
314         dev->rtnl_link_ops = ops;
315
316         tunnel = netdev_priv(dev);
317         tunnel->parms = *parms;
318         tunnel->net = net;
319
320         err = register_netdevice(dev);
321         if (err)
322                 goto failed_free;
323
324         return dev;
325
326 failed_free:
327         free_netdev(dev);
328 failed:
329         return ERR_PTR(err);
330 }
331
332 static inline void init_tunnel_flow(struct flowi4 *fl4,
333                                     int proto,
334                                     __be32 daddr, __be32 saddr,
335                                     __be32 key, __u8 tos, int oif)
336 {
337         memset(fl4, 0, sizeof(*fl4));
338         fl4->flowi4_oif = oif;
339         fl4->daddr = daddr;
340         fl4->saddr = saddr;
341         fl4->flowi4_tos = tos;
342         fl4->flowi4_proto = proto;
343         fl4->fl4_gre_key = key;
344 }
345
346 static int ip_tunnel_bind_dev(struct net_device *dev)
347 {
348         struct net_device *tdev = NULL;
349         struct ip_tunnel *tunnel = netdev_priv(dev);
350         const struct iphdr *iph;
351         int hlen = LL_MAX_HEADER;
352         int mtu = ETH_DATA_LEN;
353         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
354
355         iph = &tunnel->parms.iph;
356
357         /* Guess output device to choose reasonable mtu and needed_headroom */
358         if (iph->daddr) {
359                 struct flowi4 fl4;
360                 struct rtable *rt;
361
362                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
363                                  iph->saddr, tunnel->parms.o_key,
364                                  RT_TOS(iph->tos), tunnel->parms.link);
365                 rt = ip_route_output_key(tunnel->net, &fl4);
366
367                 if (!IS_ERR(rt)) {
368                         tdev = rt->dst.dev;
369                         tunnel_dst_set(tunnel, &rt->dst);
370                         ip_rt_put(rt);
371                 }
372                 if (dev->type != ARPHRD_ETHER)
373                         dev->flags |= IFF_POINTOPOINT;
374         }
375
376         if (!tdev && tunnel->parms.link)
377                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
378
379         if (tdev) {
380                 hlen = tdev->hard_header_len + tdev->needed_headroom;
381                 mtu = tdev->mtu;
382         }
383         dev->iflink = tunnel->parms.link;
384
385         dev->needed_headroom = t_hlen + hlen;
386         mtu -= (dev->hard_header_len + t_hlen);
387
388         if (mtu < 68)
389                 mtu = 68;
390
391         return mtu;
392 }
393
394 static struct ip_tunnel *ip_tunnel_create(struct net *net,
395                                           struct ip_tunnel_net *itn,
396                                           struct ip_tunnel_parm *parms)
397 {
398         struct ip_tunnel *nt, *fbt;
399         struct net_device *dev;
400
401         BUG_ON(!itn->fb_tunnel_dev);
402         fbt = netdev_priv(itn->fb_tunnel_dev);
403         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
404         if (IS_ERR(dev))
405                 return ERR_CAST(dev);
406
407         dev->mtu = ip_tunnel_bind_dev(dev);
408
409         nt = netdev_priv(dev);
410         ip_tunnel_add(itn, nt);
411         return nt;
412 }
413
414 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
415                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
416 {
417         struct pcpu_sw_netstats *tstats;
418         const struct iphdr *iph = ip_hdr(skb);
419         int err;
420
421 #ifdef CONFIG_NET_IPGRE_BROADCAST
422         if (ipv4_is_multicast(iph->daddr)) {
423                 tunnel->dev->stats.multicast++;
424                 skb->pkt_type = PACKET_BROADCAST;
425         }
426 #endif
427
428         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430                 tunnel->dev->stats.rx_crc_errors++;
431                 tunnel->dev->stats.rx_errors++;
432                 goto drop;
433         }
434
435         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436                 if (!(tpi->flags&TUNNEL_SEQ) ||
437                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438                         tunnel->dev->stats.rx_fifo_errors++;
439                         tunnel->dev->stats.rx_errors++;
440                         goto drop;
441                 }
442                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443         }
444
445         skb_reset_network_header(skb);
446
447         err = IP_ECN_decapsulate(iph, skb);
448         if (unlikely(err)) {
449                 if (log_ecn_error)
450                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
451                                         &iph->saddr, iph->tos);
452                 if (err > 1) {
453                         ++tunnel->dev->stats.rx_frame_errors;
454                         ++tunnel->dev->stats.rx_errors;
455                         goto drop;
456                 }
457         }
458
459         tstats = this_cpu_ptr(tunnel->dev->tstats);
460         u64_stats_update_begin(&tstats->syncp);
461         tstats->rx_packets++;
462         tstats->rx_bytes += skb->len;
463         u64_stats_update_end(&tstats->syncp);
464
465         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
466
467         if (tunnel->dev->type == ARPHRD_ETHER) {
468                 skb->protocol = eth_type_trans(skb, tunnel->dev);
469                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
470         } else {
471                 skb->dev = tunnel->dev;
472         }
473
474         gro_cells_receive(&tunnel->gro_cells, skb);
475         return 0;
476
477 drop:
478         kfree_skb(skb);
479         return 0;
480 }
481 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
482
483 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
484                             struct rtable *rt, __be16 df)
485 {
486         struct ip_tunnel *tunnel = netdev_priv(dev);
487         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
488         int mtu;
489
490         if (df)
491                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
492                                         - sizeof(struct iphdr) - tunnel->hlen;
493         else
494                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
495
496         if (skb_dst(skb))
497                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
498
499         if (skb->protocol == htons(ETH_P_IP)) {
500                 if (!skb_is_gso(skb) &&
501                     (df & htons(IP_DF)) && mtu < pkt_size) {
502                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
503                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
504                         return -E2BIG;
505                 }
506         }
507 #if IS_ENABLED(CONFIG_IPV6)
508         else if (skb->protocol == htons(ETH_P_IPV6)) {
509                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
510
511                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
512                            mtu >= IPV6_MIN_MTU) {
513                         if ((tunnel->parms.iph.daddr &&
514                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
515                             rt6->rt6i_dst.plen == 128) {
516                                 rt6->rt6i_flags |= RTF_MODIFIED;
517                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
518                         }
519                 }
520
521                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
522                                         mtu < pkt_size) {
523                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
524                         return -E2BIG;
525                 }
526         }
527 #endif
528         return 0;
529 }
530
531 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
532                     const struct iphdr *tnl_params, const u8 protocol)
533 {
534         struct ip_tunnel *tunnel = netdev_priv(dev);
535         const struct iphdr *inner_iph;
536         struct flowi4 fl4;
537         u8     tos, ttl;
538         __be16 df;
539         struct rtable *rt;              /* Route to the other host */
540         unsigned int max_headroom;      /* The extra header space needed */
541         __be32 dst;
542         int err;
543         bool connected;
544
545         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
546         connected = (tunnel->parms.iph.daddr != 0);
547
548         dst = tnl_params->daddr;
549         if (dst == 0) {
550                 /* NBMA tunnel */
551
552                 if (skb_dst(skb) == NULL) {
553                         dev->stats.tx_fifo_errors++;
554                         goto tx_error;
555                 }
556
557                 if (skb->protocol == htons(ETH_P_IP)) {
558                         rt = skb_rtable(skb);
559                         dst = rt_nexthop(rt, inner_iph->daddr);
560                 }
561 #if IS_ENABLED(CONFIG_IPV6)
562                 else if (skb->protocol == htons(ETH_P_IPV6)) {
563                         const struct in6_addr *addr6;
564                         struct neighbour *neigh;
565                         bool do_tx_error_icmp;
566                         int addr_type;
567
568                         neigh = dst_neigh_lookup(skb_dst(skb),
569                                                  &ipv6_hdr(skb)->daddr);
570                         if (neigh == NULL)
571                                 goto tx_error;
572
573                         addr6 = (const struct in6_addr *)&neigh->primary_key;
574                         addr_type = ipv6_addr_type(addr6);
575
576                         if (addr_type == IPV6_ADDR_ANY) {
577                                 addr6 = &ipv6_hdr(skb)->daddr;
578                                 addr_type = ipv6_addr_type(addr6);
579                         }
580
581                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
582                                 do_tx_error_icmp = true;
583                         else {
584                                 do_tx_error_icmp = false;
585                                 dst = addr6->s6_addr32[3];
586                         }
587                         neigh_release(neigh);
588                         if (do_tx_error_icmp)
589                                 goto tx_error_icmp;
590                 }
591 #endif
592                 else
593                         goto tx_error;
594
595                 connected = false;
596         }
597
598         tos = tnl_params->tos;
599         if (tos & 0x1) {
600                 tos &= ~0x1;
601                 if (skb->protocol == htons(ETH_P_IP)) {
602                         tos = inner_iph->tos;
603                         connected = false;
604                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
605                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
606                         connected = false;
607                 }
608         }
609
610         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
611                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
612
613         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
614
615         if (!rt) {
616                 rt = ip_route_output_key(tunnel->net, &fl4);
617
618                 if (IS_ERR(rt)) {
619                         dev->stats.tx_carrier_errors++;
620                         goto tx_error;
621                 }
622                 if (connected)
623                         tunnel_dst_set(tunnel, &rt->dst);
624         }
625
626         if (rt->dst.dev == dev) {
627                 ip_rt_put(rt);
628                 dev->stats.collisions++;
629                 goto tx_error;
630         }
631
632         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
633                 ip_rt_put(rt);
634                 goto tx_error;
635         }
636
637         if (tunnel->err_count > 0) {
638                 if (time_before(jiffies,
639                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
640                         tunnel->err_count--;
641
642                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
643                         dst_link_failure(skb);
644                 } else
645                         tunnel->err_count = 0;
646         }
647
648         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
649         ttl = tnl_params->ttl;
650         if (ttl == 0) {
651                 if (skb->protocol == htons(ETH_P_IP))
652                         ttl = inner_iph->ttl;
653 #if IS_ENABLED(CONFIG_IPV6)
654                 else if (skb->protocol == htons(ETH_P_IPV6))
655                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
656 #endif
657                 else
658                         ttl = ip4_dst_hoplimit(&rt->dst);
659         }
660
661         df = tnl_params->frag_off;
662         if (skb->protocol == htons(ETH_P_IP))
663                 df |= (inner_iph->frag_off&htons(IP_DF));
664
665         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
666                         + rt->dst.header_len;
667         if (max_headroom > dev->needed_headroom)
668                 dev->needed_headroom = max_headroom;
669
670         if (skb_cow_head(skb, dev->needed_headroom)) {
671                 dev->stats.tx_dropped++;
672                 kfree_skb(skb);
673                 return;
674         }
675
676         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
677                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
678         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
679
680         return;
681
682 #if IS_ENABLED(CONFIG_IPV6)
683 tx_error_icmp:
684         dst_link_failure(skb);
685 #endif
686 tx_error:
687         dev->stats.tx_errors++;
688         kfree_skb(skb);
689 }
690 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
691
692 static void ip_tunnel_update(struct ip_tunnel_net *itn,
693                              struct ip_tunnel *t,
694                              struct net_device *dev,
695                              struct ip_tunnel_parm *p,
696                              bool set_mtu)
697 {
698         ip_tunnel_del(t);
699         t->parms.iph.saddr = p->iph.saddr;
700         t->parms.iph.daddr = p->iph.daddr;
701         t->parms.i_key = p->i_key;
702         t->parms.o_key = p->o_key;
703         if (dev->type != ARPHRD_ETHER) {
704                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
705                 memcpy(dev->broadcast, &p->iph.daddr, 4);
706         }
707         ip_tunnel_add(itn, t);
708
709         t->parms.iph.ttl = p->iph.ttl;
710         t->parms.iph.tos = p->iph.tos;
711         t->parms.iph.frag_off = p->iph.frag_off;
712
713         if (t->parms.link != p->link) {
714                 int mtu;
715
716                 t->parms.link = p->link;
717                 mtu = ip_tunnel_bind_dev(dev);
718                 if (set_mtu)
719                         dev->mtu = mtu;
720         }
721         ip_tunnel_dst_reset_all(t);
722         netdev_state_change(dev);
723 }
724
725 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
726 {
727         int err = 0;
728         struct ip_tunnel *t = netdev_priv(dev);
729         struct net *net = t->net;
730         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
731
732         BUG_ON(!itn->fb_tunnel_dev);
733         switch (cmd) {
734         case SIOCGETTUNNEL:
735                 if (dev == itn->fb_tunnel_dev) {
736                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
737                         if (t == NULL)
738                                 t = netdev_priv(dev);
739                 }
740                 memcpy(p, &t->parms, sizeof(*p));
741                 break;
742
743         case SIOCADDTUNNEL:
744         case SIOCCHGTUNNEL:
745                 err = -EPERM;
746                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
747                         goto done;
748                 if (p->iph.ttl)
749                         p->iph.frag_off |= htons(IP_DF);
750                 if (!(p->i_flags&TUNNEL_KEY))
751                         p->i_key = 0;
752                 if (!(p->o_flags&TUNNEL_KEY))
753                         p->o_key = 0;
754
755                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
756
757                 if (!t && (cmd == SIOCADDTUNNEL)) {
758                         t = ip_tunnel_create(net, itn, p);
759                         if (IS_ERR(t)) {
760                                 err = PTR_ERR(t);
761                                 break;
762                         }
763                 }
764                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
765                         if (t != NULL) {
766                                 if (t->dev != dev) {
767                                         err = -EEXIST;
768                                         break;
769                                 }
770                         } else {
771                                 unsigned int nflags = 0;
772
773                                 if (ipv4_is_multicast(p->iph.daddr))
774                                         nflags = IFF_BROADCAST;
775                                 else if (p->iph.daddr)
776                                         nflags = IFF_POINTOPOINT;
777
778                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
779                                         err = -EINVAL;
780                                         break;
781                                 }
782
783                                 t = netdev_priv(dev);
784                         }
785                 }
786
787                 if (t) {
788                         err = 0;
789                         ip_tunnel_update(itn, t, dev, p, true);
790                 } else {
791                         err = -ENOENT;
792                 }
793                 break;
794
795         case SIOCDELTUNNEL:
796                 err = -EPERM;
797                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
798                         goto done;
799
800                 if (dev == itn->fb_tunnel_dev) {
801                         err = -ENOENT;
802                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
803                         if (t == NULL)
804                                 goto done;
805                         err = -EPERM;
806                         if (t == netdev_priv(itn->fb_tunnel_dev))
807                                 goto done;
808                         dev = t->dev;
809                 }
810                 unregister_netdevice(dev);
811                 err = 0;
812                 break;
813
814         default:
815                 err = -EINVAL;
816         }
817
818 done:
819         return err;
820 }
821 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
822
823 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
824 {
825         struct ip_tunnel *tunnel = netdev_priv(dev);
826         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
827
828         if (new_mtu < 68 ||
829             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
830                 return -EINVAL;
831         dev->mtu = new_mtu;
832         return 0;
833 }
834 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
835
836 static void ip_tunnel_dev_free(struct net_device *dev)
837 {
838         struct ip_tunnel *tunnel = netdev_priv(dev);
839
840         gro_cells_destroy(&tunnel->gro_cells);
841         free_percpu(tunnel->dst_cache);
842         free_percpu(dev->tstats);
843         free_netdev(dev);
844 }
845
846 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
847 {
848         struct ip_tunnel *tunnel = netdev_priv(dev);
849         struct ip_tunnel_net *itn;
850
851         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
852
853         if (itn->fb_tunnel_dev != dev) {
854                 ip_tunnel_del(netdev_priv(dev));
855                 unregister_netdevice_queue(dev, head);
856         }
857 }
858 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
859
860 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
861                                   struct rtnl_link_ops *ops, char *devname)
862 {
863         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
864         struct ip_tunnel_parm parms;
865         unsigned int i;
866
867         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
868                 INIT_HLIST_HEAD(&itn->tunnels[i]);
869
870         if (!ops) {
871                 itn->fb_tunnel_dev = NULL;
872                 return 0;
873         }
874
875         memset(&parms, 0, sizeof(parms));
876         if (devname)
877                 strlcpy(parms.name, devname, IFNAMSIZ);
878
879         rtnl_lock();
880         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
881         /* FB netdevice is special: we have one, and only one per netns.
882          * Allowing to move it to another netns is clearly unsafe.
883          */
884         if (!IS_ERR(itn->fb_tunnel_dev)) {
885                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
886                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
887                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
888         }
889         rtnl_unlock();
890
891         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
892 }
893 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
894
895 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
896                               struct rtnl_link_ops *ops)
897 {
898         struct net *net = dev_net(itn->fb_tunnel_dev);
899         struct net_device *dev, *aux;
900         int h;
901
902         for_each_netdev_safe(net, dev, aux)
903                 if (dev->rtnl_link_ops == ops)
904                         unregister_netdevice_queue(dev, head);
905
906         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
907                 struct ip_tunnel *t;
908                 struct hlist_node *n;
909                 struct hlist_head *thead = &itn->tunnels[h];
910
911                 hlist_for_each_entry_safe(t, n, thead, hash_node)
912                         /* If dev is in the same netns, it has already
913                          * been added to the list by the previous loop.
914                          */
915                         if (!net_eq(dev_net(t->dev), net))
916                                 unregister_netdevice_queue(t->dev, head);
917         }
918 }
919
920 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
921 {
922         LIST_HEAD(list);
923
924         rtnl_lock();
925         ip_tunnel_destroy(itn, &list, ops);
926         unregister_netdevice_many(&list);
927         rtnl_unlock();
928 }
929 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
930
931 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
932                       struct ip_tunnel_parm *p)
933 {
934         struct ip_tunnel *nt;
935         struct net *net = dev_net(dev);
936         struct ip_tunnel_net *itn;
937         int mtu;
938         int err;
939
940         nt = netdev_priv(dev);
941         itn = net_generic(net, nt->ip_tnl_net_id);
942
943         if (ip_tunnel_find(itn, p, dev->type))
944                 return -EEXIST;
945
946         nt->net = net;
947         nt->parms = *p;
948         err = register_netdevice(dev);
949         if (err)
950                 goto out;
951
952         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
953                 eth_hw_addr_random(dev);
954
955         mtu = ip_tunnel_bind_dev(dev);
956         if (!tb[IFLA_MTU])
957                 dev->mtu = mtu;
958
959         ip_tunnel_add(itn, nt);
960
961 out:
962         return err;
963 }
964 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
965
966 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
967                          struct ip_tunnel_parm *p)
968 {
969         struct ip_tunnel *t;
970         struct ip_tunnel *tunnel = netdev_priv(dev);
971         struct net *net = tunnel->net;
972         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
973
974         if (dev == itn->fb_tunnel_dev)
975                 return -EINVAL;
976
977         t = ip_tunnel_find(itn, p, dev->type);
978
979         if (t) {
980                 if (t->dev != dev)
981                         return -EEXIST;
982         } else {
983                 t = tunnel;
984
985                 if (dev->type != ARPHRD_ETHER) {
986                         unsigned int nflags = 0;
987
988                         if (ipv4_is_multicast(p->iph.daddr))
989                                 nflags = IFF_BROADCAST;
990                         else if (p->iph.daddr)
991                                 nflags = IFF_POINTOPOINT;
992
993                         if ((dev->flags ^ nflags) &
994                             (IFF_POINTOPOINT | IFF_BROADCAST))
995                                 return -EINVAL;
996                 }
997         }
998
999         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1000         return 0;
1001 }
1002 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1003
1004 int ip_tunnel_init(struct net_device *dev)
1005 {
1006         struct ip_tunnel *tunnel = netdev_priv(dev);
1007         struct iphdr *iph = &tunnel->parms.iph;
1008         int err;
1009
1010         dev->destructor = ip_tunnel_dev_free;
1011         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1012         if (!dev->tstats)
1013                 return -ENOMEM;
1014
1015         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1016         if (!tunnel->dst_cache) {
1017                 free_percpu(dev->tstats);
1018                 return -ENOMEM;
1019         }
1020
1021         err = gro_cells_init(&tunnel->gro_cells, dev);
1022         if (err) {
1023                 free_percpu(tunnel->dst_cache);
1024                 free_percpu(dev->tstats);
1025                 return err;
1026         }
1027
1028         tunnel->dev = dev;
1029         tunnel->net = dev_net(dev);
1030         strcpy(tunnel->parms.name, dev->name);
1031         iph->version            = 4;
1032         iph->ihl                = 5;
1033
1034         return 0;
1035 }
1036 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1037
1038 void ip_tunnel_uninit(struct net_device *dev)
1039 {
1040         struct ip_tunnel *tunnel = netdev_priv(dev);
1041         struct net *net = tunnel->net;
1042         struct ip_tunnel_net *itn;
1043
1044         itn = net_generic(net, tunnel->ip_tnl_net_id);
1045         /* fb_tunnel_dev will be unregisted in net-exit call. */
1046         if (itn->fb_tunnel_dev != dev)
1047                 ip_tunnel_del(netdev_priv(dev));
1048
1049         ip_tunnel_dst_reset_all(tunnel);
1050 }
1051 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1052
1053 /* Do least required initialization, rest of init is done in tunnel_init call */
1054 void ip_tunnel_setup(struct net_device *dev, int net_id)
1055 {
1056         struct ip_tunnel *tunnel = netdev_priv(dev);
1057         tunnel->ip_tnl_net_id = net_id;
1058 }
1059 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1060
1061 MODULE_LICENSE("GPL");