iwlwifi: remove iwl_ht_params.smps_mode
[cascardo/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57 #include <net/udp.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
72                                 __be16 flags, __be32 key)
73 {
74         if (p->i_flags & TUNNEL_KEY) {
75                 if (flags & TUNNEL_KEY)
76                         return key == p->i_key;
77                 else
78                         /* key expected, none present */
79                         return false;
80         } else
81                 return !(flags & TUNNEL_KEY);
82 }
83
84 /* Fallback tunnel: no source, no destination, no key, no options
85
86    Tunnel hash table:
87    We require exact key match i.e. if a key is present in packet
88    it will match only tunnel with the same key; if it is not present,
89    it will match only keyless tunnel.
90
91    All keysless packets, if not matched configured keyless tunnels
92    will match fallback tunnel.
93    Given src, dst and key, find appropriate for input tunnel.
94 */
95 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
96                                    int link, __be16 flags,
97                                    __be32 remote, __be32 local,
98                                    __be32 key)
99 {
100         unsigned int hash;
101         struct ip_tunnel *t, *cand = NULL;
102         struct hlist_head *head;
103
104         hash = ip_tunnel_hash(key, remote);
105         head = &itn->tunnels[hash];
106
107         hlist_for_each_entry_rcu(t, head, hash_node) {
108                 if (local != t->parms.iph.saddr ||
109                     remote != t->parms.iph.daddr ||
110                     !(t->dev->flags & IFF_UP))
111                         continue;
112
113                 if (!ip_tunnel_key_match(&t->parms, flags, key))
114                         continue;
115
116                 if (t->parms.link == link)
117                         return t;
118                 else
119                         cand = t;
120         }
121
122         hlist_for_each_entry_rcu(t, head, hash_node) {
123                 if (remote != t->parms.iph.daddr ||
124                     t->parms.iph.saddr != 0 ||
125                     !(t->dev->flags & IFF_UP))
126                         continue;
127
128                 if (!ip_tunnel_key_match(&t->parms, flags, key))
129                         continue;
130
131                 if (t->parms.link == link)
132                         return t;
133                 else if (!cand)
134                         cand = t;
135         }
136
137         hash = ip_tunnel_hash(key, 0);
138         head = &itn->tunnels[hash];
139
140         hlist_for_each_entry_rcu(t, head, hash_node) {
141                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
142                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
143                         continue;
144
145                 if (!(t->dev->flags & IFF_UP))
146                         continue;
147
148                 if (!ip_tunnel_key_match(&t->parms, flags, key))
149                         continue;
150
151                 if (t->parms.link == link)
152                         return t;
153                 else if (!cand)
154                         cand = t;
155         }
156
157         if (flags & TUNNEL_NO_KEY)
158                 goto skip_key_lookup;
159
160         hlist_for_each_entry_rcu(t, head, hash_node) {
161                 if (t->parms.i_key != key ||
162                     t->parms.iph.saddr != 0 ||
163                     t->parms.iph.daddr != 0 ||
164                     !(t->dev->flags & IFF_UP))
165                         continue;
166
167                 if (t->parms.link == link)
168                         return t;
169                 else if (!cand)
170                         cand = t;
171         }
172
173 skip_key_lookup:
174         if (cand)
175                 return cand;
176
177         t = rcu_dereference(itn->collect_md_tun);
178         if (t)
179                 return t;
180
181         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
182                 return netdev_priv(itn->fb_tunnel_dev);
183
184         return NULL;
185 }
186 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187
188 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
189                                     struct ip_tunnel_parm *parms)
190 {
191         unsigned int h;
192         __be32 remote;
193         __be32 i_key = parms->i_key;
194
195         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
196                 remote = parms->iph.daddr;
197         else
198                 remote = 0;
199
200         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
201                 i_key = 0;
202
203         h = ip_tunnel_hash(i_key, remote);
204         return &itn->tunnels[h];
205 }
206
207 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208 {
209         struct hlist_head *head = ip_bucket(itn, &t->parms);
210
211         if (t->collect_md)
212                 rcu_assign_pointer(itn->collect_md_tun, t);
213         hlist_add_head_rcu(&t->hash_node, head);
214 }
215
216 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
217 {
218         if (t->collect_md)
219                 rcu_assign_pointer(itn->collect_md_tun, NULL);
220         hlist_del_init_rcu(&t->hash_node);
221 }
222
223 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
224                                         struct ip_tunnel_parm *parms,
225                                         int type)
226 {
227         __be32 remote = parms->iph.daddr;
228         __be32 local = parms->iph.saddr;
229         __be32 key = parms->i_key;
230         __be16 flags = parms->i_flags;
231         int link = parms->link;
232         struct ip_tunnel *t = NULL;
233         struct hlist_head *head = ip_bucket(itn, parms);
234
235         hlist_for_each_entry_rcu(t, head, hash_node) {
236                 if (local == t->parms.iph.saddr &&
237                     remote == t->parms.iph.daddr &&
238                     link == t->parms.link &&
239                     type == t->dev->type &&
240                     ip_tunnel_key_match(&t->parms, flags, key))
241                         break;
242         }
243         return t;
244 }
245
246 static struct net_device *__ip_tunnel_create(struct net *net,
247                                              const struct rtnl_link_ops *ops,
248                                              struct ip_tunnel_parm *parms)
249 {
250         int err;
251         struct ip_tunnel *tunnel;
252         struct net_device *dev;
253         char name[IFNAMSIZ];
254
255         if (parms->name[0])
256                 strlcpy(name, parms->name, IFNAMSIZ);
257         else {
258                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
259                         err = -E2BIG;
260                         goto failed;
261                 }
262                 strlcpy(name, ops->kind, IFNAMSIZ);
263                 strncat(name, "%d", 2);
264         }
265
266         ASSERT_RTNL();
267         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
268         if (!dev) {
269                 err = -ENOMEM;
270                 goto failed;
271         }
272         dev_net_set(dev, net);
273
274         dev->rtnl_link_ops = ops;
275
276         tunnel = netdev_priv(dev);
277         tunnel->parms = *parms;
278         tunnel->net = net;
279
280         err = register_netdevice(dev);
281         if (err)
282                 goto failed_free;
283
284         return dev;
285
286 failed_free:
287         free_netdev(dev);
288 failed:
289         return ERR_PTR(err);
290 }
291
292 static inline void init_tunnel_flow(struct flowi4 *fl4,
293                                     int proto,
294                                     __be32 daddr, __be32 saddr,
295                                     __be32 key, __u8 tos, int oif)
296 {
297         memset(fl4, 0, sizeof(*fl4));
298         fl4->flowi4_oif = oif;
299         fl4->daddr = daddr;
300         fl4->saddr = saddr;
301         fl4->flowi4_tos = tos;
302         fl4->flowi4_proto = proto;
303         fl4->fl4_gre_key = key;
304 }
305
306 static int ip_tunnel_bind_dev(struct net_device *dev)
307 {
308         struct net_device *tdev = NULL;
309         struct ip_tunnel *tunnel = netdev_priv(dev);
310         const struct iphdr *iph;
311         int hlen = LL_MAX_HEADER;
312         int mtu = ETH_DATA_LEN;
313         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
314
315         iph = &tunnel->parms.iph;
316
317         /* Guess output device to choose reasonable mtu and needed_headroom */
318         if (iph->daddr) {
319                 struct flowi4 fl4;
320                 struct rtable *rt;
321
322                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
323                                  iph->saddr, tunnel->parms.o_key,
324                                  RT_TOS(iph->tos), tunnel->parms.link);
325                 rt = ip_route_output_key(tunnel->net, &fl4);
326
327                 if (!IS_ERR(rt)) {
328                         tdev = rt->dst.dev;
329                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
330                                           fl4.saddr);
331                         ip_rt_put(rt);
332                 }
333                 if (dev->type != ARPHRD_ETHER)
334                         dev->flags |= IFF_POINTOPOINT;
335         }
336
337         if (!tdev && tunnel->parms.link)
338                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
339
340         if (tdev) {
341                 hlen = tdev->hard_header_len + tdev->needed_headroom;
342                 mtu = tdev->mtu;
343         }
344
345         dev->needed_headroom = t_hlen + hlen;
346         mtu -= (dev->hard_header_len + t_hlen);
347
348         if (mtu < 68)
349                 mtu = 68;
350
351         return mtu;
352 }
353
354 static struct ip_tunnel *ip_tunnel_create(struct net *net,
355                                           struct ip_tunnel_net *itn,
356                                           struct ip_tunnel_parm *parms)
357 {
358         struct ip_tunnel *nt;
359         struct net_device *dev;
360
361         BUG_ON(!itn->fb_tunnel_dev);
362         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
363         if (IS_ERR(dev))
364                 return ERR_CAST(dev);
365
366         dev->mtu = ip_tunnel_bind_dev(dev);
367
368         nt = netdev_priv(dev);
369         ip_tunnel_add(itn, nt);
370         return nt;
371 }
372
373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
374                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
375                   bool log_ecn_error)
376 {
377         struct pcpu_sw_netstats *tstats;
378         const struct iphdr *iph = ip_hdr(skb);
379         int err;
380
381 #ifdef CONFIG_NET_IPGRE_BROADCAST
382         if (ipv4_is_multicast(iph->daddr)) {
383                 tunnel->dev->stats.multicast++;
384                 skb->pkt_type = PACKET_BROADCAST;
385         }
386 #endif
387
388         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390                 tunnel->dev->stats.rx_crc_errors++;
391                 tunnel->dev->stats.rx_errors++;
392                 goto drop;
393         }
394
395         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396                 if (!(tpi->flags&TUNNEL_SEQ) ||
397                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398                         tunnel->dev->stats.rx_fifo_errors++;
399                         tunnel->dev->stats.rx_errors++;
400                         goto drop;
401                 }
402                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
403         }
404
405         skb_reset_network_header(skb);
406
407         err = IP_ECN_decapsulate(iph, skb);
408         if (unlikely(err)) {
409                 if (log_ecn_error)
410                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411                                         &iph->saddr, iph->tos);
412                 if (err > 1) {
413                         ++tunnel->dev->stats.rx_frame_errors;
414                         ++tunnel->dev->stats.rx_errors;
415                         goto drop;
416                 }
417         }
418
419         tstats = this_cpu_ptr(tunnel->dev->tstats);
420         u64_stats_update_begin(&tstats->syncp);
421         tstats->rx_packets++;
422         tstats->rx_bytes += skb->len;
423         u64_stats_update_end(&tstats->syncp);
424
425         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
426
427         if (tunnel->dev->type == ARPHRD_ETHER) {
428                 skb->protocol = eth_type_trans(skb, tunnel->dev);
429                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
430         } else {
431                 skb->dev = tunnel->dev;
432         }
433
434         if (tun_dst)
435                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
436
437         gro_cells_receive(&tunnel->gro_cells, skb);
438         return 0;
439
440 drop:
441         kfree_skb(skb);
442         return 0;
443 }
444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
445
446 static int ip_encap_hlen(struct ip_tunnel_encap *e)
447 {
448         const struct ip_tunnel_encap_ops *ops;
449         int hlen = -EINVAL;
450
451         if (e->type == TUNNEL_ENCAP_NONE)
452                 return 0;
453
454         if (e->type >= MAX_IPTUN_ENCAP_OPS)
455                 return -EINVAL;
456
457         rcu_read_lock();
458         ops = rcu_dereference(iptun_encaps[e->type]);
459         if (likely(ops && ops->encap_hlen))
460                 hlen = ops->encap_hlen(e);
461         rcu_read_unlock();
462
463         return hlen;
464 }
465
466 const struct ip_tunnel_encap_ops __rcu *
467                 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
468
469 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
470                             unsigned int num)
471 {
472         if (num >= MAX_IPTUN_ENCAP_OPS)
473                 return -ERANGE;
474
475         return !cmpxchg((const struct ip_tunnel_encap_ops **)
476                         &iptun_encaps[num],
477                         NULL, ops) ? 0 : -1;
478 }
479 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
480
481 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
482                             unsigned int num)
483 {
484         int ret;
485
486         if (num >= MAX_IPTUN_ENCAP_OPS)
487                 return -ERANGE;
488
489         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
490                        &iptun_encaps[num],
491                        ops, NULL) == ops) ? 0 : -1;
492
493         synchronize_net();
494
495         return ret;
496 }
497 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
498
499 int ip_tunnel_encap_setup(struct ip_tunnel *t,
500                           struct ip_tunnel_encap *ipencap)
501 {
502         int hlen;
503
504         memset(&t->encap, 0, sizeof(t->encap));
505
506         hlen = ip_encap_hlen(ipencap);
507         if (hlen < 0)
508                 return hlen;
509
510         t->encap.type = ipencap->type;
511         t->encap.sport = ipencap->sport;
512         t->encap.dport = ipencap->dport;
513         t->encap.flags = ipencap->flags;
514
515         t->encap_hlen = hlen;
516         t->hlen = t->encap_hlen + t->tun_hlen;
517
518         return 0;
519 }
520 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
521
522 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
523                     u8 *protocol, struct flowi4 *fl4)
524 {
525         const struct ip_tunnel_encap_ops *ops;
526         int ret = -EINVAL;
527
528         if (t->encap.type == TUNNEL_ENCAP_NONE)
529                 return 0;
530
531         if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
532                 return -EINVAL;
533
534         rcu_read_lock();
535         ops = rcu_dereference(iptun_encaps[t->encap.type]);
536         if (likely(ops && ops->build_header))
537                 ret = ops->build_header(skb, &t->encap, protocol, fl4);
538         rcu_read_unlock();
539
540         return ret;
541 }
542 EXPORT_SYMBOL(ip_tunnel_encap);
543
544 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
545                             struct rtable *rt, __be16 df,
546                             const struct iphdr *inner_iph)
547 {
548         struct ip_tunnel *tunnel = netdev_priv(dev);
549         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
550         int mtu;
551
552         if (df)
553                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
554                                         - sizeof(struct iphdr) - tunnel->hlen;
555         else
556                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
557
558         if (skb_dst(skb))
559                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
560
561         if (skb->protocol == htons(ETH_P_IP)) {
562                 if (!skb_is_gso(skb) &&
563                     (inner_iph->frag_off & htons(IP_DF)) &&
564                     mtu < pkt_size) {
565                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
566                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
567                         return -E2BIG;
568                 }
569         }
570 #if IS_ENABLED(CONFIG_IPV6)
571         else if (skb->protocol == htons(ETH_P_IPV6)) {
572                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
573
574                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
575                            mtu >= IPV6_MIN_MTU) {
576                         if ((tunnel->parms.iph.daddr &&
577                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
578                             rt6->rt6i_dst.plen == 128) {
579                                 rt6->rt6i_flags |= RTF_MODIFIED;
580                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
581                         }
582                 }
583
584                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
585                                         mtu < pkt_size) {
586                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
587                         return -E2BIG;
588                 }
589         }
590 #endif
591         return 0;
592 }
593
594 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
595                     const struct iphdr *tnl_params, u8 protocol)
596 {
597         struct ip_tunnel *tunnel = netdev_priv(dev);
598         const struct iphdr *inner_iph;
599         struct flowi4 fl4;
600         u8     tos, ttl;
601         __be16 df;
602         struct rtable *rt;              /* Route to the other host */
603         unsigned int max_headroom;      /* The extra header space needed */
604         __be32 dst;
605         bool connected;
606
607         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
608         connected = (tunnel->parms.iph.daddr != 0);
609
610         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
611
612         dst = tnl_params->daddr;
613         if (dst == 0) {
614                 /* NBMA tunnel */
615
616                 if (!skb_dst(skb)) {
617                         dev->stats.tx_fifo_errors++;
618                         goto tx_error;
619                 }
620
621                 if (skb->protocol == htons(ETH_P_IP)) {
622                         rt = skb_rtable(skb);
623                         dst = rt_nexthop(rt, inner_iph->daddr);
624                 }
625 #if IS_ENABLED(CONFIG_IPV6)
626                 else if (skb->protocol == htons(ETH_P_IPV6)) {
627                         const struct in6_addr *addr6;
628                         struct neighbour *neigh;
629                         bool do_tx_error_icmp;
630                         int addr_type;
631
632                         neigh = dst_neigh_lookup(skb_dst(skb),
633                                                  &ipv6_hdr(skb)->daddr);
634                         if (!neigh)
635                                 goto tx_error;
636
637                         addr6 = (const struct in6_addr *)&neigh->primary_key;
638                         addr_type = ipv6_addr_type(addr6);
639
640                         if (addr_type == IPV6_ADDR_ANY) {
641                                 addr6 = &ipv6_hdr(skb)->daddr;
642                                 addr_type = ipv6_addr_type(addr6);
643                         }
644
645                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
646                                 do_tx_error_icmp = true;
647                         else {
648                                 do_tx_error_icmp = false;
649                                 dst = addr6->s6_addr32[3];
650                         }
651                         neigh_release(neigh);
652                         if (do_tx_error_icmp)
653                                 goto tx_error_icmp;
654                 }
655 #endif
656                 else
657                         goto tx_error;
658
659                 connected = false;
660         }
661
662         tos = tnl_params->tos;
663         if (tos & 0x1) {
664                 tos &= ~0x1;
665                 if (skb->protocol == htons(ETH_P_IP)) {
666                         tos = inner_iph->tos;
667                         connected = false;
668                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
669                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
670                         connected = false;
671                 }
672         }
673
674         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
675                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
676
677         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
678                 goto tx_error;
679
680         rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
681                          NULL;
682
683         if (!rt) {
684                 rt = ip_route_output_key(tunnel->net, &fl4);
685
686                 if (IS_ERR(rt)) {
687                         dev->stats.tx_carrier_errors++;
688                         goto tx_error;
689                 }
690                 if (connected)
691                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
692                                           fl4.saddr);
693         }
694
695         if (rt->dst.dev == dev) {
696                 ip_rt_put(rt);
697                 dev->stats.collisions++;
698                 goto tx_error;
699         }
700
701         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
702                 ip_rt_put(rt);
703                 goto tx_error;
704         }
705
706         if (tunnel->err_count > 0) {
707                 if (time_before(jiffies,
708                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
709                         tunnel->err_count--;
710
711                         dst_link_failure(skb);
712                 } else
713                         tunnel->err_count = 0;
714         }
715
716         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
717         ttl = tnl_params->ttl;
718         if (ttl == 0) {
719                 if (skb->protocol == htons(ETH_P_IP))
720                         ttl = inner_iph->ttl;
721 #if IS_ENABLED(CONFIG_IPV6)
722                 else if (skb->protocol == htons(ETH_P_IPV6))
723                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
724 #endif
725                 else
726                         ttl = ip4_dst_hoplimit(&rt->dst);
727         }
728
729         df = tnl_params->frag_off;
730         if (skb->protocol == htons(ETH_P_IP))
731                 df |= (inner_iph->frag_off&htons(IP_DF));
732
733         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
734                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
735         if (max_headroom > dev->needed_headroom)
736                 dev->needed_headroom = max_headroom;
737
738         if (skb_cow_head(skb, dev->needed_headroom)) {
739                 ip_rt_put(rt);
740                 dev->stats.tx_dropped++;
741                 kfree_skb(skb);
742                 return;
743         }
744
745         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
746                       df, !net_eq(tunnel->net, dev_net(dev)));
747         return;
748
749 #if IS_ENABLED(CONFIG_IPV6)
750 tx_error_icmp:
751         dst_link_failure(skb);
752 #endif
753 tx_error:
754         dev->stats.tx_errors++;
755         kfree_skb(skb);
756 }
757 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
758
759 static void ip_tunnel_update(struct ip_tunnel_net *itn,
760                              struct ip_tunnel *t,
761                              struct net_device *dev,
762                              struct ip_tunnel_parm *p,
763                              bool set_mtu)
764 {
765         ip_tunnel_del(itn, t);
766         t->parms.iph.saddr = p->iph.saddr;
767         t->parms.iph.daddr = p->iph.daddr;
768         t->parms.i_key = p->i_key;
769         t->parms.o_key = p->o_key;
770         if (dev->type != ARPHRD_ETHER) {
771                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
772                 memcpy(dev->broadcast, &p->iph.daddr, 4);
773         }
774         ip_tunnel_add(itn, t);
775
776         t->parms.iph.ttl = p->iph.ttl;
777         t->parms.iph.tos = p->iph.tos;
778         t->parms.iph.frag_off = p->iph.frag_off;
779
780         if (t->parms.link != p->link) {
781                 int mtu;
782
783                 t->parms.link = p->link;
784                 mtu = ip_tunnel_bind_dev(dev);
785                 if (set_mtu)
786                         dev->mtu = mtu;
787         }
788         dst_cache_reset(&t->dst_cache);
789         netdev_state_change(dev);
790 }
791
792 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
793 {
794         int err = 0;
795         struct ip_tunnel *t = netdev_priv(dev);
796         struct net *net = t->net;
797         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
798
799         BUG_ON(!itn->fb_tunnel_dev);
800         switch (cmd) {
801         case SIOCGETTUNNEL:
802                 if (dev == itn->fb_tunnel_dev) {
803                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
804                         if (!t)
805                                 t = netdev_priv(dev);
806                 }
807                 memcpy(p, &t->parms, sizeof(*p));
808                 break;
809
810         case SIOCADDTUNNEL:
811         case SIOCCHGTUNNEL:
812                 err = -EPERM;
813                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
814                         goto done;
815                 if (p->iph.ttl)
816                         p->iph.frag_off |= htons(IP_DF);
817                 if (!(p->i_flags & VTI_ISVTI)) {
818                         if (!(p->i_flags & TUNNEL_KEY))
819                                 p->i_key = 0;
820                         if (!(p->o_flags & TUNNEL_KEY))
821                                 p->o_key = 0;
822                 }
823
824                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
825
826                 if (cmd == SIOCADDTUNNEL) {
827                         if (!t) {
828                                 t = ip_tunnel_create(net, itn, p);
829                                 err = PTR_ERR_OR_ZERO(t);
830                                 break;
831                         }
832
833                         err = -EEXIST;
834                         break;
835                 }
836                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
837                         if (t) {
838                                 if (t->dev != dev) {
839                                         err = -EEXIST;
840                                         break;
841                                 }
842                         } else {
843                                 unsigned int nflags = 0;
844
845                                 if (ipv4_is_multicast(p->iph.daddr))
846                                         nflags = IFF_BROADCAST;
847                                 else if (p->iph.daddr)
848                                         nflags = IFF_POINTOPOINT;
849
850                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
851                                         err = -EINVAL;
852                                         break;
853                                 }
854
855                                 t = netdev_priv(dev);
856                         }
857                 }
858
859                 if (t) {
860                         err = 0;
861                         ip_tunnel_update(itn, t, dev, p, true);
862                 } else {
863                         err = -ENOENT;
864                 }
865                 break;
866
867         case SIOCDELTUNNEL:
868                 err = -EPERM;
869                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
870                         goto done;
871
872                 if (dev == itn->fb_tunnel_dev) {
873                         err = -ENOENT;
874                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
875                         if (!t)
876                                 goto done;
877                         err = -EPERM;
878                         if (t == netdev_priv(itn->fb_tunnel_dev))
879                                 goto done;
880                         dev = t->dev;
881                 }
882                 unregister_netdevice(dev);
883                 err = 0;
884                 break;
885
886         default:
887                 err = -EINVAL;
888         }
889
890 done:
891         return err;
892 }
893 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
894
895 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
896 {
897         struct ip_tunnel *tunnel = netdev_priv(dev);
898         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
899         int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
900
901         if (new_mtu < 68)
902                 return -EINVAL;
903
904         if (new_mtu > max_mtu) {
905                 if (strict)
906                         return -EINVAL;
907
908                 new_mtu = max_mtu;
909         }
910
911         dev->mtu = new_mtu;
912         return 0;
913 }
914 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
915
916 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
917 {
918         return __ip_tunnel_change_mtu(dev, new_mtu, true);
919 }
920 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
921
922 static void ip_tunnel_dev_free(struct net_device *dev)
923 {
924         struct ip_tunnel *tunnel = netdev_priv(dev);
925
926         gro_cells_destroy(&tunnel->gro_cells);
927         dst_cache_destroy(&tunnel->dst_cache);
928         free_percpu(dev->tstats);
929         free_netdev(dev);
930 }
931
932 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
933 {
934         struct ip_tunnel *tunnel = netdev_priv(dev);
935         struct ip_tunnel_net *itn;
936
937         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
938
939         if (itn->fb_tunnel_dev != dev) {
940                 ip_tunnel_del(itn, netdev_priv(dev));
941                 unregister_netdevice_queue(dev, head);
942         }
943 }
944 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
945
946 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
947 {
948         struct ip_tunnel *tunnel = netdev_priv(dev);
949
950         return tunnel->net;
951 }
952 EXPORT_SYMBOL(ip_tunnel_get_link_net);
953
954 int ip_tunnel_get_iflink(const struct net_device *dev)
955 {
956         struct ip_tunnel *tunnel = netdev_priv(dev);
957
958         return tunnel->parms.link;
959 }
960 EXPORT_SYMBOL(ip_tunnel_get_iflink);
961
962 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
963                                   struct rtnl_link_ops *ops, char *devname)
964 {
965         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
966         struct ip_tunnel_parm parms;
967         unsigned int i;
968
969         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
970                 INIT_HLIST_HEAD(&itn->tunnels[i]);
971
972         if (!ops) {
973                 itn->fb_tunnel_dev = NULL;
974                 return 0;
975         }
976
977         memset(&parms, 0, sizeof(parms));
978         if (devname)
979                 strlcpy(parms.name, devname, IFNAMSIZ);
980
981         rtnl_lock();
982         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
983         /* FB netdevice is special: we have one, and only one per netns.
984          * Allowing to move it to another netns is clearly unsafe.
985          */
986         if (!IS_ERR(itn->fb_tunnel_dev)) {
987                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
988                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
989                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
990         }
991         rtnl_unlock();
992
993         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
994 }
995 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
996
997 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
998                               struct rtnl_link_ops *ops)
999 {
1000         struct net *net = dev_net(itn->fb_tunnel_dev);
1001         struct net_device *dev, *aux;
1002         int h;
1003
1004         for_each_netdev_safe(net, dev, aux)
1005                 if (dev->rtnl_link_ops == ops)
1006                         unregister_netdevice_queue(dev, head);
1007
1008         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1009                 struct ip_tunnel *t;
1010                 struct hlist_node *n;
1011                 struct hlist_head *thead = &itn->tunnels[h];
1012
1013                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1014                         /* If dev is in the same netns, it has already
1015                          * been added to the list by the previous loop.
1016                          */
1017                         if (!net_eq(dev_net(t->dev), net))
1018                                 unregister_netdevice_queue(t->dev, head);
1019         }
1020 }
1021
1022 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1023 {
1024         LIST_HEAD(list);
1025
1026         rtnl_lock();
1027         ip_tunnel_destroy(itn, &list, ops);
1028         unregister_netdevice_many(&list);
1029         rtnl_unlock();
1030 }
1031 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1032
1033 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1034                       struct ip_tunnel_parm *p)
1035 {
1036         struct ip_tunnel *nt;
1037         struct net *net = dev_net(dev);
1038         struct ip_tunnel_net *itn;
1039         int mtu;
1040         int err;
1041
1042         nt = netdev_priv(dev);
1043         itn = net_generic(net, nt->ip_tnl_net_id);
1044
1045         if (nt->collect_md) {
1046                 if (rtnl_dereference(itn->collect_md_tun))
1047                         return -EEXIST;
1048         } else {
1049                 if (ip_tunnel_find(itn, p, dev->type))
1050                         return -EEXIST;
1051         }
1052
1053         nt->net = net;
1054         nt->parms = *p;
1055         err = register_netdevice(dev);
1056         if (err)
1057                 goto out;
1058
1059         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1060                 eth_hw_addr_random(dev);
1061
1062         mtu = ip_tunnel_bind_dev(dev);
1063         if (!tb[IFLA_MTU])
1064                 dev->mtu = mtu;
1065
1066         ip_tunnel_add(itn, nt);
1067 out:
1068         return err;
1069 }
1070 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1071
1072 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1073                          struct ip_tunnel_parm *p)
1074 {
1075         struct ip_tunnel *t;
1076         struct ip_tunnel *tunnel = netdev_priv(dev);
1077         struct net *net = tunnel->net;
1078         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1079
1080         if (dev == itn->fb_tunnel_dev)
1081                 return -EINVAL;
1082
1083         t = ip_tunnel_find(itn, p, dev->type);
1084
1085         if (t) {
1086                 if (t->dev != dev)
1087                         return -EEXIST;
1088         } else {
1089                 t = tunnel;
1090
1091                 if (dev->type != ARPHRD_ETHER) {
1092                         unsigned int nflags = 0;
1093
1094                         if (ipv4_is_multicast(p->iph.daddr))
1095                                 nflags = IFF_BROADCAST;
1096                         else if (p->iph.daddr)
1097                                 nflags = IFF_POINTOPOINT;
1098
1099                         if ((dev->flags ^ nflags) &
1100                             (IFF_POINTOPOINT | IFF_BROADCAST))
1101                                 return -EINVAL;
1102                 }
1103         }
1104
1105         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1106         return 0;
1107 }
1108 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1109
1110 int ip_tunnel_init(struct net_device *dev)
1111 {
1112         struct ip_tunnel *tunnel = netdev_priv(dev);
1113         struct iphdr *iph = &tunnel->parms.iph;
1114         int err;
1115
1116         dev->destructor = ip_tunnel_dev_free;
1117         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1118         if (!dev->tstats)
1119                 return -ENOMEM;
1120
1121         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1122         if (err) {
1123                 free_percpu(dev->tstats);
1124                 return err;
1125         }
1126
1127         err = gro_cells_init(&tunnel->gro_cells, dev);
1128         if (err) {
1129                 dst_cache_destroy(&tunnel->dst_cache);
1130                 free_percpu(dev->tstats);
1131                 return err;
1132         }
1133
1134         tunnel->dev = dev;
1135         tunnel->net = dev_net(dev);
1136         strcpy(tunnel->parms.name, dev->name);
1137         iph->version            = 4;
1138         iph->ihl                = 5;
1139
1140         if (tunnel->collect_md) {
1141                 dev->features |= NETIF_F_NETNS_LOCAL;
1142                 netif_keep_dst(dev);
1143         }
1144         return 0;
1145 }
1146 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1147
1148 void ip_tunnel_uninit(struct net_device *dev)
1149 {
1150         struct ip_tunnel *tunnel = netdev_priv(dev);
1151         struct net *net = tunnel->net;
1152         struct ip_tunnel_net *itn;
1153
1154         itn = net_generic(net, tunnel->ip_tnl_net_id);
1155         /* fb_tunnel_dev will be unregisted in net-exit call. */
1156         if (itn->fb_tunnel_dev != dev)
1157                 ip_tunnel_del(itn, netdev_priv(dev));
1158
1159         dst_cache_reset(&tunnel->dst_cache);
1160 }
1161 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1162
1163 /* Do least required initialization, rest of init is done in tunnel_init call */
1164 void ip_tunnel_setup(struct net_device *dev, int net_id)
1165 {
1166         struct ip_tunnel *tunnel = netdev_priv(dev);
1167         tunnel->ip_tnl_net_id = net_id;
1168 }
1169 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1170
1171 MODULE_LICENSE("GPL");