Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[cascardo/linux.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65                                    __be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73                                                 struct rtnl_link_stats64 *tot)
74 {
75         int i;
76
77         for_each_possible_cpu(i) {
78                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80                 unsigned int start;
81
82                 do {
83                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
84                         rx_packets = tstats->rx_packets;
85                         tx_packets = tstats->tx_packets;
86                         rx_bytes = tstats->rx_bytes;
87                         tx_bytes = tstats->tx_bytes;
88                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90                 tot->rx_packets += rx_packets;
91                 tot->tx_packets += tx_packets;
92                 tot->rx_bytes   += rx_bytes;
93                 tot->tx_bytes   += tx_bytes;
94         }
95
96         tot->multicast = dev->stats.multicast;
97
98         tot->rx_crc_errors = dev->stats.rx_crc_errors;
99         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100         tot->rx_length_errors = dev->stats.rx_length_errors;
101         tot->rx_frame_errors = dev->stats.rx_frame_errors;
102         tot->rx_errors = dev->stats.rx_errors;
103
104         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106         tot->tx_dropped = dev->stats.tx_dropped;
107         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108         tot->tx_errors = dev->stats.tx_errors;
109
110         tot->collisions  = dev->stats.collisions;
111
112         return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117                                 __be16 flags, __be32 key)
118 {
119         if (p->i_flags & TUNNEL_KEY) {
120                 if (flags & TUNNEL_KEY)
121                         return key == p->i_key;
122                 else
123                         /* key expected, none present */
124                         return false;
125         } else
126                 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131    Tunnel hash table:
132    We require exact key match i.e. if a key is present in packet
133    it will match only tunnel with the same key; if it is not present,
134    it will match only keyless tunnel.
135
136    All keysless packets, if not matched configured keyless tunnels
137    will match fallback tunnel.
138    Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141                                    int link, __be16 flags,
142                                    __be32 remote, __be32 local,
143                                    __be32 key)
144 {
145         unsigned int hash;
146         struct ip_tunnel *t, *cand = NULL;
147         struct hlist_head *head;
148
149         hash = ip_tunnel_hash(itn, key, remote);
150         head = &itn->tunnels[hash];
151
152         hlist_for_each_entry_rcu(t, head, hash_node) {
153                 if (local != t->parms.iph.saddr ||
154                     remote != t->parms.iph.daddr ||
155                     !(t->dev->flags & IFF_UP))
156                         continue;
157
158                 if (!ip_tunnel_key_match(&t->parms, flags, key))
159                         continue;
160
161                 if (t->parms.link == link)
162                         return t;
163                 else
164                         cand = t;
165         }
166
167         hlist_for_each_entry_rcu(t, head, hash_node) {
168                 if (remote != t->parms.iph.daddr ||
169                     !(t->dev->flags & IFF_UP))
170                         continue;
171
172                 if (!ip_tunnel_key_match(&t->parms, flags, key))
173                         continue;
174
175                 if (t->parms.link == link)
176                         return t;
177                 else if (!cand)
178                         cand = t;
179         }
180
181         hash = ip_tunnel_hash(itn, key, 0);
182         head = &itn->tunnels[hash];
183
184         hlist_for_each_entry_rcu(t, head, hash_node) {
185                 if ((local != t->parms.iph.saddr &&
186                      (local != t->parms.iph.daddr ||
187                       !ipv4_is_multicast(local))) ||
188                     !(t->dev->flags & IFF_UP))
189                         continue;
190
191                 if (!ip_tunnel_key_match(&t->parms, flags, key))
192                         continue;
193
194                 if (t->parms.link == link)
195                         return t;
196                 else if (!cand)
197                         cand = t;
198         }
199
200         if (flags & TUNNEL_NO_KEY)
201                 goto skip_key_lookup;
202
203         hlist_for_each_entry_rcu(t, head, hash_node) {
204                 if (t->parms.i_key != key ||
205                     !(t->dev->flags & IFF_UP))
206                         continue;
207
208                 if (t->parms.link == link)
209                         return t;
210                 else if (!cand)
211                         cand = t;
212         }
213
214 skip_key_lookup:
215         if (cand)
216                 return cand;
217
218         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219                 return netdev_priv(itn->fb_tunnel_dev);
220
221
222         return NULL;
223 }
224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227                                     struct ip_tunnel_parm *parms)
228 {
229         unsigned int h;
230         __be32 remote;
231
232         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233                 remote = parms->iph.daddr;
234         else
235                 remote = 0;
236
237         h = ip_tunnel_hash(itn, parms->i_key, remote);
238         return &itn->tunnels[h];
239 }
240
241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242 {
243         struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245         hlist_add_head_rcu(&t->hash_node, head);
246 }
247
248 static void ip_tunnel_del(struct ip_tunnel *t)
249 {
250         hlist_del_init_rcu(&t->hash_node);
251 }
252
253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254                                         struct ip_tunnel_parm *parms,
255                                         int type)
256 {
257         __be32 remote = parms->iph.daddr;
258         __be32 local = parms->iph.saddr;
259         __be32 key = parms->i_key;
260         int link = parms->link;
261         struct ip_tunnel *t = NULL;
262         struct hlist_head *head = ip_bucket(itn, parms);
263
264         hlist_for_each_entry_rcu(t, head, hash_node) {
265                 if (local == t->parms.iph.saddr &&
266                     remote == t->parms.iph.daddr &&
267                     key == t->parms.i_key &&
268                     link == t->parms.link &&
269                     type == t->dev->type)
270                         break;
271         }
272         return t;
273 }
274
275 static struct net_device *__ip_tunnel_create(struct net *net,
276                                              const struct rtnl_link_ops *ops,
277                                              struct ip_tunnel_parm *parms)
278 {
279         int err;
280         struct ip_tunnel *tunnel;
281         struct net_device *dev;
282         char name[IFNAMSIZ];
283
284         if (parms->name[0])
285                 strlcpy(name, parms->name, IFNAMSIZ);
286         else {
287                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288                         err = -E2BIG;
289                         goto failed;
290                 }
291                 strlcpy(name, ops->kind, IFNAMSIZ);
292                 strncat(name, "%d", 2);
293         }
294
295         ASSERT_RTNL();
296         dev = alloc_netdev(ops->priv_size, name, ops->setup);
297         if (!dev) {
298                 err = -ENOMEM;
299                 goto failed;
300         }
301         dev_net_set(dev, net);
302
303         dev->rtnl_link_ops = ops;
304
305         tunnel = netdev_priv(dev);
306         tunnel->parms = *parms;
307         tunnel->net = net;
308
309         err = register_netdevice(dev);
310         if (err)
311                 goto failed_free;
312
313         return dev;
314
315 failed_free:
316         free_netdev(dev);
317 failed:
318         return ERR_PTR(err);
319 }
320
321 static inline struct rtable *ip_route_output_tunnel(struct net *net,
322                                                     struct flowi4 *fl4,
323                                                     int proto,
324                                                     __be32 daddr, __be32 saddr,
325                                                     __be32 key, __u8 tos, int oif)
326 {
327         memset(fl4, 0, sizeof(*fl4));
328         fl4->flowi4_oif = oif;
329         fl4->daddr = daddr;
330         fl4->saddr = saddr;
331         fl4->flowi4_tos = tos;
332         fl4->flowi4_proto = proto;
333         fl4->fl4_gre_key = key;
334         return ip_route_output_key(net, fl4);
335 }
336
337 static int ip_tunnel_bind_dev(struct net_device *dev)
338 {
339         struct net_device *tdev = NULL;
340         struct ip_tunnel *tunnel = netdev_priv(dev);
341         const struct iphdr *iph;
342         int hlen = LL_MAX_HEADER;
343         int mtu = ETH_DATA_LEN;
344         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
345
346         iph = &tunnel->parms.iph;
347
348         /* Guess output device to choose reasonable mtu and needed_headroom */
349         if (iph->daddr) {
350                 struct flowi4 fl4;
351                 struct rtable *rt;
352
353                 rt = ip_route_output_tunnel(tunnel->net, &fl4,
354                                             tunnel->parms.iph.protocol,
355                                             iph->daddr, iph->saddr,
356                                             tunnel->parms.o_key,
357                                             RT_TOS(iph->tos),
358                                             tunnel->parms.link);
359                 if (!IS_ERR(rt)) {
360                         tdev = rt->dst.dev;
361                         ip_rt_put(rt);
362                 }
363                 if (dev->type != ARPHRD_ETHER)
364                         dev->flags |= IFF_POINTOPOINT;
365         }
366
367         if (!tdev && tunnel->parms.link)
368                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
369
370         if (tdev) {
371                 hlen = tdev->hard_header_len + tdev->needed_headroom;
372                 mtu = tdev->mtu;
373         }
374         dev->iflink = tunnel->parms.link;
375
376         dev->needed_headroom = t_hlen + hlen;
377         mtu -= (dev->hard_header_len + t_hlen);
378
379         if (mtu < 68)
380                 mtu = 68;
381
382         return mtu;
383 }
384
385 static struct ip_tunnel *ip_tunnel_create(struct net *net,
386                                           struct ip_tunnel_net *itn,
387                                           struct ip_tunnel_parm *parms)
388 {
389         struct ip_tunnel *nt, *fbt;
390         struct net_device *dev;
391
392         BUG_ON(!itn->fb_tunnel_dev);
393         fbt = netdev_priv(itn->fb_tunnel_dev);
394         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
395         if (IS_ERR(dev))
396                 return NULL;
397
398         dev->mtu = ip_tunnel_bind_dev(dev);
399
400         nt = netdev_priv(dev);
401         ip_tunnel_add(itn, nt);
402         return nt;
403 }
404
405 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
406                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
407 {
408         struct pcpu_tstats *tstats;
409         const struct iphdr *iph = ip_hdr(skb);
410         int err;
411
412 #ifdef CONFIG_NET_IPGRE_BROADCAST
413         if (ipv4_is_multicast(iph->daddr)) {
414                 /* Looped back packet, drop it! */
415                 if (rt_is_output_route(skb_rtable(skb)))
416                         goto drop;
417                 tunnel->dev->stats.multicast++;
418                 skb->pkt_type = PACKET_BROADCAST;
419         }
420 #endif
421
422         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
423              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
424                 tunnel->dev->stats.rx_crc_errors++;
425                 tunnel->dev->stats.rx_errors++;
426                 goto drop;
427         }
428
429         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
430                 if (!(tpi->flags&TUNNEL_SEQ) ||
431                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
432                         tunnel->dev->stats.rx_fifo_errors++;
433                         tunnel->dev->stats.rx_errors++;
434                         goto drop;
435                 }
436                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
437         }
438
439         err = IP_ECN_decapsulate(iph, skb);
440         if (unlikely(err)) {
441                 if (log_ecn_error)
442                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
443                                         &iph->saddr, iph->tos);
444                 if (err > 1) {
445                         ++tunnel->dev->stats.rx_frame_errors;
446                         ++tunnel->dev->stats.rx_errors;
447                         goto drop;
448                 }
449         }
450
451         tstats = this_cpu_ptr(tunnel->dev->tstats);
452         u64_stats_update_begin(&tstats->syncp);
453         tstats->rx_packets++;
454         tstats->rx_bytes += skb->len;
455         u64_stats_update_end(&tstats->syncp);
456
457         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
458
459         if (tunnel->dev->type == ARPHRD_ETHER) {
460                 skb->protocol = eth_type_trans(skb, tunnel->dev);
461                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
462         } else {
463                 skb->dev = tunnel->dev;
464         }
465
466         gro_cells_receive(&tunnel->gro_cells, skb);
467         return 0;
468
469 drop:
470         kfree_skb(skb);
471         return 0;
472 }
473 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
474
475 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
476                             struct rtable *rt, __be16 df)
477 {
478         struct ip_tunnel *tunnel = netdev_priv(dev);
479         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
480         int mtu;
481
482         if (df)
483                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
484                                         - sizeof(struct iphdr) - tunnel->hlen;
485         else
486                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
487
488         if (skb_dst(skb))
489                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
490
491         if (skb->protocol == htons(ETH_P_IP)) {
492                 if (!skb_is_gso(skb) &&
493                     (df & htons(IP_DF)) && mtu < pkt_size) {
494                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
495                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
496                         return -E2BIG;
497                 }
498         }
499 #if IS_ENABLED(CONFIG_IPV6)
500         else if (skb->protocol == htons(ETH_P_IPV6)) {
501                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
502
503                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
504                            mtu >= IPV6_MIN_MTU) {
505                         if ((tunnel->parms.iph.daddr &&
506                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
507                             rt6->rt6i_dst.plen == 128) {
508                                 rt6->rt6i_flags |= RTF_MODIFIED;
509                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
510                         }
511                 }
512
513                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
514                                         mtu < pkt_size) {
515                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
516                         return -E2BIG;
517                 }
518         }
519 #endif
520         return 0;
521 }
522
523 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
524                     const struct iphdr *tnl_params, const u8 protocol)
525 {
526         struct ip_tunnel *tunnel = netdev_priv(dev);
527         const struct iphdr *inner_iph;
528         struct flowi4 fl4;
529         u8     tos, ttl;
530         __be16 df;
531         struct rtable *rt;              /* Route to the other host */
532         unsigned int max_headroom;      /* The extra header space needed */
533         __be32 dst;
534         int err;
535
536         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
537
538         dst = tnl_params->daddr;
539         if (dst == 0) {
540                 /* NBMA tunnel */
541
542                 if (skb_dst(skb) == NULL) {
543                         dev->stats.tx_fifo_errors++;
544                         goto tx_error;
545                 }
546
547                 if (skb->protocol == htons(ETH_P_IP)) {
548                         rt = skb_rtable(skb);
549                         dst = rt_nexthop(rt, inner_iph->daddr);
550                 }
551 #if IS_ENABLED(CONFIG_IPV6)
552                 else if (skb->protocol == htons(ETH_P_IPV6)) {
553                         const struct in6_addr *addr6;
554                         struct neighbour *neigh;
555                         bool do_tx_error_icmp;
556                         int addr_type;
557
558                         neigh = dst_neigh_lookup(skb_dst(skb),
559                                                  &ipv6_hdr(skb)->daddr);
560                         if (neigh == NULL)
561                                 goto tx_error;
562
563                         addr6 = (const struct in6_addr *)&neigh->primary_key;
564                         addr_type = ipv6_addr_type(addr6);
565
566                         if (addr_type == IPV6_ADDR_ANY) {
567                                 addr6 = &ipv6_hdr(skb)->daddr;
568                                 addr_type = ipv6_addr_type(addr6);
569                         }
570
571                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
572                                 do_tx_error_icmp = true;
573                         else {
574                                 do_tx_error_icmp = false;
575                                 dst = addr6->s6_addr32[3];
576                         }
577                         neigh_release(neigh);
578                         if (do_tx_error_icmp)
579                                 goto tx_error_icmp;
580                 }
581 #endif
582                 else
583                         goto tx_error;
584         }
585
586         tos = tnl_params->tos;
587         if (tos & 0x1) {
588                 tos &= ~0x1;
589                 if (skb->protocol == htons(ETH_P_IP))
590                         tos = inner_iph->tos;
591                 else if (skb->protocol == htons(ETH_P_IPV6))
592                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
593         }
594
595         rt = ip_route_output_tunnel(tunnel->net, &fl4,
596                                     protocol,
597                                     dst, tnl_params->saddr,
598                                     tunnel->parms.o_key,
599                                     RT_TOS(tos),
600                                     tunnel->parms.link);
601         if (IS_ERR(rt)) {
602                 dev->stats.tx_carrier_errors++;
603                 goto tx_error;
604         }
605         if (rt->dst.dev == dev) {
606                 ip_rt_put(rt);
607                 dev->stats.collisions++;
608                 goto tx_error;
609         }
610
611         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
612                 ip_rt_put(rt);
613                 goto tx_error;
614         }
615
616         if (tunnel->err_count > 0) {
617                 if (time_before(jiffies,
618                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
619                         tunnel->err_count--;
620
621                         dst_link_failure(skb);
622                 } else
623                         tunnel->err_count = 0;
624         }
625
626         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
627         ttl = tnl_params->ttl;
628         if (ttl == 0) {
629                 if (skb->protocol == htons(ETH_P_IP))
630                         ttl = inner_iph->ttl;
631 #if IS_ENABLED(CONFIG_IPV6)
632                 else if (skb->protocol == htons(ETH_P_IPV6))
633                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
634 #endif
635                 else
636                         ttl = ip4_dst_hoplimit(&rt->dst);
637         }
638
639         df = tnl_params->frag_off;
640         if (skb->protocol == htons(ETH_P_IP))
641                 df |= (inner_iph->frag_off&htons(IP_DF));
642
643         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
644                         + rt->dst.header_len;
645         if (max_headroom > dev->needed_headroom)
646                 dev->needed_headroom = max_headroom;
647
648         if (skb_cow_head(skb, dev->needed_headroom)) {
649                 dev->stats.tx_dropped++;
650                 dev_kfree_skb(skb);
651                 return;
652         }
653
654         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
655                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
656         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
657
658         return;
659
660 #if IS_ENABLED(CONFIG_IPV6)
661 tx_error_icmp:
662         dst_link_failure(skb);
663 #endif
664 tx_error:
665         dev->stats.tx_errors++;
666         dev_kfree_skb(skb);
667 }
668 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
669
670 static void ip_tunnel_update(struct ip_tunnel_net *itn,
671                              struct ip_tunnel *t,
672                              struct net_device *dev,
673                              struct ip_tunnel_parm *p,
674                              bool set_mtu)
675 {
676         ip_tunnel_del(t);
677         t->parms.iph.saddr = p->iph.saddr;
678         t->parms.iph.daddr = p->iph.daddr;
679         t->parms.i_key = p->i_key;
680         t->parms.o_key = p->o_key;
681         if (dev->type != ARPHRD_ETHER) {
682                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
683                 memcpy(dev->broadcast, &p->iph.daddr, 4);
684         }
685         ip_tunnel_add(itn, t);
686
687         t->parms.iph.ttl = p->iph.ttl;
688         t->parms.iph.tos = p->iph.tos;
689         t->parms.iph.frag_off = p->iph.frag_off;
690
691         if (t->parms.link != p->link) {
692                 int mtu;
693
694                 t->parms.link = p->link;
695                 mtu = ip_tunnel_bind_dev(dev);
696                 if (set_mtu)
697                         dev->mtu = mtu;
698         }
699         netdev_state_change(dev);
700 }
701
702 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
703 {
704         int err = 0;
705         struct ip_tunnel *t;
706         struct net *net = dev_net(dev);
707         struct ip_tunnel *tunnel = netdev_priv(dev);
708         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
709
710         BUG_ON(!itn->fb_tunnel_dev);
711         switch (cmd) {
712         case SIOCGETTUNNEL:
713                 t = NULL;
714                 if (dev == itn->fb_tunnel_dev)
715                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
716                 if (t == NULL)
717                         t = netdev_priv(dev);
718                 memcpy(p, &t->parms, sizeof(*p));
719                 break;
720
721         case SIOCADDTUNNEL:
722         case SIOCCHGTUNNEL:
723                 err = -EPERM;
724                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
725                         goto done;
726                 if (p->iph.ttl)
727                         p->iph.frag_off |= htons(IP_DF);
728                 if (!(p->i_flags&TUNNEL_KEY))
729                         p->i_key = 0;
730                 if (!(p->o_flags&TUNNEL_KEY))
731                         p->o_key = 0;
732
733                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
734
735                 if (!t && (cmd == SIOCADDTUNNEL))
736                         t = ip_tunnel_create(net, itn, p);
737
738                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
739                         if (t != NULL) {
740                                 if (t->dev != dev) {
741                                         err = -EEXIST;
742                                         break;
743                                 }
744                         } else {
745                                 unsigned int nflags = 0;
746
747                                 if (ipv4_is_multicast(p->iph.daddr))
748                                         nflags = IFF_BROADCAST;
749                                 else if (p->iph.daddr)
750                                         nflags = IFF_POINTOPOINT;
751
752                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
753                                         err = -EINVAL;
754                                         break;
755                                 }
756
757                                 t = netdev_priv(dev);
758                         }
759                 }
760
761                 if (t) {
762                         err = 0;
763                         ip_tunnel_update(itn, t, dev, p, true);
764                 } else
765                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
766                 break;
767
768         case SIOCDELTUNNEL:
769                 err = -EPERM;
770                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
771                         goto done;
772
773                 if (dev == itn->fb_tunnel_dev) {
774                         err = -ENOENT;
775                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
776                         if (t == NULL)
777                                 goto done;
778                         err = -EPERM;
779                         if (t == netdev_priv(itn->fb_tunnel_dev))
780                                 goto done;
781                         dev = t->dev;
782                 }
783                 unregister_netdevice(dev);
784                 err = 0;
785                 break;
786
787         default:
788                 err = -EINVAL;
789         }
790
791 done:
792         return err;
793 }
794 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
795
796 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
797 {
798         struct ip_tunnel *tunnel = netdev_priv(dev);
799         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
800
801         if (new_mtu < 68 ||
802             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
803                 return -EINVAL;
804         dev->mtu = new_mtu;
805         return 0;
806 }
807 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
808
809 static void ip_tunnel_dev_free(struct net_device *dev)
810 {
811         struct ip_tunnel *tunnel = netdev_priv(dev);
812
813         gro_cells_destroy(&tunnel->gro_cells);
814         free_percpu(dev->tstats);
815         free_netdev(dev);
816 }
817
818 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
819 {
820         struct ip_tunnel *tunnel = netdev_priv(dev);
821         struct ip_tunnel_net *itn;
822
823         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
824
825         if (itn->fb_tunnel_dev != dev) {
826                 ip_tunnel_del(netdev_priv(dev));
827                 unregister_netdevice_queue(dev, head);
828         }
829 }
830 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
831
832 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
833                                   struct rtnl_link_ops *ops, char *devname)
834 {
835         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
836         struct ip_tunnel_parm parms;
837         unsigned int i;
838
839         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
840                 INIT_HLIST_HEAD(&itn->tunnels[i]);
841
842         if (!ops) {
843                 itn->fb_tunnel_dev = NULL;
844                 return 0;
845         }
846
847         memset(&parms, 0, sizeof(parms));
848         if (devname)
849                 strlcpy(parms.name, devname, IFNAMSIZ);
850
851         rtnl_lock();
852         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
853         /* FB netdevice is special: we have one, and only one per netns.
854          * Allowing to move it to another netns is clearly unsafe.
855          */
856         if (!IS_ERR(itn->fb_tunnel_dev)) {
857                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
858                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
859         }
860         rtnl_unlock();
861
862         return PTR_RET(itn->fb_tunnel_dev);
863 }
864 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
865
866 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
867                               struct rtnl_link_ops *ops)
868 {
869         struct net *net = dev_net(itn->fb_tunnel_dev);
870         struct net_device *dev, *aux;
871         int h;
872
873         for_each_netdev_safe(net, dev, aux)
874                 if (dev->rtnl_link_ops == ops)
875                         unregister_netdevice_queue(dev, head);
876
877         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
878                 struct ip_tunnel *t;
879                 struct hlist_node *n;
880                 struct hlist_head *thead = &itn->tunnels[h];
881
882                 hlist_for_each_entry_safe(t, n, thead, hash_node)
883                         /* If dev is in the same netns, it has already
884                          * been added to the list by the previous loop.
885                          */
886                         if (!net_eq(dev_net(t->dev), net))
887                                 unregister_netdevice_queue(t->dev, head);
888         }
889 }
890
891 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
892 {
893         LIST_HEAD(list);
894
895         rtnl_lock();
896         ip_tunnel_destroy(itn, &list, ops);
897         unregister_netdevice_many(&list);
898         rtnl_unlock();
899 }
900 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
901
902 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
903                       struct ip_tunnel_parm *p)
904 {
905         struct ip_tunnel *nt;
906         struct net *net = dev_net(dev);
907         struct ip_tunnel_net *itn;
908         int mtu;
909         int err;
910
911         nt = netdev_priv(dev);
912         itn = net_generic(net, nt->ip_tnl_net_id);
913
914         if (ip_tunnel_find(itn, p, dev->type))
915                 return -EEXIST;
916
917         nt->net = net;
918         nt->parms = *p;
919         err = register_netdevice(dev);
920         if (err)
921                 goto out;
922
923         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
924                 eth_hw_addr_random(dev);
925
926         mtu = ip_tunnel_bind_dev(dev);
927         if (!tb[IFLA_MTU])
928                 dev->mtu = mtu;
929
930         ip_tunnel_add(itn, nt);
931
932 out:
933         return err;
934 }
935 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
936
937 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
938                          struct ip_tunnel_parm *p)
939 {
940         struct ip_tunnel *t;
941         struct ip_tunnel *tunnel = netdev_priv(dev);
942         struct net *net = tunnel->net;
943         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
944
945         if (dev == itn->fb_tunnel_dev)
946                 return -EINVAL;
947
948         t = ip_tunnel_find(itn, p, dev->type);
949
950         if (t) {
951                 if (t->dev != dev)
952                         return -EEXIST;
953         } else {
954                 t = tunnel;
955
956                 if (dev->type != ARPHRD_ETHER) {
957                         unsigned int nflags = 0;
958
959                         if (ipv4_is_multicast(p->iph.daddr))
960                                 nflags = IFF_BROADCAST;
961                         else if (p->iph.daddr)
962                                 nflags = IFF_POINTOPOINT;
963
964                         if ((dev->flags ^ nflags) &
965                             (IFF_POINTOPOINT | IFF_BROADCAST))
966                                 return -EINVAL;
967                 }
968         }
969
970         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
971         return 0;
972 }
973 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
974
975 int ip_tunnel_init(struct net_device *dev)
976 {
977         struct ip_tunnel *tunnel = netdev_priv(dev);
978         struct iphdr *iph = &tunnel->parms.iph;
979         int i, err;
980
981         dev->destructor = ip_tunnel_dev_free;
982         dev->tstats = alloc_percpu(struct pcpu_tstats);
983         if (!dev->tstats)
984                 return -ENOMEM;
985
986         for_each_possible_cpu(i) {
987                 struct pcpu_tstats *ipt_stats;
988                 ipt_stats = per_cpu_ptr(dev->tstats, i);
989                 u64_stats_init(&ipt_stats->syncp);
990         }
991
992         err = gro_cells_init(&tunnel->gro_cells, dev);
993         if (err) {
994                 free_percpu(dev->tstats);
995                 return err;
996         }
997
998         tunnel->dev = dev;
999         tunnel->net = dev_net(dev);
1000         strcpy(tunnel->parms.name, dev->name);
1001         iph->version            = 4;
1002         iph->ihl                = 5;
1003
1004         return 0;
1005 }
1006 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1007
1008 void ip_tunnel_uninit(struct net_device *dev)
1009 {
1010         struct ip_tunnel *tunnel = netdev_priv(dev);
1011         struct net *net = tunnel->net;
1012         struct ip_tunnel_net *itn;
1013
1014         itn = net_generic(net, tunnel->ip_tnl_net_id);
1015         /* fb_tunnel_dev will be unregisted in net-exit call. */
1016         if (itn->fb_tunnel_dev != dev)
1017                 ip_tunnel_del(netdev_priv(dev));
1018 }
1019 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1020
1021 /* Do least required initialization, rest of init is done in tunnel_init call */
1022 void ip_tunnel_setup(struct net_device *dev, int net_id)
1023 {
1024         struct ip_tunnel *tunnel = netdev_priv(dev);
1025         tunnel->ip_tnl_net_id = net_id;
1026 }
1027 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1028
1029 MODULE_LICENSE("GPL");