spi: Do not require a completion
[cascardo/linux.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         struct neighbour *neigh;
64         struct in6_addr *nexthop;
65         int ret;
66
67         skb->protocol = htons(ETH_P_IPV6);
68         skb->dev = dev;
69
70         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74                     ((mroute6_socket(dev_net(dev), skb) &&
75                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77                                          &ipv6_hdr(skb)->saddr))) {
78                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80                         /* Do not check for IFF_ALLMULTI; multicast routing
81                            is not supported in any case.
82                          */
83                         if (newskb)
84                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85                                         newskb, NULL, newskb->dev,
86                                         dev_loopback_xmit);
87
88                         if (ipv6_hdr(skb)->hop_limit == 0) {
89                                 IP6_INC_STATS(dev_net(dev), idev,
90                                               IPSTATS_MIB_OUTDISCARDS);
91                                 kfree_skb(skb);
92                                 return 0;
93                         }
94                 }
95
96                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97                                 skb->len);
98
99                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100                     IPV6_ADDR_SCOPE_NODELOCAL &&
101                     !(dev->flags & IFF_LOOPBACK)) {
102                         kfree_skb(skb);
103                         return 0;
104                 }
105         }
106
107         rcu_read_lock_bh();
108         nexthop = rt6_nexthop((struct rt6_info *)dst);
109         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110         if (unlikely(!neigh))
111                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112         if (!IS_ERR(neigh)) {
113                 ret = dst_neigh_output(dst, neigh, skb);
114                 rcu_read_unlock_bh();
115                 return ret;
116         }
117         rcu_read_unlock_bh();
118
119         IP6_INC_STATS(dev_net(dst->dev),
120                       ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121         kfree_skb(skb);
122         return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128             dst_allfrag(skb_dst(skb)) ||
129             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130                 return ip6_fragment(skb, ip6_finish_output2);
131         else
132                 return ip6_finish_output2(skb);
133 }
134
135 int ip6_output(struct sk_buff *skb)
136 {
137         struct net_device *dev = skb_dst(skb)->dev;
138         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139         if (unlikely(idev->cnf.disable_ipv6)) {
140                 IP6_INC_STATS(dev_net(dev), idev,
141                               IPSTATS_MIB_OUTDISCARDS);
142                 kfree_skb(skb);
143                 return 0;
144         }
145
146         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
147                             ip6_finish_output,
148                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
149 }
150
151 /*
152  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
153  */
154
155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156              struct ipv6_txoptions *opt, int tclass)
157 {
158         struct net *net = sock_net(sk);
159         struct ipv6_pinfo *np = inet6_sk(sk);
160         struct in6_addr *first_hop = &fl6->daddr;
161         struct dst_entry *dst = skb_dst(skb);
162         struct ipv6hdr *hdr;
163         u8  proto = fl6->flowi6_proto;
164         int seg_len = skb->len;
165         int hlimit = -1;
166         u32 mtu;
167
168         if (opt) {
169                 unsigned int head_room;
170
171                 /* First: exthdrs may take lots of space (~8K for now)
172                    MAX_HEADER is not enough.
173                  */
174                 head_room = opt->opt_nflen + opt->opt_flen;
175                 seg_len += head_room;
176                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
177
178                 if (skb_headroom(skb) < head_room) {
179                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180                         if (skb2 == NULL) {
181                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182                                               IPSTATS_MIB_OUTDISCARDS);
183                                 kfree_skb(skb);
184                                 return -ENOBUFS;
185                         }
186                         consume_skb(skb);
187                         skb = skb2;
188                         skb_set_owner_w(skb, sk);
189                 }
190                 if (opt->opt_flen)
191                         ipv6_push_frag_opts(skb, opt, &proto);
192                 if (opt->opt_nflen)
193                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
194         }
195
196         skb_push(skb, sizeof(struct ipv6hdr));
197         skb_reset_network_header(skb);
198         hdr = ipv6_hdr(skb);
199
200         /*
201          *      Fill in the IPv6 header
202          */
203         if (np)
204                 hlimit = np->hop_limit;
205         if (hlimit < 0)
206                 hlimit = ip6_dst_hoplimit(dst);
207
208         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
209
210         hdr->payload_len = htons(seg_len);
211         hdr->nexthdr = proto;
212         hdr->hop_limit = hlimit;
213
214         hdr->saddr = fl6->saddr;
215         hdr->daddr = *first_hop;
216
217         skb->protocol = htons(ETH_P_IPV6);
218         skb->priority = sk->sk_priority;
219         skb->mark = sk->sk_mark;
220
221         mtu = dst_mtu(dst);
222         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
223                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
224                               IPSTATS_MIB_OUT, skb->len);
225                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
226                                dst->dev, dst_output);
227         }
228
229         skb->dev = dst->dev;
230         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
231         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
232         kfree_skb(skb);
233         return -EMSGSIZE;
234 }
235
236 EXPORT_SYMBOL(ip6_xmit);
237
238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
239 {
240         struct ip6_ra_chain *ra;
241         struct sock *last = NULL;
242
243         read_lock(&ip6_ra_lock);
244         for (ra = ip6_ra_chain; ra; ra = ra->next) {
245                 struct sock *sk = ra->sk;
246                 if (sk && ra->sel == sel &&
247                     (!sk->sk_bound_dev_if ||
248                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
249                         if (last) {
250                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
251                                 if (skb2)
252                                         rawv6_rcv(last, skb2);
253                         }
254                         last = sk;
255                 }
256         }
257
258         if (last) {
259                 rawv6_rcv(last, skb);
260                 read_unlock(&ip6_ra_lock);
261                 return 1;
262         }
263         read_unlock(&ip6_ra_lock);
264         return 0;
265 }
266
267 static int ip6_forward_proxy_check(struct sk_buff *skb)
268 {
269         struct ipv6hdr *hdr = ipv6_hdr(skb);
270         u8 nexthdr = hdr->nexthdr;
271         __be16 frag_off;
272         int offset;
273
274         if (ipv6_ext_hdr(nexthdr)) {
275                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276                 if (offset < 0)
277                         return 0;
278         } else
279                 offset = sizeof(struct ipv6hdr);
280
281         if (nexthdr == IPPROTO_ICMPV6) {
282                 struct icmp6hdr *icmp6;
283
284                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
285                                          offset + 1 - skb->data)))
286                         return 0;
287
288                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289
290                 switch (icmp6->icmp6_type) {
291                 case NDISC_ROUTER_SOLICITATION:
292                 case NDISC_ROUTER_ADVERTISEMENT:
293                 case NDISC_NEIGHBOUR_SOLICITATION:
294                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
295                 case NDISC_REDIRECT:
296                         /* For reaction involving unicast neighbor discovery
297                          * message destined to the proxied address, pass it to
298                          * input function.
299                          */
300                         return 1;
301                 default:
302                         break;
303                 }
304         }
305
306         /*
307          * The proxying router can't forward traffic sent to a link-local
308          * address, so signal the sender and discard the packet. This
309          * behavior is clarified by the MIPv6 specification.
310          */
311         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
312                 dst_link_failure(skb);
313                 return -1;
314         }
315
316         return 0;
317 }
318
319 static inline int ip6_forward_finish(struct sk_buff *skb)
320 {
321         return dst_output(skb);
322 }
323
324 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
325 {
326         unsigned int mtu;
327         struct inet6_dev *idev;
328
329         if (dst_metric_locked(dst, RTAX_MTU)) {
330                 mtu = dst_metric_raw(dst, RTAX_MTU);
331                 if (mtu)
332                         return mtu;
333         }
334
335         mtu = IPV6_MIN_MTU;
336         rcu_read_lock();
337         idev = __in6_dev_get(dst->dev);
338         if (idev)
339                 mtu = idev->cnf.mtu6;
340         rcu_read_unlock();
341
342         return mtu;
343 }
344
345 int ip6_forward(struct sk_buff *skb)
346 {
347         struct dst_entry *dst = skb_dst(skb);
348         struct ipv6hdr *hdr = ipv6_hdr(skb);
349         struct inet6_skb_parm *opt = IP6CB(skb);
350         struct net *net = dev_net(dst->dev);
351         u32 mtu;
352
353         if (net->ipv6.devconf_all->forwarding == 0)
354                 goto error;
355
356         if (skb_warn_if_lro(skb))
357                 goto drop;
358
359         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
360                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
361                                  IPSTATS_MIB_INDISCARDS);
362                 goto drop;
363         }
364
365         if (skb->pkt_type != PACKET_HOST)
366                 goto drop;
367
368         skb_forward_csum(skb);
369
370         /*
371          *      We DO NOT make any processing on
372          *      RA packets, pushing them to user level AS IS
373          *      without ane WARRANTY that application will be able
374          *      to interpret them. The reason is that we
375          *      cannot make anything clever here.
376          *
377          *      We are not end-node, so that if packet contains
378          *      AH/ESP, we cannot make anything.
379          *      Defragmentation also would be mistake, RA packets
380          *      cannot be fragmented, because there is no warranty
381          *      that different fragments will go along one path. --ANK
382          */
383         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
384                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
385                         return 0;
386         }
387
388         /*
389          *      check and decrement ttl
390          */
391         if (hdr->hop_limit <= 1) {
392                 /* Force OUTPUT device used as source address */
393                 skb->dev = dst->dev;
394                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
395                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
396                                  IPSTATS_MIB_INHDRERRORS);
397
398                 kfree_skb(skb);
399                 return -ETIMEDOUT;
400         }
401
402         /* XXX: idev->cnf.proxy_ndp? */
403         if (net->ipv6.devconf_all->proxy_ndp &&
404             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
405                 int proxied = ip6_forward_proxy_check(skb);
406                 if (proxied > 0)
407                         return ip6_input(skb);
408                 else if (proxied < 0) {
409                         IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
410                                          IPSTATS_MIB_INDISCARDS);
411                         goto drop;
412                 }
413         }
414
415         if (!xfrm6_route_forward(skb)) {
416                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
417                                  IPSTATS_MIB_INDISCARDS);
418                 goto drop;
419         }
420         dst = skb_dst(skb);
421
422         /* IPv6 specs say nothing about it, but it is clear that we cannot
423            send redirects to source routed frames.
424            We don't send redirects to frames decapsulated from IPsec.
425          */
426         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
427                 struct in6_addr *target = NULL;
428                 struct inet_peer *peer;
429                 struct rt6_info *rt;
430
431                 /*
432                  *      incoming and outgoing devices are the same
433                  *      send a redirect.
434                  */
435
436                 rt = (struct rt6_info *) dst;
437                 if (rt->rt6i_flags & RTF_GATEWAY)
438                         target = &rt->rt6i_gateway;
439                 else
440                         target = &hdr->daddr;
441
442                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
443
444                 /* Limit redirects both by destination (here)
445                    and by source (inside ndisc_send_redirect)
446                  */
447                 if (inet_peer_xrlim_allow(peer, 1*HZ))
448                         ndisc_send_redirect(skb, target);
449                 if (peer)
450                         inet_putpeer(peer);
451         } else {
452                 int addrtype = ipv6_addr_type(&hdr->saddr);
453
454                 /* This check is security critical. */
455                 if (addrtype == IPV6_ADDR_ANY ||
456                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
457                         goto error;
458                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
459                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
460                                     ICMPV6_NOT_NEIGHBOUR, 0);
461                         goto error;
462                 }
463         }
464
465         mtu = ip6_dst_mtu_forward(dst);
466         if (mtu < IPV6_MIN_MTU)
467                 mtu = IPV6_MIN_MTU;
468
469         if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
470             (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
471                 /* Again, force OUTPUT device used as source address */
472                 skb->dev = dst->dev;
473                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
474                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
475                                  IPSTATS_MIB_INTOOBIGERRORS);
476                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
477                                  IPSTATS_MIB_FRAGFAILS);
478                 kfree_skb(skb);
479                 return -EMSGSIZE;
480         }
481
482         if (skb_cow(skb, dst->dev->hard_header_len)) {
483                 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
484                                  IPSTATS_MIB_OUTDISCARDS);
485                 goto drop;
486         }
487
488         hdr = ipv6_hdr(skb);
489
490         /* Mangling hops number delayed to point after skb COW */
491
492         hdr->hop_limit--;
493
494         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
495         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
496         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
497                        ip6_forward_finish);
498
499 error:
500         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
501 drop:
502         kfree_skb(skb);
503         return -EINVAL;
504 }
505
506 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
507 {
508         to->pkt_type = from->pkt_type;
509         to->priority = from->priority;
510         to->protocol = from->protocol;
511         skb_dst_drop(to);
512         skb_dst_set(to, dst_clone(skb_dst(from)));
513         to->dev = from->dev;
514         to->mark = from->mark;
515
516 #ifdef CONFIG_NET_SCHED
517         to->tc_index = from->tc_index;
518 #endif
519         nf_copy(to, from);
520 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
521         to->nf_trace = from->nf_trace;
522 #endif
523         skb_copy_secmark(to, from);
524 }
525
526 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
527 {
528         struct sk_buff *frag;
529         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
530         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
531         struct ipv6hdr *tmp_hdr;
532         struct frag_hdr *fh;
533         unsigned int mtu, hlen, left, len;
534         int hroom, troom;
535         __be32 frag_id = 0;
536         int ptr, offset = 0, err=0;
537         u8 *prevhdr, nexthdr = 0;
538         struct net *net = dev_net(skb_dst(skb)->dev);
539
540         hlen = ip6_find_1stfragopt(skb, &prevhdr);
541         nexthdr = *prevhdr;
542
543         mtu = ip6_skb_dst_mtu(skb);
544
545         /* We must not fragment if the socket is set to force MTU discovery
546          * or if the skb it not generated by a local socket.
547          */
548         if (unlikely(!skb->local_df && skb->len > mtu) ||
549                      (IP6CB(skb)->frag_max_size &&
550                       IP6CB(skb)->frag_max_size > mtu)) {
551                 if (skb->sk && dst_allfrag(skb_dst(skb)))
552                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
553
554                 skb->dev = skb_dst(skb)->dev;
555                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
556                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
557                               IPSTATS_MIB_FRAGFAILS);
558                 kfree_skb(skb);
559                 return -EMSGSIZE;
560         }
561
562         if (np && np->frag_size < mtu) {
563                 if (np->frag_size)
564                         mtu = np->frag_size;
565         }
566         mtu -= hlen + sizeof(struct frag_hdr);
567
568         if (skb_has_frag_list(skb)) {
569                 int first_len = skb_pagelen(skb);
570                 struct sk_buff *frag2;
571
572                 if (first_len - hlen > mtu ||
573                     ((first_len - hlen) & 7) ||
574                     skb_cloned(skb))
575                         goto slow_path;
576
577                 skb_walk_frags(skb, frag) {
578                         /* Correct geometry. */
579                         if (frag->len > mtu ||
580                             ((frag->len & 7) && frag->next) ||
581                             skb_headroom(frag) < hlen)
582                                 goto slow_path_clean;
583
584                         /* Partially cloned skb? */
585                         if (skb_shared(frag))
586                                 goto slow_path_clean;
587
588                         BUG_ON(frag->sk);
589                         if (skb->sk) {
590                                 frag->sk = skb->sk;
591                                 frag->destructor = sock_wfree;
592                         }
593                         skb->truesize -= frag->truesize;
594                 }
595
596                 err = 0;
597                 offset = 0;
598                 frag = skb_shinfo(skb)->frag_list;
599                 skb_frag_list_init(skb);
600                 /* BUILD HEADER */
601
602                 *prevhdr = NEXTHDR_FRAGMENT;
603                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
604                 if (!tmp_hdr) {
605                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
606                                       IPSTATS_MIB_FRAGFAILS);
607                         return -ENOMEM;
608                 }
609
610                 __skb_pull(skb, hlen);
611                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
612                 __skb_push(skb, hlen);
613                 skb_reset_network_header(skb);
614                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
615
616                 ipv6_select_ident(fh, rt);
617                 fh->nexthdr = nexthdr;
618                 fh->reserved = 0;
619                 fh->frag_off = htons(IP6_MF);
620                 frag_id = fh->identification;
621
622                 first_len = skb_pagelen(skb);
623                 skb->data_len = first_len - skb_headlen(skb);
624                 skb->len = first_len;
625                 ipv6_hdr(skb)->payload_len = htons(first_len -
626                                                    sizeof(struct ipv6hdr));
627
628                 dst_hold(&rt->dst);
629
630                 for (;;) {
631                         /* Prepare header of the next frame,
632                          * before previous one went down. */
633                         if (frag) {
634                                 frag->ip_summed = CHECKSUM_NONE;
635                                 skb_reset_transport_header(frag);
636                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
637                                 __skb_push(frag, hlen);
638                                 skb_reset_network_header(frag);
639                                 memcpy(skb_network_header(frag), tmp_hdr,
640                                        hlen);
641                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
642                                 fh->nexthdr = nexthdr;
643                                 fh->reserved = 0;
644                                 fh->frag_off = htons(offset);
645                                 if (frag->next != NULL)
646                                         fh->frag_off |= htons(IP6_MF);
647                                 fh->identification = frag_id;
648                                 ipv6_hdr(frag)->payload_len =
649                                                 htons(frag->len -
650                                                       sizeof(struct ipv6hdr));
651                                 ip6_copy_metadata(frag, skb);
652                         }
653
654                         err = output(skb);
655                         if(!err)
656                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
657                                               IPSTATS_MIB_FRAGCREATES);
658
659                         if (err || !frag)
660                                 break;
661
662                         skb = frag;
663                         frag = skb->next;
664                         skb->next = NULL;
665                 }
666
667                 kfree(tmp_hdr);
668
669                 if (err == 0) {
670                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
671                                       IPSTATS_MIB_FRAGOKS);
672                         ip6_rt_put(rt);
673                         return 0;
674                 }
675
676                 while (frag) {
677                         skb = frag->next;
678                         kfree_skb(frag);
679                         frag = skb;
680                 }
681
682                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
683                               IPSTATS_MIB_FRAGFAILS);
684                 ip6_rt_put(rt);
685                 return err;
686
687 slow_path_clean:
688                 skb_walk_frags(skb, frag2) {
689                         if (frag2 == frag)
690                                 break;
691                         frag2->sk = NULL;
692                         frag2->destructor = NULL;
693                         skb->truesize += frag2->truesize;
694                 }
695         }
696
697 slow_path:
698         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
699             skb_checksum_help(skb))
700                 goto fail;
701
702         left = skb->len - hlen;         /* Space per frame */
703         ptr = hlen;                     /* Where to start from */
704
705         /*
706          *      Fragment the datagram.
707          */
708
709         *prevhdr = NEXTHDR_FRAGMENT;
710         hroom = LL_RESERVED_SPACE(rt->dst.dev);
711         troom = rt->dst.dev->needed_tailroom;
712
713         /*
714          *      Keep copying data until we run out.
715          */
716         while(left > 0) {
717                 len = left;
718                 /* IF: it doesn't fit, use 'mtu' - the data space left */
719                 if (len > mtu)
720                         len = mtu;
721                 /* IF: we are not sending up to and including the packet end
722                    then align the next start on an eight byte boundary */
723                 if (len < left) {
724                         len &= ~7;
725                 }
726                 /*
727                  *      Allocate buffer.
728                  */
729
730                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
731                                       hroom + troom, GFP_ATOMIC)) == NULL) {
732                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
733                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
734                                       IPSTATS_MIB_FRAGFAILS);
735                         err = -ENOMEM;
736                         goto fail;
737                 }
738
739                 /*
740                  *      Set up data on packet
741                  */
742
743                 ip6_copy_metadata(frag, skb);
744                 skb_reserve(frag, hroom);
745                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
746                 skb_reset_network_header(frag);
747                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
748                 frag->transport_header = (frag->network_header + hlen +
749                                           sizeof(struct frag_hdr));
750
751                 /*
752                  *      Charge the memory for the fragment to any owner
753                  *      it might possess
754                  */
755                 if (skb->sk)
756                         skb_set_owner_w(frag, skb->sk);
757
758                 /*
759                  *      Copy the packet header into the new buffer.
760                  */
761                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
762
763                 /*
764                  *      Build fragment header.
765                  */
766                 fh->nexthdr = nexthdr;
767                 fh->reserved = 0;
768                 if (!frag_id) {
769                         ipv6_select_ident(fh, rt);
770                         frag_id = fh->identification;
771                 } else
772                         fh->identification = frag_id;
773
774                 /*
775                  *      Copy a block of the IP datagram.
776                  */
777                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
778                         BUG();
779                 left -= len;
780
781                 fh->frag_off = htons(offset);
782                 if (left > 0)
783                         fh->frag_off |= htons(IP6_MF);
784                 ipv6_hdr(frag)->payload_len = htons(frag->len -
785                                                     sizeof(struct ipv6hdr));
786
787                 ptr += len;
788                 offset += len;
789
790                 /*
791                  *      Put this fragment into the sending queue.
792                  */
793                 err = output(frag);
794                 if (err)
795                         goto fail;
796
797                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
798                               IPSTATS_MIB_FRAGCREATES);
799         }
800         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
801                       IPSTATS_MIB_FRAGOKS);
802         consume_skb(skb);
803         return err;
804
805 fail:
806         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
807                       IPSTATS_MIB_FRAGFAILS);
808         kfree_skb(skb);
809         return err;
810 }
811
812 static inline int ip6_rt_check(const struct rt6key *rt_key,
813                                const struct in6_addr *fl_addr,
814                                const struct in6_addr *addr_cache)
815 {
816         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
817                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
818 }
819
820 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
821                                           struct dst_entry *dst,
822                                           const struct flowi6 *fl6)
823 {
824         struct ipv6_pinfo *np = inet6_sk(sk);
825         struct rt6_info *rt;
826
827         if (!dst)
828                 goto out;
829
830         if (dst->ops->family != AF_INET6) {
831                 dst_release(dst);
832                 return NULL;
833         }
834
835         rt = (struct rt6_info *)dst;
836         /* Yes, checking route validity in not connected
837          * case is not very simple. Take into account,
838          * that we do not support routing by source, TOS,
839          * and MSG_DONTROUTE            --ANK (980726)
840          *
841          * 1. ip6_rt_check(): If route was host route,
842          *    check that cached destination is current.
843          *    If it is network route, we still may
844          *    check its validity using saved pointer
845          *    to the last used address: daddr_cache.
846          *    We do not want to save whole address now,
847          *    (because main consumer of this service
848          *    is tcp, which has not this problem),
849          *    so that the last trick works only on connected
850          *    sockets.
851          * 2. oif also should be the same.
852          */
853         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
854 #ifdef CONFIG_IPV6_SUBTREES
855             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
856 #endif
857             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
858                 dst_release(dst);
859                 dst = NULL;
860         }
861
862 out:
863         return dst;
864 }
865
866 static int ip6_dst_lookup_tail(struct sock *sk,
867                                struct dst_entry **dst, struct flowi6 *fl6)
868 {
869         struct net *net = sock_net(sk);
870 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
871         struct neighbour *n;
872         struct rt6_info *rt;
873 #endif
874         int err;
875
876         if (*dst == NULL)
877                 *dst = ip6_route_output(net, sk, fl6);
878
879         if ((err = (*dst)->error))
880                 goto out_err_release;
881
882         if (ipv6_addr_any(&fl6->saddr)) {
883                 struct rt6_info *rt = (struct rt6_info *) *dst;
884                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
885                                           sk ? inet6_sk(sk)->srcprefs : 0,
886                                           &fl6->saddr);
887                 if (err)
888                         goto out_err_release;
889         }
890
891 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
892         /*
893          * Here if the dst entry we've looked up
894          * has a neighbour entry that is in the INCOMPLETE
895          * state and the src address from the flow is
896          * marked as OPTIMISTIC, we release the found
897          * dst entry and replace it instead with the
898          * dst entry of the nexthop router
899          */
900         rt = (struct rt6_info *) *dst;
901         rcu_read_lock_bh();
902         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
903         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
904         rcu_read_unlock_bh();
905
906         if (err) {
907                 struct inet6_ifaddr *ifp;
908                 struct flowi6 fl_gw6;
909                 int redirect;
910
911                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
912                                       (*dst)->dev, 1);
913
914                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915                 if (ifp)
916                         in6_ifa_put(ifp);
917
918                 if (redirect) {
919                         /*
920                          * We need to get the dst entry for the
921                          * default router instead
922                          */
923                         dst_release(*dst);
924                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
925                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
926                         *dst = ip6_route_output(net, sk, &fl_gw6);
927                         if ((err = (*dst)->error))
928                                 goto out_err_release;
929                 }
930         }
931 #endif
932
933         return 0;
934
935 out_err_release:
936         if (err == -ENETUNREACH)
937                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
938         dst_release(*dst);
939         *dst = NULL;
940         return err;
941 }
942
943 /**
944  *      ip6_dst_lookup - perform route lookup on flow
945  *      @sk: socket which provides route info
946  *      @dst: pointer to dst_entry * for result
947  *      @fl6: flow to lookup
948  *
949  *      This function performs a route lookup on the given flow.
950  *
951  *      It returns zero on success, or a standard errno code on error.
952  */
953 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
954 {
955         *dst = NULL;
956         return ip6_dst_lookup_tail(sk, dst, fl6);
957 }
958 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
959
960 /**
961  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
962  *      @sk: socket which provides route info
963  *      @fl6: flow to lookup
964  *      @final_dst: final destination address for ipsec lookup
965  *
966  *      This function performs a route lookup on the given flow.
967  *
968  *      It returns a valid dst pointer on success, or a pointer encoded
969  *      error code.
970  */
971 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
972                                       const struct in6_addr *final_dst)
973 {
974         struct dst_entry *dst = NULL;
975         int err;
976
977         err = ip6_dst_lookup_tail(sk, &dst, fl6);
978         if (err)
979                 return ERR_PTR(err);
980         if (final_dst)
981                 fl6->daddr = *final_dst;
982
983         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
984 }
985 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
986
987 /**
988  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
989  *      @sk: socket which provides the dst cache and route info
990  *      @fl6: flow to lookup
991  *      @final_dst: final destination address for ipsec lookup
992  *
993  *      This function performs a route lookup on the given flow with the
994  *      possibility of using the cached route in the socket if it is valid.
995  *      It will take the socket dst lock when operating on the dst cache.
996  *      As a result, this function can only be used in process context.
997  *
998  *      It returns a valid dst pointer on success, or a pointer encoded
999  *      error code.
1000  */
1001 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1002                                          const struct in6_addr *final_dst)
1003 {
1004         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1005         int err;
1006
1007         dst = ip6_sk_dst_check(sk, dst, fl6);
1008
1009         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1010         if (err)
1011                 return ERR_PTR(err);
1012         if (final_dst)
1013                 fl6->daddr = *final_dst;
1014
1015         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1016 }
1017 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1018
1019 static inline int ip6_ufo_append_data(struct sock *sk,
1020                         int getfrag(void *from, char *to, int offset, int len,
1021                         int odd, struct sk_buff *skb),
1022                         void *from, int length, int hh_len, int fragheaderlen,
1023                         int transhdrlen, int mtu,unsigned int flags,
1024                         struct rt6_info *rt)
1025
1026 {
1027         struct sk_buff *skb;
1028         struct frag_hdr fhdr;
1029         int err;
1030
1031         /* There is support for UDP large send offload by network
1032          * device, so create one single skb packet containing complete
1033          * udp datagram
1034          */
1035         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1036                 skb = sock_alloc_send_skb(sk,
1037                         hh_len + fragheaderlen + transhdrlen + 20,
1038                         (flags & MSG_DONTWAIT), &err);
1039                 if (skb == NULL)
1040                         return err;
1041
1042                 /* reserve space for Hardware header */
1043                 skb_reserve(skb, hh_len);
1044
1045                 /* create space for UDP/IP header */
1046                 skb_put(skb,fragheaderlen + transhdrlen);
1047
1048                 /* initialize network header pointer */
1049                 skb_reset_network_header(skb);
1050
1051                 /* initialize protocol header pointer */
1052                 skb->transport_header = skb->network_header + fragheaderlen;
1053
1054                 skb->protocol = htons(ETH_P_IPV6);
1055                 skb->csum = 0;
1056
1057                 __skb_queue_tail(&sk->sk_write_queue, skb);
1058         } else if (skb_is_gso(skb)) {
1059                 goto append;
1060         }
1061
1062         skb->ip_summed = CHECKSUM_PARTIAL;
1063         /* Specify the length of each IPv6 datagram fragment.
1064          * It has to be a multiple of 8.
1065          */
1066         skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1067                                      sizeof(struct frag_hdr)) & ~7;
1068         skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1069         ipv6_select_ident(&fhdr, rt);
1070         skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1071
1072 append:
1073         return skb_append_datato_frags(sk, skb, getfrag, from,
1074                                        (length - transhdrlen));
1075 }
1076
1077 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1078                                                gfp_t gfp)
1079 {
1080         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1081 }
1082
1083 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1084                                                 gfp_t gfp)
1085 {
1086         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1087 }
1088
1089 static void ip6_append_data_mtu(unsigned int *mtu,
1090                                 int *maxfraglen,
1091                                 unsigned int fragheaderlen,
1092                                 struct sk_buff *skb,
1093                                 struct rt6_info *rt,
1094                                 bool pmtuprobe)
1095 {
1096         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1097                 if (skb == NULL) {
1098                         /* first fragment, reserve header_len */
1099                         *mtu = *mtu - rt->dst.header_len;
1100
1101                 } else {
1102                         /*
1103                          * this fragment is not first, the headers
1104                          * space is regarded as data space.
1105                          */
1106                         *mtu = min(*mtu, pmtuprobe ?
1107                                    rt->dst.dev->mtu :
1108                                    dst_mtu(rt->dst.path));
1109                 }
1110                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1111                               + fragheaderlen - sizeof(struct frag_hdr);
1112         }
1113 }
1114
1115 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1116         int offset, int len, int odd, struct sk_buff *skb),
1117         void *from, int length, int transhdrlen,
1118         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1119         struct rt6_info *rt, unsigned int flags, int dontfrag)
1120 {
1121         struct inet_sock *inet = inet_sk(sk);
1122         struct ipv6_pinfo *np = inet6_sk(sk);
1123         struct inet_cork *cork;
1124         struct sk_buff *skb, *skb_prev = NULL;
1125         unsigned int maxfraglen, fragheaderlen, mtu;
1126         int exthdrlen;
1127         int dst_exthdrlen;
1128         int hh_len;
1129         int copy;
1130         int err;
1131         int offset = 0;
1132         __u8 tx_flags = 0;
1133
1134         if (flags&MSG_PROBE)
1135                 return 0;
1136         cork = &inet->cork.base;
1137         if (skb_queue_empty(&sk->sk_write_queue)) {
1138                 /*
1139                  * setup for corking
1140                  */
1141                 if (opt) {
1142                         if (WARN_ON(np->cork.opt))
1143                                 return -EINVAL;
1144
1145                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1146                         if (unlikely(np->cork.opt == NULL))
1147                                 return -ENOBUFS;
1148
1149                         np->cork.opt->tot_len = opt->tot_len;
1150                         np->cork.opt->opt_flen = opt->opt_flen;
1151                         np->cork.opt->opt_nflen = opt->opt_nflen;
1152
1153                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1154                                                             sk->sk_allocation);
1155                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1156                                 return -ENOBUFS;
1157
1158                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1159                                                             sk->sk_allocation);
1160                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1161                                 return -ENOBUFS;
1162
1163                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1164                                                            sk->sk_allocation);
1165                         if (opt->hopopt && !np->cork.opt->hopopt)
1166                                 return -ENOBUFS;
1167
1168                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1169                                                             sk->sk_allocation);
1170                         if (opt->srcrt && !np->cork.opt->srcrt)
1171                                 return -ENOBUFS;
1172
1173                         /* need source address above miyazawa*/
1174                 }
1175                 dst_hold(&rt->dst);
1176                 cork->dst = &rt->dst;
1177                 inet->cork.fl.u.ip6 = *fl6;
1178                 np->cork.hop_limit = hlimit;
1179                 np->cork.tclass = tclass;
1180                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1181                         mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1182                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1183                 else
1184                         mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1185                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1186                 if (np->frag_size < mtu) {
1187                         if (np->frag_size)
1188                                 mtu = np->frag_size;
1189                 }
1190                 cork->fragsize = mtu;
1191                 if (dst_allfrag(rt->dst.path))
1192                         cork->flags |= IPCORK_ALLFRAG;
1193                 cork->length = 0;
1194                 exthdrlen = (opt ? opt->opt_flen : 0);
1195                 length += exthdrlen;
1196                 transhdrlen += exthdrlen;
1197                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1198         } else {
1199                 rt = (struct rt6_info *)cork->dst;
1200                 fl6 = &inet->cork.fl.u.ip6;
1201                 opt = np->cork.opt;
1202                 transhdrlen = 0;
1203                 exthdrlen = 0;
1204                 dst_exthdrlen = 0;
1205                 mtu = cork->fragsize;
1206         }
1207
1208         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1209
1210         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1211                         (opt ? opt->opt_nflen : 0);
1212         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1213                      sizeof(struct frag_hdr);
1214
1215         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1216                 unsigned int maxnonfragsize, headersize;
1217
1218                 headersize = sizeof(struct ipv6hdr) +
1219                              (opt ? opt->tot_len : 0) +
1220                              (dst_allfrag(&rt->dst) ?
1221                               sizeof(struct frag_hdr) : 0) +
1222                              rt->rt6i_nfheader_len;
1223
1224                 maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
1225                                  mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1226
1227                 /* dontfrag active */
1228                 if ((cork->length + length > mtu - headersize) && dontfrag &&
1229                     (sk->sk_protocol == IPPROTO_UDP ||
1230                      sk->sk_protocol == IPPROTO_RAW)) {
1231                         ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1232                                                    sizeof(struct ipv6hdr));
1233                         goto emsgsize;
1234                 }
1235
1236                 if (cork->length + length > maxnonfragsize - headersize) {
1237 emsgsize:
1238                         ipv6_local_error(sk, EMSGSIZE, fl6,
1239                                          mtu - headersize +
1240                                          sizeof(struct ipv6hdr));
1241                         return -EMSGSIZE;
1242                 }
1243         }
1244
1245         /* For UDP, check if TX timestamp is enabled */
1246         if (sk->sk_type == SOCK_DGRAM)
1247                 sock_tx_timestamp(sk, &tx_flags);
1248
1249         /*
1250          * Let's try using as much space as possible.
1251          * Use MTU if total length of the message fits into the MTU.
1252          * Otherwise, we need to reserve fragment header and
1253          * fragment alignment (= 8-15 octects, in total).
1254          *
1255          * Note that we may need to "move" the data from the tail of
1256          * of the buffer to the new fragment when we split
1257          * the message.
1258          *
1259          * FIXME: It may be fragmented into multiple chunks
1260          *        at once if non-fragmentable extension headers
1261          *        are too large.
1262          * --yoshfuji
1263          */
1264
1265         skb = skb_peek_tail(&sk->sk_write_queue);
1266         cork->length += length;
1267         if (((length > mtu) ||
1268              (skb && skb_is_gso(skb))) &&
1269             (sk->sk_protocol == IPPROTO_UDP) &&
1270             (rt->dst.dev->features & NETIF_F_UFO)) {
1271                 err = ip6_ufo_append_data(sk, getfrag, from, length,
1272                                           hh_len, fragheaderlen,
1273                                           transhdrlen, mtu, flags, rt);
1274                 if (err)
1275                         goto error;
1276                 return 0;
1277         }
1278
1279         if (!skb)
1280                 goto alloc_new_skb;
1281
1282         while (length > 0) {
1283                 /* Check if the remaining data fits into current packet. */
1284                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1285                 if (copy < length)
1286                         copy = maxfraglen - skb->len;
1287
1288                 if (copy <= 0) {
1289                         char *data;
1290                         unsigned int datalen;
1291                         unsigned int fraglen;
1292                         unsigned int fraggap;
1293                         unsigned int alloclen;
1294 alloc_new_skb:
1295                         /* There's no room in the current skb */
1296                         if (skb)
1297                                 fraggap = skb->len - maxfraglen;
1298                         else
1299                                 fraggap = 0;
1300                         /* update mtu and maxfraglen if necessary */
1301                         if (skb == NULL || skb_prev == NULL)
1302                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1303                                                     fragheaderlen, skb, rt,
1304                                                     np->pmtudisc >=
1305                                                     IPV6_PMTUDISC_PROBE);
1306
1307                         skb_prev = skb;
1308
1309                         /*
1310                          * If remaining data exceeds the mtu,
1311                          * we know we need more fragment(s).
1312                          */
1313                         datalen = length + fraggap;
1314
1315                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1316                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1317                         if ((flags & MSG_MORE) &&
1318                             !(rt->dst.dev->features&NETIF_F_SG))
1319                                 alloclen = mtu;
1320                         else
1321                                 alloclen = datalen + fragheaderlen;
1322
1323                         alloclen += dst_exthdrlen;
1324
1325                         if (datalen != length + fraggap) {
1326                                 /*
1327                                  * this is not the last fragment, the trailer
1328                                  * space is regarded as data space.
1329                                  */
1330                                 datalen += rt->dst.trailer_len;
1331                         }
1332
1333                         alloclen += rt->dst.trailer_len;
1334                         fraglen = datalen + fragheaderlen;
1335
1336                         /*
1337                          * We just reserve space for fragment header.
1338                          * Note: this may be overallocation if the message
1339                          * (without MSG_MORE) fits into the MTU.
1340                          */
1341                         alloclen += sizeof(struct frag_hdr);
1342
1343                         if (transhdrlen) {
1344                                 skb = sock_alloc_send_skb(sk,
1345                                                 alloclen + hh_len,
1346                                                 (flags & MSG_DONTWAIT), &err);
1347                         } else {
1348                                 skb = NULL;
1349                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1350                                     2 * sk->sk_sndbuf)
1351                                         skb = sock_wmalloc(sk,
1352                                                            alloclen + hh_len, 1,
1353                                                            sk->sk_allocation);
1354                                 if (unlikely(skb == NULL))
1355                                         err = -ENOBUFS;
1356                                 else {
1357                                         /* Only the initial fragment
1358                                          * is time stamped.
1359                                          */
1360                                         tx_flags = 0;
1361                                 }
1362                         }
1363                         if (skb == NULL)
1364                                 goto error;
1365                         /*
1366                          *      Fill in the control structures
1367                          */
1368                         skb->protocol = htons(ETH_P_IPV6);
1369                         skb->ip_summed = CHECKSUM_NONE;
1370                         skb->csum = 0;
1371                         /* reserve for fragmentation and ipsec header */
1372                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1373                                     dst_exthdrlen);
1374
1375                         if (sk->sk_type == SOCK_DGRAM)
1376                                 skb_shinfo(skb)->tx_flags = tx_flags;
1377
1378                         /*
1379                          *      Find where to start putting bytes
1380                          */
1381                         data = skb_put(skb, fraglen);
1382                         skb_set_network_header(skb, exthdrlen);
1383                         data += fragheaderlen;
1384                         skb->transport_header = (skb->network_header +
1385                                                  fragheaderlen);
1386                         if (fraggap) {
1387                                 skb->csum = skb_copy_and_csum_bits(
1388                                         skb_prev, maxfraglen,
1389                                         data + transhdrlen, fraggap, 0);
1390                                 skb_prev->csum = csum_sub(skb_prev->csum,
1391                                                           skb->csum);
1392                                 data += fraggap;
1393                                 pskb_trim_unique(skb_prev, maxfraglen);
1394                         }
1395                         copy = datalen - transhdrlen - fraggap;
1396
1397                         if (copy < 0) {
1398                                 err = -EINVAL;
1399                                 kfree_skb(skb);
1400                                 goto error;
1401                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1402                                 err = -EFAULT;
1403                                 kfree_skb(skb);
1404                                 goto error;
1405                         }
1406
1407                         offset += copy;
1408                         length -= datalen - fraggap;
1409                         transhdrlen = 0;
1410                         exthdrlen = 0;
1411                         dst_exthdrlen = 0;
1412
1413                         /*
1414                          * Put the packet on the pending queue
1415                          */
1416                         __skb_queue_tail(&sk->sk_write_queue, skb);
1417                         continue;
1418                 }
1419
1420                 if (copy > length)
1421                         copy = length;
1422
1423                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1424                         unsigned int off;
1425
1426                         off = skb->len;
1427                         if (getfrag(from, skb_put(skb, copy),
1428                                                 offset, copy, off, skb) < 0) {
1429                                 __skb_trim(skb, off);
1430                                 err = -EFAULT;
1431                                 goto error;
1432                         }
1433                 } else {
1434                         int i = skb_shinfo(skb)->nr_frags;
1435                         struct page_frag *pfrag = sk_page_frag(sk);
1436
1437                         err = -ENOMEM;
1438                         if (!sk_page_frag_refill(sk, pfrag))
1439                                 goto error;
1440
1441                         if (!skb_can_coalesce(skb, i, pfrag->page,
1442                                               pfrag->offset)) {
1443                                 err = -EMSGSIZE;
1444                                 if (i == MAX_SKB_FRAGS)
1445                                         goto error;
1446
1447                                 __skb_fill_page_desc(skb, i, pfrag->page,
1448                                                      pfrag->offset, 0);
1449                                 skb_shinfo(skb)->nr_frags = ++i;
1450                                 get_page(pfrag->page);
1451                         }
1452                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1453                         if (getfrag(from,
1454                                     page_address(pfrag->page) + pfrag->offset,
1455                                     offset, copy, skb->len, skb) < 0)
1456                                 goto error_efault;
1457
1458                         pfrag->offset += copy;
1459                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1460                         skb->len += copy;
1461                         skb->data_len += copy;
1462                         skb->truesize += copy;
1463                         atomic_add(copy, &sk->sk_wmem_alloc);
1464                 }
1465                 offset += copy;
1466                 length -= copy;
1467         }
1468
1469         return 0;
1470
1471 error_efault:
1472         err = -EFAULT;
1473 error:
1474         cork->length -= length;
1475         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1476         return err;
1477 }
1478 EXPORT_SYMBOL_GPL(ip6_append_data);
1479
1480 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1481 {
1482         if (np->cork.opt) {
1483                 kfree(np->cork.opt->dst0opt);
1484                 kfree(np->cork.opt->dst1opt);
1485                 kfree(np->cork.opt->hopopt);
1486                 kfree(np->cork.opt->srcrt);
1487                 kfree(np->cork.opt);
1488                 np->cork.opt = NULL;
1489         }
1490
1491         if (inet->cork.base.dst) {
1492                 dst_release(inet->cork.base.dst);
1493                 inet->cork.base.dst = NULL;
1494                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1495         }
1496         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1497 }
1498
1499 int ip6_push_pending_frames(struct sock *sk)
1500 {
1501         struct sk_buff *skb, *tmp_skb;
1502         struct sk_buff **tail_skb;
1503         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1504         struct inet_sock *inet = inet_sk(sk);
1505         struct ipv6_pinfo *np = inet6_sk(sk);
1506         struct net *net = sock_net(sk);
1507         struct ipv6hdr *hdr;
1508         struct ipv6_txoptions *opt = np->cork.opt;
1509         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1510         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1511         unsigned char proto = fl6->flowi6_proto;
1512         int err = 0;
1513
1514         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1515                 goto out;
1516         tail_skb = &(skb_shinfo(skb)->frag_list);
1517
1518         /* move skb->data to ip header from ext header */
1519         if (skb->data < skb_network_header(skb))
1520                 __skb_pull(skb, skb_network_offset(skb));
1521         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1522                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1523                 *tail_skb = tmp_skb;
1524                 tail_skb = &(tmp_skb->next);
1525                 skb->len += tmp_skb->len;
1526                 skb->data_len += tmp_skb->len;
1527                 skb->truesize += tmp_skb->truesize;
1528                 tmp_skb->destructor = NULL;
1529                 tmp_skb->sk = NULL;
1530         }
1531
1532         /* Allow local fragmentation. */
1533         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1534                 skb->local_df = 1;
1535
1536         *final_dst = fl6->daddr;
1537         __skb_pull(skb, skb_network_header_len(skb));
1538         if (opt && opt->opt_flen)
1539                 ipv6_push_frag_opts(skb, opt, &proto);
1540         if (opt && opt->opt_nflen)
1541                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1542
1543         skb_push(skb, sizeof(struct ipv6hdr));
1544         skb_reset_network_header(skb);
1545         hdr = ipv6_hdr(skb);
1546
1547         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1548         hdr->hop_limit = np->cork.hop_limit;
1549         hdr->nexthdr = proto;
1550         hdr->saddr = fl6->saddr;
1551         hdr->daddr = *final_dst;
1552
1553         skb->priority = sk->sk_priority;
1554         skb->mark = sk->sk_mark;
1555
1556         skb_dst_set(skb, dst_clone(&rt->dst));
1557         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1558         if (proto == IPPROTO_ICMPV6) {
1559                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1560
1561                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1562                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1563         }
1564
1565         err = ip6_local_out(skb);
1566         if (err) {
1567                 if (err > 0)
1568                         err = net_xmit_errno(err);
1569                 if (err)
1570                         goto error;
1571         }
1572
1573 out:
1574         ip6_cork_release(inet, np);
1575         return err;
1576 error:
1577         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1578         goto out;
1579 }
1580 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1581
1582 void ip6_flush_pending_frames(struct sock *sk)
1583 {
1584         struct sk_buff *skb;
1585
1586         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1587                 if (skb_dst(skb))
1588                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1589                                       IPSTATS_MIB_OUTDISCARDS);
1590                 kfree_skb(skb);
1591         }
1592
1593         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1594 }
1595 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);