compat: Always use own __ipv6_select_ident().
[cascardo/ovs.git] / datapath / linux / compat / ip6_output.c
1 /*
2  *      Backported from upstream commit 9ef2e965e554
3  *      ("ipv6: drop frames with attached skb->sk in forwarding")
4  *
5  *      IPv6 output functions
6  *      Linux INET6 implementation
7  *
8  *      Authors:
9  *      Pedro Roque             <roque@di.fc.ul.pt>
10  *
11  *      Based on linux/net/ipv4/ip_output.c
12  *
13  *      This program is free software; you can redistribute it and/or
14  *      modify it under the terms of the GNU General Public License
15  *      as published by the Free Software Foundation; either version
16  *      2 of the License, or (at your option) any later version.
17  *
18  *      Changes:
19  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
20  *                              extension headers are implemented.
21  *                              route changes now work.
22  *                              ip6_forward does not confuse sniffers.
23  *                              etc.
24  *
25  *      H. von Brand    :       Added missing #include <linux/string.h>
26  *      Imran Patel     :       frag id should be in NBO
27  *      Kazunori MIYAZAWA @USAGI
28  *                      :       add ip6_append_data and related functions
29  *                              for datagram xmit
30  */
31
32 #include <linux/version.h>
33
34 #ifdef OVS_FRAGMENT_BACKPORT
35
36 #include <linux/errno.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/socket.h>
40 #include <linux/net.h>
41 #include <linux/netdevice.h>
42 #include <linux/if_arp.h>
43 #include <linux/in6.h>
44 #include <linux/tcp.h>
45 #include <linux/random.h>
46 #include <linux/route.h>
47 #include <linux/module.h>
48 #include <linux/slab.h>
49
50 #include <linux/netfilter.h>
51 #include <linux/netfilter_ipv6.h>
52
53 #include <net/sock.h>
54 #include <net/snmp.h>
55
56 #include <net/ipv6.h>
57 #include <net/ndisc.h>
58 #include <net/protocol.h>
59 #include <net/ip6_route.h>
60 #include <net/addrconf.h>
61 #include <net/rawv6.h>
62 #include <net/icmp.h>
63 #include <net/xfrm.h>
64 #include <net/checksum.h>
65 #include <linux/mroute6.h>
66
67 #define IP_IDENTS_SZ 2048u
68
69 static atomic_t *ip_idents __read_mostly;
70 static u32 *ip_tstamps __read_mostly;
71
72 int __init ip6_output_init(void);
73 void ip6_output_exit(void);
74
75 /* In order to protect privacy, we add a perturbation to identifiers
76  * if one generator is seldom used. This makes hard for an attacker
77  * to infer how many packets were sent between two points in time.
78  */
79 static u32 rpl_ip_idents_reserve(u32 hash, int segs)
80 {
81         u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
82         atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
83         u32 old = ACCESS_ONCE(*p_tstamp);
84         u32 now = (u32)jiffies;
85         u32 delta = 0;
86
87         if (old != now && cmpxchg(p_tstamp, old, now) == old)
88                 delta = prandom_u32_max(now - old);
89
90         return atomic_add_return(segs + delta, p_id) - segs;
91 }
92
93 static u32 rpl___ipv6_select_ident(struct net *net, u32 hashrnd,
94                                const struct in6_addr *dst,
95                                const struct in6_addr *src)
96 {
97         u32 hash, id;
98
99         hash = __ipv6_addr_jhash(dst, hashrnd);
100         hash = __ipv6_addr_jhash(src, hash);
101         hash ^= net_hash_mix(net);
102
103         /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
104          * set the hight order instead thus minimizing possible future
105          * collisions.
106          */
107         id = rpl_ip_idents_reserve(hash, 1);
108         if (unlikely(!id))
109                 id = 1 << 31;
110
111         return id;
112 }
113
114 static __be32 rpl_ipv6_select_ident(struct net *net,
115                              const struct in6_addr *daddr,
116                              const struct in6_addr *saddr)
117 {
118         static u32 ip6_idents_hashrnd __read_mostly;
119         u32 id;
120
121         net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
122
123         id = rpl___ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
124         return htonl(id);
125 }
126
127 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
128 {
129         to->pkt_type = from->pkt_type;
130         to->priority = from->priority;
131         to->protocol = from->protocol;
132         skb_dst_drop(to);
133         skb_dst_set(to, dst_clone(skb_dst(from)));
134         to->dev = from->dev;
135         to->mark = from->mark;
136
137 #ifdef CONFIG_NET_SCHED
138         to->tc_index = from->tc_index;
139 #endif
140         nf_copy(to, from);
141         skb_copy_secmark(to, from);
142 }
143
144 #ifdef HAVE_IP_FRAGMENT_TAKES_SOCK
145 #define OUTPUT(skb) output(skb->sk, skb)
146 #else
147 #define OUTPUT(skb) output(skb)
148 #endif
149
150 int ip6_fragment(struct sock *sk, struct sk_buff *skb,
151                  int (*output)(OVS_VPORT_OUTPUT_PARAMS))
152 {
153         struct sk_buff *frag;
154         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
155         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
156                                 inet6_sk(skb->sk) : NULL;
157         struct ipv6hdr *tmp_hdr;
158         struct frag_hdr *fh;
159         unsigned int mtu, hlen, left, len;
160         int hroom, troom;
161         __be32 frag_id;
162         int ptr, offset = 0, err = 0;
163         u8 *prevhdr, nexthdr = 0;
164         struct net *net = dev_net(skb_dst(skb)->dev);
165
166         hlen = ip6_find_1stfragopt(skb, &prevhdr);
167         nexthdr = *prevhdr;
168
169         mtu = ip6_skb_dst_mtu(skb);
170
171         /* We must not fragment if the socket is set to force MTU discovery
172          * or if the skb it not generated by a local socket.
173          */
174         if (unlikely(!skb->ignore_df && skb->len > mtu))
175                 goto fail_toobig;
176
177         if (IP6CB(skb)->frag_max_size) {
178                 if (IP6CB(skb)->frag_max_size > mtu)
179                         goto fail_toobig;
180
181                 /* don't send fragments larger than what we received */
182                 mtu = IP6CB(skb)->frag_max_size;
183                 if (mtu < IPV6_MIN_MTU)
184                         mtu = IPV6_MIN_MTU;
185         }
186
187         if (np && np->frag_size < mtu) {
188                 if (np->frag_size)
189                         mtu = np->frag_size;
190         }
191         mtu -= hlen + sizeof(struct frag_hdr);
192
193         frag_id = rpl_ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
194                                         &ipv6_hdr(skb)->saddr);
195
196         hroom = LL_RESERVED_SPACE(rt->dst.dev);
197         if (skb_has_frag_list(skb)) {
198                 int first_len = skb_pagelen(skb);
199                 struct sk_buff *frag2;
200
201                 if (first_len - hlen > mtu ||
202                     ((first_len - hlen) & 7) ||
203                     skb_cloned(skb) ||
204                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
205                         goto slow_path;
206
207                 skb_walk_frags(skb, frag) {
208                         /* Correct geometry. */
209                         if (frag->len > mtu ||
210                             ((frag->len & 7) && frag->next) ||
211                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
212                                 goto slow_path_clean;
213
214                         /* Partially cloned skb? */
215                         if (skb_shared(frag))
216                                 goto slow_path_clean;
217
218                         BUG_ON(frag->sk);
219                         if (skb->sk) {
220                                 frag->sk = skb->sk;
221                                 frag->destructor = sock_wfree;
222                         }
223                         skb->truesize -= frag->truesize;
224                 }
225
226                 err = 0;
227                 offset = 0;
228                 /* BUILD HEADER */
229
230                 *prevhdr = NEXTHDR_FRAGMENT;
231                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
232                 if (!tmp_hdr) {
233                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
234                                       IPSTATS_MIB_FRAGFAILS);
235                         err = -ENOMEM;
236                         goto fail;
237                 }
238                 frag = skb_shinfo(skb)->frag_list;
239                 skb_frag_list_init(skb);
240
241                 __skb_pull(skb, hlen);
242                 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
243                 __skb_push(skb, hlen);
244                 skb_reset_network_header(skb);
245                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
246
247                 fh->nexthdr = nexthdr;
248                 fh->reserved = 0;
249                 fh->frag_off = htons(IP6_MF);
250                 fh->identification = frag_id;
251
252                 first_len = skb_pagelen(skb);
253                 skb->data_len = first_len - skb_headlen(skb);
254                 skb->len = first_len;
255                 ipv6_hdr(skb)->payload_len = htons(first_len -
256                                                    sizeof(struct ipv6hdr));
257
258                 dst_hold(&rt->dst);
259
260                 for (;;) {
261                         /* Prepare header of the next frame,
262                          * before previous one went down. */
263                         if (frag) {
264                                 frag->ip_summed = CHECKSUM_NONE;
265                                 skb_reset_transport_header(frag);
266                                 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
267                                 __skb_push(frag, hlen);
268                                 skb_reset_network_header(frag);
269                                 memcpy(skb_network_header(frag), tmp_hdr,
270                                        hlen);
271                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
272                                 fh->nexthdr = nexthdr;
273                                 fh->reserved = 0;
274                                 fh->frag_off = htons(offset);
275                                 if (frag->next)
276                                         fh->frag_off |= htons(IP6_MF);
277                                 fh->identification = frag_id;
278                                 ipv6_hdr(frag)->payload_len =
279                                                 htons(frag->len -
280                                                       sizeof(struct ipv6hdr));
281                                 ip6_copy_metadata(frag, skb);
282                         }
283
284                         err = OUTPUT(skb);
285                         if (!err)
286                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
287                                               IPSTATS_MIB_FRAGCREATES);
288
289                         if (err || !frag)
290                                 break;
291
292                         skb = frag;
293                         frag = skb->next;
294                         skb->next = NULL;
295                 }
296
297                 kfree(tmp_hdr);
298
299                 if (err == 0) {
300                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
301                                       IPSTATS_MIB_FRAGOKS);
302                         ip6_rt_put(rt);
303                         return 0;
304                 }
305
306                 kfree_skb_list(frag);
307
308                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
309                               IPSTATS_MIB_FRAGFAILS);
310                 ip6_rt_put(rt);
311                 return err;
312
313 slow_path_clean:
314                 skb_walk_frags(skb, frag2) {
315                         if (frag2 == frag)
316                                 break;
317                         frag2->sk = NULL;
318                         frag2->destructor = NULL;
319                         skb->truesize += frag2->truesize;
320                 }
321         }
322
323 slow_path:
324         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
325             skb_checksum_help(skb))
326                 goto fail;
327
328         left = skb->len - hlen;         /* Space per frame */
329         ptr = hlen;                     /* Where to start from */
330
331         /*
332          *      Fragment the datagram.
333          */
334
335         *prevhdr = NEXTHDR_FRAGMENT;
336         troom = rt->dst.dev->needed_tailroom;
337
338         /*
339          *      Keep copying data until we run out.
340          */
341         while (left > 0)        {
342                 len = left;
343                 /* IF: it doesn't fit, use 'mtu' - the data space left */
344                 if (len > mtu)
345                         len = mtu;
346                 /* IF: we are not sending up to and including the packet end
347                    then align the next start on an eight byte boundary */
348                 if (len < left) {
349                         len &= ~7;
350                 }
351
352                 /* Allocate buffer */
353                 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
354                                  hroom + troom, GFP_ATOMIC);
355                 if (!frag) {
356                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
357                                       IPSTATS_MIB_FRAGFAILS);
358                         err = -ENOMEM;
359                         goto fail;
360                 }
361
362                 /*
363                  *      Set up data on packet
364                  */
365
366                 ip6_copy_metadata(frag, skb);
367                 skb_reserve(frag, hroom);
368                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
369                 skb_reset_network_header(frag);
370                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
371                 frag->transport_header = (frag->network_header + hlen +
372                                           sizeof(struct frag_hdr));
373
374                 /*
375                  *      Charge the memory for the fragment to any owner
376                  *      it might possess
377                  */
378                 if (skb->sk)
379                         skb_set_owner_w(frag, skb->sk);
380
381                 /*
382                  *      Copy the packet header into the new buffer.
383                  */
384                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
385
386                 /*
387                  *      Build fragment header.
388                  */
389                 fh->nexthdr = nexthdr;
390                 fh->reserved = 0;
391                 fh->identification = frag_id;
392
393                 /*
394                  *      Copy a block of the IP datagram.
395                  */
396                 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
397                                      len));
398                 left -= len;
399
400                 fh->frag_off = htons(offset);
401                 if (left > 0)
402                         fh->frag_off |= htons(IP6_MF);
403                 ipv6_hdr(frag)->payload_len = htons(frag->len -
404                                                     sizeof(struct ipv6hdr));
405
406                 ptr += len;
407                 offset += len;
408
409                 /*
410                  *      Put this fragment into the sending queue.
411                  */
412                 err = OUTPUT(frag);
413                 if (err)
414                         goto fail;
415
416                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
417                               IPSTATS_MIB_FRAGCREATES);
418         }
419         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
420                       IPSTATS_MIB_FRAGOKS);
421         consume_skb(skb);
422         return err;
423
424 fail_toobig:
425         if (skb->sk && dst_allfrag(skb_dst(skb)))
426                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
427
428         skb->dev = skb_dst(skb)->dev;
429         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
430         err = -EMSGSIZE;
431
432 fail:
433         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
434                       IPSTATS_MIB_FRAGFAILS);
435         kfree_skb(skb);
436         return err;
437 }
438 #undef OUTPUT
439
440 int __init ip6_output_init(void)
441 {
442         ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
443         if (!ip_idents) {
444                 pr_warn("IP: failed to allocate ip_idents\n");
445                 goto error;
446         }
447
448         prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
449
450         ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
451         if (!ip_tstamps) {
452                 pr_warn("IP: failed to allocate ip_tstamps\n");
453                 goto error_ip_idents_free;
454         }
455
456         return 0;
457
458 error_ip_idents_free:
459         kfree(ip_idents);
460 error:
461         return -ENOMEM;
462 }
463
464 void ip6_output_exit(void)
465 {
466         kfree(ip_tstamps);
467         kfree(ip_idents);
468 }
469
470 #endif /* OVS_FRAGMENT_BACKPORT */