2 * Backported from upstream commit 9ef2e965e554
3 * ("ipv6: drop frames with attached skb->sk in forwarding")
5 * IPv6 output functions
6 * Linux INET6 implementation
9 * Pedro Roque <roque@di.fc.ul.pt>
11 * Based on linux/net/ipv4/ip_output.c
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
19 * A.N.Kuznetsov : airthmetics in fragmentation.
20 * extension headers are implemented.
21 * route changes now work.
22 * ip6_forward does not confuse sniffers.
25 * H. von Brand : Added missing #include <linux/string.h>
26 * Imran Patel : frag id should be in NBO
27 * Kazunori MIYAZAWA @USAGI
28 * : add ip6_append_data and related functions
32 #include <linux/version.h>
34 #ifdef OVS_FRAGMENT_BACKPORT
36 #include <linux/errno.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/socket.h>
40 #include <linux/net.h>
41 #include <linux/netdevice.h>
42 #include <linux/if_arp.h>
43 #include <linux/in6.h>
44 #include <linux/tcp.h>
45 #include <linux/random.h>
46 #include <linux/route.h>
47 #include <linux/module.h>
48 #include <linux/slab.h>
50 #include <linux/netfilter.h>
51 #include <linux/netfilter_ipv6.h>
57 #include <net/ndisc.h>
58 #include <net/protocol.h>
59 #include <net/ip6_route.h>
60 #include <net/addrconf.h>
61 #include <net/rawv6.h>
64 #include <net/checksum.h>
65 #include <linux/mroute6.h>
67 #define IP_IDENTS_SZ 2048u
69 static atomic_t *ip_idents __read_mostly;
70 static u32 *ip_tstamps __read_mostly;
72 int __init ip6_output_init(void);
73 void ip6_output_exit(void);
75 /* In order to protect privacy, we add a perturbation to identifiers
76 * if one generator is seldom used. This makes hard for an attacker
77 * to infer how many packets were sent between two points in time.
79 static u32 rpl_ip_idents_reserve(u32 hash, int segs)
81 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
82 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
83 u32 old = ACCESS_ONCE(*p_tstamp);
84 u32 now = (u32)jiffies;
87 if (old != now && cmpxchg(p_tstamp, old, now) == old)
88 delta = prandom_u32_max(now - old);
90 return atomic_add_return(segs + delta, p_id) - segs;
93 static u32 rpl___ipv6_select_ident(struct net *net, u32 hashrnd,
94 const struct in6_addr *dst,
95 const struct in6_addr *src)
99 hash = __ipv6_addr_jhash(dst, hashrnd);
100 hash = __ipv6_addr_jhash(src, hash);
101 hash ^= net_hash_mix(net);
103 /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
104 * set the hight order instead thus minimizing possible future
107 id = rpl_ip_idents_reserve(hash, 1);
114 static __be32 rpl_ipv6_select_ident(struct net *net,
115 const struct in6_addr *daddr,
116 const struct in6_addr *saddr)
118 static u32 ip6_idents_hashrnd __read_mostly;
121 net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
123 id = rpl___ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
127 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
129 to->pkt_type = from->pkt_type;
130 to->priority = from->priority;
131 to->protocol = from->protocol;
133 skb_dst_set(to, dst_clone(skb_dst(from)));
135 to->mark = from->mark;
137 #ifdef CONFIG_NET_SCHED
138 to->tc_index = from->tc_index;
141 skb_copy_secmark(to, from);
144 #ifdef HAVE_IP_FRAGMENT_TAKES_SOCK
145 #define OUTPUT(skb) output(skb->sk, skb)
147 #define OUTPUT(skb) output(skb)
150 int ip6_fragment(struct sock *sk, struct sk_buff *skb,
151 int (*output)(OVS_VPORT_OUTPUT_PARAMS))
153 struct sk_buff *frag;
154 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
155 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
156 inet6_sk(skb->sk) : NULL;
157 struct ipv6hdr *tmp_hdr;
159 unsigned int mtu, hlen, left, len;
162 int ptr, offset = 0, err = 0;
163 u8 *prevhdr, nexthdr = 0;
164 struct net *net = dev_net(skb_dst(skb)->dev);
166 hlen = ip6_find_1stfragopt(skb, &prevhdr);
169 mtu = ip6_skb_dst_mtu(skb);
171 /* We must not fragment if the socket is set to force MTU discovery
172 * or if the skb it not generated by a local socket.
174 if (unlikely(!skb->ignore_df && skb->len > mtu))
177 if (IP6CB(skb)->frag_max_size) {
178 if (IP6CB(skb)->frag_max_size > mtu)
181 /* don't send fragments larger than what we received */
182 mtu = IP6CB(skb)->frag_max_size;
183 if (mtu < IPV6_MIN_MTU)
187 if (np && np->frag_size < mtu) {
191 mtu -= hlen + sizeof(struct frag_hdr);
193 frag_id = rpl_ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
194 &ipv6_hdr(skb)->saddr);
196 hroom = LL_RESERVED_SPACE(rt->dst.dev);
197 if (skb_has_frag_list(skb)) {
198 int first_len = skb_pagelen(skb);
199 struct sk_buff *frag2;
201 if (first_len - hlen > mtu ||
202 ((first_len - hlen) & 7) ||
204 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
207 skb_walk_frags(skb, frag) {
208 /* Correct geometry. */
209 if (frag->len > mtu ||
210 ((frag->len & 7) && frag->next) ||
211 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
212 goto slow_path_clean;
214 /* Partially cloned skb? */
215 if (skb_shared(frag))
216 goto slow_path_clean;
221 frag->destructor = sock_wfree;
223 skb->truesize -= frag->truesize;
230 *prevhdr = NEXTHDR_FRAGMENT;
231 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
233 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
234 IPSTATS_MIB_FRAGFAILS);
238 frag = skb_shinfo(skb)->frag_list;
239 skb_frag_list_init(skb);
241 __skb_pull(skb, hlen);
242 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
243 __skb_push(skb, hlen);
244 skb_reset_network_header(skb);
245 memcpy(skb_network_header(skb), tmp_hdr, hlen);
247 fh->nexthdr = nexthdr;
249 fh->frag_off = htons(IP6_MF);
250 fh->identification = frag_id;
252 first_len = skb_pagelen(skb);
253 skb->data_len = first_len - skb_headlen(skb);
254 skb->len = first_len;
255 ipv6_hdr(skb)->payload_len = htons(first_len -
256 sizeof(struct ipv6hdr));
261 /* Prepare header of the next frame,
262 * before previous one went down. */
264 frag->ip_summed = CHECKSUM_NONE;
265 skb_reset_transport_header(frag);
266 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
267 __skb_push(frag, hlen);
268 skb_reset_network_header(frag);
269 memcpy(skb_network_header(frag), tmp_hdr,
271 offset += skb->len - hlen - sizeof(struct frag_hdr);
272 fh->nexthdr = nexthdr;
274 fh->frag_off = htons(offset);
276 fh->frag_off |= htons(IP6_MF);
277 fh->identification = frag_id;
278 ipv6_hdr(frag)->payload_len =
280 sizeof(struct ipv6hdr));
281 ip6_copy_metadata(frag, skb);
286 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
287 IPSTATS_MIB_FRAGCREATES);
300 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
301 IPSTATS_MIB_FRAGOKS);
306 kfree_skb_list(frag);
308 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
309 IPSTATS_MIB_FRAGFAILS);
314 skb_walk_frags(skb, frag2) {
318 frag2->destructor = NULL;
319 skb->truesize += frag2->truesize;
324 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
325 skb_checksum_help(skb))
328 left = skb->len - hlen; /* Space per frame */
329 ptr = hlen; /* Where to start from */
332 * Fragment the datagram.
335 *prevhdr = NEXTHDR_FRAGMENT;
336 troom = rt->dst.dev->needed_tailroom;
339 * Keep copying data until we run out.
343 /* IF: it doesn't fit, use 'mtu' - the data space left */
346 /* IF: we are not sending up to and including the packet end
347 then align the next start on an eight byte boundary */
352 /* Allocate buffer */
353 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
354 hroom + troom, GFP_ATOMIC);
356 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
357 IPSTATS_MIB_FRAGFAILS);
363 * Set up data on packet
366 ip6_copy_metadata(frag, skb);
367 skb_reserve(frag, hroom);
368 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
369 skb_reset_network_header(frag);
370 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
371 frag->transport_header = (frag->network_header + hlen +
372 sizeof(struct frag_hdr));
375 * Charge the memory for the fragment to any owner
379 skb_set_owner_w(frag, skb->sk);
382 * Copy the packet header into the new buffer.
384 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
387 * Build fragment header.
389 fh->nexthdr = nexthdr;
391 fh->identification = frag_id;
394 * Copy a block of the IP datagram.
396 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
400 fh->frag_off = htons(offset);
402 fh->frag_off |= htons(IP6_MF);
403 ipv6_hdr(frag)->payload_len = htons(frag->len -
404 sizeof(struct ipv6hdr));
410 * Put this fragment into the sending queue.
416 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
417 IPSTATS_MIB_FRAGCREATES);
419 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
420 IPSTATS_MIB_FRAGOKS);
425 if (skb->sk && dst_allfrag(skb_dst(skb)))
426 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
428 skb->dev = skb_dst(skb)->dev;
429 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
433 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
434 IPSTATS_MIB_FRAGFAILS);
440 int __init ip6_output_init(void)
442 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
444 pr_warn("IP: failed to allocate ip_idents\n");
448 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
450 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
452 pr_warn("IP: failed to allocate ip_tstamps\n");
453 goto error_ip_idents_free;
458 error_ip_idents_free:
464 void ip6_output_exit(void)
470 #endif /* OVS_FRAGMENT_BACKPORT */