datapath: Fix IPv6 fragment expiry crash.
authorJoe Stringer <joe@ovn.org>
Wed, 27 Jan 2016 00:49:36 +0000 (00:49 +0000)
committerJoe Stringer <joe@ovn.org>
Fri, 29 Jan 2016 18:14:53 +0000 (10:14 -0800)
Prior to a series of commits in 3.17 like the following, the model
used to manage and expire fragments was different. We already backport
several of these functions (See datapath/compat/inet_fragment.c) to do
things like allocate/evict/destroy frags and frag queues. In the IPv4
code, we use these. In most of the IPv6 cases, we already reuse these
also. However, for timed frag expiration we instead call the upstream
version of the function, which proceeds to use the upstream versions
of the functions we backport in inet_fragment.c. There can be some
discrepancy between the offsets used in these upstream versions vs. the
backport versions, so if you mix/match them then it leads to invalid
dereferences.

b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue")
ab1c724f6330 ("inet: frag: use seqlock for hash rebuild")

Fixes the following kernel oops on kernels < 3.17 when IPv6 fragments
are expired without reassembling the frame.

BUG: unable to handle kernel paging request at 00000006845d69a8
IP: [<ffffffff8172c09e>] _raw_spin_lock+0xe/0x50
...
Call Trace:
 <IRQ>
 [<ffffffff816a32d3>] inet_frag_kill+0x63/0x100
 [<ffffffff816ead93>] ip6_expire_frag_queue+0x63/0x110
 [<ffffffffa01130e6>] nf_ct_frag6_expire+0x26/0x30 [openvswitch]
 [<ffffffff810744f6>] call_timer_fn+0x36/0x100
 [<ffffffffa01130c0>] ? nf_ct_net_init+0x20/0x20 [openvswitch]
 [<ffffffff8107548f>] run_timer_softirq+0x1ef/0x2f0
 [<ffffffff8106cccc>] __do_softirq+0xec/0x2c0
 [<ffffffff8106d215>] irq_exit+0x105/0x110
 [<ffffffff81737095>] smp_apic_timer_interrupt+0x45/0x60
 [<ffffffff81735a1d>] apic_timer_interrupt+0x6d/0x80
 <EOI>
 [<ffffffff8104f596>] ? native_safe_halt+0x6/0x10
 [<ffffffff8101cb2f>] default_idle+0x1f/0xc0
 [<ffffffff8101d406>] arch_cpu_idle+0x26/0x30
 [<ffffffff810bf3a5>] cpu_startup_entry+0xc5/0x290
 [<ffffffff817122e7>] rest_init+0x77/0x80
 [<ffffffff81d34f70>] start_kernel+0x438/0x443

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
datapath/linux/Modules.mk
datapath/linux/compat/include/linux/ipv6.h
datapath/linux/compat/include/net/ip6_route.h
datapath/linux/compat/reassembly.c [new file with mode: 0644]

index 3ca852a..6ab52a7 100644 (file)
@@ -18,6 +18,7 @@ openvswitch_sources += \
        linux/compat/net_namespace.c \
        linux/compat/nf_conntrack_core.c \
        linux/compat/nf_conntrack_reasm.c \
+       linux/compat/reassembly.c \
        linux/compat/reciprocal_div.c \
        linux/compat/skbuff-openvswitch.c \
        linux/compat/socket.c \
index 25a5431..0971c56 100644 (file)
@@ -3,6 +3,9 @@
 
 #include_next <linux/ipv6.h>
 
+struct frag_queue;
+struct inet_frags;
+
 #ifndef HAVE_SKBUFF_HEADER_HELPERS
 static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
 {
@@ -10,4 +13,10 @@ static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
 }
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
+void rpl_ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+                              struct inet_frags *frags);
+#define ip6_expire_frag_queue rpl_ip6_expire_frag_queue
+#endif
+
 #endif
index 93d70e3..ba7dcc5 100644 (file)
@@ -2,6 +2,7 @@
 #define __NET_IP6_ROUTE_WRAPPER
 
 #include <net/route.h>
+#include <net/ip.h>                /* For OVS_VPORT_OUTPUT_PARAMS */
 #include <net/ipv6.h>
 
 #include_next<net/ip6_route.h>
diff --git a/datapath/linux/compat/reassembly.c b/datapath/linux/compat/reassembly.c
new file mode 100644 (file)
index 0000000..716d875
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Backported from upstream commit a72a5e2d34ec
+ * ("inet: kill unused skb_free op")
+ *
+ *      IPv6 fragment reassembly
+ *      Linux INET6 implementation
+ *
+ *      Authors:
+ *      Pedro Roque             <roque@di.fc.ul.pt>
+ *
+ *      Based on: net/ipv4/ip_fragment.c
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *      Fixes:
+ *      Andi Kleen      Make it work with multiple hosts.
+ *                      More RFC compliance.
+ *
+ *      Horst von Brand Add missing #include <linux/string.h>
+ *      Alexey Kuznetsov        SMP races, threading, cleanup.
+ *      Patrick McHardy         LRU queue of frag heads for evictor.
+ *      Mitsuru KANDA @USAGI    Register inet6_protocol{}.
+ *      David Stevens and
+ *      YOSHIFUJI,H. @USAGI     Always remove fragment header to
+ *                              calculate ICV correctly.
+ */
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
+#if defined(OVS_FRAGMENT_BACKPORT) && \
+    LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/inet_frag.h>
+#include <net/inet_ecn.h>
+
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+                          struct inet_frags *frags)
+{
+       struct net_device *dev = NULL;
+
+       spin_lock(&fq->q.lock);
+
+       if (qp_flags(fq) & INET_FRAG_COMPLETE)
+               goto out;
+
+       inet_frag_kill(&fq->q, frags);
+
+       rcu_read_lock();
+       dev = dev_get_by_index_rcu(net, fq->iif);
+       if (!dev)
+               goto out_rcu_unlock;
+
+       IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+
+       if (inet_frag_evicting(&fq->q))
+               goto out_rcu_unlock;
+
+       IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+       /* Don't send error if the first segment did not arrive. */
+       if (!(qp_flags(fq) & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+               goto out_rcu_unlock;
+
+       /* But use as source device on which LAST ARRIVED
+        * segment was received. And do not use fq->dev
+        * pointer directly, device might already disappeared.
+        */
+       fq->q.fragments->dev = dev;
+       icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+out_rcu_unlock:
+       rcu_read_unlock();
+out:
+       spin_unlock(&fq->q.lock);
+       inet_frag_put(&fq->q, frags);
+}
+
+#endif /* OVS_FRAGMENT_BACKPORT */