netfilter: Fix slab corruption.
[cascardo/linux.git] / net / netfilter / core.c
index 2c5327e..fcb5d1d 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
@@ -61,33 +62,41 @@ EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
+#define nf_entry_dereference(e) \
+       rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
-static struct list_head *nf_find_hook_list(struct net *net,
-                                          const struct nf_hook_ops *reg)
+static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
 {
-       struct list_head *hook_list = NULL;
-
        if (reg->pf != NFPROTO_NETDEV)
-               hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
-       else if (reg->hooknum == NF_NETDEV_INGRESS) {
+               return net->nf.hooks[reg->pf]+reg->hooknum;
+
 #ifdef CONFIG_NETFILTER_INGRESS
+       if (reg->hooknum == NF_NETDEV_INGRESS) {
                if (reg->dev && dev_net(reg->dev) == net)
-                       hook_list = &reg->dev->nf_hooks_ingress;
-#endif
+                       return &reg->dev->nf_hooks_ingress;
        }
-       return hook_list;
+#endif
+       return NULL;
 }
 
-struct nf_hook_entry {
-       const struct nf_hook_ops        *orig_ops;
-       struct nf_hook_ops              ops;
-};
-
 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-       struct list_head *hook_list;
-       struct nf_hook_entry *entry;
-       struct nf_hook_ops *elem;
+       struct nf_hook_entry __rcu **pp;
+       struct nf_hook_entry *entry, *p;
+
+       if (reg->pf == NFPROTO_NETDEV) {
+#ifndef CONFIG_NETFILTER_INGRESS
+               if (reg->hooknum == NF_NETDEV_INGRESS)
+                       return -EOPNOTSUPP;
+#endif
+               if (reg->hooknum != NF_NETDEV_INGRESS ||
+                   !reg->dev || dev_net(reg->dev) != net)
+                       return -EINVAL;
+       }
+
+       pp = nf_hook_entry_head(net, reg);
+       if (!pp)
+               return -EINVAL;
 
        entry = kmalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
@@ -95,19 +104,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 
        entry->orig_ops = reg;
        entry->ops      = *reg;
-
-       hook_list = nf_find_hook_list(net, reg);
-       if (!hook_list) {
-               kfree(entry);
-               return -ENOENT;
-       }
+       entry->next     = NULL;
 
        mutex_lock(&nf_hook_mutex);
-       list_for_each_entry(elem, hook_list, list) {
-               if (reg->priority < elem->priority)
+
+       /* Find the spot in the list */
+       while ((p = nf_entry_dereference(*pp)) != NULL) {
+               if (reg->priority < p->orig_ops->priority)
                        break;
+               pp = &p->next;
        }
-       list_add_rcu(&entry->ops.list, elem->list.prev);
+       rcu_assign_pointer(entry->next, p);
+       rcu_assign_pointer(*pp, entry);
+
        mutex_unlock(&nf_hook_mutex);
 #ifdef CONFIG_NETFILTER_INGRESS
        if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
@@ -122,24 +131,23 @@ EXPORT_SYMBOL(nf_register_net_hook);
 
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-       struct list_head *hook_list;
-       struct nf_hook_entry *entry;
-       struct nf_hook_ops *elem;
+       struct nf_hook_entry __rcu **pp;
+       struct nf_hook_entry *p;
 
-       hook_list = nf_find_hook_list(net, reg);
-       if (!hook_list)
+       pp = nf_hook_entry_head(net, reg);
+       if (WARN_ON_ONCE(!pp))
                return;
 
        mutex_lock(&nf_hook_mutex);
-       list_for_each_entry(elem, hook_list, list) {
-               entry = container_of(elem, struct nf_hook_entry, ops);
-               if (entry->orig_ops == reg) {
-                       list_del_rcu(&entry->ops.list);
+       while ((p = nf_entry_dereference(*pp)) != NULL) {
+               if (p->orig_ops == reg) {
+                       rcu_assign_pointer(*pp, p->next);
                        break;
                }
+               pp = &p->next;
        }
        mutex_unlock(&nf_hook_mutex);
-       if (&elem->list == hook_list) {
+       if (!p) {
                WARN(1, "nf_unregister_net_hook: hook not found!\n");
                return;
        }
@@ -151,10 +159,10 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
        static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
        synchronize_net();
-       nf_queue_nf_hook_drop(net, &entry->ops);
+       nf_queue_nf_hook_drop(net, p);
        /* other cpu might still process nfqueue verdict that used reg */
        synchronize_net();
-       kfree(entry);
+       kfree(p);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -294,10 +302,9 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
 }
 EXPORT_SYMBOL(_nf_unregister_hooks);
 
-unsigned int nf_iterate(struct list_head *head,
-                       struct sk_buff *skb,
+unsigned int nf_iterate(struct sk_buff *skb,
                        struct nf_hook_state *state,
-                       struct nf_hook_ops **elemp)
+                       struct nf_hook_entry **entryp)
 {
        unsigned int verdict;
 
@@ -305,20 +312,23 @@ unsigned int nf_iterate(struct list_head *head,
         * The caller must not block between calls to this
         * function because of risk of continuing from deleted element.
         */
-       list_for_each_entry_continue_rcu((*elemp), head, list) {
-               if (state->thresh > (*elemp)->priority)
+       while (*entryp) {
+               if (state->thresh > (*entryp)->ops.priority) {
+                       *entryp = rcu_dereference((*entryp)->next);
                        continue;
+               }
 
                /* Optimization: we don't need to hold module
                   reference here, since function can't sleep. --RR */
 repeat:
-               verdict = (*elemp)->hook((*elemp)->priv, skb, state);
+               verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
                if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
                        if (unlikely((verdict & NF_VERDICT_MASK)
                                                        > NF_MAX_VERDICT)) {
                                NFDEBUG("Evil return from %p(%u).\n",
-                                       (*elemp)->hook, state->hook);
+                                       (*entryp)->ops.hook, state->hook);
+                               *entryp = rcu_dereference((*entryp)->next);
                                continue;
                        }
 #endif
@@ -326,25 +336,23 @@ repeat:
                                return verdict;
                        goto repeat;
                }
+               *entryp = rcu_dereference((*entryp)->next);
        }
        return NF_ACCEPT;
 }
 
 
 /* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
+ * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
 {
-       struct nf_hook_ops *elem;
+       struct nf_hook_entry *entry;
        unsigned int verdict;
        int ret = 0;
 
-       /* We may already have this, but read-locks nest anyway */
-       rcu_read_lock();
-
-       elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
+       entry = rcu_dereference(state->hook_entries);
 next_hook:
-       verdict = nf_iterate(state->hook_list, skb, state, &elem);
+       verdict = nf_iterate(skb, state, &entry);
        if (verdict == NF_ACCEPT || verdict == NF_STOP) {
                ret = 1;
        } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -353,8 +361,10 @@ next_hook:
                if (ret == 0)
                        ret = -EPERM;
        } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-               int err = nf_queue(skb, elem, state,
-                                  verdict >> NF_VERDICT_QBITS);
+               int err;
+
+               RCU_INIT_POINTER(state->hook_entries, entry);
+               err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
                if (err < 0) {
                        if (err == -ESRCH &&
                           (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
@@ -362,7 +372,6 @@ next_hook:
                        kfree_skb(skb);
                }
        }
-       rcu_read_unlock();
        return ret;
 }
 EXPORT_SYMBOL(nf_hook_slow);
@@ -482,7 +491,7 @@ static int __net_init netfilter_net_init(struct net *net)
 
        for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
                for (h = 0; h < NF_MAX_HOOKS; h++)
-                       INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+                       RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
        }
 
 #ifdef CONFIG_PROC_FS