Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[cascardo/linux.git] / net / xfrm / xfrm_state.c
index a30f898..5685da0 100644 (file)
 
 #include "xfrm_hash.h"
 
+#define xfrm_state_deref_prot(table, net) \
+       rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+
+static void xfrm_state_gc_task(struct work_struct *work);
+
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
  */
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
+
+static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
+static HLIST_HEAD(xfrm_state_gc_list);
+
+static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+{
+       return atomic_inc_not_zero(&x->refcnt);
+}
 
 static inline unsigned int xfrm_dst_hash(struct net *net,
                                         const xfrm_address_t *daddr,
@@ -76,18 +90,18 @@ static void xfrm_hash_transfer(struct hlist_head *list,
                h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
                                    x->props.reqid, x->props.family,
                                    nhashmask);
-               hlist_add_head(&x->bydst, ndsttable+h);
+               hlist_add_head_rcu(&x->bydst, ndsttable + h);
 
                h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
                                    x->props.family,
                                    nhashmask);
-               hlist_add_head(&x->bysrc, nsrctable+h);
+               hlist_add_head_rcu(&x->bysrc, nsrctable + h);
 
                if (x->id.spi) {
                        h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
                                            x->id.proto, x->props.family,
                                            nhashmask);
-                       hlist_add_head(&x->byspi, nspitable+h);
+                       hlist_add_head_rcu(&x->byspi, nspitable + h);
                }
        }
 }
@@ -122,25 +136,29 @@ static void xfrm_hash_resize(struct work_struct *work)
        }
 
        spin_lock_bh(&net->xfrm.xfrm_state_lock);
+       write_seqcount_begin(&xfrm_state_hash_generation);
 
        nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+       odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
        for (i = net->xfrm.state_hmask; i >= 0; i--)
-               xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
-                                  nhashmask);
+               xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
 
-       odst = net->xfrm.state_bydst;
-       osrc = net->xfrm.state_bysrc;
-       ospi = net->xfrm.state_byspi;
+       osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
+       ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
        ohashmask = net->xfrm.state_hmask;
 
-       net->xfrm.state_bydst = ndst;
-       net->xfrm.state_bysrc = nsrc;
-       net->xfrm.state_byspi = nspi;
+       rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+       rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+       rcu_assign_pointer(net->xfrm.state_byspi, nspi);
        net->xfrm.state_hmask = nhashmask;
 
+       write_seqcount_end(&xfrm_state_hash_generation);
        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
        osize = (ohashmask + 1) * sizeof(struct hlist_head);
+
+       synchronize_rcu();
+
        xfrm_hash_free(odst, osize);
        xfrm_hash_free(osrc, osize);
        xfrm_hash_free(ospi, osize);
@@ -356,15 +374,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 
 static void xfrm_state_gc_task(struct work_struct *work)
 {
-       struct net *net = container_of(work, struct net, xfrm.state_gc_work);
        struct xfrm_state *x;
        struct hlist_node *tmp;
        struct hlist_head gc_list;
 
        spin_lock_bh(&xfrm_state_gc_lock);
-       hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
+       hlist_move_list(&xfrm_state_gc_list, &gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
 
+       synchronize_rcu();
+
        hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
                xfrm_state_gc_destroy(x);
 }
@@ -501,14 +520,12 @@ EXPORT_SYMBOL(xfrm_state_alloc);
 
 void __xfrm_state_destroy(struct xfrm_state *x)
 {
-       struct net *net = xs_net(x);
-
        WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
        spin_lock_bh(&xfrm_state_gc_lock);
-       hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
+       hlist_add_head(&x->gclist, &xfrm_state_gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
-       schedule_work(&net->xfrm.state_gc_work);
+       schedule_work(&xfrm_state_gc_work);
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -521,10 +538,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
                x->km.state = XFRM_STATE_DEAD;
                spin_lock(&net->xfrm.xfrm_state_lock);
                list_del(&x->km.all);
-               hlist_del(&x->bydst);
-               hlist_del(&x->bysrc);
+               hlist_del_rcu(&x->bydst);
+               hlist_del_rcu(&x->bysrc);
                if (x->id.spi)
-                       hlist_del(&x->byspi);
+                       hlist_del_rcu(&x->byspi);
                net->xfrm.state_num--;
                spin_unlock(&net->xfrm.xfrm_state_lock);
 
@@ -660,7 +677,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
        unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
        struct xfrm_state *x;
 
-       hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
                if (x->props.family != family ||
                    x->id.spi       != spi ||
                    x->id.proto     != proto ||
@@ -669,7 +686,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 
                if ((mark & x->mark.m) != x->mark.v)
                        continue;
-               xfrm_state_hold(x);
+               if (!xfrm_state_hold_rcu(x))
+                       continue;
                return x;
        }
 
@@ -684,7 +702,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
        unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
        struct xfrm_state *x;
 
-       hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
                if (x->props.family != family ||
                    x->id.proto     != proto ||
                    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -693,7 +711,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 
                if ((mark & x->mark.m) != x->mark.v)
                        continue;
-               xfrm_state_hold(x);
+               if (!xfrm_state_hold_rcu(x))
+                       continue;
                return x;
        }
 
@@ -776,13 +795,16 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
        struct xfrm_state *best = NULL;
        u32 mark = pol->mark.v & pol->mark.m;
        unsigned short encap_family = tmpl->encap_family;
+       unsigned int sequence;
        struct km_event c;
 
        to_put = NULL;
 
-       spin_lock_bh(&net->xfrm.xfrm_state_lock);
+       sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+
+       rcu_read_lock();
        h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
-       hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
                if (x->props.family == encap_family &&
                    x->props.reqid == tmpl->reqid &&
                    (mark & x->mark.m) == x->mark.v &&
@@ -798,7 +820,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
                goto found;
 
        h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
-       hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
                if (x->props.family == encap_family &&
                    x->props.reqid == tmpl->reqid &&
                    (mark & x->mark.m) == x->mark.v &&
@@ -851,19 +873,21 @@ found:
                }
 
                if (km_query(x, tmpl, pol) == 0) {
+                       spin_lock_bh(&net->xfrm.xfrm_state_lock);
                        x->km.state = XFRM_STATE_ACQ;
                        list_add(&x->km.all, &net->xfrm.state_all);
-                       hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+                       hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
                        h = xfrm_src_hash(net, daddr, saddr, encap_family);
-                       hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+                       hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
                        if (x->id.spi) {
                                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
-                               hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+                               hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
                        }
                        x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
                        tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
                        net->xfrm.state_num++;
                        xfrm_hash_grow_check(net, x->bydst.next != NULL);
+                       spin_unlock_bh(&net->xfrm.xfrm_state_lock);
                } else {
                        x->km.state = XFRM_STATE_DEAD;
                        to_put = x;
@@ -872,13 +896,26 @@ found:
                }
        }
 out:
-       if (x)
-               xfrm_state_hold(x);
-       else
+       if (x) {
+               if (!xfrm_state_hold_rcu(x)) {
+                       *err = -EAGAIN;
+                       x = NULL;
+               }
+       } else {
                *err = acquire_in_progress ? -EAGAIN : error;
-       spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+       }
+       rcu_read_unlock();
        if (to_put)
                xfrm_state_put(to_put);
+
+       if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+               *err = -EAGAIN;
+               if (x) {
+                       xfrm_state_put(x);
+                       x = NULL;
+               }
+       }
+
        return x;
 }
 
@@ -946,16 +983,16 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
        h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
                          x->props.reqid, x->props.family);
-       hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+       hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 
        h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
-       hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+       hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
        if (x->id.spi) {
                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
                                  x->props.family);
 
-               hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+               hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
        }
 
        tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
@@ -1064,9 +1101,9 @@ static struct xfrm_state *__find_acq_core(struct net *net,
                xfrm_state_hold(x);
                tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
                list_add(&x->km.all, &net->xfrm.state_all);
-               hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+               hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
                h = xfrm_src_hash(net, daddr, saddr, family);
-               hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+               hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
                net->xfrm.state_num++;
 
@@ -1582,7 +1619,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
        if (x->id.spi) {
                spin_lock_bh(&net->xfrm.xfrm_state_lock);
                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-               hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+               hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
                err = 0;
@@ -2100,8 +2137,6 @@ int __net_init xfrm_state_init(struct net *net)
 
        net->xfrm.state_num = 0;
        INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
-       INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
-       INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
        spin_lock_init(&net->xfrm.xfrm_state_lock);
        return 0;
 
@@ -2119,7 +2154,7 @@ void xfrm_state_fini(struct net *net)
 
        flush_work(&net->xfrm.state_hash_work);
        xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
-       flush_work(&net->xfrm.state_gc_work);
+       flush_work(&xfrm_state_gc_work);
 
        WARN_ON(!list_empty(&net->xfrm.state_all));