Merge tag 'gcc-plugins-v4.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / net / ipv4 / inet_hashtables.c
index 0d9e9d7..ca97835 100644 (file)
@@ -25,6 +25,7 @@
 #include <net/inet_hashtables.h>
 #include <net/secure_seq.h>
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/sock_reuseport.h>
 
 static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
@@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
 static inline int compute_score(struct sock *sk, struct net *net,
                                const unsigned short hnum, const __be32 daddr,
-                               const int dif)
+                               const int dif, bool exact_dif)
 {
        int score = -1;
        struct inet_sock *inet = inet_sk(sk);
@@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
                                return -1;
                        score += 4;
                }
-               if (sk->sk_bound_dev_if) {
+               if (sk->sk_bound_dev_if || exact_dif) {
                        if (sk->sk_bound_dev_if != dif)
                                return -1;
                        score += 4;
@@ -198,13 +199,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 }
 
 /*
- * Don't inline this cruft. Here are some nice properties to exploit here. The
- * BSD API does not allow a listening sock to specify the remote port nor the
+ * Here are some nice properties to exploit here. The BSD API
+ * does not allow a listening sock to specify the remote port nor the
  * remote address for the connection. So always assume those are both
  * wildcarded during the search since they can never be otherwise.
  */
 
-
+/* called with rcu_read_lock() : No refcount taken on the socket */
 struct sock *__inet_lookup_listener(struct net *net,
                                    struct inet_hashinfo *hashinfo,
                                    struct sk_buff *skb, int doff,
@@ -212,38 +213,28 @@ struct sock *__inet_lookup_listener(struct net *net,
                                    const __be32 daddr, const unsigned short hnum,
                                    const int dif)
 {
-       struct sock *sk, *result;
-       struct hlist_nulls_node *node;
        unsigned int hash = inet_lhashfn(net, hnum);
        struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
-       int score, hiscore, matches = 0, reuseport = 0;
-       bool select_ok = true;
+       int score, hiscore = 0, matches = 0, reuseport = 0;
+       bool exact_dif = inet_exact_dif_match(net, skb);
+       struct sock *sk, *result = NULL;
        u32 phash = 0;
 
-       rcu_read_lock();
-begin:
-       result = NULL;
-       hiscore = 0;
-       sk_nulls_for_each_rcu(sk, node, &ilb->head) {
-               score = compute_score(sk, net, hnum, daddr, dif);
+       sk_for_each_rcu(sk, &ilb->head) {
+               score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
                if (score > hiscore) {
-                       result = sk;
-                       hiscore = score;
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
                                phash = inet_ehashfn(net, daddr, hnum,
                                                     saddr, sport);
-                               if (select_ok) {
-                                       struct sock *sk2;
-                                       sk2 = reuseport_select_sock(sk, phash,
-                                                                   skb, doff);
-                                       if (sk2) {
-                                               result = sk2;
-                                               goto found;
-                                       }
-                               }
+                               result = reuseport_select_sock(sk, phash,
+                                                              skb, doff);
+                               if (result)
+                                       return result;
                                matches = 1;
                        }
+                       result = sk;
+                       hiscore = score;
                } else if (score == hiscore && reuseport) {
                        matches++;
                        if (reciprocal_scale(phash, matches) == 0)
@@ -251,25 +242,6 @@ begin:
                        phash = next_pseudo_random32(phash);
                }
        }
-       /*
-        * if the nulls value we got at the end of this lookup is
-        * not the expected one, we must restart lookup.
-        * We probably met an item that was moved to another chain.
-        */
-       if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
-               goto begin;
-       if (result) {
-found:
-               if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
-                       result = NULL;
-               else if (unlikely(compute_score(result, net, hnum, daddr,
-                                 dif) < hiscore)) {
-                       sock_put(result);
-                       select_ok = false;
-                       goto begin;
-               }
-       }
-       rcu_read_unlock();
        return result;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
@@ -312,7 +284,6 @@ struct sock *__inet_lookup_established(struct net *net,
        unsigned int slot = hash & hashinfo->ehash_mask;
        struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
 
-       rcu_read_lock();
 begin:
        sk_nulls_for_each_rcu(sk, node, &head->chain) {
                if (sk->sk_hash != hash)
@@ -339,7 +310,6 @@ begin:
 out:
        sk = NULL;
 found:
-       rcu_read_unlock();
        return sk;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_established);
@@ -392,7 +362,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
        __sk_nulls_add_node_rcu(sk, &head->chain);
        if (tw) {
                sk_nulls_del_node_init_rcu((struct sock *)tw);
-               NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+               __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
        }
        spin_unlock(lock);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -472,10 +442,9 @@ static int inet_reuseport_add_sock(struct sock *sk,
 {
        struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
        struct sock *sk2;
-       struct hlist_nulls_node *node;
        kuid_t uid = sock_i_uid(sk);
 
-       sk_nulls_for_each_rcu(sk2, node, &ilb->head) {
+       sk_for_each_rcu(sk2, &ilb->head) {
                if (sk2 != sk &&
                    sk2->sk_family == sk->sk_family &&
                    ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
@@ -514,7 +483,12 @@ int __inet_hash(struct sock *sk, struct sock *osk,
                if (err)
                        goto unlock;
        }
-       __sk_nulls_add_node_rcu(sk, &ilb->head);
+       if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+               sk->sk_family == AF_INET6)
+               hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
+       else
+               hlist_add_head_rcu(&sk->sk_node, &ilb->head);
+       sock_set_flag(sk, SOCK_RCU_FREE);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 unlock:
        spin_unlock(&ilb->lock);
@@ -541,20 +515,25 @@ void inet_unhash(struct sock *sk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        spinlock_t *lock;
+       bool listener = false;
        int done;
 
        if (sk_unhashed(sk))
                return;
 
-       if (sk->sk_state == TCP_LISTEN)
+       if (sk->sk_state == TCP_LISTEN) {
                lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
-       else
+               listener = true;
+       } else {
                lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-
+       }
        spin_lock_bh(lock);
        if (rcu_access_pointer(sk->sk_reuseport_cb))
                reuseport_detach_sock(sk);
-       done = __sk_nulls_del_node_init_rcu(sk);
+       if (listener)
+               done = __sk_del_node_init(sk);
+       else
+               done = __sk_nulls_del_node_init_rcu(sk);
        if (done)
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
        spin_unlock_bh(lock);
@@ -690,9 +669,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
 
        for (i = 0; i < INET_LHTABLE_SIZE; i++) {
                spin_lock_init(&h->listening_hash[i].lock);
-               INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
-                                     i + LISTENING_NULLS_BASE);
-               }
+               INIT_HLIST_HEAD(&h->listening_hash[i].head);
+       }
 }
 EXPORT_SYMBOL_GPL(inet_hashinfo_init);