X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=net%2Fipv4%2Finet_hashtables.c;h=ca97835bfec4b2291446a54d7f6bb1af408afc29;hb=577f12c07e4edd54730dc559a9c7bc44d22bf7dc;hp=0d9e9d7bb029373e37e4de6ed4003397d41f82cd;hpb=8c57a5e7b2820f349c95b8c8393fec1e0f4070d2;p=cascardo%2Flinux.git diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 0d9e9d7bb029..ca97835bfec4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -25,6 +25,7 @@ #include #include #include +#include #include static u32 inet_ehashfn(const struct net *net, const __be32 laddr, @@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; struct inet_sock *inet = inet_sk(sk); @@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score += 4; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score += 4; @@ -198,13 +199,13 @@ static inline int compute_score(struct sock *sk, struct net *net, } /* - * Don't inline this cruft. Here are some nice properties to exploit here. The - * BSD API does not allow a listening sock to specify the remote port nor the + * Here are some nice properties to exploit here. The BSD API + * does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ - +/* called with rcu_read_lock() : No refcount taken on the socket */ struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -212,38 +213,28 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk, *result; - struct hlist_nulls_node *node; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore, matches = 0, reuseport = 0; - bool select_ok = true; + int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet_exact_dif_match(net, skb); + struct sock *sk, *result = NULL; u32 phash = 0; - rcu_read_lock(); -begin: - result = NULL; - hiscore = 0; - sk_nulls_for_each_rcu(sk, node, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + sk_for_each_rcu(sk, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { - result = sk; - hiscore = score; reuseport = sk->sk_reuseport; if (reuseport) { phash = inet_ehashfn(net, daddr, hnum, saddr, sport); - if (select_ok) { - struct sock *sk2; - sk2 = reuseport_select_sock(sk, phash, - skb, doff); - if (sk2) { - result = sk2; - goto found; - } - } + result = reuseport_select_sock(sk, phash, + skb, doff); + if (result) + return result; matches = 1; } + result = sk; + hiscore = score; } else if (score == hiscore && reuseport) { matches++; if (reciprocal_scale(phash, matches) == 0) @@ -251,25 +242,6 @@ begin: phash = next_pseudo_random32(phash); } } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) - goto begin; - if (result) { -found: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) - result = NULL; - else if (unlikely(compute_score(result, net, hnum, daddr, - dif) < hiscore)) { - sock_put(result); - select_ok = false; - goto begin; - } - } - rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -312,7 +284,6 @@ struct sock *__inet_lookup_established(struct net *net, unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - rcu_read_lock(); begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) @@ -339,7 +310,6 @@ begin: out: sk = NULL; found: - rcu_read_unlock(); return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -392,7 +362,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -472,10 +442,9 @@ static int inet_reuseport_add_sock(struct sock *sk, { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; struct sock *sk2; - struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); - sk_nulls_for_each_rcu(sk2, node, &ilb->head) { + sk_for_each_rcu(sk2, &ilb->head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -514,7 +483,12 @@ int __inet_hash(struct sock *sk, struct sock *osk, if (err) goto unlock; } - __sk_nulls_add_node_rcu(sk, &ilb->head); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + else + hlist_add_head_rcu(&sk->sk_node, &ilb->head); + sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb->lock); @@ -541,20 +515,25 @@ void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; spinlock_t *lock; + bool listener = false; int done; if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) + if (sk->sk_state == TCP_LISTEN) { lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; - else + listener = true; + } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - + } spin_lock_bh(lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - done = __sk_nulls_del_node_init_rcu(sk); + if (listener) + done = __sk_del_node_init(sk); + else + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); @@ -690,9 +669,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, - i + LISTENING_NULLS_BASE); - } + INIT_HLIST_HEAD(&h->listening_hash[i].head); + } } EXPORT_SYMBOL_GPL(inet_hashinfo_init);