inet: fix possible panic in reqsk_queue_unlink()
[cascardo/linux.git] / net / ipv4 / tcp_ipv4.c
index f1756ee..fc1c658 100644 (file)
@@ -122,7 +122,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
           and use initial timestamp retrieved from peer table.
         */
        if (tcptw->tw_ts_recent_stamp &&
-           (twp == NULL || (sysctl_tcp_tw_reuse &&
+           (!twp || (sysctl_tcp_tw_reuse &&
                             get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
                if (tp->write_seq == 0)
@@ -189,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
        if (!inet->inet_saddr)
                inet->inet_saddr = fl4->saddr;
-       inet->inet_rcv_saddr = inet->inet_saddr;
+       sk_rcv_saddr_set(sk, inet->inet_saddr);
 
        if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
                /* Reset inherited state */
@@ -204,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                tcp_fetch_timewait_stamp(sk, &rt->dst);
 
        inet->inet_dport = usin->sin_port;
-       inet->inet_daddr = daddr;
+       sk_daddr_set(sk, daddr);
 
        inet_csk(sk)->icsk_ext_hdr_len = 0;
        if (inet_opt)
@@ -310,6 +310,34 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
                dst->ops->redirect(dst, sk, skb);
 }
 
+
+/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
+void tcp_req_err(struct sock *sk, u32 seq)
+{
+       struct request_sock *req = inet_reqsk(sk);
+       struct net *net = sock_net(sk);
+
+       /* ICMPs are not backlogged, hence we cannot get
+        * an established socket here.
+        */
+       WARN_ON(req->sk);
+
+       if (seq != tcp_rsk(req)->snt_isn) {
+               NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+               reqsk_put(req);
+       } else {
+               /*
+                * Still in SYN_RECV, just remove it silently.
+                * There is no good way to pass the error to the newly
+                * created socket, and POSIX does not want network
+                * errors returned from accept().
+                */
+               NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
+               inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+       }
+}
+EXPORT_SYMBOL(tcp_req_err);
+
 /*
  * This routine is called by the ICMP module when it gets some
  * sort of error condition.  If err < 0 then the socket should
@@ -343,8 +371,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        int err;
        struct net *net = dev_net(icmp_skb->dev);
 
-       sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
-                       iph->saddr, th->source, inet_iif(icmp_skb));
+       sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
+                                      th->dest, iph->saddr, ntohs(th->source),
+                                      inet_iif(icmp_skb));
        if (!sk) {
                ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
                return;
@@ -353,6 +382,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
                inet_twsk_put(inet_twsk(sk));
                return;
        }
+       seq = ntohl(th->seq);
+       if (sk->sk_state == TCP_NEW_SYN_RECV)
+               return tcp_req_err(sk, seq);
 
        bh_lock_sock(sk);
        /* If too many ICMPs get dropped on busy
@@ -374,7 +406,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
        icsk = inet_csk(sk);
        tp = tcp_sk(sk);
-       seq = ntohl(th->seq);
        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
        fastopen = tp->fastopen_rsk;
        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
@@ -458,42 +489,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        }
 
        switch (sk->sk_state) {
-               struct request_sock *req, **prev;
-       case TCP_LISTEN:
-               if (sock_owned_by_user(sk))
-                       goto out;
-
-               req = inet_csk_search_req(sk, &prev, th->dest,
-                                         iph->daddr, iph->saddr);
-               if (!req)
-                       goto out;
-
-               /* ICMPs are not backlogged, hence we cannot get
-                  an established socket here.
-                */
-               WARN_ON(req->sk);
-
-               if (seq != tcp_rsk(req)->snt_isn) {
-                       NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-                       goto out;
-               }
-
-               /*
-                * Still in SYN_RECV, just remove it silently.
-                * There is no good way to pass the error to the newly
-                * created socket, and POSIX does not want network
-                * errors returned from accept().
-                */
-               inet_csk_reqsk_queue_drop(sk, req, prev);
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
-               goto out;
-
        case TCP_SYN_SENT:
        case TCP_SYN_RECV:
                /* Only in fast or simultaneous open. If a fast open socket is
                 * is already accepted it is treated as a connected one below.
                 */
-               if (fastopen && fastopen->sk == NULL)
+               if (fastopen && !fastopen->sk)
                        break;
 
                if (!sock_owned_by_user(sk)) {
@@ -647,7 +648,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
                if (!key)
                        goto release_sk1;
 
-               genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
+               genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
                if (genhash || memcmp(hash_location, newhash, 16) != 0)
                        goto release_sk1;
        } else {
@@ -855,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
        kfree(inet_rsk(req)->opt);
 }
 
-/*
- * Return true if a syncookie should be sent
- */
-bool tcp_syn_flood_action(struct sock *sk,
-                        const struct sk_buff *skb,
-                        const char *proto)
-{
-       const char *msg = "Dropping request";
-       bool want_cookie = false;
-       struct listen_sock *lopt;
-
-#ifdef CONFIG_SYN_COOKIES
-       if (sysctl_tcp_syncookies) {
-               msg = "Sending cookies";
-               want_cookie = true;
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
-       } else
-#endif
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
-
-       lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
-       if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
-               lopt->synflood_warned = 1;
-               pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
-                       proto, ntohs(tcp_hdr(skb)->dest), msg);
-       }
-       return want_cookie;
-}
-EXPORT_SYMBOL(tcp_syn_flood_action);
 
 #ifdef CONFIG_TCP_MD5SIG
 /*
@@ -897,10 +869,10 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
                                         const union tcp_md5_addr *addr,
                                         int family)
 {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_md5sig_key *key;
        unsigned int size = sizeof(struct in_addr);
-       struct tcp_md5sig_info *md5sig;
+       const struct tcp_md5sig_info *md5sig;
 
        /* caller either holds rcu_read_lock() or socket lock */
        md5sig = rcu_dereference_check(tp->md5sig_info,
@@ -923,24 +895,15 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 EXPORT_SYMBOL(tcp_md5_do_lookup);
 
 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
-                                        struct sock *addr_sk)
+                                        const struct sock *addr_sk)
 {
-       union tcp_md5_addr *addr;
+       const union tcp_md5_addr *addr;
 
-       addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
+       addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
        return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 
-static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
-                                                     struct request_sock *req)
-{
-       union tcp_md5_addr *addr;
-
-       addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
-       return tcp_md5_do_lookup(sk, addr, AF_INET);
-}
-
 /* This can be called on a newly created socket, from other files */
 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
                   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
@@ -1101,8 +1064,8 @@ clear_hash_noput:
        return 1;
 }
 
-int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
-                       const struct sock *sk, const struct request_sock *req,
+int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+                       const struct sock *sk,
                        const struct sk_buff *skb)
 {
        struct tcp_md5sig_pool *hp;
@@ -1110,12 +1073,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
        const struct tcphdr *th = tcp_hdr(skb);
        __be32 saddr, daddr;
 
-       if (sk) {
-               saddr = inet_sk(sk)->inet_saddr;
-               daddr = inet_sk(sk)->inet_daddr;
-       } else if (req) {
-               saddr = inet_rsk(req)->ir_loc_addr;
-               daddr = inet_rsk(req)->ir_rmt_addr;
+       if (sk) { /* valid for establish/request sockets */
+               saddr = sk->sk_rcv_saddr;
+               daddr = sk->sk_daddr;
        } else {
                const struct iphdr *iph = ip_hdr(skb);
                saddr = iph->saddr;
@@ -1152,8 +1112,9 @@ clear_hash_noput:
 }
 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
 
-static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
-                                     const struct sk_buff *skb)
+/* Called with rcu_read_lock() */
+static bool tcp_v4_inbound_md5_hash(struct sock *sk,
+                                   const struct sk_buff *skb)
 {
        /*
         * This gets called for each TCP segment that arrives
@@ -1193,7 +1154,7 @@ static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
         */
        genhash = tcp_v4_md5_hash_skb(newhash,
                                      hash_expected,
-                                     NULL, NULL, skb);
+                                     NULL, skb);
 
        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
@@ -1205,28 +1166,16 @@ static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
        }
        return false;
 }
-
-static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
-{
-       bool ret;
-
-       rcu_read_lock();
-       ret = __tcp_v4_inbound_md5_hash(sk, skb);
-       rcu_read_unlock();
-
-       return ret;
-}
-
 #endif
 
-static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
+static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
                            struct sk_buff *skb)
 {
        struct inet_request_sock *ireq = inet_rsk(req);
 
-       ireq->ir_loc_addr = ip_hdr(skb)->daddr;
-       ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
-       ireq->no_srccheck = inet_sk(sk)->transparent;
+       sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+       sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+       ireq->no_srccheck = inet_sk(sk_listener)->transparent;
        ireq->opt = tcp_v4_save_options(skb);
 }
 
@@ -1259,7 +1208,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
        .mss_clamp      =       TCP_MSS_DEFAULT,
 #ifdef CONFIG_TCP_MD5SIG
-       .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
+       .req_md5_lookup =       tcp_v4_md5_lookup,
        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
 #endif
        .init_req       =       tcp_v4_init_req,
@@ -1318,8 +1267,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        newtp                 = tcp_sk(newsk);
        newinet               = inet_sk(newsk);
        ireq                  = inet_rsk(req);
-       newinet->inet_daddr   = ireq->ir_rmt_addr;
-       newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+       sk_daddr_set(newsk, ireq->ir_rmt_addr);
+       sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
        newinet->inet_saddr           = ireq->ir_loc_addr;
        inet_opt              = ireq->opt;
        rcu_assign_pointer(newinet->inet_opt, inet_opt);
@@ -1356,7 +1305,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        /* Copy over the MD5 key from the original socket */
        key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
                                AF_INET);
-       if (key != NULL) {
+       if (key) {
                /*
                 * We're using one, so create a matching key
                 * on the newsk structure. If we fail to get
@@ -1391,15 +1340,18 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
-       struct tcphdr *th = tcp_hdr(skb);
+       const struct tcphdr *th = tcp_hdr(skb);
        const struct iphdr *iph = ip_hdr(skb);
+       struct request_sock *req;
        struct sock *nsk;
-       struct request_sock **prev;
-       /* Find possible connection requests. */
-       struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
-                                                      iph->saddr, iph->daddr);
-       if (req)
-               return tcp_check_req(sk, skb, req, prev, false);
+
+       req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
+       if (req) {
+               nsk = tcp_check_req(sk, skb, req, false);
+               if (!nsk)
+                       reqsk_put(req);
+               return nsk;
+       }
 
        nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
                        th->source, iph->daddr, th->dest, inet_iif(skb));
@@ -1439,7 +1391,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
                sk_mark_napi_id(sk, skb);
                if (dst) {
                        if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
-                           dst->ops->check(dst, 0) == NULL) {
+                           !dst->ops->check(dst, 0)) {
                                dst_release(dst);
                                sk->sk_rx_dst = NULL;
                        }
@@ -1517,7 +1469,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
        if (sk) {
                skb->sk = sk;
                skb->destructor = sock_edemux;
-               if (sk->sk_state != TCP_TIME_WAIT) {
+               if (sk_fullsock(sk)) {
                        struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
 
                        if (dst)
@@ -1734,7 +1686,7 @@ do_time_wait:
                                                        iph->daddr, th->dest,
                                                        inet_iif(skb));
                if (sk2) {
-                       inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+                       inet_twsk_deschedule(inet_twsk(sk));
                        inet_twsk_put(inet_twsk(sk));
                        sk = sk2;
                        goto process;
@@ -1846,7 +1798,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
        if (inet_csk(sk)->icsk_bind_hash)
                inet_put_port(sk);
 
-       BUG_ON(tp->fastopen_rsk != NULL);
+       BUG_ON(tp->fastopen_rsk);
 
        /* If socket is aborted during connect operation */
        tcp_free_fastopen_req(tp);
@@ -1904,13 +1856,13 @@ get_req:
                }
                sk        = sk_nulls_next(st->syn_wait_sk);
                st->state = TCP_SEQ_STATE_LISTENING;
-               read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+               spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
        } else {
                icsk = inet_csk(sk);
-               read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+               spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                if (reqsk_queue_len(&icsk->icsk_accept_queue))
                        goto start_req;
-               read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+               spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                sk = sk_nulls_next(sk);
        }
 get_sk:
@@ -1922,7 +1874,7 @@ get_sk:
                        goto out;
                }
                icsk = inet_csk(sk);
-               read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+               spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
 start_req:
                        st->uid         = sock_i_uid(sk);
@@ -1931,7 +1883,7 @@ start_req:
                        st->sbucket     = 0;
                        goto get_req;
                }
-               read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+               spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
        }
        spin_unlock_bh(&ilb->lock);
        st->offset = 0;
@@ -2150,7 +2102,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
        case TCP_SEQ_STATE_OPENREQ:
                if (v) {
                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
-                       read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+                       spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                }
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
@@ -2204,17 +2156,17 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 }
 EXPORT_SYMBOL(tcp_proc_unregister);
 
-static void get_openreq4(const struct sock *sk, const struct request_sock *req,
+static void get_openreq4(const struct request_sock *req,
                         struct seq_file *f, int i, kuid_t uid)
 {
        const struct inet_request_sock *ireq = inet_rsk(req);
-       long delta = req->expires - jiffies;
+       long delta = req->rsk_timer.expires - jiffies;
 
        seq_printf(f, "%4d: %08X:%04X %08X:%04X"
                " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
                i,
                ireq->ir_loc_addr,
-               ntohs(inet_sk(sk)->inet_sport),
+               ireq->ir_num,
                ireq->ir_rmt_addr,
                ntohs(ireq->ir_rmt_port),
                TCP_SYN_RECV,
@@ -2225,7 +2177,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
                from_kuid_munged(seq_user_ns(f), uid),
                0,  /* non standard timer */
                0, /* open_requests have no inode */
-               atomic_read(&sk->sk_refcnt),
+               0,
                req);
 }
 
@@ -2291,9 +2243,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
                               struct seq_file *f, int i)
 {
+       long delta = tw->tw_timer.expires - jiffies;
        __be32 dest, src;
        __u16 destp, srcp;
-       s32 delta = tw->tw_ttd - inet_tw_time_stamp();
 
        dest  = tw->tw_daddr;
        src   = tw->tw_rcv_saddr;
@@ -2332,7 +2284,7 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
                        get_tcp4_sock(v, seq, st->num);
                break;
        case TCP_SEQ_STATE_OPENREQ:
-               get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
+               get_openreq4(v, seq, st->num, st->uid);
                break;
        }
 out:
@@ -2460,6 +2412,8 @@ static int __net_init tcp_sk_init(struct net *net)
        }
        net->ipv4.sysctl_tcp_ecn = 2;
        net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+       net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
+       net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
        return 0;
 
 fail: