[NET] CORE: Introducing new memory accounting interface.
[cascardo/linux.git] / net / ipv4 / tcp_input.c
index 6ca77f8..722c9cb 100644 (file)
@@ -289,8 +289,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        /* Optimize this! */
-       int truesize = tcp_win_from_space(skb->truesize)/2;
-       int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
+       int truesize = tcp_win_from_space(skb->truesize) >> 1;
+       int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
 
        while (tp->rcv_ssthresh <= window) {
                if (truesize <= skb->len)
@@ -591,7 +591,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
                         * restart window, so that we send ACKs quickly.
                         */
                        tcp_incr_quickack(sk);
-                       sk_stream_mem_reclaim(sk);
+                       sk_mem_reclaim(sk);
                }
        }
        icsk->icsk_ack.lrcvtime = now;
@@ -1118,20 +1118,19 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
  * highest SACK block). Also calculate the lowest snd_nxt among the remaining
  * retransmitted skbs to avoid some costly processing per ACKs.
  */
-static int tcp_mark_lost_retrans(struct sock *sk)
+static void tcp_mark_lost_retrans(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
-       int flag = 0;
        int cnt = 0;
        u32 new_low_seq = tp->snd_nxt;
-       u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq;
+       u32 received_upto = tcp_highest_sack_seq(tp);
 
        if (!tcp_is_fack(tp) || !tp->retrans_out ||
            !after(received_upto, tp->lost_retrans_low) ||
            icsk->icsk_ca_state != TCP_CA_Recovery)
-               return flag;
+               return;
 
        tcp_for_write_queue(skb, sk) {
                u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1159,7 +1158,6 @@ static int tcp_mark_lost_retrans(struct sock *sk)
                        if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
                                tp->lost_out += tcp_skb_pcount(skb);
                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                               flag |= FLAG_DATA_SACKED;
                        }
                        NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
                } else {
@@ -1171,8 +1169,6 @@ static int tcp_mark_lost_retrans(struct sock *sk)
 
        if (tp->retrans_out)
                tp->lost_retrans_low = new_low_seq;
-
-       return flag;
 }
 
 static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
@@ -1240,9 +1236,10 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
        return in_sack;
 }
 
-static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
+static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
                           int *reord, int dup_sack, int fack_count)
 {
+       struct tcp_sock *tp = tcp_sk(sk);
        u8 sacked = TCP_SKB_CB(skb)->sacked;
        int flag = 0;
 
@@ -1250,8 +1247,7 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
        if (dup_sack && (sacked & TCPCB_RETRANS)) {
                if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
                        tp->undo_retrans--;
-               if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) &&
-                   (sacked & TCPCB_SACKED_ACKED))
+               if (sacked & TCPCB_SACKED_ACKED)
                        *reord = min(fack_count, *reord);
        }
 
@@ -1312,12 +1308,8 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
                if (fack_count > tp->fackets_out)
                        tp->fackets_out = fack_count;
 
-               if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
-                       tp->highest_sack = skb;
-
-       } else {
-               if (dup_sack && (sacked & TCPCB_RETRANS))
-                       *reord = min(fack_count, *reord);
+               if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+                       tcp_advance_highest_sack(sk, skb);
        }
 
        /* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1339,8 +1331,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                                        int dup_sack_in, int *fack_count,
                                        int *reord, int *flag)
 {
-       struct tcp_sock *tp = tcp_sk(sk);
-
        tcp_for_write_queue_from(skb, sk) {
                int in_sack = 0;
                int dup_sack = dup_sack_in;
@@ -1367,7 +1357,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                        break;
 
                if (in_sack)
-                       *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count);
+                       *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count);
 
                *fack_count += tcp_skb_pcount(skb);
        }
@@ -1438,7 +1428,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
        if (!tp->sacked_out) {
                if (WARN_ON(tp->fackets_out))
                        tp->fackets_out = 0;
-               tp->highest_sack = tcp_write_queue_head(sk);
+               tcp_highest_sack_reset(sk);
        }
 
        found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
@@ -1561,9 +1551,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                                                       &fack_count, &reord, &flag);
 
                        /* ...tail remains todo... */
-                       if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) {
+                       if (tcp_highest_sack_seq(tp) == cache->end_seq) {
                                /* ...but better entrypoint exists! */
-                               skb = tcp_write_queue_next(sk, tp->highest_sack);
+                               skb = tcp_highest_sack(sk);
+                               if (skb == NULL)
+                                       break;
                                fack_count = tp->fackets_out;
                                cache++;
                                goto walk;
@@ -1575,8 +1567,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                        continue;
                }
 
-               if (tp->sacked_out && !before(start_seq, tcp_highest_sack_seq(tp))) {
-                       skb = tcp_write_queue_next(sk, tp->highest_sack);
+               if (!before(start_seq, tcp_highest_sack_seq(tp))) {
+                       skb = tcp_highest_sack(sk);
+                       if (skb == NULL)
+                               break;
                        fack_count = tp->fackets_out;
                }
                skb = tcp_sacktag_skip(skb, sk, start_seq);
@@ -1603,7 +1597,7 @@ advance_sp:
        for (j = 0; j < used_sacks; j++)
                tp->recv_sack_cache[i++] = sp[j];
 
-       flag |= tcp_mark_lost_retrans(sk);
+       tcp_mark_lost_retrans(sk);
 
        tcp_verify_left_out(tp);
 
@@ -2719,11 +2713,10 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
                tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
-static void tcp_cong_avoid(struct sock *sk, u32 ack,
-                          u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
-       icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good);
+       icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
        tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2766,8 +2759,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
-                              int prior_fackets)
+static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2775,8 +2767,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
        u32 now = tcp_time_stamp;
        int fully_acked = 1;
        int flag = 0;
-       int prior_packets = tp->packets_out;
-       u32 cnt = 0;
+       u32 pkts_acked = 0;
        u32 reord = tp->packets_out;
        s32 seq_rtt = -1;
        s32 ca_seq_rtt = -1;
@@ -2785,7 +2776,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
        while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
                struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
                u32 end_seq;
-               u32 packets_acked;
+               u32 acked_pcount;
                u8 sacked = scb->sacked;
 
                /* Determine how many packets and what bytes were acked, tso and else */
@@ -2794,14 +2785,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
                            !after(tp->snd_una, scb->seq))
                                break;
 
-                       packets_acked = tcp_tso_acked(sk, skb);
-                       if (!packets_acked)
+                       acked_pcount = tcp_tso_acked(sk, skb);
+                       if (!acked_pcount)
                                break;
 
                        fully_acked = 0;
                        end_seq = tp->snd_una;
                } else {
-                       packets_acked = tcp_skb_pcount(skb);
+                       acked_pcount = tcp_skb_pcount(skb);
                        end_seq = scb->end_seq;
                }
 
@@ -2811,44 +2802,36 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
                        tcp_mtup_probe_success(sk, skb);
                }
 
-               if (sacked) {
-                       if (sacked & TCPCB_RETRANS) {
-                               if (sacked & TCPCB_SACKED_RETRANS)
-                                       tp->retrans_out -= packets_acked;
-                               flag |= FLAG_RETRANS_DATA_ACKED;
-                               ca_seq_rtt = -1;
-                               seq_rtt = -1;
-                               if ((flag & FLAG_DATA_ACKED) ||
-                                   (packets_acked > 1))
-                                       flag |= FLAG_NONHEAD_RETRANS_ACKED;
-                       } else {
-                               ca_seq_rtt = now - scb->when;
-                               last_ackt = skb->tstamp;
-                               if (seq_rtt < 0) {
-                                       seq_rtt = ca_seq_rtt;
-                               }
-                               if (!(sacked & TCPCB_SACKED_ACKED))
-                                       reord = min(cnt, reord);
-                       }
-
-                       if (sacked & TCPCB_SACKED_ACKED)
-                               tp->sacked_out -= packets_acked;
-                       if (sacked & TCPCB_LOST)
-                               tp->lost_out -= packets_acked;
-
-                       if ((sacked & TCPCB_URG) && tp->urg_mode &&
-                           !before(end_seq, tp->snd_up))
-                               tp->urg_mode = 0;
+               if (sacked & TCPCB_RETRANS) {
+                       if (sacked & TCPCB_SACKED_RETRANS)
+                               tp->retrans_out -= acked_pcount;
+                       flag |= FLAG_RETRANS_DATA_ACKED;
+                       ca_seq_rtt = -1;
+                       seq_rtt = -1;
+                       if ((flag & FLAG_DATA_ACKED) ||
+                           (acked_pcount > 1))
+                               flag |= FLAG_NONHEAD_RETRANS_ACKED;
                } else {
                        ca_seq_rtt = now - scb->when;
                        last_ackt = skb->tstamp;
                        if (seq_rtt < 0) {
                                seq_rtt = ca_seq_rtt;
                        }
-                       reord = min(cnt, reord);
+                       if (!(sacked & TCPCB_SACKED_ACKED))
+                               reord = min(pkts_acked, reord);
                }
-               tp->packets_out -= packets_acked;
-               cnt += packets_acked;
+
+               if (sacked & TCPCB_SACKED_ACKED)
+                       tp->sacked_out -= acked_pcount;
+               if (sacked & TCPCB_LOST)
+                       tp->lost_out -= acked_pcount;
+
+               if (unlikely((sacked & TCPCB_URG) && tp->urg_mode &&
+                            !before(end_seq, tp->snd_up)))
+                       tp->urg_mode = 0;
+
+               tp->packets_out -= acked_pcount;
+               pkts_acked += acked_pcount;
 
                /* Initial outgoing SYN's get put onto the write_queue
                 * just like anything else we transmit.  It is not
@@ -2868,12 +2851,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
                        break;
 
                tcp_unlink_write_queue(skb, sk);
-               sk_stream_free_skb(sk, skb);
+               sk_wmem_free_skb(sk, skb);
                tcp_clear_all_retrans_hints(tp);
        }
 
        if (flag & FLAG_ACKED) {
-               u32 pkts_acked = prior_packets - tp->packets_out;
                const struct tcp_congestion_ops *ca_ops
                        = inet_csk(sk)->icsk_ca_ops;
 
@@ -2932,7 +2914,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
                }
        }
 #endif
-       *seq_rtt_p = seq_rtt;
        return flag;
 }
 
@@ -3169,7 +3150,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
        u32 prior_in_flight;
        u32 prior_fackets;
-       s32 seq_rtt;
        int prior_packets;
        int frto_cwnd = 0;
 
@@ -3235,7 +3215,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                goto no_queue;
 
        /* See if we can take anything off of the retransmit queue. */
-       flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets);
+       flag |= tcp_clean_rtx_queue(sk, prior_fackets);
 
        if (tp->frto_counter)
                frto_cwnd = tcp_process_frto(sk, flag);
@@ -3247,11 +3227,11 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                /* Advance CWND, if state allows this. */
                if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
                    tcp_may_raise_cwnd(sk, flag))
-                       tcp_cong_avoid(sk, ack, prior_in_flight, 0);
+                       tcp_cong_avoid(sk, ack, prior_in_flight);
                tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag);
        } else {
                if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
-                       tcp_cong_avoid(sk, ack, prior_in_flight, 1);
+                       tcp_cong_avoid(sk, ack, prior_in_flight);
        }
 
        if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
@@ -3587,7 +3567,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
        __skb_queue_purge(&tp->out_of_order_queue);
        if (tcp_is_sack(tp))
                tcp_sack_reset(&tp->rx_opt);
-       sk_stream_mem_reclaim(sk);
+       sk_mem_reclaim(sk);
 
        if (!sock_flag(sk, SOCK_DEAD)) {
                sk->sk_state_change(sk);
@@ -3870,12 +3850,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 queue_and_out:
                        if (eaten < 0 &&
                            (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-                            !sk_stream_rmem_schedule(sk, skb))) {
+                            !sk_rmem_schedule(sk, skb->truesize))) {
                                if (tcp_prune_queue(sk) < 0 ||
-                                   !sk_stream_rmem_schedule(sk, skb))
+                                   !sk_rmem_schedule(sk, skb->truesize))
                                        goto drop;
                        }
-                       sk_stream_set_owner_r(skb, sk);
+                       skb_set_owner_r(skb, sk);
                        __skb_queue_tail(&sk->sk_receive_queue, skb);
                }
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -3944,9 +3924,9 @@ drop:
        TCP_ECN_check_ce(tp, skb);
 
        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-           !sk_stream_rmem_schedule(sk, skb)) {
+           !sk_rmem_schedule(sk, skb->truesize)) {
                if (tcp_prune_queue(sk) < 0 ||
-                   !sk_stream_rmem_schedule(sk, skb))
+                   !sk_rmem_schedule(sk, skb->truesize))
                        goto drop;
        }
 
@@ -3957,7 +3937,7 @@ drop:
        SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
                   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
-       sk_stream_set_owner_r(skb, sk);
+       skb_set_owner_r(skb, sk);
 
        if (!skb_peek(&tp->out_of_order_queue)) {
                /* Initial out of order segment, build 1 SACK. */
@@ -4099,7 +4079,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
                memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
                TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
                __skb_insert(nskb, skb->prev, skb, list);
-               sk_stream_set_owner_r(nskb, sk);
+               skb_set_owner_r(nskb, sk);
 
                /* Copy data, releasing collapsed skbs. */
                while (copy > 0) {
@@ -4197,7 +4177,7 @@ static int tcp_prune_queue(struct sock *sk)
                     sk->sk_receive_queue.next,
                     (struct sk_buff*)&sk->sk_receive_queue,
                     tp->copied_seq, tp->rcv_nxt);
-       sk_stream_mem_reclaim(sk);
+       sk_mem_reclaim(sk);
 
        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
                return 0;
@@ -4217,7 +4197,7 @@ static int tcp_prune_queue(struct sock *sk)
                 */
                if (tcp_is_sack(tp))
                        tcp_sack_reset(&tp->rx_opt);
-               sk_stream_mem_reclaim(sk);
+               sk_mem_reclaim(sk);
        }
 
        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
@@ -4719,7 +4699,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                /* Bulk data transfer: receiver */
                                __skb_pull(skb,tcp_header_len);
                                __skb_queue_tail(&sk->sk_receive_queue, skb);
-                               sk_stream_set_owner_r(skb, sk);
+                               skb_set_owner_r(skb, sk);
                                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                        }