sctp: Add GSO support
[cascardo/linux.git] / net / sctp / output.c
index 9844fe5..60499a6 100644 (file)
@@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet)
 struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
                                       __u32 vtag, int ecn_capable)
 {
-       struct sctp_chunk *chunk = NULL;
+       struct sctp_transport *tp = packet->transport;
+       struct sctp_association *asoc = tp->asoc;
 
        pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
 
        packet->vtag = vtag;
 
+       if (asoc && tp->dst) {
+               struct sock *sk = asoc->base.sk;
+
+               rcu_read_lock();
+               if (__sk_dst_get(sk) != tp->dst) {
+                       dst_hold(tp->dst);
+                       sk_setup_caps(sk, tp->dst);
+               }
+
+               if (sk_can_gso(sk)) {
+                       struct net_device *dev = tp->dst->dev;
+
+                       packet->max_size = dev->gso_max_size;
+               } else {
+                       packet->max_size = asoc->pathmtu;
+               }
+               rcu_read_unlock();
+
+       } else {
+               packet->max_size = tp->pathmtu;
+       }
+
        if (ecn_capable && sctp_packet_empty(packet)) {
-               chunk = sctp_get_ecne_prepend(packet->transport->asoc);
+               struct sctp_chunk *chunk;
 
                /* If there a is a prepend chunk stick it on the list before
                 * any other chunks get appended.
                 */
+               chunk = sctp_get_ecne_prepend(asoc);
                if (chunk)
                        sctp_packet_append_chunk(packet, chunk);
        }
@@ -381,12 +405,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
        struct sctp_transport *tp = packet->transport;
        struct sctp_association *asoc = tp->asoc;
        struct sctphdr *sh;
-       struct sk_buff *nskb;
+       struct sk_buff *nskb = NULL, *head = NULL;
        struct sctp_chunk *chunk, *tmp;
        struct sock *sk;
        int err = 0;
        int padding;            /* How much padding do we need?  */
+       int pkt_size;
        __u8 has_data = 0;
+       int gso = 0;
+       int pktcount = 0;
        struct dst_entry *dst;
        unsigned char *auth = NULL;     /* pointer to auth in skb data */
 
@@ -400,18 +427,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
        chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
        sk = chunk->skb->sk;
 
-       /* Allocate the new skb.  */
-       nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
-       if (!nskb)
+       /* Allocate the head skb, or main one if not in GSO */
+       if (packet->size > tp->pathmtu && !packet->ipfragok) {
+               if (sk_can_gso(sk)) {
+                       gso = 1;
+                       pkt_size = packet->overhead;
+               } else {
+                       /* If this happens, we trash this packet and try
+                        * to build a new one, hopefully correct this
+                        * time. Application may notice this error.
+                        */
+                       pr_err_once("Trying to GSO but underlying device doesn't support it.");
+                       goto nomem;
+               }
+       } else {
+               pkt_size = packet->size;
+       }
+       head = alloc_skb(pkt_size + MAX_HEADER, gfp);
+       if (!head)
                goto nomem;
+       if (gso) {
+               NAPI_GRO_CB(head)->last = head;
+               skb_shinfo(head)->gso_type = sk->sk_gso_type;
+       }
 
        /* Make sure the outbound skb has enough header room reserved. */
-       skb_reserve(nskb, packet->overhead + MAX_HEADER);
+       skb_reserve(head, packet->overhead + MAX_HEADER);
 
        /* Set the owning socket so that we know where to get the
         * destination IP address.
         */
-       sctp_packet_set_owner_w(nskb, sk);
+       sctp_packet_set_owner_w(head, sk);
 
        if (!sctp_transport_dst_check(tp)) {
                sctp_transport_route(tp, NULL, sctp_sk(sk));
@@ -422,11 +468,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
        dst = dst_clone(tp->dst);
        if (!dst)
                goto no_route;
-       skb_dst_set(nskb, dst);
+       skb_dst_set(head, dst);
 
        /* Build the SCTP header.  */
-       sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
-       skb_reset_transport_header(nskb);
+       sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+       skb_reset_transport_header(head);
        sh->source = htons(packet->source_port);
        sh->dest   = htons(packet->destination_port);
 
@@ -441,90 +487,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
        sh->vtag     = htonl(packet->vtag);
        sh->checksum = 0;
 
-       /**
-        * 6.10 Bundling
-        *
-        *    An endpoint bundles chunks by simply including multiple
-        *    chunks in one outbound SCTP packet.  ...
-        */
-
-       /**
-        * 3.2  Chunk Field Descriptions
-        *
-        * The total length of a chunk (including Type, Length and
-        * Value fields) MUST be a multiple of 4 bytes.  If the length
-        * of the chunk is not a multiple of 4 bytes, the sender MUST
-        * pad the chunk with all zero bytes and this padding is not
-        * included in the chunk length field.  The sender should
-        * never pad with more than 3 bytes.
-        *
-        * [This whole comment explains WORD_ROUND() below.]
-        */
-
        pr_debug("***sctp_transmit_packet***\n");
 
-       list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
-               list_del_init(&chunk->list);
-               if (sctp_chunk_is_data(chunk)) {
-                       /* 6.3.1 C4) When data is in flight and when allowed
-                        * by rule C5, a new RTT measurement MUST be made each
-                        * round trip.  Furthermore, new RTT measurements
-                        * SHOULD be made no more than once per round-trip
-                        * for a given destination transport address.
-                        */
+       do {
+               /* Set up convenience variables... */
+               chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+               pktcount++;
 
-                       if (!chunk->resent && !tp->rto_pending) {
-                               chunk->rtt_in_progress = 1;
-                               tp->rto_pending = 1;
+               /* Calculate packet size, so it fits in PMTU. Leave
+                * other chunks for the next packets.
+                */
+               if (gso) {
+                       pkt_size = packet->overhead;
+                       list_for_each_entry(chunk, &packet->chunk_list, list) {
+                               int padded = WORD_ROUND(chunk->skb->len);
+
+                               if (pkt_size + padded > tp->pathmtu)
+                                       break;
+                               pkt_size += padded;
                        }
 
-                       has_data = 1;
-               }
+                       /* Allocate a new skb. */
+                       nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
+                       if (!nskb)
+                               goto nomem;
 
-               padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
-               if (padding)
-                       memset(skb_put(chunk->skb, padding), 0, padding);
+                       /* Make sure the outbound skb has enough header
+                        * room reserved.
+                        */
+                       skb_reserve(nskb, packet->overhead + MAX_HEADER);
+               } else {
+                       nskb = head;
+               }
 
-               /* if this is the auth chunk that we are adding,
-                * store pointer where it will be added and put
-                * the auth into the packet.
+               /**
+                * 3.2  Chunk Field Descriptions
+                *
+                * The total length of a chunk (including Type, Length and
+                * Value fields) MUST be a multiple of 4 bytes.  If the length
+                * of the chunk is not a multiple of 4 bytes, the sender MUST
+                * pad the chunk with all zero bytes and this padding is not
+                * included in the chunk length field.  The sender should
+                * never pad with more than 3 bytes.
+                *
+                * [This whole comment explains WORD_ROUND() below.]
                 */
-               if (chunk == packet->auth)
-                       auth = skb_tail_pointer(nskb);
 
-               memcpy(skb_put(nskb, chunk->skb->len),
+               pkt_size -= packet->overhead;
+               list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
+                       list_del_init(&chunk->list);
+                       if (sctp_chunk_is_data(chunk)) {
+                               /* 6.3.1 C4) When data is in flight and when allowed
+                                * by rule C5, a new RTT measurement MUST be made each
+                                * round trip.  Furthermore, new RTT measurements
+                                * SHOULD be made no more than once per round-trip
+                                * for a given destination transport address.
+                                */
+
+                               if (!chunk->resent && !tp->rto_pending) {
+                                       chunk->rtt_in_progress = 1;
+                                       tp->rto_pending = 1;
+                               }
+
+                               has_data = 1;
+                       }
+
+                       padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+                       if (padding)
+                               memset(skb_put(chunk->skb, padding), 0, padding);
+
+                       /* if this is the auth chunk that we are adding,
+                        * store pointer where it will be added and put
+                        * the auth into the packet.
+                        */
+                       if (chunk == packet->auth)
+                               auth = skb_tail_pointer(nskb);
+
+                       memcpy(skb_put(nskb, chunk->skb->len),
                               chunk->skb->data, chunk->skb->len);
 
-               pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, "
-                        "rtt_in_progress:%d\n", chunk,
-                        sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
-                        chunk->has_tsn ? "TSN" : "No TSN",
-                        chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
-                        ntohs(chunk->chunk_hdr->length), chunk->skb->len,
-                        chunk->rtt_in_progress);
-
-               /*
-                * If this is a control chunk, this is our last
-                * reference. Free data chunks after they've been
-                * acknowledged or have failed.
-                */
-               if (!sctp_chunk_is_data(chunk))
-                       sctp_chunk_free(chunk);
-       }
+                       pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
+                                chunk,
+                                sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
+                                chunk->has_tsn ? "TSN" : "No TSN",
+                                chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
+                                ntohs(chunk->chunk_hdr->length), chunk->skb->len,
+                                chunk->rtt_in_progress);
+
+                       /* If this is a control chunk, this is our last
+                        * reference. Free data chunks after they've been
+                        * acknowledged or have failed.
+                        * Re-queue auth chunks if needed.
+                        */
+                       pkt_size -= WORD_ROUND(chunk->skb->len);
 
-       /* SCTP-AUTH, Section 6.2
-        *    The sender MUST calculate the MAC as described in RFC2104 [2]
-        *    using the hash function H as described by the MAC Identifier and
-        *    the shared association key K based on the endpoint pair shared key
-        *    described by the shared key identifier.  The 'data' used for the
-        *    computation of the AUTH-chunk is given by the AUTH chunk with its
-        *    HMAC field set to zero (as shown in Figure 6) followed by all
-        *    chunks that are placed after the AUTH chunk in the SCTP packet.
-        */
-       if (auth)
-               sctp_auth_calculate_hmac(asoc, nskb,
-                                        (struct sctp_auth_chunk *)auth,
-                                        gfp);
+                       if (chunk == packet->auth && !list_empty(&packet->chunk_list))
+                               list_add(&chunk->list, &packet->chunk_list);
+                       else if (!sctp_chunk_is_data(chunk))
+                               sctp_chunk_free(chunk);
+
+                       if (!pkt_size)
+                               break;
+               }
+
+               /* SCTP-AUTH, Section 6.2
+                *    The sender MUST calculate the MAC as described in RFC2104 [2]
+                *    using the hash function H as described by the MAC Identifier and
+                *    the shared association key K based on the endpoint pair shared key
+                *    described by the shared key identifier.  The 'data' used for the
+                *    computation of the AUTH-chunk is given by the AUTH chunk with its
+                *    HMAC field set to zero (as shown in Figure 6) followed by all
+                *    chunks that are placed after the AUTH chunk in the SCTP packet.
+                */
+               if (auth)
+                       sctp_auth_calculate_hmac(asoc, nskb,
+                                                (struct sctp_auth_chunk *)auth,
+                                                gfp);
+
+               if (!gso)
+                       break;
+
+               if (skb_gro_receive(&head, nskb))
+                       goto nomem;
+               nskb = NULL;
+               if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
+                                sk->sk_gso_max_segs))
+                       goto nomem;
+       } while (!list_empty(&packet->chunk_list));
 
        /* 2) Calculate the Adler-32 checksum of the whole packet,
         *    including the SCTP common header and all the
@@ -532,16 +621,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
         *
         * Note: Adler-32 is no longer applicable, as has been replaced
         * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
+        *
+        * If it's a GSO packet, it's postponed to sctp_skb_segment.
         */
-       if (!sctp_checksum_disable) {
-               if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
-                   (dst_xfrm(dst) != NULL) || packet->ipfragok) {
-                       sh->checksum = sctp_compute_cksum(nskb, 0);
+       if (!sctp_checksum_disable || gso) {
+               if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
+                            dst_xfrm(dst) || packet->ipfragok)) {
+                       sh->checksum = sctp_compute_cksum(head, 0);
                } else {
                        /* no need to seed pseudo checksum for SCTP */
-                       nskb->ip_summed = CHECKSUM_PARTIAL;
-                       nskb->csum_start = skb_transport_header(nskb) - nskb->head;
-                       nskb->csum_offset = offsetof(struct sctphdr, checksum);
+                       head->ip_summed = CHECKSUM_PARTIAL;
+                       head->csum_start = skb_transport_header(head) - head->head;
+                       head->csum_offset = offsetof(struct sctphdr, checksum);
                }
        }
 
@@ -557,7 +648,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
         * Note: The works for IPv6 layer checks this bit too later
         * in transmission.  See IP6_ECN_flow_xmit().
         */
-       tp->af_specific->ecn_capable(nskb->sk);
+       tp->af_specific->ecn_capable(sk);
 
        /* Set up the IP options.  */
        /* BUG: not implemented
@@ -566,7 +657,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 
        /* Dump that on IP!  */
        if (asoc) {
-               asoc->stats.opackets++;
+               asoc->stats.opackets += pktcount;
                if (asoc->peer.last_sent_to != tp)
                        /* Considering the multiple CPU scenario, this is a
                         * "correcter" place for last_sent_to.  --xguo
@@ -589,16 +680,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
                }
        }
 
-       pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
+       pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
+
+       if (gso) {
+               /* Cleanup our debris for IP stacks */
+               memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
+                                       sizeof(struct inet6_skb_parm)));
 
-       nskb->ignore_df = packet->ipfragok;
-       tp->af_specific->sctp_xmit(nskb, tp);
+               skb_shinfo(head)->gso_segs = pktcount;
+               skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+
+               /* We have to refresh this in case we are xmiting to
+                * more than one transport at a time
+                */
+               rcu_read_lock();
+               if (__sk_dst_get(sk) != tp->dst) {
+                       dst_hold(tp->dst);
+                       sk_setup_caps(sk, tp->dst);
+               }
+               rcu_read_unlock();
+       }
+       head->ignore_df = packet->ipfragok;
+       tp->af_specific->sctp_xmit(head, tp);
 
 out:
        sctp_packet_reset(packet);
        return err;
 no_route:
-       kfree_skb(nskb);
+       kfree_skb(head);
+       if (nskb != head)
+               kfree_skb(nskb);
 
        if (asoc)
                IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
@@ -751,39 +862,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
                                        struct sctp_chunk *chunk,
                                        u16 chunk_len)
 {
-       size_t psize;
-       size_t pmtu;
-       int too_big;
+       size_t psize, pmtu;
        sctp_xmit_t retval = SCTP_XMIT_OK;
 
        psize = packet->size;
-       pmtu  = ((packet->transport->asoc) ?
-               (packet->transport->asoc->pathmtu) :
-               (packet->transport->pathmtu));
-
-       too_big = (psize + chunk_len > pmtu);
+       if (packet->transport->asoc)
+               pmtu = packet->transport->asoc->pathmtu;
+       else
+               pmtu = packet->transport->pathmtu;
 
        /* Decide if we need to fragment or resubmit later. */
-       if (too_big) {
-               /* It's OK to fragmet at IP level if any one of the following
+       if (psize + chunk_len > pmtu) {
+               /* It's OK to fragment at IP level if any one of the following
                 * is true:
-                *      1. The packet is empty (meaning this chunk is greater
-                *         the MTU)
-                *      2. The chunk we are adding is a control chunk
-                *      3. The packet doesn't have any data in it yet and data
-                *      requires authentication.
+                *      1. The packet is empty (meaning this chunk is greater
+                *         the MTU)
+                *      2. The packet doesn't have any data in it yet and data
+                *         requires authentication.
                 */
-               if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) ||
+               if (sctp_packet_empty(packet) ||
                    (!packet->has_data && chunk->auth)) {
                        /* We no longer do re-fragmentation.
                         * Just fragment at the IP layer, if we
                         * actually hit this condition
                         */
                        packet->ipfragok = 1;
-               } else {
-                       retval = SCTP_XMIT_PMTU_FULL;
+                       goto out;
                }
+
+               /* It is also okay to fragment if the chunk we are
+                * adding is a control chunk, but only if current packet
+                * is not a GSO one otherwise it causes fragmentation of
+                * a large frame. So in this case we allow the
+                * fragmentation by forcing it to be in a new packet.
+                */
+               if (!sctp_chunk_is_data(chunk) && packet->has_data)
+                       retval = SCTP_XMIT_PMTU_FULL;
+
+               if (psize + chunk_len > packet->max_size)
+                       /* Hit GSO/PMTU limit, gotta flush */
+                       retval = SCTP_XMIT_PMTU_FULL;
+
+               if (!packet->transport->burst_limited &&
+                   psize + chunk_len > (packet->transport->cwnd >> 1))
+                       /* Do not allow a single GSO packet to use more
+                        * than half of cwnd.
+                        */
+                       retval = SCTP_XMIT_PMTU_FULL;
+
+               if (packet->transport->burst_limited &&
+                   psize + chunk_len > (packet->transport->burst_limited >> 1))
+                       /* Do not allow a single GSO packet to use more
+                        * than half of original cwnd.
+                        */
+                       retval = SCTP_XMIT_PMTU_FULL;
+               /* Otherwise it will fit in the GSO packet */
        }
 
+out:
        return retval;
 }