SCTP is a protocol that is aligned to a word (4 bytes). Thus using bare
MTU can sometimes return values that are not aligned, like for loopback,
which is 65536 but ipv4_mtu() limits that to 65535. This mis-alignment
will cause the last non-aligned bytes to never be used and can cause
issues with congestion control.
So it's better to just consider a lower MTU and keep congestion control
calcs saner as they are based on PMTU.
Same applies to icmp frag needed messages, which is also fixed by this
patch.
One other effect of this is the inability to send MTU-sized packet
without queueing or fragmentation and without hitting Nagle. As the
check performed at sctp_packet_can_append_data():
if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead)
/* Enough data queued to fill a packet */
return SCTP_XMIT_OK;
with the above example of MTU, if there are no other messages queued,
one cannot send a packet that just fits one packet (65532 bytes) and
without causing DATA chunk fragmentation or a delay.
v2:
- Added WORD_TRUNC macro
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
#define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT
#endif
#define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT
#endif
+/* Round an int up to the next multiple of 4. */
+#define WORD_ROUND(s) (((s)+3)&~3)
+/* Truncate to the previous multiple of 4. */
+#define WORD_TRUNC(s) ((s)&~3)
+
/*
* Function declarations.
*/
/*
* Function declarations.
*/
(void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\
pos++)
(void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\
pos++)
-/* Round an int up to the next multiple of 4. */
-#define WORD_ROUND(s) (((s)+3)&~3)
-
/* External references. */
extern struct proto sctp_prot;
/* External references. */
extern struct proto sctp_prot;
list_for_each_entry(t, &asoc->peer.transport_addr_list,
transports) {
if (t->pmtu_pending && t->dst) {
list_for_each_entry(t, &asoc->peer.transport_addr_list,
transports) {
if (t->pmtu_pending && t->dst) {
- sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst));
+ sctp_transport_update_pmtu(sk, t,
+ WORD_TRUNC(dst_mtu(t->dst)));
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
/* PMTU discovery (RFC1191) */
if (ICMP_FRAG_NEEDED == code) {
/* PMTU discovery (RFC1191) */
if (ICMP_FRAG_NEEDED == code) {
- sctp_icmp_frag_needed(sk, asoc, transport, info);
+ sctp_icmp_frag_needed(sk, asoc, transport,
+ WORD_TRUNC(info));
goto out_unlock;
} else {
if (ICMP_PROT_UNREACH == code) {
goto out_unlock;
} else {
if (ICMP_PROT_UNREACH == code) {
- transport->pathmtu = dst_mtu(transport->dst);
+ transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
} else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
} else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
return;
}
if (transport->dst) {
return;
}
if (transport->dst) {
- transport->pathmtu = dst_mtu(transport->dst);
+ transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
/* Initialize sk->sk_rcv_saddr, if the transport is the
* association's active path for getsockname().
/* Initialize sk->sk_rcv_saddr, if the transport is the
* association's active path for getsockname().