Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[cascardo/linux.git] / net / rxrpc / output.c
1 /* RxRPC packet transmission
2  *
3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/net.h>
15 #include <linux/gfp.h>
16 #include <linux/skbuff.h>
17 #include <linux/export.h>
18 #include <net/sock.h>
19 #include <net/af_rxrpc.h>
20 #include "ar-internal.h"
21
22 struct rxrpc_pkt_buffer {
23         struct rxrpc_wire_header whdr;
24         union {
25                 struct {
26                         struct rxrpc_ackpacket ack;
27                         u8 acks[255];
28                         u8 pad[3];
29                 };
30                 __be32 abort_code;
31         };
32         struct rxrpc_ackinfo ackinfo;
33 };
34
35 /*
36  * Fill out an ACK packet.
37  */
38 static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
39                                  struct rxrpc_pkt_buffer *pkt,
40                                  rxrpc_seq_t *_hard_ack,
41                                  rxrpc_seq_t *_top)
42 {
43         rxrpc_serial_t serial;
44         rxrpc_seq_t hard_ack, top, seq;
45         int ix;
46         u32 mtu, jmax;
47         u8 *ackp = pkt->acks;
48
49         /* Barrier against rxrpc_input_data(). */
50         serial = call->ackr_serial;
51         hard_ack = READ_ONCE(call->rx_hard_ack);
52         top = smp_load_acquire(&call->rx_top);
53         *_hard_ack = hard_ack;
54         *_top = top;
55
56         pkt->ack.bufferSpace    = htons(8);
57         pkt->ack.maxSkew        = htons(call->ackr_skew);
58         pkt->ack.firstPacket    = htonl(hard_ack + 1);
59         pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
60         pkt->ack.serial         = htonl(serial);
61         pkt->ack.reason         = call->ackr_reason;
62         pkt->ack.nAcks          = top - hard_ack;
63
64         if (pkt->ack.reason == RXRPC_ACK_PING)
65                 pkt->whdr.flags |= RXRPC_REQUEST_ACK;
66
67         if (after(top, hard_ack)) {
68                 seq = hard_ack + 1;
69                 do {
70                         ix = seq & RXRPC_RXTX_BUFF_MASK;
71                         if (call->rxtx_buffer[ix])
72                                 *ackp++ = RXRPC_ACK_TYPE_ACK;
73                         else
74                                 *ackp++ = RXRPC_ACK_TYPE_NACK;
75                         seq++;
76                 } while (before_eq(seq, top));
77         }
78
79         mtu = call->conn->params.peer->if_mtu;
80         mtu -= call->conn->params.peer->hdrsize;
81         jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
82         pkt->ackinfo.rxMTU      = htonl(rxrpc_rx_mtu);
83         pkt->ackinfo.maxMTU     = htonl(mtu);
84         pkt->ackinfo.rwind      = htonl(call->rx_winsize);
85         pkt->ackinfo.jumbo_max  = htonl(jmax);
86
87         *ackp++ = 0;
88         *ackp++ = 0;
89         *ackp++ = 0;
90         return top - hard_ack + 3;
91 }
92
93 /*
94  * Send an ACK or ABORT call packet.
95  */
96 int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
97 {
98         struct rxrpc_connection *conn = NULL;
99         struct rxrpc_pkt_buffer *pkt;
100         struct msghdr msg;
101         struct kvec iov[2];
102         rxrpc_serial_t serial;
103         rxrpc_seq_t hard_ack, top;
104         size_t len, n;
105         bool ping = false;
106         int ioc, ret;
107         u32 abort_code;
108
109         _enter("%u,%s", call->debug_id, rxrpc_pkts[type]);
110
111         spin_lock_bh(&call->lock);
112         if (call->conn)
113                 conn = rxrpc_get_connection_maybe(call->conn);
114         spin_unlock_bh(&call->lock);
115         if (!conn)
116                 return -ECONNRESET;
117
118         pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
119         if (!pkt) {
120                 rxrpc_put_connection(conn);
121                 return -ENOMEM;
122         }
123
124         msg.msg_name    = &call->peer->srx.transport;
125         msg.msg_namelen = call->peer->srx.transport_len;
126         msg.msg_control = NULL;
127         msg.msg_controllen = 0;
128         msg.msg_flags   = 0;
129
130         pkt->whdr.epoch         = htonl(conn->proto.epoch);
131         pkt->whdr.cid           = htonl(call->cid);
132         pkt->whdr.callNumber    = htonl(call->call_id);
133         pkt->whdr.seq           = 0;
134         pkt->whdr.type          = type;
135         pkt->whdr.flags         = conn->out_clientflag;
136         pkt->whdr.userStatus    = 0;
137         pkt->whdr.securityIndex = call->security_ix;
138         pkt->whdr._rsvd         = 0;
139         pkt->whdr.serviceId     = htons(call->service_id);
140
141         iov[0].iov_base = pkt;
142         iov[0].iov_len  = sizeof(pkt->whdr);
143         len = sizeof(pkt->whdr);
144
145         switch (type) {
146         case RXRPC_PACKET_TYPE_ACK:
147                 spin_lock_bh(&call->lock);
148                 if (!call->ackr_reason) {
149                         spin_unlock_bh(&call->lock);
150                         ret = 0;
151                         goto out;
152                 }
153                 ping = (call->ackr_reason == RXRPC_ACK_PING);
154                 n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top);
155                 call->ackr_reason = 0;
156
157                 spin_unlock_bh(&call->lock);
158
159
160                 pkt->whdr.flags |= RXRPC_SLOW_START_OK;
161
162                 iov[0].iov_len += sizeof(pkt->ack) + n;
163                 iov[1].iov_base = &pkt->ackinfo;
164                 iov[1].iov_len  = sizeof(pkt->ackinfo);
165                 len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo);
166                 ioc = 2;
167                 break;
168
169         case RXRPC_PACKET_TYPE_ABORT:
170                 abort_code = call->abort_code;
171                 pkt->abort_code = htonl(abort_code);
172                 iov[0].iov_len += sizeof(pkt->abort_code);
173                 len += sizeof(pkt->abort_code);
174                 ioc = 1;
175                 break;
176
177         default:
178                 BUG();
179                 ret = -ENOANO;
180                 goto out;
181         }
182
183         serial = atomic_inc_return(&conn->serial);
184         pkt->whdr.serial = htonl(serial);
185         switch (type) {
186         case RXRPC_PACKET_TYPE_ACK:
187                 trace_rxrpc_tx_ack(call, serial,
188                                    ntohl(pkt->ack.firstPacket),
189                                    ntohl(pkt->ack.serial),
190                                    pkt->ack.reason, pkt->ack.nAcks);
191                 break;
192         }
193
194         if (ping) {
195                 call->ackr_ping = serial;
196                 smp_wmb();
197                 /* We need to stick a time in before we send the packet in case
198                  * the reply gets back before kernel_sendmsg() completes - but
199                  * asking UDP to send the packet can take a relatively long
200                  * time, so we update the time after, on the assumption that
201                  * the packet transmission is more likely to happen towards the
202                  * end of the kernel_sendmsg() call.
203                  */
204                 call->ackr_ping_time = ktime_get_real();
205                 set_bit(RXRPC_CALL_PINGING, &call->flags);
206                 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
207         }
208         ret = kernel_sendmsg(conn->params.local->socket,
209                              &msg, iov, ioc, len);
210         if (ping)
211                 call->ackr_ping_time = ktime_get_real();
212
213         if (type == RXRPC_PACKET_TYPE_ACK &&
214             call->state < RXRPC_CALL_COMPLETE) {
215                 if (ret < 0) {
216                         clear_bit(RXRPC_CALL_PINGING, &call->flags);
217                         rxrpc_propose_ACK(call, pkt->ack.reason,
218                                           ntohs(pkt->ack.maxSkew),
219                                           ntohl(pkt->ack.serial),
220                                           true, true,
221                                           rxrpc_propose_ack_retry_tx);
222                 } else {
223                         spin_lock_bh(&call->lock);
224                         if (after(hard_ack, call->ackr_consumed))
225                                 call->ackr_consumed = hard_ack;
226                         if (after(top, call->ackr_seen))
227                                 call->ackr_seen = top;
228                         spin_unlock_bh(&call->lock);
229                 }
230         }
231
232 out:
233         rxrpc_put_connection(conn);
234         kfree(pkt);
235         return ret;
236 }
237
238 /*
239  * send a packet through the transport endpoint
240  */
241 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
242                            bool retrans)
243 {
244         struct rxrpc_connection *conn = call->conn;
245         struct rxrpc_wire_header whdr;
246         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
247         struct msghdr msg;
248         struct kvec iov[2];
249         rxrpc_serial_t serial;
250         size_t len;
251         bool lost = false;
252         int ret, opt;
253
254         _enter(",{%d}", skb->len);
255
256         /* Each transmission of a Tx packet needs a new serial number */
257         serial = atomic_inc_return(&conn->serial);
258
259         whdr.epoch      = htonl(conn->proto.epoch);
260         whdr.cid        = htonl(call->cid);
261         whdr.callNumber = htonl(call->call_id);
262         whdr.seq        = htonl(sp->hdr.seq);
263         whdr.serial     = htonl(serial);
264         whdr.type       = RXRPC_PACKET_TYPE_DATA;
265         whdr.flags      = sp->hdr.flags;
266         whdr.userStatus = 0;
267         whdr.securityIndex = call->security_ix;
268         whdr._rsvd      = htons(sp->hdr._rsvd);
269         whdr.serviceId  = htons(call->service_id);
270
271         iov[0].iov_base = &whdr;
272         iov[0].iov_len = sizeof(whdr);
273         iov[1].iov_base = skb->head;
274         iov[1].iov_len = skb->len;
275         len = iov[0].iov_len + iov[1].iov_len;
276
277         msg.msg_name = &call->peer->srx.transport;
278         msg.msg_namelen = call->peer->srx.transport_len;
279         msg.msg_control = NULL;
280         msg.msg_controllen = 0;
281         msg.msg_flags = 0;
282
283         /* If our RTT cache needs working on, request an ACK.  Also request
284          * ACKs if a DATA packet appears to have been lost.
285          */
286         if (retrans ||
287             call->cong_mode == RXRPC_CALL_SLOW_START ||
288             (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
289             ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
290                          ktime_get_real()))
291                 whdr.flags |= RXRPC_REQUEST_ACK;
292
293         if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
294                 static int lose;
295                 if ((lose++ & 7) == 7) {
296                         ret = 0;
297                         lost = true;
298                         goto done;
299                 }
300         }
301
302         _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
303
304         /* send the packet with the don't fragment bit set if we currently
305          * think it's small enough */
306         if (iov[1].iov_len >= call->peer->maxdata)
307                 goto send_fragmentable;
308
309         down_read(&conn->params.local->defrag_sem);
310         /* send the packet by UDP
311          * - returns -EMSGSIZE if UDP would have to fragment the packet
312          *   to go out of the interface
313          *   - in which case, we'll have processed the ICMP error
314          *     message and update the peer record
315          */
316         ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
317
318         up_read(&conn->params.local->defrag_sem);
319         if (ret == -EMSGSIZE)
320                 goto send_fragmentable;
321
322 done:
323         trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
324                             retrans, lost);
325         if (ret >= 0) {
326                 ktime_t now = ktime_get_real();
327                 skb->tstamp = now;
328                 smp_wmb();
329                 sp->hdr.serial = serial;
330                 if (whdr.flags & RXRPC_REQUEST_ACK) {
331                         call->peer->rtt_last_req = now;
332                         trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
333                 }
334         }
335         _leave(" = %d [%u]", ret, call->peer->maxdata);
336         return ret;
337
338 send_fragmentable:
339         /* attempt to send this message with fragmentation enabled */
340         _debug("send fragment");
341
342         down_write(&conn->params.local->defrag_sem);
343
344         switch (conn->params.local->srx.transport.family) {
345         case AF_INET:
346                 opt = IP_PMTUDISC_DONT;
347                 ret = kernel_setsockopt(conn->params.local->socket,
348                                         SOL_IP, IP_MTU_DISCOVER,
349                                         (char *)&opt, sizeof(opt));
350                 if (ret == 0) {
351                         ret = kernel_sendmsg(conn->params.local->socket, &msg,
352                                              iov, 2, len);
353
354                         opt = IP_PMTUDISC_DO;
355                         kernel_setsockopt(conn->params.local->socket, SOL_IP,
356                                           IP_MTU_DISCOVER,
357                                           (char *)&opt, sizeof(opt));
358                 }
359                 break;
360
361 #ifdef CONFIG_AF_RXRPC_IPV6
362         case AF_INET6:
363                 opt = IPV6_PMTUDISC_DONT;
364                 ret = kernel_setsockopt(conn->params.local->socket,
365                                         SOL_IPV6, IPV6_MTU_DISCOVER,
366                                         (char *)&opt, sizeof(opt));
367                 if (ret == 0) {
368                         ret = kernel_sendmsg(conn->params.local->socket, &msg,
369                                              iov, 1, iov[0].iov_len);
370
371                         opt = IPV6_PMTUDISC_DO;
372                         kernel_setsockopt(conn->params.local->socket,
373                                           SOL_IPV6, IPV6_MTU_DISCOVER,
374                                           (char *)&opt, sizeof(opt));
375                 }
376                 break;
377 #endif
378         }
379
380         up_write(&conn->params.local->defrag_sem);
381         goto done;
382 }
383
384 /*
385  * reject packets through the local endpoint
386  */
387 void rxrpc_reject_packets(struct rxrpc_local *local)
388 {
389         struct sockaddr_rxrpc srx;
390         struct rxrpc_skb_priv *sp;
391         struct rxrpc_wire_header whdr;
392         struct sk_buff *skb;
393         struct msghdr msg;
394         struct kvec iov[2];
395         size_t size;
396         __be32 code;
397
398         _enter("%d", local->debug_id);
399
400         iov[0].iov_base = &whdr;
401         iov[0].iov_len = sizeof(whdr);
402         iov[1].iov_base = &code;
403         iov[1].iov_len = sizeof(code);
404         size = sizeof(whdr) + sizeof(code);
405
406         msg.msg_name = &srx.transport;
407         msg.msg_control = NULL;
408         msg.msg_controllen = 0;
409         msg.msg_flags = 0;
410
411         memset(&whdr, 0, sizeof(whdr));
412         whdr.type = RXRPC_PACKET_TYPE_ABORT;
413
414         while ((skb = skb_dequeue(&local->reject_queue))) {
415                 rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
416                 sp = rxrpc_skb(skb);
417
418                 if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
419                         msg.msg_namelen = srx.transport_len;
420
421                         code = htonl(skb->priority);
422
423                         whdr.epoch      = htonl(sp->hdr.epoch);
424                         whdr.cid        = htonl(sp->hdr.cid);
425                         whdr.callNumber = htonl(sp->hdr.callNumber);
426                         whdr.serviceId  = htons(sp->hdr.serviceId);
427                         whdr.flags      = sp->hdr.flags;
428                         whdr.flags      ^= RXRPC_CLIENT_INITIATED;
429                         whdr.flags      &= RXRPC_CLIENT_INITIATED;
430
431                         kernel_sendmsg(local->socket, &msg, iov, 2, size);
432                 }
433
434                 rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
435         }
436
437         _leave("");
438 }