Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[cascardo/linux.git] / net / rxrpc / sendmsg.c
1 /* AF_RXRPC sendmsg() implementation.
2  *
3  * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public Licence
8  * as published by the Free Software Foundation; either version
9  * 2 of the Licence, or (at your option) any later version.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/net.h>
15 #include <linux/gfp.h>
16 #include <linux/skbuff.h>
17 #include <linux/export.h>
18 #include <net/sock.h>
19 #include <net/af_rxrpc.h>
20 #include "ar-internal.h"
21
22 enum rxrpc_command {
23         RXRPC_CMD_SEND_DATA,            /* send data message */
24         RXRPC_CMD_SEND_ABORT,           /* request abort generation */
25         RXRPC_CMD_ACCEPT,               /* [server] accept incoming call */
26         RXRPC_CMD_REJECT_BUSY,          /* [server] reject a call as busy */
27 };
28
29 /*
30  * wait for space to appear in the transmit/ACK window
31  * - caller holds the socket locked
32  */
33 static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
34                                     struct rxrpc_call *call,
35                                     long *timeo)
36 {
37         DECLARE_WAITQUEUE(myself, current);
38         int ret;
39
40         _enter(",{%u,%u,%u}",
41                call->tx_hard_ack, call->tx_top, call->tx_winsize);
42
43         add_wait_queue(&call->waitq, &myself);
44
45         for (;;) {
46                 set_current_state(TASK_INTERRUPTIBLE);
47                 ret = 0;
48                 if (call->tx_top - call->tx_hard_ack <
49                     min_t(unsigned int, call->tx_winsize,
50                           call->cong_cwnd + call->cong_extra))
51                         break;
52                 if (call->state >= RXRPC_CALL_COMPLETE) {
53                         ret = -call->error;
54                         break;
55                 }
56                 if (signal_pending(current)) {
57                         ret = sock_intr_errno(*timeo);
58                         break;
59                 }
60
61                 trace_rxrpc_transmit(call, rxrpc_transmit_wait);
62                 release_sock(&rx->sk);
63                 *timeo = schedule_timeout(*timeo);
64                 lock_sock(&rx->sk);
65         }
66
67         remove_wait_queue(&call->waitq, &myself);
68         set_current_state(TASK_RUNNING);
69         _leave(" = %d", ret);
70         return ret;
71 }
72
73 /*
74  * Schedule an instant Tx resend.
75  */
76 static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
77 {
78         spin_lock_bh(&call->lock);
79
80         if (call->state < RXRPC_CALL_COMPLETE) {
81                 call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
82                 if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
83                         rxrpc_queue_call(call);
84         }
85
86         spin_unlock_bh(&call->lock);
87 }
88
89 /*
90  * Queue a DATA packet for transmission, set the resend timeout and send the
91  * packet immediately
92  */
93 static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
94                                bool last)
95 {
96         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
97         rxrpc_seq_t seq = sp->hdr.seq;
98         int ret, ix;
99         u8 annotation = RXRPC_TX_ANNO_UNACK;
100
101         _net("queue skb %p [%d]", skb, seq);
102
103         ASSERTCMP(seq, ==, call->tx_top + 1);
104
105         if (last)
106                 annotation |= RXRPC_TX_ANNO_LAST;
107
108         /* We have to set the timestamp before queueing as the retransmit
109          * algorithm can see the packet as soon as we queue it.
110          */
111         skb->tstamp = ktime_get_real();
112
113         ix = seq & RXRPC_RXTX_BUFF_MASK;
114         rxrpc_get_skb(skb, rxrpc_skb_tx_got);
115         call->rxtx_annotations[ix] = annotation;
116         smp_wmb();
117         call->rxtx_buffer[ix] = skb;
118         call->tx_top = seq;
119         if (last)
120                 trace_rxrpc_transmit(call, rxrpc_transmit_queue_last);
121         else
122                 trace_rxrpc_transmit(call, rxrpc_transmit_queue);
123
124         if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
125                 _debug("________awaiting reply/ACK__________");
126                 write_lock_bh(&call->state_lock);
127                 switch (call->state) {
128                 case RXRPC_CALL_CLIENT_SEND_REQUEST:
129                         call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
130                         break;
131                 case RXRPC_CALL_SERVER_ACK_REQUEST:
132                         call->state = RXRPC_CALL_SERVER_SEND_REPLY;
133                         call->ack_at = call->expire_at;
134                         if (call->ackr_reason == RXRPC_ACK_DELAY)
135                                 call->ackr_reason = 0;
136                         __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply,
137                                           ktime_get_real());
138                         if (!last)
139                                 break;
140                 case RXRPC_CALL_SERVER_SEND_REPLY:
141                         call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
142                         break;
143                 default:
144                         break;
145                 }
146                 write_unlock_bh(&call->state_lock);
147         }
148
149         if (seq == 1 && rxrpc_is_client_call(call))
150                 rxrpc_expose_client_call(call);
151
152         ret = rxrpc_send_data_packet(call, skb, false);
153         if (ret < 0) {
154                 _debug("need instant resend %d", ret);
155                 rxrpc_instant_resend(call, ix);
156         } else {
157                 ktime_t now = ktime_get_real(), resend_at;
158
159                 resend_at = ktime_add_ms(now, rxrpc_resend_timeout);
160
161                 if (ktime_before(resend_at, call->resend_at)) {
162                         call->resend_at = resend_at;
163                         rxrpc_set_timer(call, rxrpc_timer_set_for_send, now);
164                 }
165         }
166
167         rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
168         _leave("");
169 }
170
171 /*
172  * send data through a socket
173  * - must be called in process context
174  * - caller holds the socket locked
175  */
176 static int rxrpc_send_data(struct rxrpc_sock *rx,
177                            struct rxrpc_call *call,
178                            struct msghdr *msg, size_t len)
179 {
180         struct rxrpc_skb_priv *sp;
181         struct sk_buff *skb;
182         struct sock *sk = &rx->sk;
183         long timeo;
184         bool more;
185         int ret, copied;
186
187         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
188
189         /* this should be in poll */
190         sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
191
192         if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
193                 return -EPIPE;
194
195         more = msg->msg_flags & MSG_MORE;
196
197         skb = call->tx_pending;
198         call->tx_pending = NULL;
199         rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
200
201         copied = 0;
202         do {
203                 /* Check to see if there's a ping ACK to reply to. */
204                 if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
205                         rxrpc_send_ack_packet(call, false);
206
207                 if (!skb) {
208                         size_t size, chunk, max, space;
209
210                         _debug("alloc");
211
212                         if (call->tx_top - call->tx_hard_ack >=
213                             min_t(unsigned int, call->tx_winsize,
214                                   call->cong_cwnd + call->cong_extra)) {
215                                 ret = -EAGAIN;
216                                 if (msg->msg_flags & MSG_DONTWAIT)
217                                         goto maybe_error;
218                                 ret = rxrpc_wait_for_tx_window(rx, call,
219                                                                &timeo);
220                                 if (ret < 0)
221                                         goto maybe_error;
222                         }
223
224                         max = RXRPC_JUMBO_DATALEN;
225                         max -= call->conn->security_size;
226                         max &= ~(call->conn->size_align - 1UL);
227
228                         chunk = max;
229                         if (chunk > msg_data_left(msg) && !more)
230                                 chunk = msg_data_left(msg);
231
232                         space = chunk + call->conn->size_align;
233                         space &= ~(call->conn->size_align - 1UL);
234
235                         size = space + call->conn->security_size;
236
237                         _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
238
239                         /* create a buffer that we can retain until it's ACK'd */
240                         skb = sock_alloc_send_skb(
241                                 sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
242                         if (!skb)
243                                 goto maybe_error;
244
245                         rxrpc_new_skb(skb, rxrpc_skb_tx_new);
246
247                         _debug("ALLOC SEND %p", skb);
248
249                         ASSERTCMP(skb->mark, ==, 0);
250
251                         _debug("HS: %u", call->conn->security_size);
252                         skb_reserve(skb, call->conn->security_size);
253                         skb->len += call->conn->security_size;
254
255                         sp = rxrpc_skb(skb);
256                         sp->remain = chunk;
257                         if (sp->remain > skb_tailroom(skb))
258                                 sp->remain = skb_tailroom(skb);
259
260                         _net("skb: hr %d, tr %d, hl %d, rm %d",
261                                skb_headroom(skb),
262                                skb_tailroom(skb),
263                                skb_headlen(skb),
264                                sp->remain);
265
266                         skb->ip_summed = CHECKSUM_UNNECESSARY;
267                 }
268
269                 _debug("append");
270                 sp = rxrpc_skb(skb);
271
272                 /* append next segment of data to the current buffer */
273                 if (msg_data_left(msg) > 0) {
274                         int copy = skb_tailroom(skb);
275                         ASSERTCMP(copy, >, 0);
276                         if (copy > msg_data_left(msg))
277                                 copy = msg_data_left(msg);
278                         if (copy > sp->remain)
279                                 copy = sp->remain;
280
281                         _debug("add");
282                         ret = skb_add_data(skb, &msg->msg_iter, copy);
283                         _debug("added");
284                         if (ret < 0)
285                                 goto efault;
286                         sp->remain -= copy;
287                         skb->mark += copy;
288                         copied += copy;
289                 }
290
291                 /* check for the far side aborting the call or a network error
292                  * occurring */
293                 if (call->state == RXRPC_CALL_COMPLETE)
294                         goto call_terminated;
295
296                 /* add the packet to the send queue if it's now full */
297                 if (sp->remain <= 0 ||
298                     (msg_data_left(msg) == 0 && !more)) {
299                         struct rxrpc_connection *conn = call->conn;
300                         uint32_t seq;
301                         size_t pad;
302
303                         /* pad out if we're using security */
304                         if (conn->security_ix) {
305                                 pad = conn->security_size + skb->mark;
306                                 pad = conn->size_align - pad;
307                                 pad &= conn->size_align - 1;
308                                 _debug("pad %zu", pad);
309                                 if (pad)
310                                         memset(skb_put(skb, pad), 0, pad);
311                         }
312
313                         seq = call->tx_top + 1;
314
315                         sp->hdr.seq     = seq;
316                         sp->hdr._rsvd   = 0;
317                         sp->hdr.flags   = conn->out_clientflag;
318
319                         if (msg_data_left(msg) == 0 && !more)
320                                 sp->hdr.flags |= RXRPC_LAST_PACKET;
321                         else if (call->tx_top - call->tx_hard_ack <
322                                  call->tx_winsize)
323                                 sp->hdr.flags |= RXRPC_MORE_PACKETS;
324
325                         ret = conn->security->secure_packet(
326                                 call, skb, skb->mark, skb->head);
327                         if (ret < 0)
328                                 goto out;
329
330                         rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
331                         skb = NULL;
332                 }
333         } while (msg_data_left(msg) > 0);
334
335 success:
336         ret = copied;
337 out:
338         call->tx_pending = skb;
339         _leave(" = %d", ret);
340         return ret;
341
342 call_terminated:
343         rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
344         _leave(" = %d", -call->error);
345         return -call->error;
346
347 maybe_error:
348         if (copied)
349                 goto success;
350         goto out;
351
352 efault:
353         ret = -EFAULT;
354         goto out;
355 }
356
357 /*
358  * extract control messages from the sendmsg() control buffer
359  */
360 static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
361                               unsigned long *user_call_ID,
362                               enum rxrpc_command *command,
363                               u32 *abort_code,
364                               bool *_exclusive)
365 {
366         struct cmsghdr *cmsg;
367         bool got_user_ID = false;
368         int len;
369
370         *command = RXRPC_CMD_SEND_DATA;
371
372         if (msg->msg_controllen == 0)
373                 return -EINVAL;
374
375         for_each_cmsghdr(cmsg, msg) {
376                 if (!CMSG_OK(msg, cmsg))
377                         return -EINVAL;
378
379                 len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
380                 _debug("CMSG %d, %d, %d",
381                        cmsg->cmsg_level, cmsg->cmsg_type, len);
382
383                 if (cmsg->cmsg_level != SOL_RXRPC)
384                         continue;
385
386                 switch (cmsg->cmsg_type) {
387                 case RXRPC_USER_CALL_ID:
388                         if (msg->msg_flags & MSG_CMSG_COMPAT) {
389                                 if (len != sizeof(u32))
390                                         return -EINVAL;
391                                 *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
392                         } else {
393                                 if (len != sizeof(unsigned long))
394                                         return -EINVAL;
395                                 *user_call_ID = *(unsigned long *)
396                                         CMSG_DATA(cmsg);
397                         }
398                         _debug("User Call ID %lx", *user_call_ID);
399                         got_user_ID = true;
400                         break;
401
402                 case RXRPC_ABORT:
403                         if (*command != RXRPC_CMD_SEND_DATA)
404                                 return -EINVAL;
405                         *command = RXRPC_CMD_SEND_ABORT;
406                         if (len != sizeof(*abort_code))
407                                 return -EINVAL;
408                         *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
409                         _debug("Abort %x", *abort_code);
410                         if (*abort_code == 0)
411                                 return -EINVAL;
412                         break;
413
414                 case RXRPC_ACCEPT:
415                         if (*command != RXRPC_CMD_SEND_DATA)
416                                 return -EINVAL;
417                         *command = RXRPC_CMD_ACCEPT;
418                         if (len != 0)
419                                 return -EINVAL;
420                         break;
421
422                 case RXRPC_EXCLUSIVE_CALL:
423                         *_exclusive = true;
424                         if (len != 0)
425                                 return -EINVAL;
426                         break;
427                 default:
428                         return -EINVAL;
429                 }
430         }
431
432         if (!got_user_ID)
433                 return -EINVAL;
434         _leave(" = 0");
435         return 0;
436 }
437
438 /*
439  * Create a new client call for sendmsg().
440  */
441 static struct rxrpc_call *
442 rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
443                                   unsigned long user_call_ID, bool exclusive)
444 {
445         struct rxrpc_conn_parameters cp;
446         struct rxrpc_call *call;
447         struct key *key;
448
449         DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
450
451         _enter("");
452
453         if (!msg->msg_name)
454                 return ERR_PTR(-EDESTADDRREQ);
455
456         key = rx->key;
457         if (key && !rx->key->payload.data[0])
458                 key = NULL;
459
460         memset(&cp, 0, sizeof(cp));
461         cp.local                = rx->local;
462         cp.key                  = rx->key;
463         cp.security_level       = rx->min_sec_level;
464         cp.exclusive            = rx->exclusive | exclusive;
465         cp.service_id           = srx->srx_service;
466         call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
467
468         _leave(" = %p\n", call);
469         return call;
470 }
471
472 /*
473  * send a message forming part of a client call through an RxRPC socket
474  * - caller holds the socket locked
475  * - the socket may be either a client socket or a server socket
476  */
477 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
478 {
479         enum rxrpc_command cmd;
480         struct rxrpc_call *call;
481         unsigned long user_call_ID = 0;
482         bool exclusive = false;
483         u32 abort_code = 0;
484         int ret;
485
486         _enter("");
487
488         ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
489                                  &exclusive);
490         if (ret < 0)
491                 return ret;
492
493         if (cmd == RXRPC_CMD_ACCEPT) {
494                 if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
495                         return -EINVAL;
496                 call = rxrpc_accept_call(rx, user_call_ID, NULL);
497                 if (IS_ERR(call))
498                         return PTR_ERR(call);
499                 rxrpc_put_call(call, rxrpc_call_put);
500                 return 0;
501         }
502
503         call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
504         if (!call) {
505                 if (cmd != RXRPC_CMD_SEND_DATA)
506                         return -EBADSLT;
507                 call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
508                                                          exclusive);
509                 if (IS_ERR(call))
510                         return PTR_ERR(call);
511         }
512
513         _debug("CALL %d USR %lx ST %d on CONN %p",
514                call->debug_id, call->user_call_ID, call->state, call->conn);
515
516         if (call->state >= RXRPC_CALL_COMPLETE) {
517                 /* it's too late for this call */
518                 ret = -ESHUTDOWN;
519         } else if (cmd == RXRPC_CMD_SEND_ABORT) {
520                 ret = 0;
521                 if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
522                         ret = rxrpc_send_abort_packet(call);
523         } else if (cmd != RXRPC_CMD_SEND_DATA) {
524                 ret = -EINVAL;
525         } else if (rxrpc_is_client_call(call) &&
526                    call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
527                 /* request phase complete for this client call */
528                 ret = -EPROTO;
529         } else if (rxrpc_is_service_call(call) &&
530                    call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
531                    call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
532                 /* Reply phase not begun or not complete for service call. */
533                 ret = -EPROTO;
534         } else {
535                 ret = rxrpc_send_data(rx, call, msg, len);
536         }
537
538         rxrpc_put_call(call, rxrpc_call_put);
539         _leave(" = %d", ret);
540         return ret;
541 }
542
543 /**
544  * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
545  * @sock: The socket the call is on
546  * @call: The call to send data through
547  * @msg: The data to send
548  * @len: The amount of data to send
549  *
550  * Allow a kernel service to send data on a call.  The call must be in an state
551  * appropriate to sending data.  No control data should be supplied in @msg,
552  * nor should an address be supplied.  MSG_MORE should be flagged if there's
553  * more data to come, otherwise this data will end the transmission phase.
554  */
555 int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
556                            struct msghdr *msg, size_t len)
557 {
558         int ret;
559
560         _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
561
562         ASSERTCMP(msg->msg_name, ==, NULL);
563         ASSERTCMP(msg->msg_control, ==, NULL);
564
565         lock_sock(sock->sk);
566
567         _debug("CALL %d USR %lx ST %d on CONN %p",
568                call->debug_id, call->user_call_ID, call->state, call->conn);
569
570         if (call->state >= RXRPC_CALL_COMPLETE) {
571                 ret = -ESHUTDOWN; /* it's too late for this call */
572         } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
573                    call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
574                    call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
575                 ret = -EPROTO; /* request phase complete for this client call */
576         } else {
577                 ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
578         }
579
580         release_sock(sock->sk);
581         _leave(" = %d", ret);
582         return ret;
583 }
584 EXPORT_SYMBOL(rxrpc_kernel_send_data);
585
586 /**
587  * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
588  * @sock: The socket the call is on
589  * @call: The call to be aborted
590  * @abort_code: The abort code to stick into the ABORT packet
591  * @error: Local error value
592  * @why: 3-char string indicating why.
593  *
594  * Allow a kernel service to abort a call, if it's still in an abortable state.
595  */
596 void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
597                              u32 abort_code, int error, const char *why)
598 {
599         _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
600
601         lock_sock(sock->sk);
602
603         if (rxrpc_abort_call(why, call, 0, abort_code, error))
604                 rxrpc_send_abort_packet(call);
605
606         release_sock(sock->sk);
607         _leave("");
608 }
609
610 EXPORT_SYMBOL(rxrpc_kernel_abort_call);