-static void kcm_start_rx_timer(struct kcm_psock *psock)
-{
- if (psock->sk->sk_rcvtimeo)
- mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
-}
-
-/* Macro to invoke filter function. */
-#define KCM_RUN_FILTER(prog, ctx) \
- (*prog->bpf_func)(ctx, prog->insnsi)
-
-/* Lower socket lock held */
-static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
- unsigned int orig_offset, size_t orig_len)
-{
- struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
- struct kcm_rx_msg *rxm;
- struct kcm_sock *kcm;
- struct sk_buff *head, *skb;
- size_t eaten = 0, cand_len;
- ssize_t extra;
- int err;
- bool cloned_orig = false;
-
- if (psock->ready_rx_msg)
- return 0;
-
- head = psock->rx_skb_head;
- if (head) {
- /* Message already in progress */
-
- rxm = kcm_rx_msg(head);
- if (unlikely(rxm->early_eaten)) {
- /* Already some number of bytes on the receive sock
- * data saved in rx_skb_head, just indicate they
- * are consumed.
- */
- eaten = orig_len <= rxm->early_eaten ?
- orig_len : rxm->early_eaten;
- rxm->early_eaten -= eaten;
-
- return eaten;
- }
-
- if (unlikely(orig_offset)) {
- /* Getting data with a non-zero offset when a message is
- * in progress is not expected. If it does happen, we
- * need to clone and pull since we can't deal with
- * offsets in the skbs for a message expect in the head.
- */
- orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (!orig_skb) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = -ENOMEM;
- return 0;
- }
- if (!pskb_pull(orig_skb, orig_offset)) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- kfree_skb(orig_skb);
- desc->error = -ENOMEM;
- return 0;
- }
- cloned_orig = true;
- orig_offset = 0;
- }
-
- if (!psock->rx_skb_nextp) {
- /* We are going to append to the frags_list of head.
- * Need to unshare the frag_list.
- */
- err = skb_unclone(head, GFP_ATOMIC);
- if (err) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = err;
- return 0;
- }
-
- if (unlikely(skb_shinfo(head)->frag_list)) {
- /* We can't append to an sk_buff that already
- * has a frag_list. We create a new head, point
- * the frag_list of that to the old head, and
- * then are able to use the old head->next for
- * appending to the message.
- */
- if (WARN_ON(head->next)) {
- desc->error = -EINVAL;
- return 0;
- }
-
- skb = alloc_skb(0, GFP_ATOMIC);
- if (!skb) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = -ENOMEM;
- return 0;
- }
- skb->len = head->len;
- skb->data_len = head->len;
- skb->truesize = head->truesize;
- *kcm_rx_msg(skb) = *kcm_rx_msg(head);
- psock->rx_skb_nextp = &head->next;
- skb_shinfo(skb)->frag_list = head;
- psock->rx_skb_head = skb;
- head = skb;
- } else {
- psock->rx_skb_nextp =
- &skb_shinfo(head)->frag_list;
- }
- }
- }
-
- while (eaten < orig_len) {
- /* Always clone since we will consume something */
- skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (!skb) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = -ENOMEM;
- break;
- }
-
- cand_len = orig_len - eaten;
-
- head = psock->rx_skb_head;
- if (!head) {
- head = skb;
- psock->rx_skb_head = head;
- /* Will set rx_skb_nextp on next packet if needed */
- psock->rx_skb_nextp = NULL;
- rxm = kcm_rx_msg(head);
- memset(rxm, 0, sizeof(*rxm));
- rxm->offset = orig_offset + eaten;
- } else {
- /* Unclone since we may be appending to an skb that we
- * already share a frag_list with.
- */
- err = skb_unclone(skb, GFP_ATOMIC);
- if (err) {
- KCM_STATS_INCR(psock->stats.rx_mem_fail);
- desc->error = err;
- break;
- }
-
- rxm = kcm_rx_msg(head);
- *psock->rx_skb_nextp = skb;
- psock->rx_skb_nextp = &skb->next;
- head->data_len += skb->len;
- head->len += skb->len;
- head->truesize += skb->truesize;
- }
-
- if (!rxm->full_len) {
- ssize_t len;
-
- len = KCM_RUN_FILTER(psock->bpf_prog, head);
-
- if (!len) {
- /* Need more header to determine length */
- if (!rxm->accum_len) {
- /* Start RX timer for new message */
- kcm_start_rx_timer(psock);
- }
- rxm->accum_len += cand_len;
- eaten += cand_len;
- KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
- WARN_ON(eaten != orig_len);
- break;
- } else if (len > psock->sk->sk_rcvbuf) {
- /* Message length exceeds maximum allowed */
- KCM_STATS_INCR(psock->stats.rx_msg_too_big);
- desc->error = -EMSGSIZE;
- psock->rx_skb_head = NULL;
- kcm_abort_rx_psock(psock, EMSGSIZE, head);
- break;
- } else if (len <= (ssize_t)head->len -
- skb->len - rxm->offset) {
- /* Length must be into new skb (and also
- * greater than zero)
- */
- KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
- desc->error = -EPROTO;
- psock->rx_skb_head = NULL;
- kcm_abort_rx_psock(psock, EPROTO, head);
- break;
- }
-
- rxm->full_len = len;
- }
-
- extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
-
- if (extra < 0) {
- /* Message not complete yet. */
- if (rxm->full_len - rxm->accum_len >
- tcp_inq(psock->sk)) {
- /* Don't have the whole messages in the socket
- * buffer. Set psock->rx_need_bytes to wait for
- * the rest of the message. Also, set "early
- * eaten" since we've already buffered the skb
- * but don't consume yet per tcp_read_sock.
- */
-
- if (!rxm->accum_len) {
- /* Start RX timer for new message */
- kcm_start_rx_timer(psock);
- }
-
- psock->rx_need_bytes = rxm->full_len -
- rxm->accum_len;
- rxm->accum_len += cand_len;
- rxm->early_eaten = cand_len;
- KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
- desc->count = 0; /* Stop reading socket */
- break;
- }
- rxm->accum_len += cand_len;
- eaten += cand_len;
- WARN_ON(eaten != orig_len);
- break;
- }
-
- /* Positive extra indicates ore bytes than needed for the
- * message
- */
-
- WARN_ON(extra > cand_len);
-
- eaten += (cand_len - extra);
-
- /* Hurray, we have a new message! */
- del_timer(&psock->rx_msg_timer);
- psock->rx_skb_head = NULL;
- KCM_STATS_INCR(psock->stats.rx_msgs);
-
-try_queue:
- kcm = reserve_rx_kcm(psock, head);
- if (!kcm) {
- /* Unable to reserve a KCM, message is held in psock. */
- break;
- }
-
- if (kcm_queue_rcv_skb(&kcm->sk, head)) {
- /* Should mean socket buffer full */
- unreserve_rx_kcm(psock, false);
- goto try_queue;
- }
- }
-
- if (cloned_orig)
- kfree_skb(orig_skb);
-
- KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
-
- return eaten;
-}
-
-/* Called with lock held on lower socket */
-static int psock_tcp_read_sock(struct kcm_psock *psock)
-{
- read_descriptor_t desc;
-
- desc.arg.data = psock;
- desc.error = 0;
- desc.count = 1; /* give more than one skb per call */
-
- /* sk should be locked here, so okay to do tcp_read_sock */
- tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
-
- unreserve_rx_kcm(psock, true);
-
- return desc.error;
-}
-