drivers/net/xen-netback/rx.c

   1 /*
   2  * Copyright (c) 2016 Citrix Systems Inc.
   3  * Copyright (c) 2002-2005, K A Fraser
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License version 2
   7  * as published by the Free Software Foundation; or, when distributed
   8  * separately from the Linux kernel or incorporated into other
   9  * software packages, subject to the following license:
  10  *
  11  * Permission is hereby granted, free of charge, to any person obtaining a copy
  12  * of this source file (the "Software"), to deal in the Software without
  13  * restriction, including without limitation the rights to use, copy, modify,
  14  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  15  * and to permit persons to whom the Software is furnished to do so, subject to
  16  * the following conditions:
  17  *
  18  * The above copyright notice and this permission notice shall be included in
  19  * all copies or substantial portions of the Software.
  20  *
  21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  27  * IN THE SOFTWARE.
  28  */
  29 #include "common.h"
  30
  31 #include <linux/kthread.h>
  32
  33 #include <xen/xen.h>
  34 #include <xen/events.h>
  35
  36 static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
  37 {
  38         RING_IDX prod, cons;
  39         struct sk_buff *skb;
  40         int needed;
  41
  42         skb = skb_peek(&queue->rx_queue);
  43         if (!skb)
  44                 return false;
  45
  46         needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
  47         if (skb_is_gso(skb))
  48                 needed++;
  49         if (skb->sw_hash)
  50                 needed++;
  51
  52         do {
  53                 prod = queue->rx.sring->req_prod;
  54                 cons = queue->rx.req_cons;
  55
  56                 if (prod - cons >= needed)
  57                         return true;
  58
  59                 queue->rx.sring->req_event = prod + 1;
  60
  61                 /* Make sure event is visible before we check prod
  62                  * again.
  63                  */
  64                 mb();
  65         } while (queue->rx.sring->req_prod != prod);
  66
  67         return false;
  68 }
  69
  70 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
  71 {
  72         unsigned long flags;
  73
  74         spin_lock_irqsave(&queue->rx_queue.lock, flags);
  75
  76         __skb_queue_tail(&queue->rx_queue, skb);
  77
  78         queue->rx_queue_len += skb->len;
  79         if (queue->rx_queue_len > queue->rx_queue_max) {
  80                 struct net_device *dev = queue->vif->dev;
  81
  82                 netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
  83         }
  84
  85         spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
  86 }
  87
  88 static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
  89 {
  90         struct sk_buff *skb;
  91
  92         spin_lock_irq(&queue->rx_queue.lock);
  93
  94         skb = __skb_dequeue(&queue->rx_queue);
  95         if (skb)
  96                 queue->rx_queue_len -= skb->len;
  97
  98         spin_unlock_irq(&queue->rx_queue.lock);
  99
 100         return skb;
 101 }
 102
 103 static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
 104 {
 105         spin_lock_irq(&queue->rx_queue.lock);
 106
 107         if (queue->rx_queue_len < queue->rx_queue_max) {
 108                 struct net_device *dev = queue->vif->dev;
 109
 110                 netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 111         }
 112
 113         spin_unlock_irq(&queue->rx_queue.lock);
 114 }
 115
 116 static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
 117 {
 118         struct sk_buff *skb;
 119
 120         while ((skb = xenvif_rx_dequeue(queue)) != NULL)
 121                 kfree_skb(skb);
 122 }
 123
 124 static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
 125 {
 126         struct sk_buff *skb;
 127
 128         for (;;) {
 129                 skb = skb_peek(&queue->rx_queue);
 130                 if (!skb)
 131                         break;
 132                 if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
 133                         break;
 134                 xenvif_rx_dequeue(queue);
 135                 kfree_skb(skb);
 136         }
 137 }
 138
 139 static void xenvif_rx_copy_flush(struct xenvif_queue *queue)
 140 {
 141         unsigned int i;
 142
 143         gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num);
 144
 145         for (i = 0; i < queue->rx_copy.num; i++) {
 146                 struct gnttab_copy *op;
 147
 148                 op = &queue->rx_copy.op[i];
 149
 150                 /* If the copy failed, overwrite the status field in
 151                  * the corresponding response.
 152                  */
 153                 if (unlikely(op->status != GNTST_okay)) {
 154                         struct xen_netif_rx_response *rsp;
 155
 156                         rsp = RING_GET_RESPONSE(&queue->rx,
 157                                                 queue->rx_copy.idx[i]);
 158                         rsp->status = op->status;
 159                 }
 160         }
 161
 162         queue->rx_copy.num = 0;
 163 }
 164
 165 static void xenvif_rx_copy_add(struct xenvif_queue *queue,
 166                                struct xen_netif_rx_request *req,
 167                                unsigned int offset, void *data, size_t len)
 168 {
 169         struct gnttab_copy *op;
 170         struct page *page;
 171         struct xen_page_foreign *foreign;
 172
 173         if (queue->rx_copy.num == COPY_BATCH_SIZE)
 174                 xenvif_rx_copy_flush(queue);
 175
 176         op = &queue->rx_copy.op[queue->rx_copy.num];
 177
 178         page = virt_to_page(data);
 179
 180         op->flags = GNTCOPY_dest_gref;
 181
 182         foreign = xen_page_foreign(page);
 183         if (foreign) {
 184                 op->source.domid = foreign->domid;
 185                 op->source.u.ref = foreign->gref;
 186                 op->flags |= GNTCOPY_source_gref;
 187         } else {
 188                 op->source.u.gmfn = virt_to_gfn(data);
 189                 op->source.domid  = DOMID_SELF;
 190         }
 191
 192         op->source.offset = xen_offset_in_page(data);
 193         op->dest.u.ref    = req->gref;
 194         op->dest.domid    = queue->vif->domid;
 195         op->dest.offset   = offset;
 196         op->len           = len;
 197
 198         queue->rx_copy.idx[queue->rx_copy.num] = queue->rx.req_cons;
 199         queue->rx_copy.num++;
 200 }
 201
 202 static unsigned int xenvif_gso_type(struct sk_buff *skb)
 203 {
 204         if (skb_is_gso(skb)) {
 205                 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
 206                         return XEN_NETIF_GSO_TYPE_TCPV4;
 207                 else
 208                         return XEN_NETIF_GSO_TYPE_TCPV6;
 209         }
 210         return XEN_NETIF_GSO_TYPE_NONE;
 211 }
 212
 213 struct xenvif_pkt_state {
 214         struct sk_buff *skb;
 215         size_t remaining_len;
 216         int frag; /* frag == -1 => skb->head */
 217         unsigned int frag_offset;
 218         struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 219         unsigned int extra_count;
 220         unsigned int slot;
 221 };
 222
 223 static void xenvif_rx_next_skb(struct xenvif_queue *queue,
 224                                struct xenvif_pkt_state *pkt)
 225 {
 226         struct sk_buff *skb;
 227         unsigned int gso_type;
 228
 229         skb = xenvif_rx_dequeue(queue);
 230
 231         queue->stats.tx_bytes += skb->len;
 232         queue->stats.tx_packets++;
 233
 234         /* Reset packet state. */
 235         memset(pkt, 0, sizeof(struct xenvif_pkt_state));
 236
 237         pkt->skb = skb;
 238         pkt->remaining_len = skb->len;
 239         pkt->frag = -1;
 240
 241         gso_type = xenvif_gso_type(skb);
 242         if ((1 << gso_type) & queue->vif->gso_mask) {
 243                 struct xen_netif_extra_info *extra;
 244
 245                 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
 246
 247                 extra->u.gso.type = gso_type;
 248                 extra->u.gso.size = skb_shinfo(skb)->gso_size;
 249                 extra->u.gso.pad = 0;
 250                 extra->u.gso.features = 0;
 251                 extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
 252                 extra->flags = 0;
 253
 254                 pkt->extra_count++;
 255         }
 256
 257         if (skb->sw_hash) {
 258                 struct xen_netif_extra_info *extra;
 259
 260                 extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
 261
 262                 extra->u.hash.algorithm =
 263                         XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
 264
 265                 if (skb->l4_hash)
 266                         extra->u.hash.type =
 267                                 skb->protocol == htons(ETH_P_IP) ?
 268                                 _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
 269                                 _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
 270                 else
 271                         extra->u.hash.type =
 272                                 skb->protocol == htons(ETH_P_IP) ?
 273                                 _XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
 274                                 _XEN_NETIF_CTRL_HASH_TYPE_IPV6;
 275
 276                 *(uint32_t *)extra->u.hash.value = skb_get_hash_raw(skb);
 277
 278                 extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
 279                 extra->flags = 0;
 280
 281                 pkt->extra_count++;
 282         }
 283 }
 284
 285 static void xenvif_rx_complete(struct xenvif_queue *queue,
 286                                struct xenvif_pkt_state *pkt)
 287 {
 288         int notify;
 289
 290         /* Complete any outstanding copy ops for this skb. */
 291         xenvif_rx_copy_flush(queue);
 292
 293         /* Push responses and notify. */
 294         queue->rx.rsp_prod_pvt = queue->rx.req_cons;
 295         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify);
 296         if (notify)
 297                 notify_remote_via_irq(queue->rx_irq);
 298
 299         dev_kfree_skb(pkt->skb);
 300 }
 301
 302 static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
 303                                  struct xenvif_pkt_state *pkt,
 304                                  unsigned int offset, void **data,
 305                                  size_t *len)
 306 {
 307         struct sk_buff *skb = pkt->skb;
 308         void *frag_data;
 309         size_t frag_len, chunk_len;
 310
 311         if (pkt->frag == -1) {
 312                 frag_data = skb->data;
 313                 frag_len = skb_headlen(skb);
 314         } else {
 315                 skb_frag_t *frag = &skb_shinfo(skb)->frags[pkt->frag];
 316
 317                 frag_data = skb_frag_address(frag);
 318                 frag_len = skb_frag_size(frag);
 319         }
 320
 321         frag_data += pkt->frag_offset;
 322         frag_len -= pkt->frag_offset;
 323
 324         chunk_len = min(frag_len, XEN_PAGE_SIZE - offset);
 325         chunk_len = min(chunk_len,
 326                         XEN_PAGE_SIZE - xen_offset_in_page(frag_data));
 327
 328         pkt->frag_offset += chunk_len;
 329
 330         /* Advance to next frag? */
 331         if (frag_len == chunk_len) {
 332                 pkt->frag++;
 333                 pkt->frag_offset = 0;
 334         }
 335
 336         *data = frag_data;
 337         *len = chunk_len;
 338 }
 339
 340 static void xenvif_rx_data_slot(struct xenvif_queue *queue,
 341                                 struct xenvif_pkt_state *pkt,
 342                                 struct xen_netif_rx_request *req,
 343                                 struct xen_netif_rx_response *rsp)
 344 {
 345         unsigned int offset = 0;
 346         unsigned int flags;
 347
 348         do {
 349                 size_t len;
 350                 void *data;
 351
 352                 xenvif_rx_next_chunk(queue, pkt, offset, &data, &len);
 353                 xenvif_rx_copy_add(queue, req, offset, data, len);
 354
 355                 offset += len;
 356                 pkt->remaining_len -= len;
 357
 358         } while (offset < XEN_PAGE_SIZE && pkt->remaining_len > 0);
 359
 360         if (pkt->remaining_len > 0)
 361                 flags = XEN_NETRXF_more_data;
 362         else
 363                 flags = 0;
 364
 365         if (pkt->slot == 0) {
 366                 struct sk_buff *skb = pkt->skb;
 367
 368                 if (skb->ip_summed == CHECKSUM_PARTIAL)
 369                         flags |= XEN_NETRXF_csum_blank |
 370                                  XEN_NETRXF_data_validated;
 371                 else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 372                         flags |= XEN_NETRXF_data_validated;
 373
 374                 if (pkt->extra_count != 0)
 375                         flags |= XEN_NETRXF_extra_info;
 376         }
 377
 378         rsp->offset = 0;
 379         rsp->flags = flags;
 380         rsp->id = req->id;
 381         rsp->status = (s16)offset;
 382 }
 383
 384 static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
 385                                  struct xenvif_pkt_state *pkt,
 386                                  struct xen_netif_rx_request *req,
 387                                  struct xen_netif_rx_response *rsp)
 388 {
 389         struct xen_netif_extra_info *extra = (void *)rsp;
 390         unsigned int i;
 391
 392         pkt->extra_count--;
 393
 394         for (i = 0; i < ARRAY_SIZE(pkt->extras); i++) {
 395                 if (pkt->extras[i].type) {
 396                         *extra = pkt->extras[i];
 397
 398                         if (pkt->extra_count != 0)
 399                                 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
 400
 401                         pkt->extras[i].type = 0;
 402                         return;
 403                 }
 404         }
 405         BUG();
 406 }
 407
 408 void xenvif_rx_action(struct xenvif_queue *queue)
 409 {
 410         struct xenvif_pkt_state pkt;
 411
 412         xenvif_rx_next_skb(queue, &pkt);
 413
 414         do {
 415                 struct xen_netif_rx_request *req;
 416                 struct xen_netif_rx_response *rsp;
 417
 418                 req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons);
 419                 rsp = RING_GET_RESPONSE(&queue->rx, queue->rx.req_cons);
 420
 421                 /* Extras must go after the first data slot */
 422                 if (pkt.slot != 0 && pkt.extra_count != 0)
 423                         xenvif_rx_extra_slot(queue, &pkt, req, rsp);
 424                 else
 425                         xenvif_rx_data_slot(queue, &pkt, req, rsp);
 426
 427                 queue->rx.req_cons++;
 428                 pkt.slot++;
 429         } while (pkt.remaining_len > 0 || pkt.extra_count != 0);
 430
 431         xenvif_rx_complete(queue, &pkt);
 432 }
 433
 434 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
 435 {
 436         RING_IDX prod, cons;
 437
 438         prod = queue->rx.sring->req_prod;
 439         cons = queue->rx.req_cons;
 440
 441         return !queue->stalled &&
 442                 prod - cons < 1 &&
 443                 time_after(jiffies,
 444                            queue->last_rx_time + queue->vif->stall_timeout);
 445 }
 446
 447 static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
 448 {
 449         RING_IDX prod, cons;
 450
 451         prod = queue->rx.sring->req_prod;
 452         cons = queue->rx.req_cons;
 453
 454         return queue->stalled && prod - cons >= 1;
 455 }
 456
 457 static bool xenvif_have_rx_work(struct xenvif_queue *queue)
 458 {
 459         return xenvif_rx_ring_slots_available(queue) ||
 460                 (queue->vif->stall_timeout &&
 461                  (xenvif_rx_queue_stalled(queue) ||
 462                   xenvif_rx_queue_ready(queue))) ||
 463                 kthread_should_stop() ||
 464                 queue->vif->disabled;
 465 }
 466
 467 static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
 468 {
 469         struct sk_buff *skb;
 470         long timeout;
 471
 472         skb = skb_peek(&queue->rx_queue);
 473         if (!skb)
 474                 return MAX_SCHEDULE_TIMEOUT;
 475
 476         timeout = XENVIF_RX_CB(skb)->expires - jiffies;
 477         return timeout < 0 ? 0 : timeout;
 478 }
 479
 480 /* Wait until the guest Rx thread has work.
 481  *
 482  * The timeout needs to be adjusted based on the current head of the
 483  * queue (and not just the head at the beginning).  In particular, if
 484  * the queue is initially empty an infinite timeout is used and this
 485  * needs to be reduced when a skb is queued.
 486  *
 487  * This cannot be done with wait_event_timeout() because it only
 488  * calculates the timeout once.
 489  */
 490 static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
 491 {
 492         DEFINE_WAIT(wait);
 493
 494         if (xenvif_have_rx_work(queue))
 495                 return;
 496
 497         for (;;) {
 498                 long ret;
 499
 500                 prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
 501                 if (xenvif_have_rx_work(queue))
 502                         break;
 503                 ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
 504                 if (!ret)
 505                         break;
 506         }
 507         finish_wait(&queue->wq, &wait);
 508 }
 509
 510 static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
 511 {
 512         struct xenvif *vif = queue->vif;
 513
 514         queue->stalled = true;
 515
 516         /* At least one queue has stalled? Disable the carrier. */
 517         spin_lock(&vif->lock);
 518         if (vif->stalled_queues++ == 0) {
 519                 netdev_info(vif->dev, "Guest Rx stalled");
 520                 netif_carrier_off(vif->dev);
 521         }
 522         spin_unlock(&vif->lock);
 523 }
 524
 525 static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
 526 {
 527         struct xenvif *vif = queue->vif;
 528
 529         queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
 530         queue->stalled = false;
 531
 532         /* All queues are ready? Enable the carrier. */
 533         spin_lock(&vif->lock);
 534         if (--vif->stalled_queues == 0) {
 535                 netdev_info(vif->dev, "Guest Rx ready");
 536                 netif_carrier_on(vif->dev);
 537         }
 538         spin_unlock(&vif->lock);
 539 }
 540
 541 int xenvif_kthread_guest_rx(void *data)
 542 {
 543         struct xenvif_queue *queue = data;
 544         struct xenvif *vif = queue->vif;
 545
 546         if (!vif->stall_timeout)
 547                 xenvif_queue_carrier_on(queue);
 548
 549         for (;;) {
 550                 xenvif_wait_for_rx_work(queue);
 551
 552                 if (kthread_should_stop())
 553                         break;
 554
 555                 /* This frontend is found to be rogue, disable it in
 556                  * kthread context. Currently this is only set when
 557                  * netback finds out frontend sends malformed packet,
 558                  * but we cannot disable the interface in softirq
 559                  * context so we defer it here, if this thread is
 560                  * associated with queue 0.
 561                  */
 562                 if (unlikely(vif->disabled && queue->id == 0)) {
 563                         xenvif_carrier_off(vif);
 564                         break;
 565                 }
 566
 567                 if (!skb_queue_empty(&queue->rx_queue))
 568                         xenvif_rx_action(queue);
 569
 570                 /* If the guest hasn't provided any Rx slots for a
 571                  * while it's probably not responsive, drop the
 572                  * carrier so packets are dropped earlier.
 573                  */
 574                 if (vif->stall_timeout) {
 575                         if (xenvif_rx_queue_stalled(queue))
 576                                 xenvif_queue_carrier_off(queue);
 577                         else if (xenvif_rx_queue_ready(queue))
 578                                 xenvif_queue_carrier_on(queue);
 579                 }
 580
 581                 /* Queued packets may have foreign pages from other
 582                  * domains.  These cannot be queued indefinitely as
 583                  * this would starve guests of grant refs and transmit
 584                  * slots.
 585                  */
 586                 xenvif_rx_queue_drop_expired(queue);
 587
 588                 xenvif_rx_queue_maybe_wake(queue);
 589
 590                 cond_resched();
 591         }
 592
 593         /* Bin any remaining skbs */
 594         xenvif_rx_queue_purge(queue);
 595
 596         return 0;
 597 }