IB/mlx5: Add support for RDMA read/write responder page faults
authorHaggai Eran <haggaie@mellanox.com>
Thu, 11 Dec 2014 15:04:25 +0000 (17:04 +0200)
committerRoland Dreier <roland@purestorage.com>
Tue, 16 Dec 2014 02:19:03 +0000 (18:19 -0800)
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/mlx5/odp.c

index bd1dbe5..936a6cd 100644 (file)
@@ -35,6 +35,8 @@
 
 #include "mlx5_ib.h"
 
+#define MAX_PREFETCH_LEN (4*1024*1024U)
+
 struct workqueue_struct *mlx5_ib_page_fault_wq;
 
 #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do {        \
@@ -490,6 +492,80 @@ resolve_page_fault:
        free_page((unsigned long)buffer);
 }
 
+static int pages_in_range(u64 address, u32 length)
+{
+       return (ALIGN(address + length, PAGE_SIZE) -
+               (address & PAGE_MASK)) >> PAGE_SHIFT;
+}
+
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
+                                          struct mlx5_ib_pfault *pfault)
+{
+       struct mlx5_pagefault *mpfault = &pfault->mpfault;
+       u64 address;
+       u32 length;
+       u32 prefetch_len = mpfault->bytes_committed;
+       int prefetch_activated = 0;
+       u32 rkey = mpfault->rdma.r_key;
+       int ret;
+
+       /* The RDMA responder handler handles the page fault in two parts.
+        * First it brings the necessary pages for the current packet
+        * (and uses the pfault context), and then (after resuming the QP)
+        * prefetches more pages. The second operation cannot use the pfault
+        * context and therefore uses the dummy_pfault context allocated on
+        * the stack */
+       struct mlx5_ib_pfault dummy_pfault = {};
+
+       dummy_pfault.mpfault.bytes_committed = 0;
+
+       mpfault->rdma.rdma_va += mpfault->bytes_committed;
+       mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
+                                        mpfault->rdma.rdma_op_len);
+       mpfault->bytes_committed = 0;
+
+       address = mpfault->rdma.rdma_va;
+       length  = mpfault->rdma.rdma_op_len;
+
+       /* For some operations, the hardware cannot tell the exact message
+        * length, and in those cases it reports zero. Use prefetch
+        * logic. */
+       if (length == 0) {
+               prefetch_activated = 1;
+               length = mpfault->rdma.packet_size;
+               prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
+       }
+
+       ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
+                                           NULL);
+       if (ret == -EAGAIN) {
+               /* We're racing with an invalidation, don't prefetch */
+               prefetch_activated = 0;
+       } else if (ret < 0 || pages_in_range(address, length) > ret) {
+               mlx5_ib_page_fault_resume(qp, pfault, 1);
+               return;
+       }
+
+       mlx5_ib_page_fault_resume(qp, pfault, 0);
+
+       /* At this point, there might be a new pagefault already arriving in
+        * the eq, switch to the dummy pagefault for the rest of the
+        * processing. We're still OK with the objects being alive as the
+        * work-queue is being fenced. */
+
+       if (prefetch_activated) {
+               ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
+                                                   address,
+                                                   prefetch_len,
+                                                   NULL);
+               if (ret < 0) {
+                       pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
+                               ret, prefetch_activated,
+                               qp->ibqp.qp_num, address, prefetch_len);
+               }
+       }
+}
+
 void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
                               struct mlx5_ib_pfault *pfault)
 {
@@ -499,6 +575,9 @@ void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
        case MLX5_PFAULT_SUBTYPE_WQE:
                mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
                break;
+       case MLX5_PFAULT_SUBTYPE_RDMA:
+               mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+               break;
        default:
                pr_warn("Invalid page fault event subtype: 0x%x\n",
                        event_subtype);