Merge branches 'core', 'cxgb4', 'ip-roce', 'iser', 'misc', 'mlx4', 'nes', 'ocrdma...
authorRoland Dreier <roland@purestorage.com>
Thu, 3 Apr 2014 15:30:17 +0000 (08:30 -0700)
committerRoland Dreier <roland@purestorage.com>
Thu, 3 Apr 2014 15:30:17 +0000 (08:30 -0700)
80 files changed:
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/amso1100/c2_provider.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_cq.c
drivers/infiniband/hw/ehca/ehca_mrmw.c
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_dma.c
drivers/infiniband/hw/ipath/ipath_mr.c
drivers/infiniband/hw/mlx4/doorbell.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/doorbell.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mem.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_user.h
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/hw/ocrdma/Makefile
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_abi.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.h
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_stats.c [new file with mode: 0644]
drivers/infiniband/hw/ocrdma/ocrdma_stats.h [new file with mode: 0644]
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_diag.c
drivers/infiniband/hw/qib/qib_dma.c
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_mad.c
drivers/infiniband/hw/qib/qib_mr.c
drivers/infiniband/hw/qib/qib_rc.c
drivers/infiniband/hw/qib/qib_ruc.c
drivers/infiniband/hw/qib/qib_ud.c
drivers/infiniband/hw/qib/qib_user_sdma.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/hw/usnic/usnic_uiom.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/net/ethernet/emulex/benet/be_roce.c
drivers/net/ethernet/emulex/benet/be_roce.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/scsi/scsi_transport_srp.c
include/linux/mlx5/cq.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/qp.h
include/rdma/ib_cm.h
include/rdma/ib_umem.h
include/rdma/ib_verbs.h
include/scsi/scsi_transport_srp.h

index 0601b9d..c323917 100644 (file)
@@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
                           grh, &av->ah_attr);
 }
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
-{
-       struct cm_id_private *cm_id_priv;
-
-       cm_id_priv = container_of(id, struct cm_id_private, id);
-
-       if (smac != NULL)
-               memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
-
-       if (alt_smac != NULL)
-               memcpy(cm_id_priv->alt_av.smac, alt_smac,
-                      sizeof(cm_id_priv->alt_av.smac));
-
-       return 0;
-}
-EXPORT_SYMBOL(ib_update_cm_av);
-
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 {
        struct cm_device *cm_dev;
index 199958d..42c3058 100644 (file)
@@ -1284,15 +1284,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        struct rdma_id_private *listen_id, *conn_id;
        struct rdma_cm_event event;
        int offset, ret;
-       u8 smac[ETH_ALEN];
-       u8 alt_smac[ETH_ALEN];
-       u8 *psmac = smac;
-       u8 *palt_smac = alt_smac;
-       int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
-                       RDMA_TRANSPORT_IB) &&
-                      (rdma_port_get_link_layer(cm_id->device,
-                       ib_event->param.req_rcvd.port) ==
-                       IB_LINK_LAYER_ETHERNET));
 
        listen_id = cm_id->context;
        if (!cma_check_req_qp_type(&listen_id->id, ib_event))
@@ -1336,28 +1327,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        ret = conn_id->id.event_handler(&conn_id->id, &event);
        if (ret)
                goto err3;
-
-       if (is_iboe) {
-               if (ib_event->param.req_rcvd.primary_path != NULL)
-                       rdma_addr_find_smac_by_sgid(
-                               &ib_event->param.req_rcvd.primary_path->sgid,
-                               psmac, NULL);
-               else
-                       psmac = NULL;
-               if (ib_event->param.req_rcvd.alternate_path != NULL)
-                       rdma_addr_find_smac_by_sgid(
-                               &ib_event->param.req_rcvd.alternate_path->sgid,
-                               palt_smac, NULL);
-               else
-                       palt_smac = NULL;
-       }
        /*
         * Acquire mutex to prevent user executing rdma_destroy_id()
         * while we're accessing the cm_id.
         */
        mutex_lock(&lock);
-       if (is_iboe)
-               ib_update_cm_av(cm_id, psmac, palt_smac);
        if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
            (conn_id->id.qp_type != IB_QPT_UD))
                ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
index 4c837e6..ab31f13 100644 (file)
@@ -1022,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
                                        mad_send_wr->send_buf.mad,
                                        sge[0].length,
                                        DMA_TO_DEVICE);
+       if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
+               return -ENOMEM;
+
        mad_send_wr->header_mapping = sge[0].addr;
 
        sge[1].addr = ib_dma_map_single(mad_agent->device,
                                        ib_get_payload(mad_send_wr),
                                        sge[1].length,
                                        DMA_TO_DEVICE);
+       if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
+               ib_dma_unmap_single(mad_agent->device,
+                                   mad_send_wr->header_mapping,
+                                   sge[0].length, DMA_TO_DEVICE);
+               return -ENOMEM;
+       }
        mad_send_wr->payload_mapping = sge[1].addr;
 
        spin_lock_irqsave(&qp_info->send_queue.lock, flags);
@@ -2590,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                                                 sizeof *mad_priv -
                                                   sizeof mad_priv->header,
                                                 DMA_FROM_DEVICE);
+               if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
+                                                 sg_list.addr))) {
+                       ret = -ENOMEM;
+                       break;
+               }
                mad_priv->header.mapping = sg_list.addr;
                recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
                mad_priv->header.mad_list.mad_queue = recv_queue;
index a841123..a3a2e9c 100644 (file)
 
 #include "uverbs.h"
 
-#define IB_UMEM_MAX_PAGE_CHUNK                                         \
-       ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /      \
-        ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -        \
-         (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
 
 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
 {
-       struct ib_umem_chunk *chunk, *tmp;
+       struct scatterlist *sg;
+       struct page *page;
        int i;
 
-       list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
-               ib_dma_unmap_sg(dev, chunk->page_list,
-                               chunk->nents, DMA_BIDIRECTIONAL);
-               for (i = 0; i < chunk->nents; ++i) {
-                       struct page *page = sg_page(&chunk->page_list[i]);
+       if (umem->nmap > 0)
+               ib_dma_unmap_sg(dev, umem->sg_head.sgl,
+                               umem->nmap,
+                               DMA_BIDIRECTIONAL);
 
-                       if (umem->writable && dirty)
-                               set_page_dirty_lock(page);
-                       put_page(page);
-               }
+       for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
 
-               kfree(chunk);
+               page = sg_page(sg);
+               if (umem->writable && dirty)
+                       set_page_dirty_lock(page);
+               put_page(page);
        }
+
+       sg_free_table(&umem->sg_head);
+       return;
+
 }
 
 /**
@@ -81,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        struct ib_umem *umem;
        struct page **page_list;
        struct vm_area_struct **vma_list;
-       struct ib_umem_chunk *chunk;
        unsigned long locked;
        unsigned long lock_limit;
        unsigned long cur_base;
        unsigned long npages;
        int ret;
-       int off;
        int i;
        DEFINE_DMA_ATTRS(attrs);
+       struct scatterlist *sg, *sg_list_start;
+       int need_release = 0;
 
        if (dmasync)
                dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
@@ -97,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        if (!can_do_mlock())
                return ERR_PTR(-EPERM);
 
-       umem = kmalloc(sizeof *umem, GFP_KERNEL);
+       umem = kzalloc(sizeof *umem, GFP_KERNEL);
        if (!umem)
                return ERR_PTR(-ENOMEM);
 
@@ -117,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        /* We assume the memory is from hugetlb until proved otherwise */
        umem->hugetlb   = 1;
 
-       INIT_LIST_HEAD(&umem->chunk_list);
-
        page_list = (struct page **) __get_free_page(GFP_KERNEL);
        if (!page_list) {
                kfree(umem);
@@ -147,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
        cur_base = addr & PAGE_MASK;
 
-       ret = 0;
+       if (npages == 0) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
+       if (ret)
+               goto out;
+
+       need_release = 1;
+       sg_list_start = umem->sg_head.sgl;
+
        while (npages) {
                ret = get_user_pages(current, current->mm, cur_base,
                                     min_t(unsigned long, npages,
@@ -157,54 +166,38 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
                if (ret < 0)
                        goto out;
 
+               umem->npages += ret;
                cur_base += ret * PAGE_SIZE;
                npages   -= ret;
 
-               off = 0;
-
-               while (ret) {
-                       chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
-                                       min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
-                                       GFP_KERNEL);
-                       if (!chunk) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-
-                       chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
-                       sg_init_table(chunk->page_list, chunk->nents);
-                       for (i = 0; i < chunk->nents; ++i) {
-                               if (vma_list &&
-                                   !is_vm_hugetlb_page(vma_list[i + off]))
-                                       umem->hugetlb = 0;
-                               sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
-                       }
-
-                       chunk->nmap = ib_dma_map_sg_attrs(context->device,
-                                                         &chunk->page_list[0],
-                                                         chunk->nents,
-                                                         DMA_BIDIRECTIONAL,
-                                                         &attrs);
-                       if (chunk->nmap <= 0) {
-                               for (i = 0; i < chunk->nents; ++i)
-                                       put_page(sg_page(&chunk->page_list[i]));
-                               kfree(chunk);
-
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-
-                       ret -= chunk->nents;
-                       off += chunk->nents;
-                       list_add_tail(&chunk->list, &umem->chunk_list);
+               for_each_sg(sg_list_start, sg, ret, i) {
+                       if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
+                               umem->hugetlb = 0;
+
+                       sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
                }
 
-               ret = 0;
+               /* preparing for next loop */
+               sg_list_start = sg;
        }
 
+       umem->nmap = ib_dma_map_sg_attrs(context->device,
+                                 umem->sg_head.sgl,
+                                 umem->npages,
+                                 DMA_BIDIRECTIONAL,
+                                 &attrs);
+
+       if (umem->nmap <= 0) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = 0;
+
 out:
        if (ret < 0) {
-               __ib_umem_release(context->device, umem, 0);
+               if (need_release)
+                       __ib_umem_release(context->device, umem, 0);
                kfree(umem);
        } else
                current->mm->pinned_vm = locked;
@@ -278,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release);
 
 int ib_umem_page_count(struct ib_umem *umem)
 {
-       struct ib_umem_chunk *chunk;
        int shift;
        int i;
        int n;
+       struct scatterlist *sg;
 
        shift = ilog2(umem->page_size);
 
        n = 0;
-       list_for_each_entry(chunk, &umem->chunk_list, list)
-               for (i = 0; i < chunk->nmap; ++i)
-                       n += sg_dma_len(&chunk->page_list[i]) >> shift;
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
+               n += sg_dma_len(sg) >> shift;
 
        return n;
 }
index 3ac7951..92525f8 100644 (file)
@@ -1169,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr)
 }
 EXPORT_SYMBOL(ib_dereg_mr);
 
+struct ib_mr *ib_create_mr(struct ib_pd *pd,
+                          struct ib_mr_init_attr *mr_init_attr)
+{
+       struct ib_mr *mr;
+
+       if (!pd->device->create_mr)
+               return ERR_PTR(-ENOSYS);
+
+       mr = pd->device->create_mr(pd, mr_init_attr);
+
+       if (!IS_ERR(mr)) {
+               mr->device  = pd->device;
+               mr->pd      = pd;
+               mr->uobject = NULL;
+               atomic_inc(&pd->usecnt);
+               atomic_set(&mr->usecnt, 0);
+       }
+
+       return mr;
+}
+EXPORT_SYMBOL(ib_create_mr);
+
+int ib_destroy_mr(struct ib_mr *mr)
+{
+       struct ib_pd *pd;
+       int ret;
+
+       if (atomic_read(&mr->usecnt))
+               return -EBUSY;
+
+       pd = mr->pd;
+       ret = mr->device->destroy_mr(mr);
+       if (!ret)
+               atomic_dec(&pd->usecnt);
+
+       return ret;
+}
+EXPORT_SYMBOL(ib_destroy_mr);
+
 struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
 {
        struct ib_mr *mr;
@@ -1398,3 +1437,11 @@ int ib_destroy_flow(struct ib_flow *flow_id)
        return err;
 }
 EXPORT_SYMBOL(ib_destroy_flow);
+
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+                      struct ib_mr_status *mr_status)
+{
+       return mr->device->check_mr_status ?
+               mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_check_mr_status);
index 07eb3a8..8af33cf 100644 (file)
@@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        u64 *pages;
        u64 kva = 0;
        int shift, n, len;
-       int i, j, k;
+       int i, k, entry;
        int err = 0;
-       struct ib_umem_chunk *chunk;
+       struct scatterlist *sg;
        struct c2_pd *c2pd = to_c2pd(pd);
        struct c2_mr *c2mr;
 
@@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        }
 
        shift = ffs(c2mr->umem->page_size) - 1;
-
-       n = 0;
-       list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
-               n += chunk->nents;
+       n = c2mr->umem->nmap;
 
        pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
        if (!pages) {
@@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        }
 
        i = 0;
-       list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
-               for (j = 0; j < chunk->nmap; ++j) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> shift;
-                       for (k = 0; k < len; ++k) {
-                               pages[i++] =
-                                       sg_dma_address(&chunk->page_list[j]) +
-                                       (c2mr->umem->page_size * k);
-                       }
+       for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
+               len = sg_dma_len(sg) >> shift;
+               for (k = 0; k < len; ++k) {
+                       pages[i++] =
+                               sg_dma_address(sg) +
+                               (c2mr->umem->page_size * k);
                }
        }
 
index d228383..811b24a 100644 (file)
@@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 {
        __be64 *pages;
        int shift, n, len;
-       int i, j, k;
+       int i, k, entry;
        int err = 0;
-       struct ib_umem_chunk *chunk;
        struct iwch_dev *rhp;
        struct iwch_pd *php;
        struct iwch_mr *mhp;
        struct iwch_reg_user_mr_resp uresp;
-
+       struct scatterlist *sg;
        PDBG("%s ib_pd %p\n", __func__, pd);
 
        php = to_iwch_pd(pd);
@@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        shift = ffs(mhp->umem->page_size) - 1;
 
-       n = 0;
-       list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
-               n += chunk->nents;
+       n = mhp->umem->nmap;
 
        err = iwch_alloc_pbl(mhp, n);
        if (err)
@@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        i = n = 0;
 
-       list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
-               for (j = 0; j < chunk->nmap; ++j) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> shift;
+       for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+                       len = sg_dma_len(sg) >> shift;
                        for (k = 0; k < len; ++k) {
-                               pages[i++] = cpu_to_be64(sg_dma_address(
-                                       &chunk->page_list[j]) +
+                               pages[i++] = cpu_to_be64(sg_dma_address(sg) +
                                        mhp->umem->page_size * k);
                                if (i == PAGE_SIZE / sizeof *pages) {
                                        err = iwch_write_pbl(mhp, pages, i, n);
@@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                        i = 0;
                                }
                        }
-               }
+       }
 
        if (i)
                err = iwch_write_pbl(mhp, pages, i, n);
index d286bde..26046c2 100644 (file)
@@ -98,9 +98,9 @@ int c4iw_debug;
 module_param(c4iw_debug, int, 0644);
 MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
 
-static int peer2peer;
+static int peer2peer = 1;
 module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
 
 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
 module_param(p2p_type, int, 0644);
@@ -400,7 +400,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
        n = dst_neigh_lookup(&rt->dst, &peer_ip);
        if (!n)
                return NULL;
-       if (!our_interface(dev, n->dev)) {
+       if (!our_interface(dev, n->dev) &&
+           !(n->dev->flags & IFF_LOOPBACK)) {
                dst_release(&rt->dst);
                return NULL;
        }
@@ -759,8 +760,9 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
        ep->mpa_skb = skb;
        c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
        start_ep_timer(ep);
-       state_set(&ep->com, MPA_REQ_SENT);
+       __state_set(&ep->com, MPA_REQ_SENT);
        ep->mpa_attr.initiator = 1;
+       ep->snd_seq += mpalen;
        return;
 }
 
@@ -840,6 +842,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
        BUG_ON(ep->mpa_skb);
        ep->mpa_skb = skb;
+       ep->snd_seq += mpalen;
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
@@ -923,7 +926,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
        skb_get(skb);
        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
        ep->mpa_skb = skb;
-       state_set(&ep->com, MPA_REP_SENT);
+       __state_set(&ep->com, MPA_REP_SENT);
+       ep->snd_seq += mpalen;
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
@@ -940,6 +944,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
        PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
             be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
 
+       mutex_lock(&ep->com.mutex);
        dst_confirm(ep->dst);
 
        /* setup the hwtid for this connection */
@@ -963,17 +968,18 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
                send_mpa_req(ep, skb, 1);
        else
                send_mpa_req(ep, skb, mpa_rev);
-
+       mutex_unlock(&ep->com.mutex);
        return 0;
 }
 
-static void close_complete_upcall(struct c4iw_ep *ep)
+static void close_complete_upcall(struct c4iw_ep *ep, int status)
 {
        struct iw_cm_event event;
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_CLOSE;
+       event.status = status;
        if (ep->com.cm_id) {
                PDBG("close complete delivered ep %p cm_id %p tid %u\n",
                     ep, ep->com.cm_id, ep->hwtid);
@@ -987,7 +993,6 @@ static void close_complete_upcall(struct c4iw_ep *ep)
 static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
 {
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-       close_complete_upcall(ep);
        state_set(&ep->com, ABORTING);
        set_bit(ABORT_CONN, &ep->com.history);
        return send_abort(ep, skb, gfp);
@@ -1066,9 +1071,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
        }
 }
 
-static void connect_request_upcall(struct c4iw_ep *ep)
+static int connect_request_upcall(struct c4iw_ep *ep)
 {
        struct iw_cm_event event;
+       int ret;
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
@@ -1093,15 +1099,14 @@ static void connect_request_upcall(struct c4iw_ep *ep)
                event.private_data_len = ep->plen;
                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
        }
-       if (state_read(&ep->parent_ep->com) != DEAD) {
-               c4iw_get_ep(&ep->com);
-               ep->parent_ep->com.cm_id->event_handler(
-                                               ep->parent_ep->com.cm_id,
-                                               &event);
-       }
+       c4iw_get_ep(&ep->com);
+       ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
+                                                     &event);
+       if (ret)
+               c4iw_put_ep(&ep->com);
        set_bit(CONNREQ_UPCALL, &ep->com.history);
        c4iw_put_ep(&ep->parent_ep->com);
-       ep->parent_ep = NULL;
+       return ret;
 }
 
 static void established_upcall(struct c4iw_ep *ep)
@@ -1165,7 +1170,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
         * the connection.
         */
        stop_ep_timer(ep);
-       if (state_read(&ep->com) != MPA_REQ_SENT)
+       if (ep->com.state != MPA_REQ_SENT)
                return;
 
        /*
@@ -1240,7 +1245,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
         * start reply message including private data. And
         * the MPA header is valid.
         */
-       state_set(&ep->com, FPDU_MODE);
+       __state_set(&ep->com, FPDU_MODE);
        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
        ep->mpa_attr.recv_marker_enabled = markers_enabled;
        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1355,7 +1360,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
        }
        goto out;
 err:
-       state_set(&ep->com, ABORTING);
+       __state_set(&ep->com, ABORTING);
        send_abort(ep, skb, GFP_KERNEL);
 out:
        connect_reply_upcall(ep, err);
@@ -1370,7 +1375,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 
-       if (state_read(&ep->com) != MPA_REQ_WAIT)
+       if (ep->com.state != MPA_REQ_WAIT)
                return;
 
        /*
@@ -1400,7 +1405,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
                return;
 
        PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
-       stop_ep_timer(ep);
        mpa = (struct mpa_message *) ep->mpa_pkt;
 
        /*
@@ -1492,10 +1496,18 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
             ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
             ep->mpa_attr.p2p_type);
 
-       state_set(&ep->com, MPA_REQ_RCVD);
+       __state_set(&ep->com, MPA_REQ_RCVD);
+       stop_ep_timer(ep);
 
        /* drive upcall */
-       connect_request_upcall(ep);
+       mutex_lock(&ep->parent_ep->com.mutex);
+       if (ep->parent_ep->com.state != DEAD) {
+               if (connect_request_upcall(ep))
+                       abort_connection(ep, skb, GFP_KERNEL);
+       } else {
+               abort_connection(ep, skb, GFP_KERNEL);
+       }
+       mutex_unlock(&ep->parent_ep->com.mutex);
        return;
 }
 
@@ -1509,14 +1521,17 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
        __u8 status = hdr->status;
 
        ep = lookup_tid(t, tid);
+       if (!ep)
+               return 0;
        PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
        skb_pull(skb, sizeof(*hdr));
        skb_trim(skb, dlen);
+       mutex_lock(&ep->com.mutex);
 
        /* update RX credits */
        update_rx_credits(ep, dlen);
 
-       switch (state_read(&ep->com)) {
+       switch (ep->com.state) {
        case MPA_REQ_SENT:
                ep->rcv_seq += dlen;
                process_mpa_reply(ep, skb);
@@ -1532,7 +1547,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
                        pr_err("%s Unexpected streaming data." \
                               " qpid %u ep %p state %d tid %u status %d\n",
                               __func__, ep->com.qp->wq.sq.qid, ep,
-                              state_read(&ep->com), ep->hwtid, status);
+                              ep->com.state, ep->hwtid, status);
                attrs.next_state = C4IW_QP_STATE_TERMINATE;
                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
@@ -1541,6 +1556,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
        default:
                break;
        }
+       mutex_unlock(&ep->com.mutex);
        return 0;
 }
 
@@ -2246,7 +2262,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
                }
-               close_complete_upcall(ep);
+               close_complete_upcall(ep, 0);
                __state_set(&ep->com, DEAD);
                release = 1;
                disconnect = 0;
@@ -2425,7 +2441,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
                                             C4IW_QP_ATTR_NEXT_STATE,
                                             &attrs, 1);
                }
-               close_complete_upcall(ep);
+               close_complete_upcall(ep, 0);
                __state_set(&ep->com, DEAD);
                release = 1;
                break;
@@ -2500,22 +2516,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
 
 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
 {
-       int err;
+       int err = 0;
+       int disconnect = 0;
        struct c4iw_ep *ep = to_ep(cm_id);
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 
-       if (state_read(&ep->com) == DEAD) {
+       mutex_lock(&ep->com.mutex);
+       if (ep->com.state == DEAD) {
+               mutex_unlock(&ep->com.mutex);
                c4iw_put_ep(&ep->com);
                return -ECONNRESET;
        }
        set_bit(ULP_REJECT, &ep->com.history);
-       BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+       BUG_ON(ep->com.state != MPA_REQ_RCVD);
        if (mpa_rev == 0)
                abort_connection(ep, NULL, GFP_KERNEL);
        else {
                err = send_mpa_reject(ep, pdata, pdata_len);
-               err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+               disconnect = 1;
        }
+       mutex_unlock(&ep->com.mutex);
+       if (disconnect)
+               err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
        c4iw_put_ep(&ep->com);
        return 0;
 }
@@ -2530,12 +2552,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-       if (state_read(&ep->com) == DEAD) {
+
+       mutex_lock(&ep->com.mutex);
+       if (ep->com.state == DEAD) {
                err = -ECONNRESET;
                goto err;
        }
 
-       BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+       BUG_ON(ep->com.state != MPA_REQ_RCVD);
        BUG_ON(!qp);
 
        set_bit(ULP_ACCEPT, &ep->com.history);
@@ -2604,14 +2628,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (err)
                goto err1;
 
-       state_set(&ep->com, FPDU_MODE);
+       __state_set(&ep->com, FPDU_MODE);
        established_upcall(ep);
+       mutex_unlock(&ep->com.mutex);
        c4iw_put_ep(&ep->com);
        return 0;
 err1:
        ep->com.cm_id = NULL;
        cm_id->rem_ref(cm_id);
 err:
+       mutex_unlock(&ep->com.mutex);
        c4iw_put_ep(&ep->com);
        return err;
 }
@@ -2980,7 +3006,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
        rdev = &ep->com.dev->rdev;
        if (c4iw_fatal_error(rdev)) {
                fatal = 1;
-               close_complete_upcall(ep);
+               close_complete_upcall(ep, -EIO);
                ep->com.state = DEAD;
        }
        switch (ep->com.state) {
@@ -3022,7 +3048,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
        if (close) {
                if (abrupt) {
                        set_bit(EP_DISC_ABORT, &ep->com.history);
-                       close_complete_upcall(ep);
+                       close_complete_upcall(ep, -ECONNRESET);
                        ret = send_abort(ep, NULL, gfp);
                } else {
                        set_bit(EP_DISC_CLOSE, &ep->com.history);
@@ -3203,6 +3229,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
        struct sk_buff *req_skb;
        struct fw_ofld_connection_wr *req;
        struct cpl_pass_accept_req *cpl = cplhdr(skb);
+       int ret;
 
        req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
        req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
@@ -3239,7 +3266,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
        req->cookie = (unsigned long)skb;
 
        set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
-       cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+       ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+       if (ret < 0) {
+               pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
+                      ret);
+               kfree_skb(skb);
+               kfree_skb(req_skb);
+       }
 }
 
 /*
@@ -3346,13 +3379,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
                pi = (struct port_info *)netdev_priv(pdev);
                tx_chan = cxgb4_port_chan(pdev);
        }
+       neigh_release(neigh);
        if (!e) {
                pr_err("%s - failed to allocate l2t entry!\n",
                       __func__);
                goto free_dst;
        }
 
-       neigh_release(neigh);
        step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
        rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
        window = (__force u16) htons((__force u16)tcph->window);
@@ -3427,6 +3460,7 @@ static void process_timeout(struct c4iw_ep *ep)
                                     &attrs, 1);
                }
                __state_set(&ep->com, ABORTING);
+               close_complete_upcall(ep, -ETIMEDOUT);
                break;
        default:
                WARN(1, "%s unexpected state ep %p tid %u state %u\n",
index 88de3aa..ce468e5 100644 (file)
@@ -365,8 +365,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
 
                if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
 
-                       /*
-                        * drop peer2peer RTR reads.
+                       /* If we have reached here because of async
+                        * event or other error, and have egress error
+                        * then drop
+                        */
+                       if (CQE_TYPE(hw_cqe) == 1)
+                               goto next_cqe;
+
+                       /* drop peer2peer RTR reads.
                         */
                        if (CQE_WRID_STAG(hw_cqe) == 1)
                                goto next_cqe;
@@ -511,8 +517,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
         */
        if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
 
-               /*
-                * If this is an unsolicited read response, then the read
+               /* If we have reached here because of async
+                * event or other error, and have egress error
+                * then drop
+                */
+               if (CQE_TYPE(hw_cqe) == 1) {
+                       if (CQE_STATUS(hw_cqe))
+                               t4_set_wq_in_error(wq);
+                       ret = -EAGAIN;
+                       goto skip_cqe;
+               }
+
+               /* If this is an unsolicited read response, then the read
                 * was generated by the kernel driver as part of peer-2-peer
                 * connection setup.  So ignore the completion.
                 */
@@ -603,7 +619,7 @@ proc_cqe:
         */
        if (SQ_TYPE(hw_cqe)) {
                int idx = CQE_WRID_SQ_IDX(hw_cqe);
-               BUG_ON(idx > wq->sq.size);
+               BUG_ON(idx >= wq->sq.size);
 
                /*
                * Account for any unsignaled completions completed by
@@ -617,7 +633,7 @@ proc_cqe:
                        wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
                else
                        wq->sq.in_use -= idx - wq->sq.cidx;
-               BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size);
+               BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
 
                wq->sq.cidx = (uint16_t)idx;
                PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
@@ -881,7 +897,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
        /*
         * Make actual HW queue 2x to avoid cdix_inc overflows.
         */
-       hwentries = entries * 2;
+       hwentries = min(entries * 2, T4_MAX_IQ_SIZE);
 
        /*
         * Make HW queue at least 64 entries so GTS updates aren't too
@@ -930,6 +946,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
                if (!mm2)
                        goto err4;
 
+               memset(&uresp, 0, sizeof(uresp));
                uresp.qid_mask = rhp->rdev.cqmask;
                uresp.cqid = chp->cq.cqid;
                uresp.size = chp->cq.size;
index 4a03385..982f815 100644 (file)
@@ -897,11 +897,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
        }
 
        opcode = *(u8 *)rsp;
-       if (c4iw_handlers[opcode])
+       if (c4iw_handlers[opcode]) {
                c4iw_handlers[opcode](dev, skb);
-       else
+       } else {
                pr_info("%s no handler opcode 0x%x...\n", __func__,
                       opcode);
+               kfree_skb(skb);
+       }
 
        return 0;
 nomem:
index 23eaeab..a1e8f13 100644 (file)
@@ -369,6 +369,7 @@ struct c4iw_fr_page_list {
        DEFINE_DMA_UNMAP_ADDR(mapping);
        dma_addr_t dma_addr;
        struct c4iw_dev *dev;
+       int pll_len;
 };
 
 static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
@@ -441,6 +442,7 @@ struct c4iw_qp {
        atomic_t refcnt;
        wait_queue_head_t wait;
        struct timer_list timer;
+       int sq_sig_all;
 };
 
 static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
index 41b1195..f9ca072 100644 (file)
@@ -37,9 +37,9 @@
 
 #include "iw_cxgb4.h"
 
-int use_dsgl = 1;
+int use_dsgl = 0;
 module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
 
 #define T4_ULPTX_MIN_IO 32
 #define C4IW_MAX_INLINE_SIZE 96
@@ -678,9 +678,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 {
        __be64 *pages;
        int shift, n, len;
-       int i, j, k;
+       int i, k, entry;
        int err = 0;
-       struct ib_umem_chunk *chunk;
+       struct scatterlist *sg;
        struct c4iw_dev *rhp;
        struct c4iw_pd *php;
        struct c4iw_mr *mhp;
@@ -710,10 +710,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        shift = ffs(mhp->umem->page_size) - 1;
 
-       n = 0;
-       list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
-               n += chunk->nents;
-
+       n = mhp->umem->nmap;
        err = alloc_pbl(mhp, n);
        if (err)
                goto err;
@@ -726,24 +723,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        i = n = 0;
 
-       list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
-               for (j = 0; j < chunk->nmap; ++j) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> shift;
-                       for (k = 0; k < len; ++k) {
-                               pages[i++] = cpu_to_be64(sg_dma_address(
-                                       &chunk->page_list[j]) +
-                                       mhp->umem->page_size * k);
-                               if (i == PAGE_SIZE / sizeof *pages) {
-                                       err = write_pbl(&mhp->rhp->rdev,
-                                             pages,
-                                             mhp->attr.pbl_addr + (n << 3), i);
-                                       if (err)
-                                               goto pbl_done;
-                                       n += i;
-                                       i = 0;
-                               }
+       for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+               len = sg_dma_len(sg) >> shift;
+               for (k = 0; k < len; ++k) {
+                       pages[i++] = cpu_to_be64(sg_dma_address(sg) +
+                               mhp->umem->page_size * k);
+                       if (i == PAGE_SIZE / sizeof *pages) {
+                               err = write_pbl(&mhp->rhp->rdev,
+                                     pages,
+                                     mhp->attr.pbl_addr + (n << 3), i);
+                               if (err)
+                                       goto pbl_done;
+                               n += i;
+                               i = 0;
                        }
                }
+       }
 
        if (i)
                err = write_pbl(&mhp->rhp->rdev, pages,
@@ -903,7 +898,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
        dma_unmap_addr_set(c4pl, mapping, dma_addr);
        c4pl->dma_addr = dma_addr;
        c4pl->dev = dev;
-       c4pl->ibpl.max_page_list_len = pll_len;
+       c4pl->pll_len = pll_len;
+
+       PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+            __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+            &c4pl->dma_addr);
 
        return &c4pl->ibpl;
 }
@@ -912,8 +911,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
 {
        struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
 
+       PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+            __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+            &c4pl->dma_addr);
+
        dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
-                         c4pl->ibpl.max_page_list_len,
+                         c4pl->pll_len,
                          c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
        kfree(c4pl);
 }
index 5829367..723ad29 100644 (file)
@@ -675,7 +675,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                fw_flags = 0;
                if (wr->send_flags & IB_SEND_SOLICITED)
                        fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
-               if (wr->send_flags & IB_SEND_SIGNALED)
+               if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
                        fw_flags |= FW_RI_COMPLETION_FLAG;
                swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
                switch (wr->opcode) {
@@ -736,7 +736,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                }
                swsqe->idx = qhp->wq.sq.pidx;
                swsqe->complete = 0;
-               swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+               swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
+                                 qhp->sq_sig_all;
                swsqe->flushed = 0;
                swsqe->wr_id = wr->wr_id;
 
@@ -1533,7 +1534,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        struct c4iw_cq *schp;
        struct c4iw_cq *rchp;
        struct c4iw_create_qp_resp uresp;
-       int sqsize, rqsize;
+       unsigned int sqsize, rqsize;
        struct c4iw_ucontext *ucontext;
        int ret;
        struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL;
@@ -1605,6 +1606,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
        qhp->attr.enable_bind = 1;
        qhp->attr.max_ord = 1;
        qhp->attr.max_ird = 1;
+       qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
        spin_lock_init(&qhp->lock);
        mutex_init(&qhp->mutex);
        init_waitqueue_head(&qhp->wait);
index f08f6ea..bd45e0f 100644 (file)
@@ -322,7 +322,7 @@ struct ehca_mr_pginfo {
                } phy;
                struct { /* type EHCA_MR_PGI_USER section */
                        struct ib_umem *region;
-                       struct ib_umem_chunk *next_chunk;
+                       struct scatterlist *next_sg;
                        u64 next_nmap;
                } usr;
                struct { /* type EHCA_MR_PGI_FMR section */
index 212150c..8cc8375 100644 (file)
@@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                        (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
                if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
                        ehca_err(device, "Copy to udata failed.");
+                       cq = ERR_PTR(-EFAULT);
                        goto create_cq_exit4;
                }
        }
index bcfb0c1..3488e8c 100644 (file)
@@ -400,10 +400,7 @@ reg_user_mr_fallback:
        pginfo.num_hwpages = num_hwpages;
        pginfo.u.usr.region = e_mr->umem;
        pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
-       pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
-                                                    (&e_mr->umem->chunk_list),
-                                                    list);
-
+       pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
        ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
                          e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
                          &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
@@ -1858,61 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
                                  u64 *kpage)
 {
        int ret = 0;
-       struct ib_umem_chunk *prev_chunk;
-       struct ib_umem_chunk *chunk;
        u64 pgaddr;
-       u32 i = 0;
        u32 j = 0;
        int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-
-       /* loop over desired chunk entries */
-       chunk      = pginfo->u.usr.next_chunk;
-       prev_chunk = pginfo->u.usr.next_chunk;
-       list_for_each_entry_continue(
-               chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
-               for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
-                       pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
-                               << PAGE_SHIFT ;
-                       *kpage = pgaddr + (pginfo->next_hwpage *
-                                          pginfo->hwpage_size);
-                       if ( !(*kpage) ) {
-                               ehca_gen_err("pgaddr=%llx "
-                                            "chunk->page_list[i]=%llx "
-                                            "i=%x next_hwpage=%llx",
-                                            pgaddr, (u64)sg_dma_address(
-                                                    &chunk->page_list[i]),
-                                            i, pginfo->next_hwpage);
-                               return -EFAULT;
-                       }
-                       (pginfo->hwpage_cnt)++;
-                       (pginfo->next_hwpage)++;
-                       kpage++;
-                       if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
-                               (pginfo->kpage_cnt)++;
-                               (pginfo->u.usr.next_nmap)++;
-                               pginfo->next_hwpage = 0;
-                               i++;
-                       }
-                       j++;
-                       if (j >= number) break;
+       struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+       while (*sg != NULL) {
+               pgaddr = page_to_pfn(sg_page(*sg))
+                       << PAGE_SHIFT;
+               *kpage = pgaddr + (pginfo->next_hwpage *
+                                  pginfo->hwpage_size);
+               if (!(*kpage)) {
+                       ehca_gen_err("pgaddr=%llx "
+                                    "sg_dma_address=%llx "
+                                    "entry=%llx next_hwpage=%llx",
+                                    pgaddr, (u64)sg_dma_address(*sg),
+                                    pginfo->u.usr.next_nmap,
+                                    pginfo->next_hwpage);
+                       return -EFAULT;
                }
-               if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
-                   (j >= number)) {
-                       pginfo->u.usr.next_nmap = 0;
-                       prev_chunk = chunk;
-                       break;
-               } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
-                       pginfo->u.usr.next_nmap = 0;
-                       prev_chunk = chunk;
-               } else if (j >= number)
+               (pginfo->hwpage_cnt)++;
+               (pginfo->next_hwpage)++;
+               kpage++;
+               if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
+                       (pginfo->kpage_cnt)++;
+                       (pginfo->u.usr.next_nmap)++;
+                       pginfo->next_hwpage = 0;
+                       *sg = sg_next(*sg);
+               }
+               j++;
+               if (j >= number)
                        break;
-               else
-                       prev_chunk = chunk;
        }
-       pginfo->u.usr.next_chunk =
-               list_prepare_entry(prev_chunk,
-                                  (&(pginfo->u.usr.region->chunk_list)),
-                                  list);
+
        return ret;
 }
 
@@ -1920,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
  * check given pages for contiguous layout
  * last page addr is returned in prev_pgaddr for further check
  */
-static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
-                                    int start_idx, int end_idx,
+static int ehca_check_kpages_per_ate(struct scatterlist **sg,
+                                    int num_pages,
                                     u64 *prev_pgaddr)
 {
-       int t;
-       for (t = start_idx; t <= end_idx; t++) {
-               u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
+       for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
+               u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
                if (ehca_debug_level >= 3)
                        ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
                                     *(u64 *)__va(pgaddr));
                if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
                        ehca_gen_err("uncontiguous page found pgaddr=%llx "
-                                    "prev_pgaddr=%llx page_list_i=%x",
-                                    pgaddr, *prev_pgaddr, t);
+                                    "prev_pgaddr=%llx entries_left_in_hwpage=%x",
+                                    pgaddr, *prev_pgaddr, num_pages);
                        return -EINVAL;
                }
                *prev_pgaddr = pgaddr;
@@ -1947,111 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
                                  u64 *kpage)
 {
        int ret = 0;
-       struct ib_umem_chunk *prev_chunk;
-       struct ib_umem_chunk *chunk;
        u64 pgaddr, prev_pgaddr;
-       u32 i = 0;
        u32 j = 0;
        int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
        int nr_kpages = kpages_per_hwpage;
+       struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+       while (*sg != NULL) {
 
-       /* loop over desired chunk entries */
-       chunk      = pginfo->u.usr.next_chunk;
-       prev_chunk = pginfo->u.usr.next_chunk;
-       list_for_each_entry_continue(
-               chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
-               for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
-                       if (nr_kpages == kpages_per_hwpage) {
-                               pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
-                                          << PAGE_SHIFT );
-                               *kpage = pgaddr;
-                               if ( !(*kpage) ) {
-                                       ehca_gen_err("pgaddr=%llx i=%x",
-                                                    pgaddr, i);
+               if (nr_kpages == kpages_per_hwpage) {
+                       pgaddr = (page_to_pfn(sg_page(*sg))
+                                  << PAGE_SHIFT);
+                       *kpage = pgaddr;
+                       if (!(*kpage)) {
+                               ehca_gen_err("pgaddr=%llx entry=%llx",
+                                            pgaddr, pginfo->u.usr.next_nmap);
+                               ret = -EFAULT;
+                               return ret;
+                       }
+                       /*
+                        * The first page in a hwpage must be aligned;
+                        * the first MR page is exempt from this rule.
+                        */
+                       if (pgaddr & (pginfo->hwpage_size - 1)) {
+                               if (pginfo->hwpage_cnt) {
+                                       ehca_gen_err(
+                                               "invalid alignment "
+                                               "pgaddr=%llx entry=%llx "
+                                               "mr_pgsize=%llx",
+                                               pgaddr, pginfo->u.usr.next_nmap,
+                                               pginfo->hwpage_size);
                                        ret = -EFAULT;
                                        return ret;
                                }
-                               /*
-                                * The first page in a hwpage must be aligned;
-                                * the first MR page is exempt from this rule.
-                                */
-                               if (pgaddr & (pginfo->hwpage_size - 1)) {
-                                       if (pginfo->hwpage_cnt) {
-                                               ehca_gen_err(
-                                                       "invalid alignment "
-                                                       "pgaddr=%llx i=%x "
-                                                       "mr_pgsize=%llx",
-                                                       pgaddr, i,
-                                                       pginfo->hwpage_size);
-                                               ret = -EFAULT;
-                                               return ret;
-                                       }
-                                       /* first MR page */
-                                       pginfo->kpage_cnt =
-                                               (pgaddr &
-                                                (pginfo->hwpage_size - 1)) >>
-                                               PAGE_SHIFT;
-                                       nr_kpages -= pginfo->kpage_cnt;
-                                       *kpage = pgaddr &
-                                                ~(pginfo->hwpage_size - 1);
-                               }
-                               if (ehca_debug_level >= 3) {
-                                       u64 val = *(u64 *)__va(pgaddr);
-                                       ehca_gen_dbg("kpage=%llx chunk_page=%llx "
-                                                    "value=%016llx",
-                                                    *kpage, pgaddr, val);
-                               }
-                               prev_pgaddr = pgaddr;
-                               i++;
-                               pginfo->kpage_cnt++;
-                               pginfo->u.usr.next_nmap++;
-                               nr_kpages--;
-                               if (!nr_kpages)
-                                       goto next_kpage;
-                               continue;
+                               /* first MR page */
+                               pginfo->kpage_cnt =
+                                       (pgaddr &
+                                        (pginfo->hwpage_size - 1)) >>
+                                       PAGE_SHIFT;
+                               nr_kpages -= pginfo->kpage_cnt;
+                               *kpage = pgaddr &
+                                        ~(pginfo->hwpage_size - 1);
                        }
-                       if (i + nr_kpages > chunk->nmap) {
-                               ret = ehca_check_kpages_per_ate(
-                                       chunk->page_list, i,
-                                       chunk->nmap - 1, &prev_pgaddr);
-                               if (ret) return ret;
-                               pginfo->kpage_cnt += chunk->nmap - i;
-                               pginfo->u.usr.next_nmap += chunk->nmap - i;
-                               nr_kpages -= chunk->nmap - i;
-                               break;
+                       if (ehca_debug_level >= 3) {
+                               u64 val = *(u64 *)__va(pgaddr);
+                               ehca_gen_dbg("kpage=%llx page=%llx "
+                                            "value=%016llx",
+                                            *kpage, pgaddr, val);
                        }
+                       prev_pgaddr = pgaddr;
+                       *sg = sg_next(*sg);
+                       pginfo->kpage_cnt++;
+                       pginfo->u.usr.next_nmap++;
+                       nr_kpages--;
+                       if (!nr_kpages)
+                               goto next_kpage;
+                       continue;
+               }
+
+               ret = ehca_check_kpages_per_ate(sg, nr_kpages,
+                                               &prev_pgaddr);
+               if (ret)
+                       return ret;
+               pginfo->kpage_cnt += nr_kpages;
+               pginfo->u.usr.next_nmap += nr_kpages;
 
-                       ret = ehca_check_kpages_per_ate(chunk->page_list, i,
-                                                       i + nr_kpages - 1,
-                                                       &prev_pgaddr);
-                       if (ret) return ret;
-                       i += nr_kpages;
-                       pginfo->kpage_cnt += nr_kpages;
-                       pginfo->u.usr.next_nmap += nr_kpages;
 next_kpage:
-                       nr_kpages = kpages_per_hwpage;
-                       (pginfo->hwpage_cnt)++;
-                       kpage++;
-                       j++;
-                       if (j >= number) break;
-               }
-               if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
-                   (j >= number)) {
-                       pginfo->u.usr.next_nmap = 0;
-                       prev_chunk = chunk;
-                       break;
-               } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
-                       pginfo->u.usr.next_nmap = 0;
-                       prev_chunk = chunk;
-               } else if (j >= number)
+               nr_kpages = kpages_per_hwpage;
+               (pginfo->hwpage_cnt)++;
+               kpage++;
+               j++;
+               if (j >= number)
                        break;
-               else
-                       prev_chunk = chunk;
        }
-       pginfo->u.usr.next_chunk =
-               list_prepare_entry(prev_chunk,
-                                  (&(pginfo->u.usr.region->chunk_list)),
-                                  list);
+
        return ret;
 }
 
@@ -2591,16 +2534,6 @@ static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
        /* This is only a stub; nothing to be done here */
 }
 
-static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
-       return sg->dma_address;
-}
-
-static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
-{
-       return sg->length;
-}
-
 static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
                                         size_t size,
                                         enum dma_data_direction dir)
@@ -2653,8 +2586,6 @@ struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
        .unmap_page             = ehca_dma_unmap_page,
        .map_sg                 = ehca_dma_map_sg,
        .unmap_sg               = ehca_dma_unmap_sg,
-       .dma_address            = ehca_dma_address,
-       .dma_len                = ehca_dma_len,
        .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
        .sync_single_for_device = ehca_dma_sync_single_for_device,
        .alloc_coherent         = ehca_dma_alloc_coherent,
index 714293b..e2f9a51 100644 (file)
@@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                                   size_t count, loff_t *off)
 {
        u32 __iomem *piobuf;
-       u32 plen, clen, pbufn;
+       u32 plen, pbufn, maxlen_reserve;
        struct ipath_diag_pkt odp;
        struct ipath_diag_xpkt dp;
        u32 *tmpbuf = NULL;
@@ -335,51 +335,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
        u64 val;
        u32 l_state, lt_state; /* LinkState, LinkTrainingState */
 
-       if (count < sizeof(odp)) {
-               ret = -EINVAL;
-               goto bail;
-       }
 
        if (count == sizeof(dp)) {
                if (copy_from_user(&dp, data, sizeof(dp))) {
                        ret = -EFAULT;
                        goto bail;
                }
-       } else if (copy_from_user(&odp, data, sizeof(odp))) {
-               ret = -EFAULT;
+       } else if (count == sizeof(odp)) {
+               if (copy_from_user(&odp, data, sizeof(odp))) {
+                       ret = -EFAULT;
+                       goto bail;
+               }
+       } else {
+               ret = -EINVAL;
                goto bail;
        }
 
-       /*
-        * Due to padding/alignment issues (lessened with new struct)
-        * the old and new structs are the same length. We need to
-        * disambiguate them, which we can do because odp.len has never
-        * been less than the total of LRH+BTH+DETH so far, while
-        * dp.unit (same offset) unit is unlikely to get that high.
-        * Similarly, dp.data, the pointer to user at the same offset
-        * as odp.unit, is almost certainly at least one (512byte)page
-        * "above" NULL. The if-block below can be omitted if compatibility
-        * between a new driver and older diagnostic code is unimportant.
-        * compatibility the other direction (new diags, old driver) is
-        * handled in the diagnostic code, with a warning.
-        */
-       if (dp.unit >= 20 && dp.data < 512) {
-               /* very probable version mismatch. Fix it up */
-               memcpy(&odp, &dp, sizeof(odp));
-               /* We got a legacy dp, copy elements to dp */
-               dp.unit = odp.unit;
-               dp.data = odp.data;
-               dp.len = odp.len;
-               dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
-       }
-
        /* send count must be an exact number of dwords */
        if (dp.len & 3) {
                ret = -EINVAL;
                goto bail;
        }
 
-       clen = dp.len >> 2;
+       plen = dp.len >> 2;
 
        dd = ipath_lookup(dp.unit);
        if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
@@ -422,16 +400,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                goto bail;
        }
 
-       /* need total length before first word written */
-       /* +1 word is for the qword padding */
-       plen = sizeof(u32) + dp.len;
-
-       if ((plen + 4) > dd->ipath_ibmaxlen) {
+       /*
+        * need total length before first word written, plus 2 Dwords. One Dword
+        * is for padding so we get the full user data when not aligned on
+        * a word boundary. The other Dword is to make sure we have room for the
+        * ICRC which gets tacked on later.
+        */
+       maxlen_reserve = 2 * sizeof(u32);
+       if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
                ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
-                         plen - 4, dd->ipath_ibmaxlen);
+                         dp.len, dd->ipath_ibmaxlen);
                ret = -EINVAL;
-               goto bail;      /* before writing pbc */
+               goto bail;
        }
+
+       plen = sizeof(u32) + dp.len;
+
        tmpbuf = vmalloc(plen);
        if (!tmpbuf) {
                dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
@@ -473,11 +457,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
         */
        if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
                ipath_flush_wc();
-               __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+               __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
                ipath_flush_wc();
-               __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+               __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
        } else
-               __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+               __iowrite32_copy(piobuf + 2, tmpbuf, plen);
 
        ipath_flush_wc();
 
index 644c2c7..123a8c0 100644 (file)
@@ -115,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
                        ret = 0;
                        break;
                }
+               sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+               sg->dma_length = sg->length;
+#endif
        }
        return ret;
 }
@@ -126,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev,
        BUG_ON(!valid_dma_direction(direction));
 }
 
-static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
-       u64 addr = (u64) page_address(sg_page(sg));
-
-       if (addr)
-               addr += sg->offset;
-       return addr;
-}
-
-static unsigned int ipath_sg_dma_len(struct ib_device *dev,
-                                    struct scatterlist *sg)
-{
-       return sg->length;
-}
-
 static void ipath_sync_single_for_cpu(struct ib_device *dev,
                                      u64 addr,
                                      size_t size,
@@ -176,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
 }
 
 struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
-       ipath_mapping_error,
-       ipath_dma_map_single,
-       ipath_dma_unmap_single,
-       ipath_dma_map_page,
-       ipath_dma_unmap_page,
-       ipath_map_sg,
-       ipath_unmap_sg,
-       ipath_sg_dma_address,
-       ipath_sg_dma_len,
-       ipath_sync_single_for_cpu,
-       ipath_sync_single_for_device,
-       ipath_dma_alloc_coherent,
-       ipath_dma_free_coherent
+       .mapping_error = ipath_mapping_error,
+       .map_single = ipath_dma_map_single,
+       .unmap_single = ipath_dma_unmap_single,
+       .map_page = ipath_dma_map_page,
+       .unmap_page = ipath_dma_unmap_page,
+       .map_sg = ipath_map_sg,
+       .unmap_sg = ipath_unmap_sg,
+       .sync_single_for_cpu = ipath_sync_single_for_cpu,
+       .sync_single_for_device = ipath_sync_single_for_device,
+       .alloc_coherent = ipath_dma_alloc_coherent,
+       .free_coherent = ipath_dma_free_coherent
 };
index e346d38..5e61e9b 100644 (file)
@@ -188,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 {
        struct ipath_mr *mr;
        struct ib_umem *umem;
-       struct ib_umem_chunk *chunk;
-       int n, m, i;
+       int n, m, entry;
+       struct scatterlist *sg;
        struct ib_mr *ret;
 
        if (length == 0) {
@@ -202,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        if (IS_ERR(umem))
                return (void *) umem;
 
-       n = 0;
-       list_for_each_entry(chunk, &umem->chunk_list, list)
-               n += chunk->nents;
-
+       n = umem->nmap;
        mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
        if (!mr) {
                ret = ERR_PTR(-ENOMEM);
@@ -224,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        m = 0;
        n = 0;
-       list_for_each_entry(chunk, &umem->chunk_list, list) {
-               for (i = 0; i < chunk->nents; i++) {
-                       void *vaddr;
-
-                       vaddr = page_address(sg_page(&chunk->page_list[i]));
-                       if (!vaddr) {
-                               ret = ERR_PTR(-EINVAL);
-                               goto bail;
-                       }
-                       mr->mr.map[m]->segs[n].vaddr = vaddr;
-                       mr->mr.map[m]->segs[n].length = umem->page_size;
-                       n++;
-                       if (n == IPATH_SEGSZ) {
-                               m++;
-                               n = 0;
-                       }
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+               void *vaddr;
+
+               vaddr = page_address(sg_page(sg));
+               if (!vaddr) {
+                       ret = ERR_PTR(-EINVAL);
+                       goto bail;
+               }
+               mr->mr.map[m]->segs[n].vaddr = vaddr;
+               mr->mr.map[m]->segs[n].length = umem->page_size;
+               n++;
+               if (n == IPATH_SEGSZ) {
+                       m++;
+                       n = 0;
                }
        }
        ret = &mr->ibmr;
index 8aee423..c517409 100644 (file)
@@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
                        struct mlx4_db *db)
 {
        struct mlx4_ib_user_db_page *page;
-       struct ib_umem_chunk *chunk;
        int err = 0;
 
        mutex_lock(&context->db_page_mutex);
@@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
        list_add(&page->list, &context->db_page_list);
 
 found:
-       chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
-       db->dma         = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+       db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
        db->u.user_page = page;
        ++page->refcnt;
 
index e81c554..20b4d7a 100644 (file)
@@ -1803,7 +1803,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
 
 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 {
-       char name[32];
+       char name[80];
        int eq_per_port = 0;
        int added_eqs = 0;
        int total_eqs = 0;
@@ -1833,8 +1833,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
        eq = 0;
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
                for (j = 0; j < eq_per_port; j++) {
-                       sprintf(name, "mlx4-ib-%d-%d@%s",
-                               i, j, dev->pdev->bus->name);
+                       snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
+                                i, j, dev->pdev->bus->name);
                        /* Set IRQ for specific name (per ring) */
                        if (mlx4_assign_eq(dev, name, NULL,
                                           &ibdev->eq_table[eq])) {
@@ -2056,8 +2056,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                        err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
                        if (err)
                                ibdev->counters[i] = -1;
-               } else
-                               ibdev->counters[i] = -1;
+               } else {
+                       ibdev->counters[i] = -1;
+               }
        }
 
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
index e471f08..cb2a872 100644 (file)
@@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
                           struct ib_umem *umem)
 {
        u64 *pages;
-       struct ib_umem_chunk *chunk;
-       int i, j, k;
+       int i, k, entry;
        int n;
        int len;
        int err = 0;
+       struct scatterlist *sg;
 
        pages = (u64 *) __get_free_page(GFP_KERNEL);
        if (!pages)
@@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
 
        i = n = 0;
 
-       list_for_each_entry(chunk, &umem->chunk_list, list)
-               for (j = 0; j < chunk->nmap; ++j) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
-                       for (k = 0; k < len; ++k) {
-                               pages[i++] = sg_dma_address(&chunk->page_list[j]) +
-                                       umem->page_size * k;
-                               /*
-                                * Be friendly to mlx4_write_mtt() and
-                                * pass it chunks of appropriate size.
-                                */
-                               if (i == PAGE_SIZE / sizeof (u64)) {
-                                       err = mlx4_write_mtt(dev->dev, mtt, n,
-                                                            i, pages);
-                                       if (err)
-                                               goto out;
-                                       n += i;
-                                       i = 0;
-                               }
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+               len = sg_dma_len(sg) >> mtt->page_shift;
+               for (k = 0; k < len; ++k) {
+                       pages[i++] = sg_dma_address(sg) +
+                               umem->page_size * k;
+                       /*
+                        * Be friendly to mlx4_write_mtt() and
+                        * pass it chunks of appropriate size.
+                        */
+                       if (i == PAGE_SIZE / sizeof (u64)) {
+                               err = mlx4_write_mtt(dev->dev, mtt, n,
+                                                    i, pages);
+                               if (err)
+                                       goto out;
+                               n += i;
+                               i = 0;
                        }
                }
+       }
 
        if (i)
                err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
index d8f4d1f..7499325 100644 (file)
@@ -1882,7 +1882,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                                return err;
                }
 
-               if (ah->av.eth.vlan != 0xffff) {
+               if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
                        vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
                        is_vlan = 1;
                }
index b1705ce..62bb6b4 100644 (file)
@@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
        mlx5_buf_free(&dev->mdev, &buf->buf);
 }
 
+static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
+                            struct ib_sig_err *item)
+{
+       u16 syndrome = be16_to_cpu(cqe->syndrome);
+
+#define GUARD_ERR   (1 << 13)
+#define APPTAG_ERR  (1 << 12)
+#define REFTAG_ERR  (1 << 11)
+
+       if (syndrome & GUARD_ERR) {
+               item->err_type = IB_SIG_BAD_GUARD;
+               item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
+               item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
+       } else
+       if (syndrome & REFTAG_ERR) {
+               item->err_type = IB_SIG_BAD_REFTAG;
+               item->expected = be32_to_cpu(cqe->expected_reftag);
+               item->actual = be32_to_cpu(cqe->actual_reftag);
+       } else
+       if (syndrome & APPTAG_ERR) {
+               item->err_type = IB_SIG_BAD_APPTAG;
+               item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
+               item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
+       } else {
+               pr_err("Got signature completion error with bad syndrome %04x\n",
+                      syndrome);
+       }
+
+       item->sig_err_offset = be64_to_cpu(cqe->err_offset);
+       item->key = be32_to_cpu(cqe->mkey);
+}
+
 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
                         struct mlx5_ib_qp **cur_qp,
                         struct ib_wc *wc)
@@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
        struct mlx5_cqe64 *cqe64;
        struct mlx5_core_qp *mqp;
        struct mlx5_ib_wq *wq;
+       struct mlx5_sig_err_cqe *sig_err_cqe;
+       struct mlx5_core_mr *mmr;
+       struct mlx5_ib_mr *mr;
        uint8_t opcode;
        uint32_t qpn;
        u16 wqe_ctr;
@@ -475,6 +510,33 @@ repoll:
                        }
                }
                break;
+       case MLX5_CQE_SIG_ERR:
+               sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+
+               read_lock(&dev->mdev.priv.mr_table.lock);
+               mmr = __mlx5_mr_lookup(&dev->mdev,
+                                      mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+               if (unlikely(!mmr)) {
+                       read_unlock(&dev->mdev.priv.mr_table.lock);
+                       mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
+                                    cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
+                       return -EINVAL;
+               }
+
+               mr = to_mibmr(mmr);
+               get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
+               mr->sig->sig_err_exists = true;
+               mr->sig->sigerr_count++;
+
+               mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
+                            cq->mcq.cqn, mr->sig->err_item.key,
+                            mr->sig->err_item.err_type,
+                            mr->sig->err_item.sig_err_offset,
+                            mr->sig->err_item.expected,
+                            mr->sig->err_item.actual);
+
+               read_unlock(&dev->mdev.priv.mr_table.lock);
+               goto repoll;
        }
 
        return 0;
index 256a233..ece028f 100644 (file)
@@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
                        struct mlx5_db *db)
 {
        struct mlx5_ib_user_db_page *page;
-       struct ib_umem_chunk *chunk;
        int err = 0;
 
        mutex_lock(&context->db_page_mutex);
@@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
        list_add(&page->list, &context->db_page_list);
 
 found:
-       chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
-       db->dma         = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+       db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
        db->u.user_page = page;
        ++page->refcnt;
 
index aa03e73..7b9c078 100644 (file)
@@ -273,6 +273,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        if (flags & MLX5_DEV_CAP_FLAG_XRC)
                props->device_cap_flags |= IB_DEVICE_XRC;
        props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+       if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) {
+               props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+               /* At this stage no support for signature handover */
+               props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
+                                     IB_PROT_T10DIF_TYPE_2 |
+                                     IB_PROT_T10DIF_TYPE_3;
+               props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
+                                      IB_GUARD_T10DIF_CSUM;
+       }
 
        props->vendor_id           = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
                0xffffff;
@@ -1423,12 +1432,15 @@ static int init_one(struct pci_dev *pdev,
        dev->ib_dev.get_dma_mr          = mlx5_ib_get_dma_mr;
        dev->ib_dev.reg_user_mr         = mlx5_ib_reg_user_mr;
        dev->ib_dev.dereg_mr            = mlx5_ib_dereg_mr;
+       dev->ib_dev.destroy_mr          = mlx5_ib_destroy_mr;
        dev->ib_dev.attach_mcast        = mlx5_ib_mcg_attach;
        dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
        dev->ib_dev.process_mad         = mlx5_ib_process_mad;
+       dev->ib_dev.create_mr           = mlx5_ib_create_mr;
        dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
        dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
        dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
+       dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
 
        if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
                dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
index 3a53228..8499aec 100644 (file)
 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
                        int *ncont, int *order)
 {
-       struct ib_umem_chunk *chunk;
        unsigned long tmp;
        unsigned long m;
-       int i, j, k;
+       int i, k;
        u64 base = 0;
        int p = 0;
        int skip;
        int mask;
        u64 len;
        u64 pfn;
+       struct scatterlist *sg;
+       int entry;
 
        addr = addr >> PAGE_SHIFT;
        tmp = (unsigned long)addr;
@@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
        skip = 1 << m;
        mask = skip - 1;
        i = 0;
-       list_for_each_entry(chunk, &umem->chunk_list, list)
-               for (j = 0; j < chunk->nmap; j++) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
-                       pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
-                       for (k = 0; k < len; k++) {
-                               if (!(i & mask)) {
-                                       tmp = (unsigned long)pfn;
-                                       m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+               len = sg_dma_len(sg) >> PAGE_SHIFT;
+               pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+               for (k = 0; k < len; k++) {
+                       if (!(i & mask)) {
+                               tmp = (unsigned long)pfn;
+                               m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+                               skip = 1 << m;
+                               mask = skip - 1;
+                               base = pfn;
+                               p = 0;
+                       } else {
+                               if (base + p != pfn) {
+                                       tmp = (unsigned long)p;
+                                       m = find_first_bit(&tmp, sizeof(tmp));
                                        skip = 1 << m;
                                        mask = skip - 1;
                                        base = pfn;
                                        p = 0;
-                               } else {
-                                       if (base + p != pfn) {
-                                               tmp = (unsigned long)p;
-                                               m = find_first_bit(&tmp, sizeof(tmp));
-                                               skip = 1 << m;
-                                               mask = skip - 1;
-                                               base = pfn;
-                                               p = 0;
-                                       }
                                }
-                               p++;
-                               i++;
                        }
+                       p++;
+                       i++;
                }
+       }
 
        if (i) {
                m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
@@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 {
        int shift = page_shift - PAGE_SHIFT;
        int mask = (1 << shift) - 1;
-       struct ib_umem_chunk *chunk;
-       int i, j, k;
+       int i, k;
        u64 cur = 0;
        u64 base;
        int len;
+       struct scatterlist *sg;
+       int entry;
 
        i = 0;
-       list_for_each_entry(chunk, &umem->chunk_list, list)
-               for (j = 0; j < chunk->nmap; j++) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
-                       base = sg_dma_address(&chunk->page_list[j]);
-                       for (k = 0; k < len; k++) {
-                               if (!(i & mask)) {
-                                       cur = base + (k << PAGE_SHIFT);
-                                       if (umr)
-                                               cur |= 3;
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+               len = sg_dma_len(sg) >> PAGE_SHIFT;
+               base = sg_dma_address(sg);
+               for (k = 0; k < len; k++) {
+                       if (!(i & mask)) {
+                               cur = base + (k << PAGE_SHIFT);
+                               if (umr)
+                                       cur |= 3;
 
-                                       pas[i >> shift] = cpu_to_be64(cur);
-                                       mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
-                                                   i >> shift, be64_to_cpu(pas[i >> shift]));
-                               }  else
-                                       mlx5_ib_dbg(dev, "=====> 0x%llx\n",
-                                                   base + (k << PAGE_SHIFT));
-                               i++;
-                       }
+                               pas[i >> shift] = cpu_to_be64(cur);
+                               mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
+                                           i >> shift, be64_to_cpu(pas[i >> shift]));
+                       }  else
+                               mlx5_ib_dbg(dev, "=====> 0x%llx\n",
+                                           base + (k << PAGE_SHIFT));
+                       i++;
                }
+       }
 }
 
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
index 389e319..5054158 100644 (file)
@@ -189,6 +189,9 @@ struct mlx5_ib_qp {
 
        int                     create_type;
        u32                     pa_lkey;
+
+       /* Store signature errors */
+       bool                    signature_en;
 };
 
 struct mlx5_ib_cq_buf {
@@ -265,6 +268,7 @@ struct mlx5_ib_mr {
        enum ib_wc_status       status;
        struct mlx5_ib_dev     *dev;
        struct mlx5_create_mkey_mbox_out out;
+       struct mlx5_core_sig_ctx    *sig;
 };
 
 struct mlx5_ib_fast_reg_page_list {
@@ -396,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
        return container_of(mqp, struct mlx5_ib_qp, mqp);
 }
 
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+{
+       return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
 {
        return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -495,6 +504,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  u64 virt_addr, int access_flags,
                                  struct ib_udata *udata);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+                               struct ib_mr_init_attr *mr_init_attr);
 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
                                        int max_page_list_len);
 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
@@ -530,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+                           struct ib_mr_status *mr_status);
 
 static inline void init_query_mad(struct ib_smp *mad)
 {
index 7c95ca1..81392b2 100644 (file)
@@ -992,6 +992,122 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
        return 0;
 }
 
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+                               struct ib_mr_init_attr *mr_init_attr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_create_mkey_mbox_in *in;
+       struct mlx5_ib_mr *mr;
+       int access_mode, err;
+       int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_free;
+       }
+
+       in->seg.status = 1 << 6; /* free */
+       in->seg.xlt_oct_size = cpu_to_be32(ndescs);
+       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+       access_mode = MLX5_ACCESS_MODE_MTT;
+
+       if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
+               u32 psv_index[2];
+
+               in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
+                                                          MLX5_MKEY_BSF_EN);
+               in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+               mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+               if (!mr->sig) {
+                       err = -ENOMEM;
+                       goto err_free_in;
+               }
+
+               /* create mem & wire PSVs */
+               err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn,
+                                          2, psv_index);
+               if (err)
+                       goto err_free_sig;
+
+               access_mode = MLX5_ACCESS_MODE_KLM;
+               mr->sig->psv_memory.psv_idx = psv_index[0];
+               mr->sig->psv_wire.psv_idx = psv_index[1];
+
+               mr->sig->sig_status_checked = true;
+               mr->sig->sig_err_exists = false;
+               /* Next UMR, Arm SIGERR */
+               ++mr->sig->sigerr_count;
+       }
+
+       in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
+       err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in),
+                                   NULL, NULL, NULL);
+       if (err)
+               goto err_destroy_psv;
+
+       mr->ibmr.lkey = mr->mmr.key;
+       mr->ibmr.rkey = mr->mmr.key;
+       mr->umem = NULL;
+       kfree(in);
+
+       return &mr->ibmr;
+
+err_destroy_psv:
+       if (mr->sig) {
+               if (mlx5_core_destroy_psv(&dev->mdev,
+                                         mr->sig->psv_memory.psv_idx))
+                       mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+                                    mr->sig->psv_memory.psv_idx);
+               if (mlx5_core_destroy_psv(&dev->mdev,
+                                         mr->sig->psv_wire.psv_idx))
+                       mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+                                    mr->sig->psv_wire.psv_idx);
+       }
+err_free_sig:
+       kfree(mr->sig);
+err_free_in:
+       kfree(in);
+err_free:
+       kfree(mr);
+       return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+       int err;
+
+       if (mr->sig) {
+               if (mlx5_core_destroy_psv(&dev->mdev,
+                                         mr->sig->psv_memory.psv_idx))
+                       mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+                                    mr->sig->psv_memory.psv_idx);
+               if (mlx5_core_destroy_psv(&dev->mdev,
+                                         mr->sig->psv_wire.psv_idx))
+                       mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+                                    mr->sig->psv_wire.psv_idx);
+               kfree(mr->sig);
+       }
+
+       err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+       if (err) {
+               mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
+                            mr->mmr.key, err);
+               return err;
+       }
+
+       kfree(mr);
+
+       return err;
+}
+
 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
                                        int max_page_list_len)
 {
@@ -1077,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
        kfree(mfrpl->ibfrpl.page_list);
        kfree(mfrpl);
 }
+
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+                           struct ib_mr_status *mr_status)
+{
+       struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+       int ret = 0;
+
+       if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
+               pr_err("Invalid status check mask\n");
+               ret = -EINVAL;
+               goto done;
+       }
+
+       mr_status->fail_status = 0;
+       if (check_mask & IB_MR_CHECK_SIG_STATUS) {
+               if (!mmr->sig) {
+                       ret = -EINVAL;
+                       pr_err("signature status check requested on a non-signature enabled MR\n");
+                       goto done;
+               }
+
+               mmr->sig->sig_status_checked = true;
+               if (!mmr->sig->sig_err_exists)
+                       goto done;
+
+               if (ibmr->lkey == mmr->sig->err_item.key)
+                       memcpy(&mr_status->sig_err, &mmr->sig->err_item,
+                              sizeof(mr_status->sig_err));
+               else {
+                       mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
+                       mr_status->sig_err.sig_err_offset = 0;
+                       mr_status->sig_err.key = mmr->sig->err_item.key;
+               }
+
+               mmr->sig->sig_err_exists = false;
+               mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
+       }
+
+done:
+       return ret;
+}
index 7dfe8a1..ae788d2 100644 (file)
@@ -256,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
        }
 
        size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
-
-       return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
+       if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+           ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
+                       return MLX5_SIG_WQE_SIZE;
+       else
+               return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
 }
 
 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
@@ -284,6 +287,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
                sizeof(struct mlx5_wqe_inline_seg);
        attr->cap.max_inline_data = qp->max_inline_data;
 
+       if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
+               qp->signature_en = true;
+
        wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
        qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
        if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
@@ -665,7 +671,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
        int err;
 
        uuari = &dev->mdev.priv.uuari;
-       if (init_attr->create_flags)
+       if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN)
                return -EINVAL;
 
        if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
@@ -1771,6 +1777,27 @@ static __be64 frwr_mkey_mask(void)
        return cpu_to_be64(result);
 }
 
+static __be64 sig_mkey_mask(void)
+{
+       u64 result;
+
+       result = MLX5_MKEY_MASK_LEN             |
+               MLX5_MKEY_MASK_PAGE_SIZE        |
+               MLX5_MKEY_MASK_START_ADDR       |
+               MLX5_MKEY_MASK_EN_SIGERR        |
+               MLX5_MKEY_MASK_EN_RINVAL        |
+               MLX5_MKEY_MASK_KEY              |
+               MLX5_MKEY_MASK_LR               |
+               MLX5_MKEY_MASK_LW               |
+               MLX5_MKEY_MASK_RR               |
+               MLX5_MKEY_MASK_RW               |
+               MLX5_MKEY_MASK_SMALL_FENCE      |
+               MLX5_MKEY_MASK_FREE             |
+               MLX5_MKEY_MASK_BSF_EN;
+
+       return cpu_to_be64(result);
+}
+
 static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
                                 struct ib_send_wr *wr, int li)
 {
@@ -1826,7 +1853,7 @@ static u8 get_umr_flags(int acc)
               (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
               (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
               (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
-               MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+               MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
 }
 
 static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
@@ -1838,7 +1865,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
                return;
        }
 
-       seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
+       seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
+                    MLX5_ACCESS_MODE_MTT;
        *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
        seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
        seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
@@ -1954,6 +1982,342 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
        return 0;
 }
 
+static u16 prot_field_size(enum ib_signature_type type)
+{
+       switch (type) {
+       case IB_SIG_TYPE_T10_DIF:
+               return MLX5_DIF_SIZE;
+       default:
+               return 0;
+       }
+}
+
+static u8 bs_selector(int block_size)
+{
+       switch (block_size) {
+       case 512:           return 0x1;
+       case 520:           return 0x2;
+       case 4096:          return 0x3;
+       case 4160:          return 0x4;
+       case 1073741824:    return 0x5;
+       default:            return 0;
+       }
+}
+
+static int format_selector(struct ib_sig_attrs *attr,
+                          struct ib_sig_domain *domain,
+                          int *selector)
+{
+
+#define FORMAT_DIF_NONE                0
+#define FORMAT_DIF_CRC_INC     8
+#define FORMAT_DIF_CRC_NO_INC  12
+#define FORMAT_DIF_CSUM_INC    13
+#define FORMAT_DIF_CSUM_NO_INC 14
+
+       switch (domain->sig.dif.type) {
+       case IB_T10DIF_NONE:
+               /* No DIF */
+               *selector = FORMAT_DIF_NONE;
+               break;
+       case IB_T10DIF_TYPE1: /* Fall through */
+       case IB_T10DIF_TYPE2:
+               switch (domain->sig.dif.bg_type) {
+               case IB_T10DIF_CRC:
+                       *selector = FORMAT_DIF_CRC_INC;
+                       break;
+               case IB_T10DIF_CSUM:
+                       *selector = FORMAT_DIF_CSUM_INC;
+                       break;
+               default:
+                       return 1;
+               }
+               break;
+       case IB_T10DIF_TYPE3:
+               switch (domain->sig.dif.bg_type) {
+               case IB_T10DIF_CRC:
+                       *selector = domain->sig.dif.type3_inc_reftag ?
+                                          FORMAT_DIF_CRC_INC :
+                                          FORMAT_DIF_CRC_NO_INC;
+                       break;
+               case IB_T10DIF_CSUM:
+                       *selector = domain->sig.dif.type3_inc_reftag ?
+                                          FORMAT_DIF_CSUM_INC :
+                                          FORMAT_DIF_CSUM_NO_INC;
+                       break;
+               default:
+                       return 1;
+               }
+               break;
+       default:
+               return 1;
+       }
+
+       return 0;
+}
+
+static int mlx5_set_bsf(struct ib_mr *sig_mr,
+                       struct ib_sig_attrs *sig_attrs,
+                       struct mlx5_bsf *bsf, u32 data_size)
+{
+       struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
+       struct mlx5_bsf_basic *basic = &bsf->basic;
+       struct ib_sig_domain *mem = &sig_attrs->mem;
+       struct ib_sig_domain *wire = &sig_attrs->wire;
+       int ret, selector;
+
+       switch (sig_attrs->mem.sig_type) {
+       case IB_SIG_TYPE_T10_DIF:
+               if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
+                       return -EINVAL;
+
+               /* Input domain check byte mask */
+               basic->check_byte_mask = sig_attrs->check_mask;
+               if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
+                   mem->sig.dif.type == wire->sig.dif.type) {
+                       /* Same block structure */
+                       basic->bsf_size_sbs = 1 << 4;
+                       if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
+                               basic->wire.copy_byte_mask = 0xff;
+                       else
+                               basic->wire.copy_byte_mask = 0x3f;
+               } else
+                       basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
+
+               basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+               basic->raw_data_size = cpu_to_be32(data_size);
+
+               ret = format_selector(sig_attrs, mem, &selector);
+               if (ret)
+                       return -EINVAL;
+               basic->m_bfs_psv = cpu_to_be32(selector << 24 |
+                                              msig->psv_memory.psv_idx);
+
+               ret = format_selector(sig_attrs, wire, &selector);
+               if (ret)
+                       return -EINVAL;
+               basic->w_bfs_psv = cpu_to_be32(selector << 24 |
+                                              msig->psv_wire.psv_idx);
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+                               void **seg, int *size)
+{
+       struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
+       struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+       struct mlx5_bsf *bsf;
+       u32 data_len = wr->sg_list->length;
+       u32 data_key = wr->sg_list->lkey;
+       u64 data_va = wr->sg_list->addr;
+       int ret;
+       int wqe_size;
+
+       if (!wr->wr.sig_handover.prot) {
+               /**
+                * Source domain doesn't contain signature information
+                * So need construct:
+                *                  ------------------
+                *                 |     data_klm     |
+                *                  ------------------
+                *                 |       BSF        |
+                *                  ------------------
+                **/
+               struct mlx5_klm *data_klm = *seg;
+
+               data_klm->bcount = cpu_to_be32(data_len);
+               data_klm->key = cpu_to_be32(data_key);
+               data_klm->va = cpu_to_be64(data_va);
+               wqe_size = ALIGN(sizeof(*data_klm), 64);
+       } else {
+               /**
+                * Source domain contains signature information
+                * So need construct a strided block format:
+                *               ---------------------------
+                *              |     stride_block_ctrl     |
+                *               ---------------------------
+                *              |          data_klm         |
+                *               ---------------------------
+                *              |          prot_klm         |
+                *               ---------------------------
+                *              |             BSF           |
+                *               ---------------------------
+                **/
+               struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
+               struct mlx5_stride_block_entry *data_sentry;
+               struct mlx5_stride_block_entry *prot_sentry;
+               u32 prot_key = wr->wr.sig_handover.prot->lkey;
+               u64 prot_va = wr->wr.sig_handover.prot->addr;
+               u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
+               int prot_size;
+
+               sblock_ctrl = *seg;
+               data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
+               prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
+
+               prot_size = prot_field_size(sig_attrs->mem.sig_type);
+               if (!prot_size) {
+                       pr_err("Bad block size given: %u\n", block_size);
+                       return -EINVAL;
+               }
+               sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
+                                                           prot_size);
+               sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
+               sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
+               sblock_ctrl->num_entries = cpu_to_be16(2);
+
+               data_sentry->bcount = cpu_to_be16(block_size);
+               data_sentry->key = cpu_to_be32(data_key);
+               data_sentry->va = cpu_to_be64(data_va);
+               prot_sentry->bcount = cpu_to_be16(prot_size);
+               prot_sentry->key = cpu_to_be32(prot_key);
+
+               if (prot_key == data_key && prot_va == data_va) {
+                       /**
+                        * The data and protection are interleaved
+                        * in a single memory region
+                        **/
+                       prot_sentry->va = cpu_to_be64(data_va + block_size);
+                       prot_sentry->stride = cpu_to_be16(block_size + prot_size);
+                       data_sentry->stride = prot_sentry->stride;
+               } else {
+                       /* The data and protection are two different buffers */
+                       prot_sentry->va = cpu_to_be64(prot_va);
+                       data_sentry->stride = cpu_to_be16(block_size);
+                       prot_sentry->stride = cpu_to_be16(prot_size);
+               }
+               wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
+                                sizeof(*prot_sentry), 64);
+       }
+
+       *seg += wqe_size;
+       *size += wqe_size / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+
+       bsf = *seg;
+       ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
+       if (ret)
+               return -EINVAL;
+
+       *seg += sizeof(*bsf);
+       *size += sizeof(*bsf) / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+
+       return 0;
+}
+
+static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
+                                struct ib_send_wr *wr, u32 nelements,
+                                u32 length, u32 pdn)
+{
+       struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+       u32 sig_key = sig_mr->rkey;
+       u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
+
+       memset(seg, 0, sizeof(*seg));
+
+       seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
+                                  MLX5_ACCESS_MODE_KLM;
+       seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
+       seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
+                                   MLX5_MKEY_BSF_EN | pdn);
+       seg->len = cpu_to_be64(length);
+       seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
+       seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+}
+
+static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+                               struct ib_send_wr *wr, u32 nelements)
+{
+       memset(umr, 0, sizeof(*umr));
+
+       umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
+       umr->klm_octowords = get_klm_octo(nelements);
+       umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
+       umr->mkey_mask = sig_mkey_mask();
+}
+
+
+static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+                         void **seg, int *size)
+{
+       struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
+       u32 pdn = get_pd(qp)->pdn;
+       u32 klm_oct_size;
+       int region_len, ret;
+
+       if (unlikely(wr->num_sge != 1) ||
+           unlikely(wr->wr.sig_handover.access_flags &
+                    IB_ACCESS_REMOTE_ATOMIC) ||
+           unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+           unlikely(!sig_mr->sig->sig_status_checked))
+               return -EINVAL;
+
+       /* length of the protected region, data + protection */
+       region_len = wr->sg_list->length;
+       if (wr->wr.sig_handover.prot)
+               region_len += wr->wr.sig_handover.prot->length;
+
+       /**
+        * KLM octoword size - if protection was provided
+        * then we use strided block format (3 octowords),
+        * else we use single KLM (1 octoword)
+        **/
+       klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
+
+       set_sig_umr_segment(*seg, wr, klm_oct_size);
+       *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+       *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+
+       set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
+       *seg += sizeof(struct mlx5_mkey_seg);
+       *size += sizeof(struct mlx5_mkey_seg) / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+
+       ret = set_sig_data_segment(wr, qp, seg, size);
+       if (ret)
+               return ret;
+
+       sig_mr->sig->sig_status_checked = false;
+       return 0;
+}
+
+static int set_psv_wr(struct ib_sig_domain *domain,
+                     u32 psv_idx, void **seg, int *size)
+{
+       struct mlx5_seg_set_psv *psv_seg = *seg;
+
+       memset(psv_seg, 0, sizeof(*psv_seg));
+       psv_seg->psv_num = cpu_to_be32(psv_idx);
+       switch (domain->sig_type) {
+       case IB_SIG_TYPE_T10_DIF:
+               psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
+                                                    domain->sig.dif.app_tag);
+               psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
+
+               *seg += sizeof(*psv_seg);
+               *size += sizeof(*psv_seg) / 16;
+               break;
+
+       default:
+               pr_err("Bad signature type given.\n");
+               return 1;
+       }
+
+       return 0;
+}
+
 static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
                          struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
 {
@@ -2041,6 +2405,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
        }
 }
 
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+                    struct mlx5_wqe_ctrl_seg **ctrl,
+                    struct ib_send_wr *wr, int *idx,
+                    int *size, int nreq)
+{
+       int err = 0;
+
+       if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+               err = -ENOMEM;
+               return err;
+       }
+
+       *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+       *seg = mlx5_get_send_wqe(qp, *idx);
+       *ctrl = *seg;
+       *(uint32_t *)(*seg + 8) = 0;
+       (*ctrl)->imm = send_ieth(wr);
+       (*ctrl)->fm_ce_se = qp->sq_signal_bits |
+               (wr->send_flags & IB_SEND_SIGNALED ?
+                MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+               (wr->send_flags & IB_SEND_SOLICITED ?
+                MLX5_WQE_CTRL_SOLICITED : 0);
+
+       *seg += sizeof(**ctrl);
+       *size = sizeof(**ctrl) / 16;
+
+       return err;
+}
+
+static void finish_wqe(struct mlx5_ib_qp *qp,
+                      struct mlx5_wqe_ctrl_seg *ctrl,
+                      u8 size, unsigned idx, u64 wr_id,
+                      int nreq, u8 fence, u8 next_fence,
+                      u32 mlx5_opcode)
+{
+       u8 opmod = 0;
+
+       ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
+                                            mlx5_opcode | ((u32)opmod << 24));
+       ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+       ctrl->fm_ce_se |= fence;
+       qp->fm_cache = next_fence;
+       if (unlikely(qp->wq_sig))
+               ctrl->signature = wq_sig(ctrl);
+
+       qp->sq.wrid[idx] = wr_id;
+       qp->sq.w_list[idx].opcode = mlx5_opcode;
+       qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+       qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+       qp->sq.w_list[idx].next = qp->sq.cur_post;
+}
+
+
 int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr)
 {
@@ -2048,13 +2465,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx5_core_dev *mdev = &dev->mdev;
        struct mlx5_ib_qp *qp = to_mqp(ibqp);
+       struct mlx5_ib_mr *mr;
        struct mlx5_wqe_data_seg *dpseg;
        struct mlx5_wqe_xrc_seg *xrc;
        struct mlx5_bf *bf = qp->bf;
        int uninitialized_var(size);
        void *qend = qp->sq.qend;
        unsigned long flags;
-       u32 mlx5_opcode;
        unsigned idx;
        int err = 0;
        int inl = 0;
@@ -2063,7 +2480,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        int nreq;
        int i;
        u8 next_fence = 0;
-       u8 opmod = 0;
        u8 fence;
 
        spin_lock_irqsave(&qp->sq.lock, flags);
@@ -2076,36 +2492,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        goto out;
                }
 
-               if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+               fence = qp->fm_cache;
+               num_sge = wr->num_sge;
+               if (unlikely(num_sge > qp->sq.max_gs)) {
                        mlx5_ib_warn(dev, "\n");
                        err = -ENOMEM;
                        *bad_wr = wr;
                        goto out;
                }
 
-               fence = qp->fm_cache;
-               num_sge = wr->num_sge;
-               if (unlikely(num_sge > qp->sq.max_gs)) {
+               err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+               if (err) {
                        mlx5_ib_warn(dev, "\n");
                        err = -ENOMEM;
                        *bad_wr = wr;
                        goto out;
                }
 
-               idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
-               seg = mlx5_get_send_wqe(qp, idx);
-               ctrl = seg;
-               *(uint32_t *)(seg + 8) = 0;
-               ctrl->imm = send_ieth(wr);
-               ctrl->fm_ce_se = qp->sq_signal_bits |
-                       (wr->send_flags & IB_SEND_SIGNALED ?
-                        MLX5_WQE_CTRL_CQ_UPDATE : 0) |
-                       (wr->send_flags & IB_SEND_SOLICITED ?
-                        MLX5_WQE_CTRL_SOLICITED : 0);
-
-               seg += sizeof(*ctrl);
-               size = sizeof(*ctrl) / 16;
-
                switch (ibqp->qp_type) {
                case IB_QPT_XRC_INI:
                        xrc = seg;
@@ -2158,6 +2561,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                num_sge = 0;
                                break;
 
+                       case IB_WR_REG_SIG_MR:
+                               qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
+                               mr = to_mmr(wr->wr.sig_handover.sig_mr);
+
+                               ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
+                               err = set_sig_umr_wr(wr, qp, &seg, &size);
+                               if (err) {
+                                       mlx5_ib_warn(dev, "\n");
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+
+                               finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+                                          nreq, get_fence(fence, wr),
+                                          next_fence, MLX5_OPCODE_UMR);
+                               /*
+                                * SET_PSV WQEs are not signaled and solicited
+                                * on error
+                                */
+                               wr->send_flags &= ~IB_SEND_SIGNALED;
+                               wr->send_flags |= IB_SEND_SOLICITED;
+                               err = begin_wqe(qp, &seg, &ctrl, wr,
+                                               &idx, &size, nreq);
+                               if (err) {
+                                       mlx5_ib_warn(dev, "\n");
+                                       err = -ENOMEM;
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+
+                               err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
+                                                mr->sig->psv_memory.psv_idx, &seg,
+                                                &size);
+                               if (err) {
+                                       mlx5_ib_warn(dev, "\n");
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+
+                               finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+                                          nreq, get_fence(fence, wr),
+                                          next_fence, MLX5_OPCODE_SET_PSV);
+                               err = begin_wqe(qp, &seg, &ctrl, wr,
+                                               &idx, &size, nreq);
+                               if (err) {
+                                       mlx5_ib_warn(dev, "\n");
+                                       err = -ENOMEM;
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+
+                               next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+                               err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
+                                                mr->sig->psv_wire.psv_idx, &seg,
+                                                &size);
+                               if (err) {
+                                       mlx5_ib_warn(dev, "\n");
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+
+                               finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+                                          nreq, get_fence(fence, wr),
+                                          next_fence, MLX5_OPCODE_SET_PSV);
+                               num_sge = 0;
+                               goto skip_psv;
+
                        default:
                                break;
                        }
@@ -2238,22 +2708,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        }
                }
 
-               mlx5_opcode = mlx5_ib_opcode[wr->opcode];
-               ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8)      |
-                                                    mlx5_opcode                        |
-                                                    ((u32)opmod << 24));
-               ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
-               ctrl->fm_ce_se |= get_fence(fence, wr);
-               qp->fm_cache = next_fence;
-               if (unlikely(qp->wq_sig))
-                       ctrl->signature = wq_sig(ctrl);
-
-               qp->sq.wrid[idx] = wr->wr_id;
-               qp->sq.w_list[idx].opcode = mlx5_opcode;
-               qp->sq.wqe_head[idx] = qp->sq.head + nreq;
-               qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
-               qp->sq.w_list[idx].next = qp->sq.cur_post;
-
+               finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+                          get_fence(fence, wr), next_fence,
+                          mlx5_ib_opcode[wr->opcode]);
+skip_psv:
                if (0)
                        dump_wqe(qp, idx, size);
        }
index 5b71d43..415f8e1 100644 (file)
@@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
 
        if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
                mthca_free_cq(to_mdev(ibdev), cq);
+               err = -EFAULT;
                goto err_free;
        }
 
@@ -976,12 +977,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                       u64 virt, int acc, struct ib_udata *udata)
 {
        struct mthca_dev *dev = to_mdev(pd->device);
-       struct ib_umem_chunk *chunk;
+       struct scatterlist *sg;
        struct mthca_mr *mr;
        struct mthca_reg_mr ucmd;
        u64 *pages;
        int shift, n, len;
-       int i, j, k;
+       int i, k, entry;
        int err = 0;
        int write_mtt_size;
 
@@ -1009,10 +1010,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        }
 
        shift = ffs(mr->umem->page_size) - 1;
-
-       n = 0;
-       list_for_each_entry(chunk, &mr->umem->chunk_list, list)
-               n += chunk->nents;
+       n = mr->umem->nmap;
 
        mr->mtt = mthca_alloc_mtt(dev, n);
        if (IS_ERR(mr->mtt)) {
@@ -1030,25 +1028,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
 
-       list_for_each_entry(chunk, &mr->umem->chunk_list, list)
-               for (j = 0; j < chunk->nmap; ++j) {
-                       len = sg_dma_len(&chunk->page_list[j]) >> shift;
-                       for (k = 0; k < len; ++k) {
-                               pages[i++] = sg_dma_address(&chunk->page_list[j]) +
-                                       mr->umem->page_size * k;
-                               /*
-                                * Be friendly to write_mtt and pass it chunks
-                                * of appropriate size.
-                                */
-                               if (i == write_mtt_size) {
-                                       err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
-                                       if (err)
-                                               goto mtt_done;
-                                       n += i;
-                                       i = 0;
-                               }
+       for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+               len = sg_dma_len(sg) >> shift;
+               for (k = 0; k < len; ++k) {
+                       pages[i++] = sg_dma_address(sg) +
+                               mr->umem->page_size * k;
+                       /*
+                        * Be friendly to write_mtt and pass it chunks
+                        * of appropriate size.
+                        */
+                       if (i == write_mtt_size) {
+                               err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+                               if (err)
+                                       goto mtt_done;
+                               n += i;
+                               i = 0;
                        }
                }
+       }
 
        if (i)
                err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
index 9c9f2f5..dfa9df4 100644 (file)
@@ -128,6 +128,7 @@ static void build_mpa_v1(struct nes_cm_node *, void *, u8);
 static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
 
 static void print_core(struct nes_cm_core *core);
+static void record_ird_ord(struct nes_cm_node *, u16, u16);
 
 /* External CM API Interface */
 /* instance of function pointers for client API */
@@ -317,7 +318,6 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
                }
        }
 
-
        if (priv_data_len + mpa_hdr_len != len) {
                nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
                        " complete (%x + %x != %x)\n",
@@ -356,25 +356,57 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
                        /* send reset */
                        return -EINVAL;
                }
+               if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD)
+                       cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
 
-               if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+               if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) {
                        /* responder */
-                       if (cm_node->ord_size > ird_size)
-                               cm_node->ord_size = ird_size;
-               } else {
-                       /* initiator */
-                       if (cm_node->ord_size > ird_size)
-                               cm_node->ord_size = ird_size;
-
-                       if (cm_node->ird_size < ord_size) {
-                               /* no resources available */
-                               /* send terminate message */
-                               return -EINVAL;
+                       if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+                               /* we are still negotiating */
+                               if (ord_size > NES_MAX_IRD) {
+                                       cm_node->ird_size = NES_MAX_IRD;
+                               } else {
+                                       cm_node->ird_size = ord_size;
+                                       if (ord_size == 0 &&
+                                       (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+                                               cm_node->ird_size = 1;
+                                               nes_debug(NES_DBG_CM,
+                                               "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n",
+                                                       __func__, ord_size);
+                                       }
+                               }
+                               if (ird_size > NES_MAX_ORD)
+                                       cm_node->ord_size = NES_MAX_ORD;
+                               else
+                                       cm_node->ord_size = ird_size;
+                       } else { /* initiator */
+                               if (ord_size > NES_MAX_IRD) {
+                                       nes_debug(NES_DBG_CM,
+                                       "%s: Unable to support the requested (ord =%u)\n",
+                                                       __func__, ord_size);
+                                       return -EINVAL;
+                               }
+                               cm_node->ird_size = ord_size;
+
+                               if (ird_size > NES_MAX_ORD) {
+                                       cm_node->ord_size = NES_MAX_ORD;
+                               } else {
+                                       if (ird_size == 0 &&
+                                       (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+                                               nes_debug(NES_DBG_CM,
+                                               "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n",
+                                                       __func__, ird_size);
+                                               return -EINVAL;
+                                       } else {
+                                               cm_node->ord_size = ird_size;
+                                       }
+                               }
                        }
                }
 
                if (rtr_ctrl_ord & IETF_RDMA0_READ) {
                        cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+
                } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
                        cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
                } else {        /* Not supported RDMA0 operation */
@@ -514,6 +546,19 @@ static void print_core(struct nes_cm_core *core)
        nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
 }
 
+static void record_ird_ord(struct nes_cm_node *cm_node,
+                                       u16 conn_ird, u16 conn_ord)
+{
+       if (conn_ird > NES_MAX_IRD)
+               conn_ird = NES_MAX_IRD;
+
+       if (conn_ord > NES_MAX_ORD)
+               conn_ord = NES_MAX_ORD;
+
+       cm_node->ird_size = conn_ird;
+       cm_node->ord_size = conn_ord;
+}
+
 /**
  * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
  */
@@ -557,11 +602,13 @@ static void build_mpa_v2(struct nes_cm_node *cm_node,
        mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
 
        /* initialize RTR msg */
-       ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
-                           IETF_NO_IRD_ORD : cm_node->ird_size;
-       ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
-                           IETF_NO_IRD_ORD : cm_node->ord_size;
-
+       if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+               ctrl_ird = IETF_NO_IRD_ORD;
+               ctrl_ord = IETF_NO_IRD_ORD;
+       } else {
+               ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD;
+               ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
+       }
        ctrl_ird |= IETF_PEER_TO_PEER;
        ctrl_ird |= IETF_FLPDU_ZERO_LEN;
 
@@ -610,7 +657,7 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
        struct nes_qp *nesqp = *nesqp_addr;
        struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
 
-       u64temp = (unsigned long)nesqp;
+       u64temp = (unsigned long)nesqp->nesuqp_addr;
        u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
        set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
 
@@ -1409,8 +1456,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 
        cm_node->mpa_frame_rev = mpa_version;
        cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
-       cm_node->ird_size = IETF_NO_IRD_ORD;
-       cm_node->ord_size = IETF_NO_IRD_ORD;
+       cm_node->mpav2_ird_ord = 0;
+       cm_node->ird_size = 0;
+       cm_node->ord_size = 0;
 
        nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
                  &cm_node->loc_addr, cm_node->loc_port,
@@ -3027,11 +3075,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                rem_ref_cm_node(cm_node->cm_core, cm_node);
                return -ECONNRESET;
        }
-
        /* associate the node with the QP */
        nesqp->cm_node = (void *)cm_node;
        cm_node->nesqp = nesqp;
 
+
        nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
                nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
        atomic_inc(&cm_accepts);
@@ -3054,6 +3102,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (cm_node->mpa_frame_rev == IETF_MPA_V1)
                mpa_frame_offset = 4;
 
+       if (cm_node->mpa_frame_rev == IETF_MPA_V1 ||
+                       cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+               record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+       }
+
        memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
               conn_param->private_data_len);
 
@@ -3117,7 +3170,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        }
        nesqp->skip_lsmm = 1;
 
-
        /* Cache the cm_id in the qp */
        nesqp->cm_id = cm_id;
        cm_node->cm_id = cm_id;
@@ -3154,7 +3206,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
                ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
        nesqp->nesqp_context->ird_ord_sizes |=
-               cpu_to_le32((u32)conn_param->ord);
+               cpu_to_le32((u32)cm_node->ord_size);
 
        memset(&nes_quad, 0, sizeof(nes_quad));
        nes_quad.DstIpAdrIndex =
@@ -3194,6 +3246,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        cm_event.remote_addr = cm_id->remote_addr;
        cm_event.private_data = NULL;
        cm_event.private_data_len = 0;
+       cm_event.ird = cm_node->ird_size;
+       cm_event.ord = cm_node->ord_size;
+
        ret = cm_id->event_handler(cm_id, &cm_event);
        attr.qp_state = IB_QPS_RTS;
        nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
@@ -3290,14 +3345,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        /* cache the cm_id in the qp */
        nesqp->cm_id = cm_id;
-
        cm_id->provider_data = nesqp;
-
        nesqp->private_data_len = conn_param->private_data_len;
-       nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
-       /* space for rdma0 read msg */
-       if (conn_param->ord == 0)
-               nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1);
 
        nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
        nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
@@ -3334,6 +3383,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                return -ENOMEM;
        }
 
+       record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+       if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+                               cm_node->ord_size == 0)
+               cm_node->ord_size = 1;
+
        cm_node->apbvt_set = apbvt_set;
        nesqp->cm_node = cm_node;
        cm_node->nesqp = nesqp;
@@ -3530,6 +3584,8 @@ static void cm_event_connected(struct nes_cm_event *event)
        nesqp->nesqp_context->ird_ord_sizes |=
                        cpu_to_le32((u32)1 <<
                        NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+       nesqp->nesqp_context->ird_ord_sizes |=
+                       cpu_to_le32((u32)cm_node->ord_size);
 
        /* Adjust tail for not having a LSMM */
        /*nesqp->hwqp.sq_tail = 1;*/
@@ -3742,8 +3798,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
        cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
        cm_event.private_data = cm_node->mpa_frame_buf;
        cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
+       if (cm_node->mpa_frame_rev == IETF_MPA_V1) {
+               cm_event.ird = NES_MAX_IRD;
+               cm_event.ord = NES_MAX_ORD;
+       } else {
        cm_event.ird = cm_node->ird_size;
        cm_event.ord = cm_node->ord_size;
+       }
 
        ret = cm_id->event_handler(cm_id, &cm_event);
        if (ret)
index 4646e66..522c99c 100644 (file)
@@ -58,6 +58,8 @@
 #define IETF_RDMA0_WRITE        0x8000
 #define IETF_RDMA0_READ         0x4000
 #define IETF_NO_IRD_ORD         0x3FFF
+#define NES_MAX_IRD             0x40
+#define NES_MAX_ORD             0x7F
 
 enum ietf_mpa_flags {
        IETF_MPA_FLAGS_MARKERS = 0x80,  /* receive Markers */
@@ -333,6 +335,7 @@ struct nes_cm_node {
        enum mpa_frame_version    mpa_frame_rev;
        u16                       ird_size;
        u16                       ord_size;
+       u16                       mpav2_ird_ord;
 
        u16                       mpa_frame_size;
        struct iw_cm_id           *cm_id;
index 4926de7..529c421 100644 (file)
@@ -39,8 +39,8 @@
 
 #include <linux/types.h>
 
-#define NES_ABI_USERSPACE_VER 1
-#define NES_ABI_KERNEL_VER    1
+#define NES_ABI_USERSPACE_VER 2
+#define NES_ABI_KERNEL_VER    2
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -78,6 +78,7 @@ struct nes_create_cq_req {
 
 struct nes_create_qp_req {
        __u64 user_wqe_buffers;
+       __u64 user_qp_buffer;
 };
 
 enum iwnes_memreg_type {
index 8308e36..218dd35 100644 (file)
@@ -1186,11 +1186,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
                                        nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
                                        kfree(nesqp->allocated_buffer);
                                        nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
-                                       return NULL;
+                                       return ERR_PTR(-EFAULT);
                                }
                                if (req.user_wqe_buffers) {
                                        virt_wqs = 1;
                                }
+                               if (req.user_qp_buffer)
+                                       nesqp->nesuqp_addr = req.user_qp_buffer;
                                if ((ibpd->uobject) && (ibpd->uobject->context)) {
                                        nesqp->user_mode = 1;
                                        nes_ucontext = to_nesucontext(ibpd->uobject->context);
@@ -2307,7 +2309,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        struct nes_device *nesdev = nesvnic->nesdev;
        struct nes_adapter *nesadapter = nesdev->nesadapter;
        struct ib_mr *ibmr = ERR_PTR(-EINVAL);
-       struct ib_umem_chunk *chunk;
+       struct scatterlist *sg;
        struct nes_ucontext *nes_ucontext;
        struct nes_pbl *nespbl;
        struct nes_mr *nesmr;
@@ -2315,7 +2317,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        struct nes_mem_reg_req req;
        struct nes_vpbl vpbl;
        struct nes_root_vpbl root_vpbl;
-       int nmap_index, page_index;
+       int entry, page_index;
        int page_count = 0;
        int err, pbl_depth = 0;
        int chunk_pages;
@@ -2330,6 +2332,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        u16 pbl_count;
        u8 single_page = 1;
        u8 stag_key;
+       int first_page = 1;
 
        region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
        if (IS_ERR(region)) {
@@ -2380,128 +2383,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                        }
                        nesmr->region = region;
 
-                       list_for_each_entry(chunk, &region->chunk_list, list) {
-                               nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n",
-                                               chunk->nents, chunk->nmap);
-                               for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
-                                       if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) {
-                                               ib_umem_release(region);
-                                               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                               nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
-                                                               (unsigned int) sg_dma_address(&chunk->page_list[nmap_index]));
-                                               ibmr = ERR_PTR(-EINVAL);
-                                               kfree(nesmr);
-                                               goto reg_user_mr_err;
-                                       }
+                       for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+                               if (sg_dma_address(sg) & ~PAGE_MASK) {
+                                       ib_umem_release(region);
+                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+                                       nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+                                                 (unsigned int) sg_dma_address(sg));
+                                       ibmr = ERR_PTR(-EINVAL);
+                                       kfree(nesmr);
+                                       goto reg_user_mr_err;
+                               }
 
-                                       if (!sg_dma_len(&chunk->page_list[nmap_index])) {
-                                               ib_umem_release(region);
-                                               nes_free_resource(nesadapter, nesadapter->allocated_mrs,
-                                                               stag_index);
-                                               nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
-                                               ibmr = ERR_PTR(-EINVAL);
-                                               kfree(nesmr);
-                                               goto reg_user_mr_err;
-                                       }
+                               if (!sg_dma_len(sg)) {
+                                       ib_umem_release(region);
+                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+                                                         stag_index);
+                                       nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+                                       ibmr = ERR_PTR(-EINVAL);
+                                       kfree(nesmr);
+                                       goto reg_user_mr_err;
+                               }
 
-                                       region_length += sg_dma_len(&chunk->page_list[nmap_index]);
-                                       chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
-                                       region_length -= skip_pages << 12;
-                                       for (page_index=skip_pages; page_index < chunk_pages; page_index++) {
-                                               skip_pages = 0;
-                                               if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
-                                                       goto enough_pages;
-                                               if ((page_count&0x01FF) == 0) {
-                                                       if (page_count >= 1024 * 512) {
+                               region_length += sg_dma_len(sg);
+                               chunk_pages = sg_dma_len(sg) >> 12;
+                               region_length -= skip_pages << 12;
+                               for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
+                                       skip_pages = 0;
+                                       if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+                                               goto enough_pages;
+                                       if ((page_count&0x01FF) == 0) {
+                                               if (page_count >= 1024 * 512) {
+                                                       ib_umem_release(region);
+                                                       nes_free_resource(nesadapter,
+                                                                         nesadapter->allocated_mrs, stag_index);
+                                                       kfree(nesmr);
+                                                       ibmr = ERR_PTR(-E2BIG);
+                                                       goto reg_user_mr_err;
+                                               }
+                                               if (root_pbl_index == 1) {
+                                                       root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
+                                                                       8192, &root_vpbl.pbl_pbase);
+                                                       nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+                                                                 root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+                                                       if (!root_vpbl.pbl_vbase) {
                                                                ib_umem_release(region);
-                                                               nes_free_resource(nesadapter,
-                                                                               nesadapter->allocated_mrs, stag_index);
+                                                               pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+                                                                                   vpbl.pbl_pbase);
+                                                               nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+                                                                                 stag_index);
                                                                kfree(nesmr);
-                                                               ibmr = ERR_PTR(-E2BIG);
+                                                               ibmr = ERR_PTR(-ENOMEM);
                                                                goto reg_user_mr_err;
                                                        }
-                                                       if (root_pbl_index == 1) {
-                                                               root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
-                                                                               8192, &root_vpbl.pbl_pbase);
-                                                               nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
-                                                                               root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
-                                                               if (!root_vpbl.pbl_vbase) {
-                                                                       ib_umem_release(region);
-                                                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                                                       vpbl.pbl_pbase);
-                                                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs,
-                                                                                       stag_index);
-                                                                       kfree(nesmr);
-                                                                       ibmr = ERR_PTR(-ENOMEM);
-                                                                       goto reg_user_mr_err;
-                                                               }
-                                                               root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
-                                                                               GFP_KERNEL);
-                                                               if (!root_vpbl.leaf_vpbl) {
-                                                                       ib_umem_release(region);
-                                                                       pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
-                                                                                       root_vpbl.pbl_pbase);
-                                                                       pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
-                                                                                       vpbl.pbl_pbase);
-                                                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs,
-                                                                                       stag_index);
-                                                                       kfree(nesmr);
-                                                                       ibmr = ERR_PTR(-ENOMEM);
-                                                                       goto reg_user_mr_err;
-                                                               }
-                                                               root_vpbl.pbl_vbase[0].pa_low =
-                                                                               cpu_to_le32((u32)vpbl.pbl_pbase);
-                                                               root_vpbl.pbl_vbase[0].pa_high =
-                                                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
-                                                               root_vpbl.leaf_vpbl[0] = vpbl;
-                                                       }
-                                                       vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-                                                                       &vpbl.pbl_pbase);
-                                                       nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
-                                                                       vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
-                                                       if (!vpbl.pbl_vbase) {
+                                                       root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
+                                                                       GFP_KERNEL);
+                                                       if (!root_vpbl.leaf_vpbl) {
                                                                ib_umem_release(region);
-                                                               nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-                                                               ibmr = ERR_PTR(-ENOMEM);
+                                                               pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+                                                                                   root_vpbl.pbl_pbase);
+                                                               pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+                                                                                   vpbl.pbl_pbase);
+                                                               nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+                                                                                 stag_index);
                                                                kfree(nesmr);
+                                                               ibmr = ERR_PTR(-ENOMEM);
                                                                goto reg_user_mr_err;
                                                        }
-                                                       if (1 <= root_pbl_index) {
-                                                               root_vpbl.pbl_vbase[root_pbl_index].pa_low =
-                                                                               cpu_to_le32((u32)vpbl.pbl_pbase);
-                                                               root_vpbl.pbl_vbase[root_pbl_index].pa_high =
-                                                                               cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
-                                                               root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
-                                                       }
-                                                       root_pbl_index++;
-                                                       cur_pbl_index = 0;
+                                                       root_vpbl.pbl_vbase[0].pa_low =
+                                                                       cpu_to_le32((u32)vpbl.pbl_pbase);
+                                                       root_vpbl.pbl_vbase[0].pa_high =
+                                                                       cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+                                                       root_vpbl.leaf_vpbl[0] = vpbl;
                                                }
-                                               if (single_page) {
-                                                       if (page_count != 0) {
-                                                               if ((last_dma_addr+4096) !=
-                                                                               (sg_dma_address(&chunk->page_list[nmap_index])+
-                                                                               (page_index*4096)))
-                                                                       single_page = 0;
-                                                               last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
-                                                                               (page_index*4096);
-                                                       } else {
-                                                               first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
-                                                                               (page_index*4096);
-                                                               last_dma_addr = first_dma_addr;
-                                                       }
+                                               vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+                                                               &vpbl.pbl_pbase);
+                                               nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
+                                                         vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
+                                               if (!vpbl.pbl_vbase) {
+                                                       ib_umem_release(region);
+                                                       nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+                                                       ibmr = ERR_PTR(-ENOMEM);
+                                                       kfree(nesmr);
+                                                       goto reg_user_mr_err;
+                                               }
+                                               if (1 <= root_pbl_index) {
+                                                       root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+                                                                       cpu_to_le32((u32)vpbl.pbl_pbase);
+                                                       root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+                                                                       cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+                                                       root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+                                               }
+                                               root_pbl_index++;
+                                               cur_pbl_index = 0;
+                                       }
+                                       if (single_page) {
+                                               if (page_count != 0) {
+                                                       if ((last_dma_addr+4096) !=
+                                                                       (sg_dma_address(sg)+
+                                                                       (page_index*4096)))
+                                                               single_page = 0;
+                                                       last_dma_addr = sg_dma_address(sg)+
+                                                                       (page_index*4096);
+                                               } else {
+                                                       first_dma_addr = sg_dma_address(sg)+
+                                                                       (page_index*4096);
+                                                       last_dma_addr = first_dma_addr;
                                                }
-
-                                               vpbl.pbl_vbase[cur_pbl_index].pa_low =
-                                                               cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+
-                                                               (page_index*4096)));
-                                               vpbl.pbl_vbase[cur_pbl_index].pa_high =
-                                                               cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+
-                                                               (page_index*4096))) >> 32)));
-                                               cur_pbl_index++;
-                                               page_count++;
                                        }
+
+                                       vpbl.pbl_vbase[cur_pbl_index].pa_low =
+                                                       cpu_to_le32((u32)(sg_dma_address(sg)+
+                                                       (page_index*4096)));
+                                       vpbl.pbl_vbase[cur_pbl_index].pa_high =
+                                                       cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+
+                                                       (page_index*4096))) >> 32)));
+                                       cur_pbl_index++;
+                                       page_count++;
                                }
                        }
+
                        enough_pages:
                        nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
                                        " stag_key=0x%08x\n",
@@ -2613,25 +2613,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
                                  (void *) nespbl->pbl_vbase, nespbl->user_base);
 
-                       list_for_each_entry(chunk, &region->chunk_list, list) {
-                               for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
-                                       chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
-                                       chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0;
-                                       nespbl->page = sg_page(&chunk->page_list[0]);
-                                       for (page_index=0; page_index<chunk_pages; page_index++) {
-                                               ((__le32 *)pbl)[0] = cpu_to_le32((u32)
-                                                               (sg_dma_address(&chunk->page_list[nmap_index])+
-                                                               (page_index*4096)));
-                                               ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
-                                                               (sg_dma_address(&chunk->page_list[nmap_index])+
-                                                               (page_index*4096)))>>32);
-                                               nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
-                                                               (unsigned long long)*pbl,
-                                                               le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
-                                               pbl++;
-                                       }
+                       for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+                               chunk_pages = sg_dma_len(sg) >> 12;
+                               chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0;
+                               if (first_page) {
+                                       nespbl->page = sg_page(sg);
+                                       first_page = 0;
+                               }
+
+                               for (page_index = 0; page_index < chunk_pages; page_index++) {
+                                       ((__le32 *)pbl)[0] = cpu_to_le32((u32)
+                                                       (sg_dma_address(sg)+
+                                                       (page_index*4096)));
+                                       ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
+                                                       (sg_dma_address(sg)+
+                                                       (page_index*4096)))>>32);
+                                       nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
+                                                 (unsigned long long)*pbl,
+                                                 le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
+                                       pbl++;
                                }
                        }
+
                        if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
                                list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
                        } else {
@@ -3134,9 +3137,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
                                nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
                                original_last_aeq, nesqp->last_aeq);
-               if ((!ret) ||
-                               ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) &&
-                               (ret))) {
+               if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
                        if (dont_wait) {
                                if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
                                        nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
index 0eff7c4..309b31c 100644 (file)
@@ -184,5 +184,6 @@ struct nes_qp {
        u8                    pau_busy;
        u8                    pau_pending;
        u8                    pau_state;
+       __u64                 nesuqp_addr;
 };
 #endif                 /* NES_VERBS_H */
index 06a5bed..d1bfd4f 100644 (file)
@@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/emulex/benet
 
 obj-$(CONFIG_INFINIBAND_OCRDMA)        += ocrdma.o
 
-ocrdma-y :=    ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o
+ocrdma-y :=    ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o
index 7c001b9..19011db 100644 (file)
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
 
 #include <be_roce.h>
 #include "ocrdma_sli.h"
 
-#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+
+#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
 #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
 
+#define OC_NAME_SH     OCRDMA_NODE_DESC "(Skyhawk)"
+#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)"
+
+#define OC_SKH_DEVICE_PF 0x720
+#define OC_SKH_DEVICE_VF 0x728
 #define OCRDMA_MAX_AH 512
 
 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
 
+#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo)
+
 struct ocrdma_dev_attr {
        u8 fw_ver[32];
        u32 vendor_id;
@@ -65,6 +75,7 @@ struct ocrdma_dev_attr {
        int max_mr;
        u64 max_mr_size;
        u32 max_num_mr_pbl;
+       int max_mw;
        int max_fmr;
        int max_map_per_fmr;
        int max_pages_per_frmr;
@@ -83,6 +94,12 @@ struct ocrdma_dev_attr {
        u8 num_ird_pages;
 };
 
+struct ocrdma_dma_mem {
+       void *va;
+       dma_addr_t pa;
+       u32 size;
+};
+
 struct ocrdma_pbl {
        void *va;
        dma_addr_t pa;
@@ -148,6 +165,26 @@ struct ocrdma_mr {
        struct ocrdma_hw_mr hwmr;
 };
 
+struct ocrdma_stats {
+       u8 type;
+       struct ocrdma_dev *dev;
+};
+
+struct stats_mem {
+       struct ocrdma_mqe mqe;
+       void *va;
+       dma_addr_t pa;
+       u32 size;
+       char *debugfs_mem;
+};
+
+struct phy_info {
+       u16 auto_speeds_supported;
+       u16 fixed_speeds_supported;
+       u16 phy_type;
+       u16 interface_type;
+};
+
 struct ocrdma_dev {
        struct ib_device ibdev;
        struct ocrdma_dev_attr attr;
@@ -191,12 +228,30 @@ struct ocrdma_dev {
        struct mqe_ctx mqe_ctx;
 
        struct be_dev_info nic_info;
+       struct phy_info phy;
+       char model_number[32];
+       u32 hba_port_num;
 
        struct list_head entry;
        struct rcu_head rcu;
        int id;
-       struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG];
+       u64 stag_arr[OCRDMA_MAX_STAG];
        u16 pvid;
+       u32 asic_id;
+
+       ulong last_stats_time;
+       struct mutex stats_lock; /* provide synch for debugfs operations */
+       struct stats_mem stats_mem;
+       struct ocrdma_stats rsrc_stats;
+       struct ocrdma_stats rx_stats;
+       struct ocrdma_stats wqe_stats;
+       struct ocrdma_stats tx_stats;
+       struct ocrdma_stats db_err_stats;
+       struct ocrdma_stats tx_qp_err_stats;
+       struct ocrdma_stats rx_qp_err_stats;
+       struct ocrdma_stats tx_dbg_stats;
+       struct ocrdma_stats rx_dbg_stats;
+       struct dentry *dir;
 };
 
 struct ocrdma_cq {
@@ -209,8 +264,8 @@ struct ocrdma_cq {
                         */
        u32 max_hw_cqe;
        bool phase_change;
-       bool armed, solicited;
-       bool arm_needed;
+       bool deferred_arm, deferred_sol;
+       bool first_arm;
 
        spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
                                                   * to cq polling
@@ -223,6 +278,7 @@ struct ocrdma_cq {
        struct ocrdma_ucontext *ucontext;
        dma_addr_t pa;
        u32 len;
+       u32 cqe_cnt;
 
        /* head of all qp's sq and rq for which cqes need to be flushed
         * by the software.
@@ -232,7 +288,6 @@ struct ocrdma_cq {
 
 struct ocrdma_pd {
        struct ib_pd ibpd;
-       struct ocrdma_dev *dev;
        struct ocrdma_ucontext *uctx;
        u32 id;
        int num_dpp_qp;
@@ -317,10 +372,8 @@ struct ocrdma_qp {
        bool dpp_enabled;
        u8 *ird_q_va;
        bool signaled;
-       u16 db_cache;
 };
 
-
 struct ocrdma_ucontext {
        struct ib_ucontext ibucontext;
 
@@ -385,13 +438,6 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
        return container_of(ibsrq, struct ocrdma_srq, ibsrq);
 }
 
-
-static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
-{
-       return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
-                qp->id < 128) ? 24 : 16);
-}
-
 static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
 {
        int cqe_valid;
@@ -436,4 +482,40 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
        return 0;
 }
 
+static inline char *hca_name(struct ocrdma_dev *dev)
+{
+       switch (dev->nic_info.pdev->device) {
+       case OC_SKH_DEVICE_PF:
+       case OC_SKH_DEVICE_VF:
+               return OC_NAME_SH;
+       default:
+               return OC_NAME_UNKNOWN;
+       }
+}
+
+static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev,
+               int eqid)
+{
+       int indx;
+
+       for (indx = 0; indx < dev->eq_cnt; indx++) {
+               if (dev->eq_tbl[indx].q.id == eqid)
+                       return indx;
+       }
+
+       return -EINVAL;
+}
+
+static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
+{
+       if (dev->nic_info.dev_family == 0xF && !dev->asic_id) {
+               pci_read_config_dword(
+                       dev->nic_info.pdev,
+                       OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id);
+       }
+
+       return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >>
+                               OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
+}
+
 #endif
index fbac8eb..1554cca 100644 (file)
@@ -28,7 +28,8 @@
 #ifndef __OCRDMA_ABI_H__
 #define __OCRDMA_ABI_H__
 
-#define OCRDMA_ABI_VERSION 1
+#define OCRDMA_ABI_VERSION 2
+#define OCRDMA_BE_ROCE_ABI_VERSION 1
 /* user kernel communication data structures. */
 
 struct ocrdma_alloc_ucontext_resp {
@@ -107,9 +108,7 @@ struct ocrdma_create_qp_uresp {
        u32 db_sq_offset;
        u32 db_rq_offset;
        u32 db_shift;
-       u64 rsvd1;
-       u64 rsvd2;
-       u64 rsvd3;
+       u64 rsvd[11];
 } __packed;
 
 struct ocrdma_create_srq_uresp {
index 3407114..d4cc01f 100644 (file)
@@ -100,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        if (!(attr->ah_flags & IB_AH_GRH))
                return ERR_PTR(-EINVAL);
 
-       ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+       ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
        if (!ah)
                return ERR_PTR(-ENOMEM);
 
index 1664d64..3bbf201 100644 (file)
@@ -32,7 +32,6 @@
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
-#include <rdma/ib_addr.h>
 
 #include "ocrdma.h"
 #include "ocrdma_hw.h"
@@ -243,6 +242,23 @@ static int ocrdma_get_mbx_errno(u32 status)
        return err_num;
 }
 
+char *port_speed_string(struct ocrdma_dev *dev)
+{
+       char *str = "";
+       u16 speeds_supported;
+
+       speeds_supported = dev->phy.fixed_speeds_supported |
+                               dev->phy.auto_speeds_supported;
+       if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS)
+               str = "40Gbps ";
+       else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS)
+               str = "10Gbps ";
+       else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS)
+               str = "1Gbps ";
+
+       return str;
+}
+
 static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
 {
        int err_num = -EINVAL;
@@ -332,6 +348,11 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
        return mqe;
 }
 
+static void *ocrdma_alloc_mqe(void)
+{
+       return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
+}
+
 static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
 {
        dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
@@ -364,8 +385,8 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
        }
 }
 
-static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
-                              int queue_type)
+static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev,
+                              struct ocrdma_queue_info *q, int queue_type)
 {
        u8 opcode = 0;
        int status;
@@ -444,7 +465,7 @@ mbx_err:
        return status;
 }
 
-static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
 {
        int irq;
 
@@ -574,6 +595,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev)
        if (status)
                goto alloc_err;
 
+       dev->eq_tbl[0].cq_cnt++;
        status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q);
        if (status)
                goto mbx_cq_free;
@@ -639,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
 {
        struct ocrdma_qp *qp = NULL;
        struct ocrdma_cq *cq = NULL;
-       struct ib_event ib_evt;
+       struct ib_event ib_evt = { 0 };
        int cq_event = 0;
        int qp_event = 1;
        int srq_event = 0;
@@ -664,6 +686,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
        case OCRDMA_CQ_OVERRUN_ERROR:
                ib_evt.element.cq = &cq->ibcq;
                ib_evt.event = IB_EVENT_CQ_ERR;
+               cq_event = 1;
+               qp_event = 0;
                break;
        case OCRDMA_CQ_QPCAT_ERROR:
                ib_evt.element.qp = &qp->ibqp;
@@ -725,6 +749,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
                                                     qp->srq->ibsrq.
                                                     srq_context);
        } else if (dev_event) {
+               pr_err("%s: Fatal event received\n", dev->ibdev.name);
                ib_dispatch_event(&ib_evt);
        }
 
@@ -752,7 +777,6 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
        }
 }
 
-
 static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
 {
        /* async CQE processing */
@@ -799,8 +823,6 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
                        ocrdma_process_acqe(dev, cqe);
                else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
                        ocrdma_process_mcqe(dev, cqe);
-               else
-                       pr_err("%s() cqe->compl is not set.\n", __func__);
                memset(cqe, 0, sizeof(struct ocrdma_mcqe));
                ocrdma_mcq_inc_tail(dev);
        }
@@ -858,16 +880,8 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
                BUG();
 
        cq = dev->cq_tbl[cq_idx];
-       if (cq == NULL) {
-               pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
+       if (cq == NULL)
                return;
-       }
-       spin_lock_irqsave(&cq->cq_lock, flags);
-       cq->armed = false;
-       cq->solicited = false;
-       spin_unlock_irqrestore(&cq->cq_lock, flags);
-
-       ocrdma_ring_cq_db(dev, cq->id, false, false, 0);
 
        if (cq->ibcq.comp_handler) {
                spin_lock_irqsave(&cq->comp_handler_lock, flags);
@@ -892,27 +906,35 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
        struct ocrdma_dev *dev = eq->dev;
        struct ocrdma_eqe eqe;
        struct ocrdma_eqe *ptr;
-       u16 eqe_popped = 0;
        u16 cq_id;
-       while (1) {
+       int budget = eq->cq_cnt;
+
+       do {
                ptr = ocrdma_get_eqe(eq);
                eqe = *ptr;
                ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
                if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
                        break;
-               eqe_popped += 1;
+
                ptr->id_valid = 0;
+               /* ring eq doorbell as soon as its consumed. */
+               ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1);
                /* check whether its CQE or not. */
                if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) {
                        cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT;
                        ocrdma_cq_handler(dev, cq_id);
                }
                ocrdma_eq_inc_tail(eq);
-       }
-       ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped);
-       /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */
-       if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
-               ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+
+               /* There can be a stale EQE after the last bound CQ is
+                * destroyed. EQE valid and budget == 0 implies this.
+                */
+               if (budget)
+                       budget--;
+
+       } while (budget);
+
+       ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
        return IRQ_HANDLED;
 }
 
@@ -949,7 +971,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
 {
        int status = 0;
        u16 cqe_status, ext_status;
-       struct ocrdma_mqe *rsp;
+       struct ocrdma_mqe *rsp_mqe;
+       struct ocrdma_mbx_rsp *rsp = NULL;
 
        mutex_lock(&dev->mqe_ctx.lock);
        ocrdma_post_mqe(dev, mqe);
@@ -958,23 +981,61 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
                goto mbx_err;
        cqe_status = dev->mqe_ctx.cqe_status;
        ext_status = dev->mqe_ctx.ext_status;
-       rsp = ocrdma_get_mqe_rsp(dev);
-       ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
+       rsp_mqe = ocrdma_get_mqe_rsp(dev);
+       ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe)));
+       if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+                               OCRDMA_MQE_HDR_EMB_SHIFT)
+               rsp = &mqe->u.rsp;
+
        if (cqe_status || ext_status) {
-               pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
-                      __func__,
-                    (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
-                    OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
+               pr_err("%s() cqe_status=0x%x, ext_status=0x%x,",
+                      __func__, cqe_status, ext_status);
+               if (rsp) {
+                       /* This is for embedded cmds. */
+                       pr_err("opcode=0x%x, subsystem=0x%x\n",
+                              (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+                               OCRDMA_MBX_RSP_OPCODE_SHIFT,
+                               (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+                               OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+               }
                status = ocrdma_get_mbx_cqe_errno(cqe_status);
                goto mbx_err;
        }
-       if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)
+       /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */
+       if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK))
                status = ocrdma_get_mbx_errno(mqe->u.rsp.status);
 mbx_err:
        mutex_unlock(&dev->mqe_ctx.lock);
        return status;
 }
 
+static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
+                                void *payload_va)
+{
+       int status = 0;
+       struct ocrdma_mbx_rsp *rsp = payload_va;
+
+       if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+                               OCRDMA_MQE_HDR_EMB_SHIFT)
+               BUG();
+
+       status = ocrdma_mbx_cmd(dev, mqe);
+       if (!status)
+               /* For non embedded, only CQE failures are handled in
+                * ocrdma_mbx_cmd. We need to check for RSP errors.
+                */
+               if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK)
+                       status = ocrdma_get_mbx_errno(rsp->status);
+
+       if (status)
+               pr_err("opcode=0x%x, subsystem=0x%x\n",
+                      (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+                       OCRDMA_MBX_RSP_OPCODE_SHIFT,
+                       (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+                       OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+       return status;
+}
+
 static void ocrdma_get_attr(struct ocrdma_dev *dev,
                              struct ocrdma_dev_attr *attr,
                              struct ocrdma_mbx_query_config *rsp)
@@ -985,6 +1046,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_qp =
            (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT;
+       attr->max_srq =
+               (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
+               OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
        attr->max_send_sge = ((rsp->max_write_send_sge &
                               OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
                              OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
@@ -1000,9 +1064,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
-       attr->max_srq =
-               (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
-               OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
        attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
@@ -1015,6 +1076,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay &
                                    OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >>
            OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
+       attr->max_mw = rsp->max_mw;
        attr->max_mr = rsp->max_mr;
        attr->max_mr_size = ~0ull;
        attr->max_fmr = 0;
@@ -1036,7 +1098,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_inline_data =
            attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) +
                              sizeof(struct ocrdma_sge));
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                attr->ird = 1;
                attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE;
                attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES;
@@ -1110,6 +1172,96 @@ mbx_err:
        return status;
 }
 
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
+{
+       struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
+       struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
+       struct ocrdma_rdma_stats_resp *old_stats = NULL;
+       int status;
+
+       old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+       if (old_stats == NULL)
+               return -ENOMEM;
+
+       memset(mqe, 0, sizeof(*mqe));
+       mqe->hdr.pyld_len = dev->stats_mem.size;
+       mqe->hdr.spcl_sge_cnt_emb |=
+                       (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+                               OCRDMA_MQE_HDR_SGE_CNT_MASK;
+       mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff);
+       mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa);
+       mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size;
+
+       /* Cache the old stats */
+       memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp));
+       memset(req, 0, dev->stats_mem.size);
+
+       ocrdma_init_mch((struct ocrdma_mbx_hdr *)req,
+                       OCRDMA_CMD_GET_RDMA_STATS,
+                       OCRDMA_SUBSYS_ROCE,
+                       dev->stats_mem.size);
+       if (reset)
+               req->reset_stats = reset;
+
+       status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va);
+       if (status)
+               /* Copy from cache, if mbox fails */
+               memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp));
+       else
+               ocrdma_le32_to_cpu(req, dev->stats_mem.size);
+
+       kfree(old_stats);
+       return status;
+}
+
+static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
+{
+       int status = -ENOMEM;
+       struct ocrdma_dma_mem dma;
+       struct ocrdma_mqe *mqe;
+       struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
+       struct mgmt_hba_attribs *hba_attribs;
+
+       mqe = ocrdma_alloc_mqe();
+       if (!mqe)
+               return status;
+       memset(mqe, 0, sizeof(*mqe));
+
+       dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
+       dma.va   = dma_alloc_coherent(&dev->nic_info.pdev->dev,
+                                       dma.size, &dma.pa, GFP_KERNEL);
+       if (!dma.va)
+               goto free_mqe;
+
+       mqe->hdr.pyld_len = dma.size;
+       mqe->hdr.spcl_sge_cnt_emb |=
+                       (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+                       OCRDMA_MQE_HDR_SGE_CNT_MASK;
+       mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff);
+       mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa);
+       mqe->u.nonemb_req.sge[0].len = dma.size;
+
+       memset(dma.va, 0, dma.size);
+       ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va,
+                       OCRDMA_CMD_GET_CTRL_ATTRIBUTES,
+                       OCRDMA_SUBSYS_COMMON,
+                       dma.size);
+
+       status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va);
+       if (!status) {
+               ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
+               hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
+
+               dev->hba_port_num = hba_attribs->phy_port;
+               strncpy(dev->model_number,
+                       hba_attribs->controller_model_number, 31);
+       }
+       dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
+free_mqe:
+       kfree(mqe);
+       return status;
+}
+
 static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
 {
        int status = -ENOMEM;
@@ -1157,6 +1309,35 @@ mbx_err:
        return status;
 }
 
+static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
+{
+       int status = -ENOMEM;
+       struct ocrdma_mqe *cmd;
+       struct ocrdma_get_phy_info_rsp *rsp;
+
+       cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd));
+       if (!cmd)
+               return status;
+
+       ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+                       OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON,
+                       sizeof(*cmd));
+
+       status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+       if (status)
+               goto mbx_err;
+
+       rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
+       dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+       dev->phy.auto_speeds_supported  =
+                       le16_to_cpu(rsp->auto_speeds_supported);
+       dev->phy.fixed_speeds_supported =
+                       le16_to_cpu(rsp->fixed_speeds_supported);
+mbx_err:
+       kfree(cmd);
+       return status;
+}
+
 int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
 {
        int status = -ENOMEM;
@@ -1226,7 +1407,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
 
 static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
 {
-       int i ;
+       int i;
        int status = 0;
        int max_ah;
        struct ocrdma_create_ah_tbl *cmd;
@@ -1357,12 +1538,10 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
        int i;
 
        mutex_lock(&dev->dev_lock);
-       for (i = 0; i < dev->eq_cnt; i++) {
-               if (dev->eq_tbl[i].q.id != eq_id)
-                       continue;
-               dev->eq_tbl[i].cq_cnt -= 1;
-               break;
-       }
+       i = ocrdma_get_eq_table_index(dev, eq_id);
+       if (i == -EINVAL)
+               BUG();
+       dev->eq_tbl[i].cq_cnt -= 1;
        mutex_unlock(&dev->dev_lock);
 }
 
@@ -1380,7 +1559,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
                       __func__, dev->id, dev->attr.max_cqe, entries);
                return -EINVAL;
        }
-       if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
+       if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R))
                return -EINVAL;
 
        if (dpp_cq) {
@@ -1417,6 +1596,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        cq->eqn = ocrdma_bind_eq(dev);
        cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
        cqe_count = cq->len / cqe_size;
+       cq->cqe_cnt = cqe_count;
        if (cqe_count > 1024) {
                /* Set cnt to 3 to indicate more than 1024 cq entries */
                cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
@@ -1439,7 +1619,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        }
        /* shared eq between all the consumer cqs. */
        cmd->cmd.eqn = cq->eqn;
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                if (dpp_cq)
                        cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
                                OCRDMA_CREATE_CQ_TYPE_SHIFT;
@@ -1484,12 +1664,9 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
            (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
            OCRDMA_DESTROY_CQ_QID_MASK;
 
-       ocrdma_unbind_eq(dev, cq->eqn);
        status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
-       if (status)
-               goto mbx_err;
+       ocrdma_unbind_eq(dev, cq->eqn);
        dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
-mbx_err:
        kfree(cmd);
        return status;
 }
@@ -2029,8 +2206,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
                                OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
        qp->rq_cq = cq;
 
-       if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
-           (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
+       if (pd->dpp_enabled && pd->num_dpp_qp) {
                ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
                                             dpp_cq_id);
        }
@@ -2099,7 +2275,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
               sizeof(cmd->params.dgid));
        status = ocrdma_query_gid(&qp->dev->ibdev, 1,
-                        ah_attr->grh.sgid_index, &sgid);
+                       ah_attr->grh.sgid_index, &sgid);
        if (status)
                return status;
 
@@ -2127,8 +2303,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 
 static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
                                struct ocrdma_modify_qp *cmd,
-                               struct ib_qp_attr *attrs, int attr_mask,
-                               enum ib_qp_state old_qps)
+                               struct ib_qp_attr *attrs, int attr_mask)
 {
        int status = 0;
 
@@ -2233,8 +2408,7 @@ pmtu_err:
 }
 
 int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
-                        struct ib_qp_attr *attrs, int attr_mask,
-                        enum ib_qp_state old_qps)
+                        struct ib_qp_attr *attrs, int attr_mask)
 {
        int status = -ENOMEM;
        struct ocrdma_modify_qp *cmd;
@@ -2257,7 +2431,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
                    OCRDMA_QP_PARAMS_STATE_MASK;
        }
 
-       status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
+       status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask);
        if (status)
                goto mbx_err;
        status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
@@ -2488,7 +2662,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev)
 
        for (i = 0; i < num_eq; i++) {
                status = ocrdma_create_eq(dev, &dev->eq_tbl[i],
-                                         OCRDMA_EQ_LEN);
+                                       OCRDMA_EQ_LEN);
                if (status) {
                        status = -EINVAL;
                        break;
@@ -2533,6 +2707,13 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
        status = ocrdma_mbx_create_ah_tbl(dev);
        if (status)
                goto conf_err;
+       status = ocrdma_mbx_get_phy_info(dev);
+       if (status)
+               goto conf_err;
+       status = ocrdma_mbx_get_ctrl_attribs(dev);
+       if (status)
+               goto conf_err;
+
        return 0;
 
 conf_err:
index 82fe332..e513f72 100644 (file)
@@ -112,8 +112,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
                         u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
                         u16 *dpp_credit_lmt);
 int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
-                        struct ib_qp_attr *attrs, int attr_mask,
-                        enum ib_qp_state old_qps);
+                        struct ib_qp_attr *attrs, int attr_mask);
 int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
                        struct ocrdma_qp_params *param);
 int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
@@ -132,5 +131,8 @@ int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
 bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
 bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
 void ocrdma_flush_qp(struct ocrdma_qp *);
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
 
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
+char *port_speed_string(struct ocrdma_dev *dev);
 #endif                         /* __OCRDMA_HW_H__ */
index 1a8a945..7c504e0 100644 (file)
 #include "ocrdma_ah.h"
 #include "be_roce.h"
 #include "ocrdma_hw.h"
+#include "ocrdma_stats.h"
 #include "ocrdma_abi.h"
 
-MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
-MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
+MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION);
+MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
 MODULE_AUTHOR("Emulex Corporation");
 MODULE_LICENSE("GPL");
 
@@ -286,7 +287,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
 
        dev->ibdev.process_mad = ocrdma_process_mad;
 
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                dev->ibdev.uverbs_cmd_mask |=
                     OCRDMA_UVERBS(CREATE_SRQ) |
                     OCRDMA_UVERBS(MODIFY_SRQ) |
@@ -338,9 +339,42 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
        kfree(dev->sgid_tbl);
 }
 
+/* OCRDMA sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+                       char *buf)
+{
+       struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+                       char *buf)
+{
+       struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+       return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+
+static struct device_attribute *ocrdma_attributes[] = {
+       &dev_attr_hw_rev,
+       &dev_attr_fw_ver
+};
+
+static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+               device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
+}
+
 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 {
-       int status = 0;
+       int status = 0, i;
        struct ocrdma_dev *dev;
 
        dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -369,11 +403,25 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
        if (status)
                goto alloc_err;
 
+       for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+               if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
+                       goto sysfs_err;
        spin_lock(&ocrdma_devlist_lock);
        list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
        spin_unlock(&ocrdma_devlist_lock);
+       /* Init stats */
+       ocrdma_add_port_stats(dev);
+
+       pr_info("%s %s: %s \"%s\" port %d\n",
+               dev_name(&dev->nic_info.pdev->dev), hca_name(dev),
+               port_speed_string(dev), dev->model_number,
+               dev->hba_port_num);
+       pr_info("%s ocrdma%d driver loaded successfully\n",
+               dev_name(&dev->nic_info.pdev->dev), dev->id);
        return dev;
 
+sysfs_err:
+       ocrdma_remove_sysfiles(dev);
 alloc_err:
        ocrdma_free_resources(dev);
        ocrdma_cleanup_hw(dev);
@@ -400,6 +448,9 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
        /* first unregister with stack to stop all the active traffic
         * of the registered clients.
         */
+       ocrdma_rem_port_stats(dev);
+       ocrdma_remove_sysfiles(dev);
+
        ib_unregister_device(&dev->ibdev);
 
        spin_lock(&ocrdma_devlist_lock);
@@ -437,7 +488,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
                cur_qp = dev->qp_tbl;
                for (i = 0; i < OCRDMA_MAX_QP; i++) {
                        qp = cur_qp[i];
-                       if (qp) {
+                       if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
                                /* change the QP state to ERROR */
                                _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
 
@@ -478,6 +529,7 @@ static struct ocrdma_driver ocrdma_drv = {
        .add                    = ocrdma_add,
        .remove                 = ocrdma_remove,
        .state_change_handler   = ocrdma_event_handler,
+       .be_abi_version         = OCRDMA_BE_ROCE_ABI_VERSION,
 };
 
 static void ocrdma_unregister_inet6addr_notifier(void)
@@ -487,10 +539,17 @@ static void ocrdma_unregister_inet6addr_notifier(void)
 #endif
 }
 
+static void ocrdma_unregister_inetaddr_notifier(void)
+{
+       unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+}
+
 static int __init ocrdma_init_module(void)
 {
        int status;
 
+       ocrdma_init_debugfs();
+
        status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier);
        if (status)
                return status;
@@ -498,13 +557,19 @@ static int __init ocrdma_init_module(void)
 #if IS_ENABLED(CONFIG_IPV6)
        status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
        if (status)
-               return status;
+               goto err_notifier6;
 #endif
 
        status = be_roce_register_driver(&ocrdma_drv);
        if (status)
-               ocrdma_unregister_inet6addr_notifier();
+               goto err_be_reg;
 
+       return 0;
+
+err_be_reg:
+       ocrdma_unregister_inet6addr_notifier();
+err_notifier6:
+       ocrdma_unregister_inetaddr_notifier();
        return status;
 }
 
@@ -512,6 +577,8 @@ static void __exit ocrdma_exit_module(void)
 {
        be_roce_unregister_driver(&ocrdma_drv);
        ocrdma_unregister_inet6addr_notifier();
+       ocrdma_unregister_inetaddr_notifier();
+       ocrdma_rem_debugfs();
 }
 
 module_init(ocrdma_init_module);
index 60d5ac2..96c9ee6 100644 (file)
 
 #define Bit(_b) (1 << (_b))
 
-#define OCRDMA_GEN1_FAMILY     0xB
-#define OCRDMA_GEN2_FAMILY     0x0F
+enum {
+       OCRDMA_ASIC_GEN_SKH_R = 0x04,
+       OCRDMA_ASIC_GEN_LANCER = 0x0B
+};
+
+enum {
+       OCRDMA_ASIC_REV_A0 = 0x00,
+       OCRDMA_ASIC_REV_B0 = 0x10,
+       OCRDMA_ASIC_REV_C0 = 0x20
+};
 
 #define OCRDMA_SUBSYS_ROCE 10
 enum {
@@ -64,6 +72,7 @@ enum {
 
        OCRDMA_CMD_ATTACH_MCAST,
        OCRDMA_CMD_DETACH_MCAST,
+       OCRDMA_CMD_GET_RDMA_STATS,
 
        OCRDMA_CMD_MAX
 };
@@ -74,12 +83,14 @@ enum {
        OCRDMA_CMD_CREATE_CQ            = 12,
        OCRDMA_CMD_CREATE_EQ            = 13,
        OCRDMA_CMD_CREATE_MQ            = 21,
+       OCRDMA_CMD_GET_CTRL_ATTRIBUTES  = 32,
        OCRDMA_CMD_GET_FW_VER           = 35,
        OCRDMA_CMD_DELETE_MQ            = 53,
        OCRDMA_CMD_DELETE_CQ            = 54,
        OCRDMA_CMD_DELETE_EQ            = 55,
        OCRDMA_CMD_GET_FW_CONFIG        = 58,
-       OCRDMA_CMD_CREATE_MQ_EXT        = 90
+       OCRDMA_CMD_CREATE_MQ_EXT        = 90,
+       OCRDMA_CMD_PHY_DETAILS          = 102
 };
 
 enum {
@@ -103,7 +114,10 @@ enum {
        OCRDMA_DB_GEN2_SRQ_OFFSET       = OCRDMA_DB_GEN2_RQ_OFFSET,
        OCRDMA_DB_CQ_OFFSET             = 0x120,
        OCRDMA_DB_EQ_OFFSET             = OCRDMA_DB_CQ_OFFSET,
-       OCRDMA_DB_MQ_OFFSET             = 0x140
+       OCRDMA_DB_MQ_OFFSET             = 0x140,
+
+       OCRDMA_DB_SQ_SHIFT              = 16,
+       OCRDMA_DB_RQ_SHIFT              = 24
 };
 
 #define OCRDMA_DB_CQ_RING_ID_MASK       0x3FF  /* bits 0 - 9 */
@@ -138,6 +152,10 @@ enum {
 #define OCRDMA_MIN_Q_PAGE_SIZE (4096)
 #define OCRDMA_MAX_Q_PAGES     (8)
 
+#define OCRDMA_SLI_ASIC_ID_OFFSET      0x9C
+#define OCRDMA_SLI_ASIC_REV_MASK       0x000000FF
+#define OCRDMA_SLI_ASIC_GEN_NUM_MASK   0x0000FF00
+#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT  0x08
 /*
 # 0: 4K Bytes
 # 1: 8K Bytes
@@ -562,6 +580,30 @@ enum {
        OCRDMA_FN_MODE_RDMA     = 0x4
 };
 
+struct ocrdma_get_phy_info_rsp {
+       struct ocrdma_mqe_hdr hdr;
+       struct ocrdma_mbx_rsp rsp;
+
+       u16 phy_type;
+       u16 interface_type;
+       u32 misc_params;
+       u16 ext_phy_details;
+       u16 rsvd;
+       u16 auto_speeds_supported;
+       u16 fixed_speeds_supported;
+       u32 future_use[2];
+};
+
+enum {
+       OCRDMA_PHY_SPEED_ZERO = 0x0,
+       OCRDMA_PHY_SPEED_10MBPS = 0x1,
+       OCRDMA_PHY_SPEED_100MBPS = 0x2,
+       OCRDMA_PHY_SPEED_1GBPS = 0x4,
+       OCRDMA_PHY_SPEED_10GBPS = 0x8,
+       OCRDMA_PHY_SPEED_40GBPS = 0x20
+};
+
+
 struct ocrdma_get_link_speed_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
@@ -590,7 +632,7 @@ enum {
 
 enum {
        OCRDMA_CREATE_CQ_VER2                   = 2,
-       OCRDMA_CREATE_CQ_VER3                   = 3,
+       OCRDMA_CREATE_CQ_VER3                   = 3,
 
        OCRDMA_CREATE_CQ_PAGE_CNT_MASK          = 0xFFFF,
        OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT        = 16,
@@ -1050,6 +1092,7 @@ enum {
        OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK       = 0xFFFF <<
                                        OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT
 };
+
 struct ocrdma_modify_qp_rsp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_rsp rsp;
@@ -1062,8 +1105,8 @@ struct ocrdma_query_qp {
        struct ocrdma_mqe_hdr hdr;
        struct ocrdma_mbx_hdr req;
 
-#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
-#define OCRDMA_QUERY_UP_QP_ID_MASK   0xFFFFFF
+#define OCRDMA_QUERY_UP_QP_ID_SHIFT    0
+#define OCRDMA_QUERY_UP_QP_ID_MASK     0xFFFFFF
        u32 qp_id;
 };
 
@@ -1703,4 +1746,208 @@ struct ocrdma_av {
        u32 valid;
 } __packed;
 
+struct ocrdma_rsrc_stats {
+       u32 dpp_pds;
+       u32 non_dpp_pds;
+       u32 rc_dpp_qps;
+       u32 uc_dpp_qps;
+       u32 ud_dpp_qps;
+       u32 rc_non_dpp_qps;
+       u32 rsvd;
+       u32 uc_non_dpp_qps;
+       u32 ud_non_dpp_qps;
+       u32 rsvd1;
+       u32 srqs;
+       u32 rbqs;
+       u32 r64K_nsmr;
+       u32 r64K_to_2M_nsmr;
+       u32 r2M_to_44M_nsmr;
+       u32 r44M_to_1G_nsmr;
+       u32 r1G_to_4G_nsmr;
+       u32 nsmr_count_4G_to_32G;
+       u32 r32G_to_64G_nsmr;
+       u32 r64G_to_128G_nsmr;
+       u32 r128G_to_higher_nsmr;
+       u32 embedded_nsmr;
+       u32 frmr;
+       u32 prefetch_qps;
+       u32 ondemand_qps;
+       u32 phy_mr;
+       u32 mw;
+       u32 rsvd2[7];
+};
+
+struct ocrdma_db_err_stats {
+       u32 sq_doorbell_errors;
+       u32 cq_doorbell_errors;
+       u32 rq_srq_doorbell_errors;
+       u32 cq_overflow_errors;
+       u32 rsvd[4];
+};
+
+struct ocrdma_wqe_stats {
+       u32 large_send_rc_wqes_lo;
+       u32 large_send_rc_wqes_hi;
+       u32 large_write_rc_wqes_lo;
+       u32 large_write_rc_wqes_hi;
+       u32 rsvd[4];
+       u32 read_wqes_lo;
+       u32 read_wqes_hi;
+       u32 frmr_wqes_lo;
+       u32 frmr_wqes_hi;
+       u32 mw_bind_wqes_lo;
+       u32 mw_bind_wqes_hi;
+       u32 invalidate_wqes_lo;
+       u32 invalidate_wqes_hi;
+       u32 rsvd1[2];
+       u32 dpp_wqe_drops;
+       u32 rsvd2[5];
+};
+
+struct ocrdma_tx_stats {
+       u32 send_pkts_lo;
+       u32 send_pkts_hi;
+       u32 write_pkts_lo;
+       u32 write_pkts_hi;
+       u32 read_pkts_lo;
+       u32 read_pkts_hi;
+       u32 read_rsp_pkts_lo;
+       u32 read_rsp_pkts_hi;
+       u32 ack_pkts_lo;
+       u32 ack_pkts_hi;
+       u32 send_bytes_lo;
+       u32 send_bytes_hi;
+       u32 write_bytes_lo;
+       u32 write_bytes_hi;
+       u32 read_req_bytes_lo;
+       u32 read_req_bytes_hi;
+       u32 read_rsp_bytes_lo;
+       u32 read_rsp_bytes_hi;
+       u32 ack_timeouts;
+       u32 rsvd[5];
+};
+
+
+struct ocrdma_tx_qp_err_stats {
+       u32 local_length_errors;
+       u32 local_protection_errors;
+       u32 local_qp_operation_errors;
+       u32 retry_count_exceeded_errors;
+       u32 rnr_retry_count_exceeded_errors;
+       u32 rsvd[3];
+};
+
+struct ocrdma_rx_stats {
+       u32 roce_frame_bytes_lo;
+       u32 roce_frame_bytes_hi;
+       u32 roce_frame_icrc_drops;
+       u32 roce_frame_payload_len_drops;
+       u32 ud_drops;
+       u32 qp1_drops;
+       u32 psn_error_request_packets;
+       u32 psn_error_resp_packets;
+       u32 rnr_nak_timeouts;
+       u32 rnr_nak_receives;
+       u32 roce_frame_rxmt_drops;
+       u32 nak_count_psn_sequence_errors;
+       u32 rc_drop_count_lookup_errors;
+       u32 rq_rnr_naks;
+       u32 srq_rnr_naks;
+       u32 roce_frames_lo;
+       u32 roce_frames_hi;
+       u32 rsvd;
+};
+
+struct ocrdma_rx_qp_err_stats {
+       u32 nak_invalid_requst_errors;
+       u32 nak_remote_operation_errors;
+       u32 nak_count_remote_access_errors;
+       u32 local_length_errors;
+       u32 local_protection_errors;
+       u32 local_qp_operation_errors;
+       u32 rsvd[2];
+};
+
+struct ocrdma_tx_dbg_stats {
+       u32 data[100];
+};
+
+struct ocrdma_rx_dbg_stats {
+       u32 data[200];
+};
+
+struct ocrdma_rdma_stats_req {
+       struct ocrdma_mbx_hdr hdr;
+       u8 reset_stats;
+       u8 rsvd[3];
+} __packed;
+
+struct ocrdma_rdma_stats_resp {
+       struct ocrdma_mbx_hdr hdr;
+       struct ocrdma_rsrc_stats act_rsrc_stats;
+       struct ocrdma_rsrc_stats th_rsrc_stats;
+       struct ocrdma_db_err_stats      db_err_stats;
+       struct ocrdma_wqe_stats         wqe_stats;
+       struct ocrdma_tx_stats          tx_stats;
+       struct ocrdma_tx_qp_err_stats   tx_qp_err_stats;
+       struct ocrdma_rx_stats          rx_stats;
+       struct ocrdma_rx_qp_err_stats   rx_qp_err_stats;
+       struct ocrdma_tx_dbg_stats      tx_dbg_stats;
+       struct ocrdma_rx_dbg_stats      rx_dbg_stats;
+} __packed;
+
+
+struct mgmt_hba_attribs {
+       u8 flashrom_version_string[32];
+       u8 manufacturer_name[32];
+       u32 supported_modes;
+       u32 rsvd0[3];
+       u8 ncsi_ver_string[12];
+       u32 default_extended_timeout;
+       u8 controller_model_number[32];
+       u8 controller_description[64];
+       u8 controller_serial_number[32];
+       u8 ip_version_string[32];
+       u8 firmware_version_string[32];
+       u8 bios_version_string[32];
+       u8 redboot_version_string[32];
+       u8 driver_version_string[32];
+       u8 fw_on_flash_version_string[32];
+       u32 functionalities_supported;
+       u16 max_cdblength;
+       u8 asic_revision;
+       u8 generational_guid[16];
+       u8 hba_port_count;
+       u16 default_link_down_timeout;
+       u8 iscsi_ver_min_max;
+       u8 multifunction_device;
+       u8 cache_valid;
+       u8 hba_status;
+       u8 max_domains_supported;
+       u8 phy_port;
+       u32 firmware_post_status;
+       u32 hba_mtu[8];
+       u32 rsvd1[4];
+};
+
+struct mgmt_controller_attrib {
+       struct mgmt_hba_attribs hba_attribs;
+       u16 pci_vendor_id;
+       u16 pci_device_id;
+       u16 pci_sub_vendor_id;
+       u16 pci_sub_system_id;
+       u8 pci_bus_number;
+       u8 pci_device_number;
+       u8 pci_function_number;
+       u8 interface_type;
+       u64 unique_identifier;
+       u32 rsvd0[5];
+};
+
+struct ocrdma_get_ctrl_attribs_rsp {
+       struct ocrdma_mbx_hdr hdr;
+       struct mgmt_controller_attrib ctrl_attribs;
+};
+
+
 #endif                         /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
new file mode 100644 (file)
index 0000000..6c54106
--- /dev/null
@@ -0,0 +1,623 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <rdma/ib_addr.h>
+#include "ocrdma_stats.h"
+
+static struct dentry *ocrdma_dbgfs_dir;
+
+static int ocrdma_add_stat(char *start, char *pcur,
+                               char *name, u64 count)
+{
+       char buff[128] = {0};
+       int cpy_len = 0;
+
+       snprintf(buff, 128, "%s: %llu\n", name, count);
+       cpy_len = strlen(buff);
+
+       if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) {
+               pr_err("%s: No space in stats buff\n", __func__);
+               return 0;
+       }
+
+       memcpy(pcur, buff, cpy_len);
+       return cpy_len;
+}
+
+static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+{
+       struct stats_mem *mem = &dev->stats_mem;
+
+       /* Alloc mbox command mem*/
+       mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
+                       sizeof(struct ocrdma_rdma_stats_resp));
+
+       mem->va   = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
+                                        &mem->pa, GFP_KERNEL);
+       if (!mem->va) {
+               pr_err("%s: stats mbox allocation failed\n", __func__);
+               return false;
+       }
+
+       memset(mem->va, 0, mem->size);
+
+       /* Alloc debugfs mem */
+       mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
+       if (!mem->debugfs_mem) {
+               pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+               return false;
+       }
+
+       return true;
+}
+
+static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+{
+       struct stats_mem *mem = &dev->stats_mem;
+
+       if (mem->va)
+               dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
+                                 mem->va, mem->pa);
+       kfree(mem->debugfs_mem);
+}
+
+static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+                       (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds",
+                               (u64)rsrc_stats->dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds",
+                               (u64)rsrc_stats->non_dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps",
+                               (u64)rsrc_stats->rc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps",
+                               (u64)rsrc_stats->uc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps",
+                               (u64)rsrc_stats->ud_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps",
+                               (u64)rsrc_stats->rc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps",
+                               (u64)rsrc_stats->uc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps",
+                               (u64)rsrc_stats->ud_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_srqs",
+                               (u64)rsrc_stats->srqs);
+       pcur += ocrdma_add_stat(stats, pcur, "active_rbqs",
+                               (u64)rsrc_stats->rbqs);
+       pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr",
+                               (u64)rsrc_stats->r64K_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr",
+                               (u64)rsrc_stats->r64K_to_2M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr",
+                               (u64)rsrc_stats->r2M_to_44M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr",
+                               (u64)rsrc_stats->r44M_to_1G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr",
+                               (u64)rsrc_stats->r1G_to_4G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G",
+                               (u64)rsrc_stats->nsmr_count_4G_to_32G);
+       pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr",
+                               (u64)rsrc_stats->r32G_to_64G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr",
+                               (u64)rsrc_stats->r64G_to_128G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr",
+                               (u64)rsrc_stats->r128G_to_higher_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr",
+                               (u64)rsrc_stats->embedded_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_frmr",
+                               (u64)rsrc_stats->frmr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps",
+                               (u64)rsrc_stats->prefetch_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps",
+                               (u64)rsrc_stats->ondemand_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr",
+                               (u64)rsrc_stats->phy_mr);
+       pcur += ocrdma_add_stat(stats, pcur, "active_mw",
+                               (u64)rsrc_stats->mw);
+
+       /* Print the threshold stats */
+       rsrc_stats = &rdma_stats->th_rsrc_stats;
+
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds",
+                               (u64)rsrc_stats->dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds",
+                               (u64)rsrc_stats->non_dpp_pds);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps",
+                               (u64)rsrc_stats->rc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps",
+                               (u64)rsrc_stats->uc_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps",
+                               (u64)rsrc_stats->ud_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps",
+                               (u64)rsrc_stats->rc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps",
+                               (u64)rsrc_stats->uc_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps",
+                               (u64)rsrc_stats->ud_non_dpp_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs",
+                               (u64)rsrc_stats->srqs);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs",
+                               (u64)rsrc_stats->rbqs);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr",
+                               (u64)rsrc_stats->r64K_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr",
+                               (u64)rsrc_stats->r64K_to_2M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr",
+                               (u64)rsrc_stats->r2M_to_44M_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr",
+                               (u64)rsrc_stats->r44M_to_1G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr",
+                               (u64)rsrc_stats->r1G_to_4G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G",
+                               (u64)rsrc_stats->nsmr_count_4G_to_32G);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr",
+                               (u64)rsrc_stats->r32G_to_64G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr",
+                               (u64)rsrc_stats->r64G_to_128G_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr",
+                               (u64)rsrc_stats->r128G_to_higher_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr",
+                               (u64)rsrc_stats->embedded_nsmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr",
+                               (u64)rsrc_stats->frmr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps",
+                               (u64)rsrc_stats->prefetch_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps",
+                               (u64)rsrc_stats->ondemand_qps);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr",
+                               (u64)rsrc_stats->phy_mr);
+       pcur += ocrdma_add_stat(stats, pcur, "threshold_mw",
+                               (u64)rsrc_stats->mw);
+       return stats;
+}
+
+static char *ocrdma_rx_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat
+               (stats, pcur, "roce_frame_bytes",
+                convert_to_64bit(rx_stats->roce_frame_bytes_lo,
+                rx_stats->roce_frame_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops",
+                               (u64)rx_stats->roce_frame_icrc_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops",
+                               (u64)rx_stats->roce_frame_payload_len_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "ud_drops",
+                               (u64)rx_stats->ud_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "qp1_drops",
+                               (u64)rx_stats->qp1_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets",
+                               (u64)rx_stats->psn_error_request_packets);
+       pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets",
+                               (u64)rx_stats->psn_error_resp_packets);
+       pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts",
+                               (u64)rx_stats->rnr_nak_timeouts);
+       pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives",
+                               (u64)rx_stats->rnr_nak_receives);
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops",
+                               (u64)rx_stats->roce_frame_rxmt_drops);
+       pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors",
+                               (u64)rx_stats->nak_count_psn_sequence_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors",
+                               (u64)rx_stats->rc_drop_count_lookup_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks",
+                               (u64)rx_stats->rq_rnr_naks);
+       pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks",
+                               (u64)rx_stats->srq_rnr_naks);
+       pcur += ocrdma_add_stat(stats, pcur, "roce_frames",
+                               convert_to_64bit(rx_stats->roce_frames_lo,
+                                                rx_stats->roce_frames_hi));
+
+       return stats;
+}
+
+static char *ocrdma_tx_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "send_pkts",
+                               convert_to_64bit(tx_stats->send_pkts_lo,
+                                                tx_stats->send_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "write_pkts",
+                               convert_to_64bit(tx_stats->write_pkts_lo,
+                                                tx_stats->write_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_pkts",
+                               convert_to_64bit(tx_stats->read_pkts_lo,
+                                                tx_stats->read_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts",
+                               convert_to_64bit(tx_stats->read_rsp_pkts_lo,
+                                                tx_stats->read_rsp_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "ack_pkts",
+                               convert_to_64bit(tx_stats->ack_pkts_lo,
+                                                tx_stats->ack_pkts_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "send_bytes",
+                               convert_to_64bit(tx_stats->send_bytes_lo,
+                                                tx_stats->send_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "write_bytes",
+                               convert_to_64bit(tx_stats->write_bytes_lo,
+                                                tx_stats->write_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes",
+                               convert_to_64bit(tx_stats->read_req_bytes_lo,
+                                                tx_stats->read_req_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes",
+                               convert_to_64bit(tx_stats->read_rsp_bytes_lo,
+                                                tx_stats->read_rsp_bytes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts",
+                               (u64)tx_stats->ack_timeouts);
+
+       return stats;
+}
+
+static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes",
+               convert_to_64bit(wqe_stats->large_send_rc_wqes_lo,
+                                wqe_stats->large_send_rc_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes",
+               convert_to_64bit(wqe_stats->large_write_rc_wqes_lo,
+                                wqe_stats->large_write_rc_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "read_wqes",
+                               convert_to_64bit(wqe_stats->read_wqes_lo,
+                                                wqe_stats->read_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes",
+                               convert_to_64bit(wqe_stats->frmr_wqes_lo,
+                                                wqe_stats->frmr_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes",
+                               convert_to_64bit(wqe_stats->mw_bind_wqes_lo,
+                                                wqe_stats->mw_bind_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes",
+               convert_to_64bit(wqe_stats->invalidate_wqes_lo,
+                                wqe_stats->invalidate_wqes_hi));
+       pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops",
+                               (u64)wqe_stats->dpp_wqe_drops);
+       return stats;
+}
+
+static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors",
+                               (u64)db_err_stats->sq_doorbell_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors",
+                               (u64)db_err_stats->cq_doorbell_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors",
+                               (u64)db_err_stats->rq_srq_doorbell_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors",
+                               (u64)db_err_stats->cq_overflow_errors);
+       return stats;
+}
+
+static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rx_qp_err_stats *rx_qp_err_stats =
+                &rdma_stats->rx_qp_err_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
+                       (u64)rx_qp_err_stats->nak_invalid_requst_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
+                       (u64)rx_qp_err_stats->nak_remote_operation_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
+                       (u64)rx_qp_err_stats->nak_count_remote_access_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+                       (u64)rx_qp_err_stats->local_length_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+                       (u64)rx_qp_err_stats->local_protection_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+                       (u64)rx_qp_err_stats->local_qp_operation_errors);
+       return stats;
+}
+
+static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
+{
+       char *stats = dev->stats_mem.debugfs_mem, *pcur;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_tx_qp_err_stats *tx_qp_err_stats =
+               &rdma_stats->tx_qp_err_stats;
+
+       memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       pcur = stats;
+       pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+                       (u64)tx_qp_err_stats->local_length_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+                       (u64)tx_qp_err_stats->local_protection_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+                       (u64)tx_qp_err_stats->local_qp_operation_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors",
+                       (u64)tx_qp_err_stats->retry_count_exceeded_errors);
+       pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors",
+                       (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors);
+       return stats;
+}
+
+static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
+{
+       int i;
+       char *pstats = dev->stats_mem.debugfs_mem;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_tx_dbg_stats *tx_dbg_stats =
+               &rdma_stats->tx_dbg_stats;
+
+       memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       for (i = 0; i < 100; i++)
+               pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+                                tx_dbg_stats->data[i]);
+
+       return dev->stats_mem.debugfs_mem;
+}
+
+static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
+{
+       int i;
+       char *pstats = dev->stats_mem.debugfs_mem;
+       struct ocrdma_rdma_stats_resp *rdma_stats =
+               (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+       struct ocrdma_rx_dbg_stats *rx_dbg_stats =
+               &rdma_stats->rx_dbg_stats;
+
+       memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+       for (i = 0; i < 200; i++)
+               pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+                                rx_dbg_stats->data[i]);
+
+       return dev->stats_mem.debugfs_mem;
+}
+
+static void ocrdma_update_stats(struct ocrdma_dev *dev)
+{
+       ulong now = jiffies, secs;
+       int status = 0;
+
+       secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U;
+       if (secs) {
+               /* update */
+               status = ocrdma_mbx_rdma_stats(dev, false);
+               if (status)
+                       pr_err("%s: stats mbox failed with status = %d\n",
+                              __func__, status);
+               dev->last_stats_time = jiffies;
+       }
+}
+
+static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
+                                       size_t usr_buf_len, loff_t *ppos)
+{
+       struct ocrdma_stats *pstats = filp->private_data;
+       struct ocrdma_dev *dev = pstats->dev;
+       ssize_t status = 0;
+       char *data = NULL;
+
+       /* No partial reads */
+       if (*ppos != 0)
+               return 0;
+
+       mutex_lock(&dev->stats_lock);
+
+       ocrdma_update_stats(dev);
+
+       switch (pstats->type) {
+       case OCRDMA_RSRC_STATS:
+               data = ocrdma_resource_stats(dev);
+               break;
+       case OCRDMA_RXSTATS:
+               data = ocrdma_rx_stats(dev);
+               break;
+       case OCRDMA_WQESTATS:
+               data = ocrdma_wqe_stats(dev);
+               break;
+       case OCRDMA_TXSTATS:
+               data = ocrdma_tx_stats(dev);
+               break;
+       case OCRDMA_DB_ERRSTATS:
+               data = ocrdma_db_errstats(dev);
+               break;
+       case OCRDMA_RXQP_ERRSTATS:
+               data = ocrdma_rxqp_errstats(dev);
+               break;
+       case OCRDMA_TXQP_ERRSTATS:
+               data = ocrdma_txqp_errstats(dev);
+               break;
+       case OCRDMA_TX_DBG_STATS:
+               data = ocrdma_tx_dbg_stats(dev);
+               break;
+       case OCRDMA_RX_DBG_STATS:
+               data = ocrdma_rx_dbg_stats(dev);
+               break;
+
+       default:
+               status = -EFAULT;
+               goto exit;
+       }
+
+       if (usr_buf_len < strlen(data)) {
+               status = -ENOSPC;
+               goto exit;
+       }
+
+       status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data,
+                                        strlen(data));
+exit:
+       mutex_unlock(&dev->stats_lock);
+       return status;
+}
+
+static int ocrdma_debugfs_open(struct inode *inode, struct file *file)
+{
+       if (inode->i_private)
+               file->private_data = inode->i_private;
+       return 0;
+}
+
+static const struct file_operations ocrdma_dbg_ops = {
+       .owner = THIS_MODULE,
+       .open = ocrdma_debugfs_open,
+       .read = ocrdma_dbgfs_ops_read,
+};
+
+void ocrdma_add_port_stats(struct ocrdma_dev *dev)
+{
+       if (!ocrdma_dbgfs_dir)
+               return;
+
+       /* Create post stats base dir */
+       dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+       if (!dev->dir)
+               goto err;
+
+       dev->rsrc_stats.type = OCRDMA_RSRC_STATS;
+       dev->rsrc_stats.dev = dev;
+       if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir,
+                                &dev->rsrc_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->rx_stats.type = OCRDMA_RXSTATS;
+       dev->rx_stats.dev = dev;
+       if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir,
+                                &dev->rx_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->wqe_stats.type = OCRDMA_WQESTATS;
+       dev->wqe_stats.dev = dev;
+       if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir,
+                                &dev->wqe_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->tx_stats.type = OCRDMA_TXSTATS;
+       dev->tx_stats.dev = dev;
+       if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir,
+                                &dev->tx_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->db_err_stats.type = OCRDMA_DB_ERRSTATS;
+       dev->db_err_stats.dev = dev;
+       if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir,
+                                &dev->db_err_stats, &ocrdma_dbg_ops))
+               goto err;
+
+
+       dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS;
+       dev->tx_qp_err_stats.dev = dev;
+       if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir,
+                                &dev->tx_qp_err_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS;
+       dev->rx_qp_err_stats.dev = dev;
+       if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir,
+                                &dev->rx_qp_err_stats, &ocrdma_dbg_ops))
+               goto err;
+
+
+       dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS;
+       dev->tx_dbg_stats.dev = dev;
+       if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir,
+                                &dev->tx_dbg_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS;
+       dev->rx_dbg_stats.dev = dev;
+       if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir,
+                                &dev->rx_dbg_stats, &ocrdma_dbg_ops))
+               goto err;
+
+       /* Now create dma_mem for stats mbx command */
+       if (!ocrdma_alloc_stats_mem(dev))
+               goto err;
+
+       mutex_init(&dev->stats_lock);
+
+       return;
+err:
+       ocrdma_release_stats_mem(dev);
+       debugfs_remove_recursive(dev->dir);
+       dev->dir = NULL;
+}
+
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
+{
+       if (!dev->dir)
+               return;
+       mutex_destroy(&dev->stats_lock);
+       ocrdma_release_stats_mem(dev);
+       debugfs_remove(dev->dir);
+}
+
+void ocrdma_init_debugfs(void)
+{
+       /* Create base dir in debugfs root dir */
+       ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL);
+}
+
+void ocrdma_rem_debugfs(void)
+{
+       debugfs_remove_recursive(ocrdma_dbgfs_dir);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
new file mode 100644 (file)
index 0000000..5f5e20c
--- /dev/null
@@ -0,0 +1,54 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_STATS_H__
+#define __OCRDMA_STATS_H__
+
+#include <linux/debugfs.h>
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+
+#define OCRDMA_MAX_DBGFS_MEM 4096
+
+enum OCRDMA_STATS_TYPE {
+       OCRDMA_RSRC_STATS,
+       OCRDMA_RXSTATS,
+       OCRDMA_WQESTATS,
+       OCRDMA_TXSTATS,
+       OCRDMA_DB_ERRSTATS,
+       OCRDMA_RXQP_ERRSTATS,
+       OCRDMA_TXQP_ERRSTATS,
+       OCRDMA_TX_DBG_STATS,
+       OCRDMA_RX_DBG_STATS
+};
+
+void ocrdma_rem_debugfs(void);
+void ocrdma_init_debugfs(void);
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
+void ocrdma_add_port_stats(struct ocrdma_dev *dev);
+
+#endif /* __OCRDMA_STATS_H__ */
index e0cc201..edf6211 100644 (file)
@@ -53,7 +53,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
 
        dev = get_ocrdma_dev(ibdev);
        memset(sgid, 0, sizeof(*sgid));
-       if (index >= OCRDMA_MAX_SGID)
+       if (index > OCRDMA_MAX_SGID)
                return -EINVAL;
 
        memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
@@ -89,7 +89,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
        attr->max_cq = dev->attr.max_cq;
        attr->max_cqe = dev->attr.max_cqe;
        attr->max_mr = dev->attr.max_mr;
-       attr->max_mw = 0;
+       attr->max_mw = dev->attr.max_mw;
        attr->max_pd = dev->attr.max_pd;
        attr->atomic_cap = 0;
        attr->max_fmr = 0;
@@ -144,7 +144,6 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
        }
 }
 
-
 int ocrdma_query_port(struct ib_device *ibdev,
                      u8 port, struct ib_port_attr *props)
 {
@@ -267,7 +266,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
 
        if (udata && uctx) {
                pd->dpp_enabled =
-                       dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY;
+                       ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
                pd->num_dpp_qp =
                        pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
        }
@@ -726,10 +725,10 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
                            u32 num_pbes)
 {
        struct ocrdma_pbe *pbe;
-       struct ib_umem_chunk *chunk;
+       struct scatterlist *sg;
        struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
        struct ib_umem *umem = mr->umem;
-       int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+       int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
 
        if (!mr->hwmr.num_pbes)
                return;
@@ -739,39 +738,37 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
 
        shift = ilog2(umem->page_size);
 
-       list_for_each_entry(chunk, &umem->chunk_list, list) {
-               /* get all the dma regions from the chunk. */
-               for (i = 0; i < chunk->nmap; i++) {
-                       pages = sg_dma_len(&chunk->page_list[i]) >> shift;
-                       for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
-                               /* store the page address in pbe */
-                               pbe->pa_lo =
-                                   cpu_to_le32(sg_dma_address
-                                               (&chunk->page_list[i]) +
-                                               (umem->page_size * pg_cnt));
-                               pbe->pa_hi =
-                                   cpu_to_le32(upper_32_bits
-                                               ((sg_dma_address
-                                                 (&chunk->page_list[i]) +
-                                                 umem->page_size * pg_cnt)));
-                               pbe_cnt += 1;
-                               total_num_pbes += 1;
-                               pbe++;
-
-                               /* if done building pbes, issue the mbx cmd. */
-                               if (total_num_pbes == num_pbes)
-                                       return;
-
-                               /* if the given pbl is full storing the pbes,
-                                * move to next pbl.
-                                */
-                               if (pbe_cnt ==
-                                       (mr->hwmr.pbl_size / sizeof(u64))) {
-                                       pbl_tbl++;
-                                       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-                                       pbe_cnt = 0;
-                               }
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+               pages = sg_dma_len(sg) >> shift;
+               for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+                       /* store the page address in pbe */
+                       pbe->pa_lo =
+                           cpu_to_le32(sg_dma_address
+                                       (sg) +
+                                       (umem->page_size * pg_cnt));
+                       pbe->pa_hi =
+                           cpu_to_le32(upper_32_bits
+                                       ((sg_dma_address
+                                         (sg) +
+                                         umem->page_size * pg_cnt)));
+                       pbe_cnt += 1;
+                       total_num_pbes += 1;
+                       pbe++;
+
+                       /* if done building pbes, issue the mbx cmd. */
+                       if (total_num_pbes == num_pbes)
+                               return;
+
+                       /* if the given pbl is full storing the pbes,
+                        * move to next pbl.
+                        */
+                       if (pbe_cnt ==
+                               (mr->hwmr.pbl_size / sizeof(u64))) {
+                               pbl_tbl++;
+                               pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+                               pbe_cnt = 0;
                        }
+
                }
        }
 }
@@ -840,8 +837,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 
        status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
 
-       if (mr->hwmr.fr_mr == 0)
-               ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+       ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
 
        /* it could be user registered memory. */
        if (mr->umem)
@@ -910,6 +906,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
        spin_lock_init(&cq->comp_handler_lock);
        INIT_LIST_HEAD(&cq->sq_head);
        INIT_LIST_HEAD(&cq->rq_head);
+       cq->first_arm = true;
 
        if (ib_ctx) {
                uctx = get_ocrdma_ucontext(ib_ctx);
@@ -927,9 +924,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
                        goto ctx_err;
        }
        cq->phase = OCRDMA_CQE_VALID;
-       cq->arm_needed = true;
        dev->cq_tbl[cq->id] = cq;
-
        return &cq->ibcq;
 
 ctx_err:
@@ -952,15 +947,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
        return status;
 }
 
+static void ocrdma_flush_cq(struct ocrdma_cq *cq)
+{
+       int cqe_cnt;
+       int valid_count = 0;
+       unsigned long flags;
+
+       struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
+       struct ocrdma_cqe *cqe = NULL;
+
+       cqe = cq->va;
+       cqe_cnt = cq->cqe_cnt;
+
+       /* Last irq might have scheduled a polling thread
+        * sync-up with it before hard flushing.
+        */
+       spin_lock_irqsave(&cq->cq_lock, flags);
+       while (cqe_cnt) {
+               if (is_cqe_valid(cq, cqe))
+                       valid_count++;
+               cqe++;
+               cqe_cnt--;
+       }
+       ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
+       spin_unlock_irqrestore(&cq->cq_lock, flags);
+}
+
 int ocrdma_destroy_cq(struct ib_cq *ibcq)
 {
        int status;
        struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+       struct ocrdma_eq *eq = NULL;
        struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
        int pdid = 0;
+       u32 irq, indx;
 
-       status = ocrdma_mbx_destroy_cq(dev, cq);
+       dev->cq_tbl[cq->id] = NULL;
+       indx = ocrdma_get_eq_table_index(dev, cq->eqn);
+       if (indx == -EINVAL)
+               BUG();
 
+       eq = &dev->eq_tbl[indx];
+       irq = ocrdma_get_irq(dev, eq);
+       synchronize_irq(irq);
+       ocrdma_flush_cq(cq);
+
+       status = ocrdma_mbx_destroy_cq(dev, cq);
        if (cq->ucontext) {
                pdid = cq->ucontext->cntxt_pd->id;
                ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
@@ -969,7 +1001,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
                                ocrdma_get_db_addr(dev, pdid),
                                dev->nic_info.db_page_size);
        }
-       dev->cq_tbl[cq->id] = NULL;
 
        kfree(cq);
        return status;
@@ -1092,15 +1123,9 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
        }
        uresp.db_page_addr = usr_db;
        uresp.db_page_size = dev->nic_info.db_page_size;
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
-               uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
-               uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
-               uresp.db_shift = 24;
-       } else {
-               uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
-               uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
-               uresp.db_shift = 16;
-       }
+       uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
+       uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+       uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
 
        if (qp->dpp_enabled) {
                uresp.dpp_credit = dpp_credit_lmt;
@@ -1132,7 +1157,7 @@ err:
 static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
                             struct ocrdma_pd *pd)
 {
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                qp->sq_db = dev->nic_info.db +
                        (pd->id * dev->nic_info.db_page_size) +
                        OCRDMA_DB_GEN2_SQ_OFFSET;
@@ -1182,7 +1207,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
        qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
 }
 
-
 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
                                   struct ib_qp_init_attr *attrs)
 {
@@ -1268,17 +1292,6 @@ gen_err:
        return ERR_PTR(status);
 }
 
-
-static void ocrdma_flush_rq_db(struct ocrdma_qp *qp)
-{
-       if (qp->db_cache) {
-               u32 val = qp->rq.dbid | (qp->db_cache <<
-                               ocrdma_get_num_posted_shift(qp));
-               iowrite32(val, qp->rq_db);
-               qp->db_cache = 0;
-       }
-}
-
 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                      int attr_mask)
 {
@@ -1296,9 +1309,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
         */
        if (status < 0)
                return status;
-       status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
-       if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR)
-               ocrdma_flush_rq_db(qp);
+       status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
 
        return status;
 }
@@ -1510,7 +1521,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
        int discard_cnt = 0;
        u32 cur_getp, stop_getp;
        struct ocrdma_cqe *cqe;
-       u32 qpn = 0;
+       u32 qpn = 0, wqe_idx = 0;
 
        spin_lock_irqsave(&cq->cq_lock, cq_flags);
 
@@ -1539,24 +1550,29 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
                if (qpn == 0 || qpn != qp->id)
                        goto skip_cqe;
 
-               /* mark cqe discarded so that it is not picked up later
-                * in the poll_cq().
-                */
-               discard_cnt += 1;
-               cqe->cmn.qpn = 0;
                if (is_cqe_for_sq(cqe)) {
                        ocrdma_hwq_inc_tail(&qp->sq);
                } else {
                        if (qp->srq) {
+                               wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+                                       OCRDMA_CQE_BUFTAG_SHIFT) &
+                                       qp->srq->rq.max_wqe_idx;
+                               if (wqe_idx < 1)
+                                       BUG();
                                spin_lock_irqsave(&qp->srq->q_lock, flags);
                                ocrdma_hwq_inc_tail(&qp->srq->rq);
-                               ocrdma_srq_toggle_bit(qp->srq, cur_getp);
+                               ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
                                spin_unlock_irqrestore(&qp->srq->q_lock, flags);
 
                        } else {
                                ocrdma_hwq_inc_tail(&qp->rq);
                        }
                }
+               /* mark cqe discarded so that it is not picked up later
+                * in the poll_cq().
+                */
+               discard_cnt += 1;
+               cqe->cmn.qpn = 0;
 skip_cqe:
                cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
        } while (cur_getp != stop_getp);
@@ -1659,7 +1675,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
            (srq->pd->id * dev->nic_info.db_page_size);
        uresp.db_page_size = dev->nic_info.db_page_size;
        uresp.num_rqe_allocated = srq->rq.max_cnt;
-       if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+       if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
                uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
                uresp.db_shift = 24;
        } else {
@@ -2009,15 +2025,15 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
        fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
        fast_reg->size_sge =
                get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
-       mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) &
-               (OCRDMA_MAX_STAG - 1)];
+       mr = (struct ocrdma_mr *) (unsigned long)
+               qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
        build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
        return 0;
 }
 
 static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
 {
-       u32 val = qp->sq.dbid | (1 << 16);
+       u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
 
        iowrite32(val, qp->sq_db);
 }
@@ -2122,12 +2138,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
 {
-       u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
+       u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
 
-       if (qp->state != OCRDMA_QPS_INIT)
-               iowrite32(val, qp->rq_db);
-       else
-               qp->db_cache++;
+       iowrite32(val, qp->rq_db);
 }
 
 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
@@ -2213,7 +2226,7 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
 
        if (row == srq->bit_fields_len)
                BUG();
-       return indx;
+       return indx + 1; /* Use from index 1 */
 }
 
 static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
@@ -2550,10 +2563,13 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
 
        srq = get_ocrdma_srq(qp->ibqp.srq);
        wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
-                       OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+               OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+       if (wqe_idx < 1)
+               BUG();
+
        ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
        spin_lock_irqsave(&srq->q_lock, flags);
-       ocrdma_srq_toggle_bit(srq, wqe_idx);
+       ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
        spin_unlock_irqrestore(&srq->q_lock, flags);
        ocrdma_hwq_inc_tail(&srq->rq);
 }
@@ -2705,10 +2721,18 @@ expand_cqe:
        }
 stop_cqe:
        cq->getp = cur_getp;
-       if (polled_hw_cqes || expand || stop) {
-               ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
+       if (cq->deferred_arm) {
+               ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
+                                 polled_hw_cqes);
+               cq->deferred_arm = false;
+               cq->deferred_sol = false;
+       } else {
+               /* We need to pop the CQE. No need to arm */
+               ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
                                  polled_hw_cqes);
+               cq->deferred_sol = false;
        }
+
        return i;
 }
 
@@ -2780,30 +2804,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
        struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
        struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
        u16 cq_id;
-       u16 cur_getp;
-       struct ocrdma_cqe *cqe;
        unsigned long flags;
+       bool arm_needed = false, sol_needed = false;
 
        cq_id = cq->id;
 
        spin_lock_irqsave(&cq->cq_lock, flags);
        if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
-               cq->armed = true;
+               arm_needed = true;
        if (cq_flags & IB_CQ_SOLICITED)
-               cq->solicited = true;
-
-       cur_getp = cq->getp;
-       cqe = cq->va + cur_getp;
+               sol_needed = true;
 
-       /* check whether any valid cqe exist or not, if not then safe to
-        * arm. If cqe is not yet consumed, then let it get consumed and then
-        * we arm it to avoid false interrupts.
-        */
-       if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
-               cq->arm_needed = false;
-               ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
+       if (cq->first_arm) {
+               ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
+               cq->first_arm = false;
+               goto skip_defer;
        }
+       cq->deferred_arm = true;
+
+skip_defer:
+       cq->deferred_sol = sol_needed;
        spin_unlock_irqrestore(&cq->cq_lock, flags);
+
        return 0;
 }
 
@@ -2838,7 +2860,8 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
                goto mbx_err;
        mr->ibmr.rkey = mr->hwmr.lkey;
        mr->ibmr.lkey = mr->hwmr.lkey;
-       dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr;
+       dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
+               (unsigned long) mr;
        return &mr->ibmr;
 mbx_err:
        ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
index 1946101..c00ae09 100644 (file)
@@ -868,8 +868,10 @@ struct qib_devdata {
        /* last buffer for user use */
        u32 lastctxt_piobuf;
 
-       /* saturating counter of (non-port-specific) device interrupts */
-       u32 int_counter;
+       /* reset value */
+       u64 z_int_counter;
+       /* percpu intcounter */
+       u64 __percpu *int_counter;
 
        /* pio bufs allocated per ctxt */
        u32 pbufsctxt;
@@ -1184,7 +1186,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *);
 void qib_set_ctxtcnt(struct qib_devdata *);
 int qib_create_ctxts(struct qib_devdata *dd);
 struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
-void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
+int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
 void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
 
 u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
@@ -1449,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *);
 void qib_nomsix(struct qib_devdata *);
 void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
 void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
+/* interrupts for device */
+u64 qib_int_counter(struct qib_devdata *);
+/* interrupt for all devices */
+u64 qib_sps_ints(void);
 
 /*
  * dma_addr wrappers - all 0's invalid for hw
index 1686fd4..5dfda4c 100644 (file)
@@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp,
                                 size_t count, loff_t *off)
 {
        u32 __iomem *piobuf;
-       u32 plen, clen, pbufn;
+       u32 plen, pbufn, maxlen_reserve;
        struct qib_diag_xpkt dp;
        u32 *tmpbuf = NULL;
        struct qib_devdata *dd;
@@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp,
        }
        ppd = &dd->pport[dp.port - 1];
 
-       /* need total length before first word written */
-       /* +1 word is for the qword padding */
-       plen = sizeof(u32) + dp.len;
-       clen = dp.len >> 2;
-
-       if ((plen + 4) > ppd->ibmaxlen) {
+       /*
+        * need total length before first word written, plus 2 Dwords. One Dword
+        * is for padding so we get the full user data when not aligned on
+        * a word boundary. The other Dword is to make sure we have room for the
+        * ICRC which gets tacked on later.
+        */
+       maxlen_reserve = 2 * sizeof(u32);
+       if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
                ret = -EINVAL;
-               goto bail;      /* before writing pbc */
+               goto bail;
        }
+
+       plen = sizeof(u32) + dp.len;
+
        tmpbuf = vmalloc(plen);
        if (!tmpbuf) {
                qib_devinfo(dd->pcidev,
@@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp,
         */
        if (dd->flags & QIB_PIO_FLUSH_WC) {
                qib_flush_wc();
-               qib_pio_copy(piobuf + 2, tmpbuf, clen - 1);
+               qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
                qib_flush_wc();
-               __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+               __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
        } else
-               qib_pio_copy(piobuf + 2, tmpbuf, clen);
+               qib_pio_copy(piobuf + 2, tmpbuf, plen);
 
        if (dd->flags & QIB_USE_SPCL_TRIG) {
                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
@@ -689,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd,
                          const struct diag_observer *op)
 {
        struct diag_observer_list_elt *olp;
-       int ret = -EINVAL;
+       unsigned long flags;
 
        if (!dd || !op)
-               goto bail;
-       ret = -ENOMEM;
+               return -EINVAL;
        olp = vmalloc(sizeof *olp);
        if (!olp) {
                pr_err("vmalloc for observer failed\n");
-               goto bail;
+               return -ENOMEM;
        }
-       if (olp) {
-               unsigned long flags;
 
-               spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
-               olp->op = op;
-               olp->next = dd->diag_observer_list;
-               dd->diag_observer_list = olp;
-               spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
-               ret = 0;
-       }
-bail:
-       return ret;
+       spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+       olp->op = op;
+       olp->next = dd->diag_observer_list;
+       dd->diag_observer_list = olp;
+       spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+
+       return 0;
 }
 
 /* Remove all registered observers when device is closed */
index 2920bb3..59fe092 100644 (file)
@@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
                        ret = 0;
                        break;
                }
+               sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+               sg->dma_length = sg->length;
+#endif
        }
        return ret;
 }
@@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev,
        BUG_ON(!valid_dma_direction(direction));
 }
 
-static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
-       u64 addr = (u64) page_address(sg_page(sg));
-
-       if (addr)
-               addr += sg->offset;
-       return addr;
-}
-
-static unsigned int qib_sg_dma_len(struct ib_device *dev,
-                                  struct scatterlist *sg)
-{
-       return sg->length;
-}
-
 static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
                                    size_t size, enum dma_data_direction dir)
 {
@@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = {
        .unmap_page = qib_dma_unmap_page,
        .map_sg = qib_map_sg,
        .unmap_sg = qib_unmap_sg,
-       .dma_address = qib_sg_dma_address,
-       .dma_len = qib_sg_dma_len,
        .sync_single_for_cpu = qib_sync_single_for_cpu,
        .sync_single_for_device = qib_sync_single_for_device,
        .alloc_coherent = qib_dma_alloc_coherent,
index 275f247..b15e34e 100644 (file)
@@ -1459,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
                                        cused++;
                                else
                                        cfree++;
-                       if (pusable && cfree && cused < inuse) {
+                       if (cfree && cused < inuse) {
                                udd = dd;
                                inuse = cused;
                        }
@@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
        struct qib_ctxtdata *rcd = fd->rcd;
        struct qib_devdata *dd = rcd->dd;
 
-       if (dd->flags & QIB_HAS_SEND_DMA)
+       if (dd->flags & QIB_HAS_SEND_DMA) {
 
                fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
                                                    dd->unit,
@@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
                                                    fd->subctxt);
                if (!fd->pq)
                        return -ENOMEM;
+       }
 
        return 0;
 }
index c61e2a9..cab610c 100644 (file)
@@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode,
 static ssize_t driver_stats_read(struct file *file, char __user *buf,
                                 size_t count, loff_t *ppos)
 {
+       qib_stats.sps_ints = qib_sps_ints();
        return simple_read_from_buffer(buf, count, ppos, &qib_stats,
                                       sizeof qib_stats);
 }
index 84e593d..d68266a 100644 (file)
@@ -1634,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data)
                goto bail;
        }
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
                              QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
@@ -1808,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
         * isn't set.
         */
        dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
-       dd->int_counter = 0; /* so we check interrupts work again */
+       /* so we check interrupts work again */
+       dd->z_int_counter = qib_int_counter(dd);
        val = dd->control | QLOGIC_IB_C_RESET;
        writeq(val, &dd->kregbase[kr_control]);
        mb(); /* prevent compiler re-ordering around actual reset */
@@ -3266,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd)
 
        dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
 
-       qib_init_pportdata(ppd, dd, 0, 1);
+       ret = qib_init_pportdata(ppd, dd, 0, 1);
+       if (ret)
+               goto bail;
        ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
        ppd->link_speed_supported = QIB_IB_SDR;
        ppd->link_width_enabled = IB_WIDTH_4X;
index 454c2e7..7dec89f 100644 (file)
@@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data)
                goto bail;
        }
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
-
+       this_cpu_inc(*dd->int_counter);
        if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
                              QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
                unlikely_7220_intr(dd, istat);
@@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
         * isn't set.
         */
        dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
-       dd->int_counter = 0; /* so we check interrupts work again */
+       /* so we check interrupts work again */
+       dd->z_int_counter = qib_int_counter(dd);
        val = dd->control | QLOGIC_IB_C_RESET;
        writeq(val, &dd->kregbase[kr_control]);
        mb(); /* prevent compiler reordering around actual reset */
@@ -4061,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
        init_waitqueue_head(&cpspec->autoneg_wait);
        INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work);
 
-       qib_init_pportdata(ppd, dd, 0, 1);
+       ret = qib_init_pportdata(ppd, dd, 0, 1);
+       if (ret)
+               goto bail;
        ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
        ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR;
 
index d1bd213..a7eb325 100644 (file)
@@ -3115,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data)
                goto bail;
        }
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* handle "errors" of various kinds first, device ahead of port */
        if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO |
@@ -3186,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) |
@@ -3215,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL);
@@ -3248,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3277,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3306,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3336,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
                 */
                return IRQ_HANDLED;
 
-       qib_stats.sps_ints++;
-       if (dd->int_counter != (u32) -1)
-               dd->int_counter++;
+       this_cpu_inc(*dd->int_counter);
 
        /* Clear the interrupt bit we expect to be set. */
        qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3723,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
        dd->pport->cpspec->ibsymdelta = 0;
        dd->pport->cpspec->iblnkerrdelta = 0;
        dd->pport->cpspec->ibmalfdelta = 0;
-       dd->int_counter = 0; /* so we check interrupts work again */
+       /* so we check interrupts work again */
+       dd->z_int_counter = qib_int_counter(dd);
 
        /*
         * Keep chip from being accessed until we are ready.  Use
@@ -6557,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
                }
 
                dd->num_pports++;
-               qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+               ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+               if (ret) {
+                       dd->num_pports--;
+                       goto bail;
+               }
 
                ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
                ppd->link_width_enabled = IB_WIDTH_4X;
index 24e802f..5b7aeb2 100644 (file)
@@ -130,7 +130,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
 int qib_create_ctxts(struct qib_devdata *dd)
 {
        unsigned i;
-       int ret;
        int local_node_id = pcibus_to_node(dd->pcidev->bus);
 
        if (local_node_id < 0)
@@ -145,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd)
        if (!dd->rcd) {
                qib_dev_err(dd,
                        "Unable to allocate ctxtdata array, failing\n");
-               ret = -ENOMEM;
-               goto done;
+               return -ENOMEM;
        }
 
        /* create (one or more) kctxt */
@@ -163,15 +161,14 @@ int qib_create_ctxts(struct qib_devdata *dd)
                if (!rcd) {
                        qib_dev_err(dd,
                                "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
-                       ret = -ENOMEM;
-                       goto done;
+                       kfree(dd->rcd);
+                       dd->rcd = NULL;
+                       return -ENOMEM;
                }
                rcd->pkeys[0] = QIB_DEFAULT_P_KEY;
                rcd->seq_cnt = 1;
        }
-       ret = 0;
-done:
-       return ret;
+       return 0;
 }
 
 /*
@@ -233,7 +230,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
 /*
  * Common code for initializing the physical port structure.
  */
-void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
+int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
                        u8 hw_pidx, u8 port)
 {
        int size;
@@ -243,6 +240,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
 
        spin_lock_init(&ppd->sdma_lock);
        spin_lock_init(&ppd->lflags_lock);
+       spin_lock_init(&ppd->cc_shadow_lock);
        init_waitqueue_head(&ppd->state_wait);
 
        init_timer(&ppd->symerr_clear_timer);
@@ -250,8 +248,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
        ppd->symerr_clear_timer.data = (unsigned long)ppd;
 
        ppd->qib_wq = NULL;
-
-       spin_lock_init(&ppd->cc_shadow_lock);
+       ppd->ibport_data.pmastats =
+               alloc_percpu(struct qib_pma_counters);
+       if (!ppd->ibport_data.pmastats)
+               return -ENOMEM;
 
        if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
                goto bail;
@@ -299,7 +299,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
                goto bail_3;
        }
 
-       return;
+       return 0;
 
 bail_3:
        kfree(ppd->ccti_entries_shadow);
@@ -313,7 +313,7 @@ bail_1:
 bail:
        /* User is intentionally disabling the congestion control agent */
        if (!qib_cc_table_size)
-               return;
+               return 0;
 
        if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
                qib_cc_table_size = 0;
@@ -324,7 +324,7 @@ bail:
 
        qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
                port);
-       return;
+       return 0;
 }
 
 static int init_pioavailregs(struct qib_devdata *dd)
@@ -525,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd)
 static void verify_interrupt(unsigned long opaque)
 {
        struct qib_devdata *dd = (struct qib_devdata *) opaque;
+       u64 int_counter;
 
        if (!dd)
                return; /* being torn down */
@@ -533,7 +534,8 @@ static void verify_interrupt(unsigned long opaque)
         * If we don't have a lid or any interrupts, let the user know and
         * don't bother checking again.
         */
-       if (dd->int_counter == 0) {
+       int_counter = qib_int_counter(dd) - dd->z_int_counter;
+       if (int_counter == 0) {
                if (!dd->f_intr_fallback(dd))
                        dev_err(&dd->pcidev->dev,
                                "No interrupts detected, not usable.\n");
@@ -633,6 +635,12 @@ wq_error:
        return -ENOMEM;
 }
 
+static void qib_free_pportdata(struct qib_pportdata *ppd)
+{
+       free_percpu(ppd->ibport_data.pmastats);
+       ppd->ibport_data.pmastats = NULL;
+}
+
 /**
  * qib_init - do the actual initialization sequence on the chip
  * @dd: the qlogic_ib device
@@ -920,6 +928,7 @@ static void qib_shutdown_device(struct qib_devdata *dd)
                        destroy_workqueue(ppd->qib_wq);
                        ppd->qib_wq = NULL;
                }
+               qib_free_pportdata(ppd);
        }
 
        qib_update_eeprom_log(dd);
@@ -1079,9 +1088,34 @@ void qib_free_devdata(struct qib_devdata *dd)
 #ifdef CONFIG_DEBUG_FS
        qib_dbg_ibdev_exit(&dd->verbs_dev);
 #endif
+       free_percpu(dd->int_counter);
        ib_dealloc_device(&dd->verbs_dev.ibdev);
 }
 
+u64 qib_int_counter(struct qib_devdata *dd)
+{
+       int cpu;
+       u64 int_counter = 0;
+
+       for_each_possible_cpu(cpu)
+               int_counter += *per_cpu_ptr(dd->int_counter, cpu);
+       return int_counter;
+}
+
+u64 qib_sps_ints(void)
+{
+       unsigned long flags;
+       struct qib_devdata *dd;
+       u64 sps_ints = 0;
+
+       spin_lock_irqsave(&qib_devs_lock, flags);
+       list_for_each_entry(dd, &qib_dev_list, list) {
+               sps_ints += qib_int_counter(dd);
+       }
+       spin_unlock_irqrestore(&qib_devs_lock, flags);
+       return sps_ints;
+}
+
 /*
  * Allocate our primary per-unit data structure.  Must be done via verbs
  * allocator, because the verbs cleanup process both does cleanup and
@@ -1097,14 +1131,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
        int ret;
 
        dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
-       if (!dd) {
-               dd = ERR_PTR(-ENOMEM);
-               goto bail;
-       }
+       if (!dd)
+               return ERR_PTR(-ENOMEM);
 
-#ifdef CONFIG_DEBUG_FS
-       qib_dbg_ibdev_init(&dd->verbs_dev);
-#endif
+       INIT_LIST_HEAD(&dd->list);
 
        idr_preload(GFP_KERNEL);
        spin_lock_irqsave(&qib_devs_lock, flags);
@@ -1121,11 +1151,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
        if (ret < 0) {
                qib_early_err(&pdev->dev,
                              "Could not allocate unit ID: error %d\n", -ret);
-#ifdef CONFIG_DEBUG_FS
-               qib_dbg_ibdev_exit(&dd->verbs_dev);
-#endif
-               ib_dealloc_device(&dd->verbs_dev.ibdev);
-               dd = ERR_PTR(ret);
+               goto bail;
+       }
+       dd->int_counter = alloc_percpu(u64);
+       if (!dd->int_counter) {
+               ret = -ENOMEM;
+               qib_early_err(&pdev->dev,
+                             "Could not allocate per-cpu int_counter\n");
                goto bail;
        }
 
@@ -1139,9 +1171,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
                        qib_early_err(&pdev->dev,
                                "Could not alloc cpulist info, cpu affinity might be wrong\n");
        }
-
-bail:
+#ifdef CONFIG_DEBUG_FS
+       qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
        return dd;
+bail:
+       if (!list_empty(&dd->list))
+               list_del_init(&dd->list);
+       ib_dealloc_device(&dd->verbs_dev.ibdev);
+       return ERR_PTR(ret);;
 }
 
 /*
index ccb1191..edad991 100644 (file)
@@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
        return reply((struct ib_smp *)pmp);
 }
 
+static void qib_snapshot_pmacounters(
+       struct qib_ibport *ibp,
+       struct qib_pma_counters *pmacounters)
+{
+       struct qib_pma_counters *p;
+       int cpu;
+
+       memset(pmacounters, 0, sizeof(*pmacounters));
+       for_each_possible_cpu(cpu) {
+               p = per_cpu_ptr(ibp->pmastats, cpu);
+               pmacounters->n_unicast_xmit += p->n_unicast_xmit;
+               pmacounters->n_unicast_rcv += p->n_unicast_rcv;
+               pmacounters->n_multicast_xmit += p->n_multicast_xmit;
+               pmacounters->n_multicast_rcv += p->n_multicast_rcv;
+       }
+}
+
 static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
                                    struct ib_device *ibdev, u8 port)
 {
@@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
        struct qib_ibport *ibp = to_iport(ibdev, port);
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
        u64 swords, rwords, spkts, rpkts, xwait;
+       struct qib_pma_counters pma;
        u8 port_select = p->port_select;
 
        memset(pmp->data, 0, sizeof(pmp->data));
@@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
        p->port_rcv_data = cpu_to_be64(rwords);
        p->port_xmit_packets = cpu_to_be64(spkts);
        p->port_rcv_packets = cpu_to_be64(rpkts);
-       p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit);
-       p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv);
-       p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit);
-       p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv);
+
+       qib_snapshot_pmacounters(ibp, &pma);
+
+       p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit
+               - ibp->z_unicast_xmit);
+       p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv
+               - ibp->z_unicast_rcv);
+       p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit
+               - ibp->z_multicast_xmit);
+       p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv
+               - ibp->z_multicast_rcv);
 
 bail:
        return reply((struct ib_smp *) pmp);
@@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
        struct qib_ibport *ibp = to_iport(ibdev, port);
        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
        u64 swords, rwords, spkts, rpkts, xwait;
+       struct qib_pma_counters pma;
 
        qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
 
@@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
        if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
                ibp->z_port_rcv_packets = rpkts;
 
+       qib_snapshot_pmacounters(ibp, &pma);
+
        if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
-               ibp->n_unicast_xmit = 0;
+               ibp->z_unicast_xmit = pma.n_unicast_xmit;
 
        if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
-               ibp->n_unicast_rcv = 0;
+               ibp->z_unicast_rcv = pma.n_unicast_rcv;
 
        if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
-               ibp->n_multicast_xmit = 0;
+               ibp->z_multicast_xmit = pma.n_multicast_xmit;
 
        if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
-               ibp->n_multicast_rcv = 0;
+               ibp->z_multicast_rcv = pma.n_multicast_rcv;
 
        return pma_get_portcounters_ext(pmp, ibdev, port);
 }
index e6687de..9bbb553 100644 (file)
@@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 {
        struct qib_mr *mr;
        struct ib_umem *umem;
-       struct ib_umem_chunk *chunk;
-       int n, m, i;
+       struct scatterlist *sg;
+       int n, m, entry;
        struct ib_mr *ret;
 
        if (length == 0) {
@@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        if (IS_ERR(umem))
                return (void *) umem;
 
-       n = 0;
-       list_for_each_entry(chunk, &umem->chunk_list, list)
-               n += chunk->nents;
+       n = umem->nmap;
 
        mr = alloc_mr(n, pd);
        if (IS_ERR(mr)) {
@@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                mr->mr.page_shift = ilog2(umem->page_size);
        m = 0;
        n = 0;
-       list_for_each_entry(chunk, &umem->chunk_list, list) {
-               for (i = 0; i < chunk->nents; i++) {
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
                        void *vaddr;
 
-                       vaddr = page_address(sg_page(&chunk->page_list[i]));
+                       vaddr = page_address(sg_page(sg));
                        if (!vaddr) {
                                ret = ERR_PTR(-EINVAL);
                                goto bail;
@@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                m++;
                                n = 0;
                        }
-               }
        }
        ret = &mr->ibmr;
 
index 3ab3413..2f25018 100644 (file)
@@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp)
        qib_flush_wc();
        qib_sendbuf_done(dd, pbufn);
 
-       ibp->n_unicast_xmit++;
+       this_cpu_inc(ibp->pmastats->n_unicast_xmit);
        goto done;
 
 queue_ack:
index 357b6cf..4c07a8b 100644 (file)
@@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
        ohdr->bth[0] = cpu_to_be32(bth0);
        ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
        ohdr->bth[2] = cpu_to_be32(bth2);
+       this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 }
 
 /**
index 3ad651c..aaf7039 100644 (file)
@@ -280,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp)
        ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
        if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
                if (ah_attr->dlid != QIB_PERMISSIVE_LID)
-                       ibp->n_multicast_xmit++;
+                       this_cpu_inc(ibp->pmastats->n_multicast_xmit);
                else
-                       ibp->n_unicast_xmit++;
+                       this_cpu_inc(ibp->pmastats->n_unicast_xmit);
        } else {
-               ibp->n_unicast_xmit++;
+               this_cpu_inc(ibp->pmastats->n_unicast_xmit);
                lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
                if (unlikely(lid == ppd->lid)) {
                        /*
index 165aee2..d2806ca 100644 (file)
 /* attempt to drain the queue for 5secs */
 #define QIB_USER_SDMA_DRAIN_TIMEOUT 500
 
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+       struct rb_node node;
+       int refcount;
+       pid_t pid;
+};
+
 struct qib_user_sdma_pkt {
        struct list_head list;  /* list element */
 
@@ -120,15 +131,60 @@ struct qib_user_sdma_queue {
        /* dma page table */
        struct rb_root dma_pages_root;
 
+       struct qib_user_sdma_rb_node *sdma_rb_node;
+
        /* protect everything above... */
        struct mutex lock;
 };
 
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+       struct qib_user_sdma_rb_node *sdma_rb_node;
+       struct rb_node *node = root->rb_node;
+
+       while (node) {
+               sdma_rb_node = container_of(node,
+                       struct qib_user_sdma_rb_node, node);
+               if (pid < sdma_rb_node->pid)
+                       node = node->rb_left;
+               else if (pid > sdma_rb_node->pid)
+                       node = node->rb_right;
+               else
+                       return sdma_rb_node;
+       }
+       return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+       struct rb_node **node = &(root->rb_node);
+       struct rb_node *parent = NULL;
+       struct qib_user_sdma_rb_node *got;
+
+       while (*node) {
+               got = container_of(*node, struct qib_user_sdma_rb_node, node);
+               parent = *node;
+               if (new->pid < got->pid)
+                       node = &((*node)->rb_left);
+               else if (new->pid > got->pid)
+                       node = &((*node)->rb_right);
+               else
+                       return 0;
+       }
+
+       rb_link_node(&new->node, parent, node);
+       rb_insert_color(&new->node, root);
+       return 1;
+}
+
 struct qib_user_sdma_queue *
 qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
 {
        struct qib_user_sdma_queue *pq =
                kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+       struct qib_user_sdma_rb_node *sdma_rb_node;
 
        if (!pq)
                goto done;
@@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
        pq->num_pending = 0;
        pq->num_sending = 0;
        pq->added = 0;
+       pq->sdma_rb_node = NULL;
 
        INIT_LIST_HEAD(&pq->sent);
        spin_lock_init(&pq->sent_lock);
@@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
 
        pq->dma_pages_root = RB_ROOT;
 
+       sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+                                       current->pid);
+       if (sdma_rb_node) {
+               sdma_rb_node->refcount++;
+       } else {
+               int ret;
+               sdma_rb_node = kmalloc(sizeof(
+                       struct qib_user_sdma_rb_node), GFP_KERNEL);
+               if (!sdma_rb_node)
+                       goto err_rb;
+
+               sdma_rb_node->refcount = 1;
+               sdma_rb_node->pid = current->pid;
+
+               ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+                                       sdma_rb_node);
+               BUG_ON(ret == 0);
+       }
+       pq->sdma_rb_node = sdma_rb_node;
+
        goto done;
 
+err_rb:
+       dma_pool_destroy(pq->header_cache);
 err_slab:
        kmem_cache_destroy(pq->pkt_slab);
 err_kfree:
@@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
        if (!pq)
                return;
 
-       kmem_cache_destroy(pq->pkt_slab);
+       pq->sdma_rb_node->refcount--;
+       if (pq->sdma_rb_node->refcount == 0) {
+               rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+               kfree(pq->sdma_rb_node);
+       }
        dma_pool_destroy(pq->header_cache);
+       kmem_cache_destroy(pq->pkt_slab);
        kfree(pq);
 }
 
@@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
                                 struct qib_user_sdma_queue *pq,
                                 struct list_head *pktlist, int count)
 {
-       int ret = 0;
        unsigned long flags;
 
        if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
                return -ECOMM;
 
-       spin_lock_irqsave(&ppd->sdma_lock, flags);
-
-       if (unlikely(!__qib_sdma_running(ppd))) {
-               ret = -ECOMM;
-               goto unlock;
+       /* non-blocking mode */
+       if (pq->sdma_rb_node->refcount > 1) {
+               spin_lock_irqsave(&ppd->sdma_lock, flags);
+               if (unlikely(!__qib_sdma_running(ppd))) {
+                       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+                       return -ECOMM;
+               }
+               pq->num_pending += count;
+               list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+               qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+               spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+               return 0;
        }
 
+       /* In this case, descriptors from this process are not
+        * linked to ppd pending queue, interrupt handler
+        * won't update this process, it is OK to directly
+        * modify without sdma lock.
+        */
+
+
        pq->num_pending += count;
-       list_splice_tail_init(pktlist, &ppd->sdma_userpending);
-       qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+       /*
+        * Blocking mode for single rail process, we must
+        * release/regain sdma_lock to give other process
+        * chance to make progress. This is important for
+        * performance.
+        */
+       do {
+               spin_lock_irqsave(&ppd->sdma_lock, flags);
+               if (unlikely(!__qib_sdma_running(ppd))) {
+                       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+                       return -ECOMM;
+               }
+               qib_user_sdma_send_desc(ppd, pktlist);
+               if (!list_empty(pktlist))
+                       qib_sdma_make_progress(ppd);
+               spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+       } while (!list_empty(pktlist));
 
-unlock:
-       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-       return ret;
+       return 0;
 }
 
 int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
@@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
                qib_user_sdma_queue_clean(ppd, pq);
 
        while (dim) {
-               int mxp = 8;
+               int mxp = 1;
                int ndesc = 0;
 
                ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
index 092b0bb..9bcfbd8 100644 (file)
@@ -662,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
                mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
                if (mcast == NULL)
                        goto drop;
-               ibp->n_multicast_rcv++;
+               this_cpu_inc(ibp->pmastats->n_multicast_rcv);
                list_for_each_entry_rcu(p, &mcast->qp_list, list)
                        qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
                /*
@@ -678,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
                                        &rcd->lookaside_qp->refcount))
                                        wake_up(
                                         &rcd->lookaside_qp->wait);
-                                       rcd->lookaside_qp = NULL;
-                               }
+                               rcd->lookaside_qp = NULL;
+                       }
                }
                if (!rcd->lookaside_qp) {
                        qp = qib_lookup_qpn(ibp, qp_num);
@@ -689,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
                        rcd->lookaside_qpn = qp_num;
                } else
                        qp = rcd->lookaside_qp;
-               ibp->n_unicast_rcv++;
+               this_cpu_inc(ibp->pmastats->n_unicast_rcv);
                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
        }
        return;
index a01c7d2..bfc8948 100644 (file)
@@ -664,6 +664,13 @@ struct qib_opcode_stats_perctx {
        struct qib_opcode_stats stats[128];
 };
 
+struct qib_pma_counters {
+       u64 n_unicast_xmit;     /* total unicast packets sent */
+       u64 n_unicast_rcv;      /* total unicast packets received */
+       u64 n_multicast_xmit;   /* total multicast packets sent */
+       u64 n_multicast_rcv;    /* total multicast packets received */
+};
+
 struct qib_ibport {
        struct qib_qp __rcu *qp0;
        struct qib_qp __rcu *qp1;
@@ -680,10 +687,11 @@ struct qib_ibport {
        __be64 mkey;
        __be64 guids[QIB_GUIDS_PER_PORT - 1];   /* writable GUIDs */
        u64 tid;                /* TID for traps */
-       u64 n_unicast_xmit;     /* total unicast packets sent */
-       u64 n_unicast_rcv;      /* total unicast packets received */
-       u64 n_multicast_xmit;   /* total multicast packets sent */
-       u64 n_multicast_rcv;    /* total multicast packets received */
+       struct qib_pma_counters __percpu *pmastats;
+       u64 z_unicast_xmit;     /* starting count for PMA */
+       u64 z_unicast_rcv;      /* starting count for PMA */
+       u64 z_multicast_xmit;   /* starting count for PMA */
+       u64 z_multicast_rcv;    /* starting count for PMA */
        u64 z_symbol_error_counter;             /* starting count for PMA */
        u64 z_link_error_recovery_counter;      /* starting count for PMA */
        u64 z_link_downed_counter;              /* starting count for PMA */
index 16755cd..801a1d6 100644 (file)
@@ -286,7 +286,7 @@ iter_chunk:
                                err = iommu_map(pd->domain, va_start, pa_start,
                                                        size, flags);
                                if (err) {
-                                       usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n",
+                                       usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
                                                va_start, &pa_start, size, err);
                                        goto err_out;
                                }
index 529b6bc..66a908b 100644 (file)
@@ -411,6 +411,8 @@ static void srp_path_rec_completion(int status,
 
 static int srp_lookup_path(struct srp_target_port *target)
 {
+       int ret;
+
        target->path.numb_path = 1;
 
        init_completion(&target->done);
@@ -431,7 +433,9 @@ static int srp_lookup_path(struct srp_target_port *target)
        if (target->path_query_id < 0)
                return target->path_query_id;
 
-       wait_for_completion(&target->done);
+       ret = wait_for_completion_interruptible(&target->done);
+       if (ret < 0)
+               return ret;
 
        if (target->status < 0)
                shost_printk(KERN_WARNING, target->scsi_host,
@@ -710,7 +714,9 @@ static int srp_connect_target(struct srp_target_port *target)
                ret = srp_send_req(target);
                if (ret)
                        return ret;
-               wait_for_completion(&target->done);
+               ret = wait_for_completion_interruptible(&target->done);
+               if (ret < 0)
+                       return ret;
 
                /*
                 * The CM event handling code will set status to
@@ -777,6 +783,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
  * srp_claim_req - Take ownership of the scmnd associated with a request.
  * @target: SRP target port.
  * @req: SRP request.
+ * @sdev: If not NULL, only take ownership for this SCSI device.
  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
  *         ownership of @req->scmnd if it equals @scmnd.
  *
@@ -785,16 +792,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
  */
 static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
                                       struct srp_request *req,
+                                      struct scsi_device *sdev,
                                       struct scsi_cmnd *scmnd)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&target->lock, flags);
-       if (!scmnd) {
+       if (req->scmnd &&
+           (!sdev || req->scmnd->device == sdev) &&
+           (!scmnd || req->scmnd == scmnd)) {
                scmnd = req->scmnd;
                req->scmnd = NULL;
-       } else if (req->scmnd == scmnd) {
-               req->scmnd = NULL;
        } else {
                scmnd = NULL;
        }
@@ -821,9 +829,10 @@ static void srp_free_req(struct srp_target_port *target,
 }
 
 static void srp_finish_req(struct srp_target_port *target,
-                          struct srp_request *req, int result)
+                          struct srp_request *req, struct scsi_device *sdev,
+                          int result)
 {
-       struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
+       struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);
 
        if (scmnd) {
                srp_free_req(target, req, scmnd, 0);
@@ -835,11 +844,20 @@ static void srp_finish_req(struct srp_target_port *target,
 static void srp_terminate_io(struct srp_rport *rport)
 {
        struct srp_target_port *target = rport->lld_data;
+       struct Scsi_Host *shost = target->scsi_host;
+       struct scsi_device *sdev;
        int i;
 
+       /*
+        * Invoking srp_terminate_io() while srp_queuecommand() is running
+        * is not safe. Hence the warning statement below.
+        */
+       shost_for_each_device(sdev, shost)
+               WARN_ON_ONCE(sdev->request_queue->request_fn_active);
+
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
+               srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16);
        }
 }
 
@@ -876,7 +894,7 @@ static int srp_rport_reconnect(struct srp_rport *rport)
 
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               srp_finish_req(target, req, DID_RESET << 16);
+               srp_finish_req(target, req, NULL, DID_RESET << 16);
        }
 
        INIT_LIST_HEAD(&target->free_tx);
@@ -1284,7 +1302,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
                complete(&target->tsk_mgmt_done);
        } else {
                req = &target->req_ring[rsp->tag];
-               scmnd = srp_claim_req(target, req, NULL);
+               scmnd = srp_claim_req(target, req, NULL, NULL);
                if (!scmnd) {
                        shost_printk(KERN_ERR, target->scsi_host,
                                     "Null scmnd for RSP w/tag %016llx\n",
@@ -1804,8 +1822,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
                                shost_printk(KERN_WARNING, shost,
                                             PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
                        else
-                               shost_printk(KERN_WARNING, shost,
-                                           PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
+                               shost_printk(KERN_WARNING, shost, PFX
+                                            "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
+                                            target->path.sgid.raw,
+                                            target->orig_dgid, reason);
                } else
                        shost_printk(KERN_WARNING, shost,
                                     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
@@ -1863,6 +1883,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
        case IB_CM_TIMEWAIT_EXIT:
                shost_printk(KERN_ERR, target->scsi_host,
                             PFX "connection closed\n");
+               comp = 1;
 
                target->status = 0;
                break;
@@ -1999,7 +2020,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
 
        shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
 
-       if (!req || !srp_claim_req(target, req, scmnd))
+       if (!req || !srp_claim_req(target, req, NULL, scmnd))
                return SUCCESS;
        if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
                              SRP_TSK_ABORT_TASK) == 0)
@@ -2030,8 +2051,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
 
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               if (req->scmnd && req->scmnd->device == scmnd->device)
-                       srp_finish_req(target, req, DID_RESET << 16);
+               srp_finish_req(target, req, scmnd->device, DID_RESET << 16);
        }
 
        return SUCCESS;
@@ -2612,6 +2632,8 @@ static ssize_t srp_create_target(struct device *dev,
        target->tl_retry_count  = 7;
        target->queue_size      = SRP_DEFAULT_QUEUE_SIZE;
 
+       mutex_lock(&host->add_target_mutex);
+
        ret = srp_parse_options(buf, target);
        if (ret)
                goto err;
@@ -2649,16 +2671,9 @@ static ssize_t srp_create_target(struct device *dev,
        if (ret)
                goto err_free_mem;
 
-       ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
-
-       shost_printk(KERN_DEBUG, target->scsi_host, PFX
-                    "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
-                    "service_id %016llx dgid %pI6\n",
-              (unsigned long long) be64_to_cpu(target->id_ext),
-              (unsigned long long) be64_to_cpu(target->ioc_guid),
-              be16_to_cpu(target->path.pkey),
-              (unsigned long long) be64_to_cpu(target->service_id),
-              target->path.dgid.raw);
+       ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
+       if (ret)
+               goto err_free_mem;
 
        ret = srp_create_target_ib(target);
        if (ret)
@@ -2679,7 +2694,19 @@ static ssize_t srp_create_target(struct device *dev,
        if (ret)
                goto err_disconnect;
 
-       return count;
+       shost_printk(KERN_DEBUG, target->scsi_host, PFX
+                    "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
+                    be64_to_cpu(target->id_ext),
+                    be64_to_cpu(target->ioc_guid),
+                    be16_to_cpu(target->path.pkey),
+                    be64_to_cpu(target->service_id),
+                    target->path.sgid.raw, target->path.dgid.raw);
+
+       ret = count;
+
+out:
+       mutex_unlock(&host->add_target_mutex);
+       return ret;
 
 err_disconnect:
        srp_disconnect_target(target);
@@ -2695,8 +2722,7 @@ err_free_mem:
 
 err:
        scsi_host_put(target_host);
-
-       return ret;
+       goto out;
 }
 
 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
@@ -2732,6 +2758,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
        INIT_LIST_HEAD(&host->target_list);
        spin_lock_init(&host->target_lock);
        init_completion(&host->released);
+       mutex_init(&host->add_target_mutex);
        host->srp_dev = device;
        host->port = port;
 
index 5756810..aad27b7 100644 (file)
@@ -105,6 +105,7 @@ struct srp_host {
        spinlock_t              target_lock;
        struct completion       released;
        struct list_head        list;
+       struct mutex            add_target_mutex;
 };
 
 struct srp_request {
index 9cd5415..aa7f943 100644 (file)
@@ -35,6 +35,12 @@ static void _be_roce_dev_add(struct be_adapter *adapter)
 
        if (!ocrdma_drv)
                return;
+
+       if (ocrdma_drv->be_abi_version != BE_ROCE_ABI_VERSION) {
+               dev_warn(&pdev->dev, "Cannot initialize RoCE due to ocrdma ABI mismatch\n");
+               return;
+       }
+
        if (pdev->device == OC_DEVICE_ID5) {
                /* only msix is supported on these devices */
                if (!msix_enabled(adapter))
index 2cd1129..1bfb161 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 
+#define BE_ROCE_ABI_VERSION    1
+
 struct ocrdma_dev;
 
 enum be_interrupt_mode {
@@ -52,6 +54,7 @@ struct be_dev_info {
 /* ocrdma driver register's the callback functions with nic driver. */
 struct ocrdma_driver {
        unsigned char name[32];
+       u32 be_abi_version;
        struct ocrdma_dev *(*add) (struct be_dev_info *dev_info);
        void (*remove) (struct ocrdma_dev *);
        void (*state_change_handler) (struct ocrdma_dev *, u32 new_state);
index a064f06..96a0617 100644 (file)
@@ -446,6 +446,7 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
        mlx5_init_cq_table(dev);
        mlx5_init_qp_table(dev);
        mlx5_init_srq_table(dev);
+       mlx5_init_mr_table(dev);
 
        return 0;
 
index 35e514d..4cc9276 100644 (file)
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
+void mlx5_init_mr_table(struct mlx5_core_dev *dev)
+{
+       struct mlx5_mr_table *table = &dev->priv.mr_table;
+
+       rwlock_init(&table->lock);
+       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+}
+
+void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev)
+{
+}
+
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
                          struct mlx5_create_mkey_mbox_in *in, int inlen,
                          mlx5_cmd_cbk_t callback, void *context,
                          struct mlx5_create_mkey_mbox_out *out)
 {
+       struct mlx5_mr_table *table = &dev->priv.mr_table;
        struct mlx5_create_mkey_mbox_out lout;
        int err;
        u8 key;
@@ -73,14 +86,21 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
        mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
                      be32_to_cpu(lout.mkey), key, mr->key);
 
+       /* connect to MR tree */
+       write_lock_irq(&table->lock);
+       err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr);
+       write_unlock_irq(&table->lock);
+
        return err;
 }
 EXPORT_SYMBOL(mlx5_core_create_mkey);
 
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
 {
+       struct mlx5_mr_table *table = &dev->priv.mr_table;
        struct mlx5_destroy_mkey_mbox_in in;
        struct mlx5_destroy_mkey_mbox_out out;
+       unsigned long flags;
        int err;
 
        memset(&in, 0, sizeof(in));
@@ -95,6 +115,10 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
        if (out.hdr.status)
                return mlx5_cmd_status_to_err(&out.hdr);
 
+       write_lock_irqsave(&table->lock, flags);
+       radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key));
+       write_unlock_irqrestore(&table->lock, flags);
+
        return err;
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
@@ -144,3 +168,64 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
        return err;
 }
 EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
+
+int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
+                        int npsvs, u32 *sig_index)
+{
+       struct mlx5_allocate_psv_in in;
+       struct mlx5_allocate_psv_out out;
+       int i, err;
+
+       if (npsvs > MLX5_MAX_PSVS)
+               return -EINVAL;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV);
+       in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err) {
+               mlx5_core_err(dev, "cmd exec failed %d\n", err);
+               return err;
+       }
+
+       if (out.hdr.status) {
+               mlx5_core_err(dev, "create_psv bad status %d\n", out.hdr.status);
+               return mlx5_cmd_status_to_err(&out.hdr);
+       }
+
+       for (i = 0; i < npsvs; i++)
+               sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff;
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_psv);
+
+int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
+{
+       struct mlx5_destroy_psv_in in;
+       struct mlx5_destroy_psv_out out;
+       int err;
+
+       memset(&in, 0, sizeof(in));
+       memset(&out, 0, sizeof(out));
+
+       in.psv_number = cpu_to_be32(psv_num);
+       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV);
+       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       if (err) {
+               mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err);
+               goto out;
+       }
+
+       if (out.hdr.status) {
+               mlx5_core_err(dev, "destroy_psv bad status %d\n", out.hdr.status);
+               err = mlx5_cmd_status_to_err(&out.hdr);
+               goto out;
+       }
+
+out:
+       return err;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_psv);
index d47ffc8..13e8983 100644 (file)
@@ -810,6 +810,7 @@ EXPORT_SYMBOL_GPL(srp_remove_host);
 
 /**
  * srp_stop_rport_timers - stop the transport layer recovery timers
+ * @rport: SRP remote port for which to stop the timers.
  *
  * Must be called after srp_remove_host() and scsi_remove_host(). The caller
  * must hold a reference on the rport (rport->dev) and on the SCSI host
index 2202c7f..f6b17ac 100644 (file)
@@ -80,6 +80,7 @@ enum {
        MLX5_CQE_RESP_SEND_IMM  = 3,
        MLX5_CQE_RESP_SEND_INV  = 4,
        MLX5_CQE_RESIZE_CQ      = 5,
+       MLX5_CQE_SIG_ERR        = 12,
        MLX5_CQE_REQ_ERR        = 13,
        MLX5_CQE_RESP_ERR       = 14,
        MLX5_CQE_INVALID        = 15,
index 817a6fa..407bdb6 100644 (file)
@@ -48,6 +48,8 @@ enum {
        MLX5_MAX_COMMANDS               = 32,
        MLX5_CMD_DATA_BLOCK_SIZE        = 512,
        MLX5_PCI_CMD_XPORT              = 7,
+       MLX5_MKEY_BSF_OCTO_SIZE         = 4,
+       MLX5_MAX_PSVS                   = 4,
 };
 
 enum {
@@ -116,6 +118,7 @@ enum {
        MLX5_MKEY_MASK_START_ADDR       = 1ull << 6,
        MLX5_MKEY_MASK_PD               = 1ull << 7,
        MLX5_MKEY_MASK_EN_RINVAL        = 1ull << 8,
+       MLX5_MKEY_MASK_EN_SIGERR        = 1ull << 9,
        MLX5_MKEY_MASK_BSF_EN           = 1ull << 12,
        MLX5_MKEY_MASK_KEY              = 1ull << 13,
        MLX5_MKEY_MASK_QPN              = 1ull << 14,
@@ -555,6 +558,23 @@ struct mlx5_cqe64 {
        u8              op_own;
 };
 
+struct mlx5_sig_err_cqe {
+       u8              rsvd0[16];
+       __be32          expected_trans_sig;
+       __be32          actual_trans_sig;
+       __be32          expected_reftag;
+       __be32          actual_reftag;
+       __be16          syndrome;
+       u8              rsvd22[2];
+       __be32          mkey;
+       __be64          err_offset;
+       u8              rsvd30[8];
+       __be32          qpn;
+       u8              rsvd38[2];
+       u8              signature;
+       u8              op_own;
+};
+
 struct mlx5_wqe_srq_next_seg {
        u8                      rsvd0[2];
        __be16                  next_wqe_index;
@@ -936,4 +956,27 @@ enum {
        MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO        = 1 <<  0
 };
 
+struct mlx5_allocate_psv_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  npsv_pd;
+       __be32                  rsvd_psv0;
+};
+
+struct mlx5_allocate_psv_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+       __be32                  psv_idx[4];
+};
+
+struct mlx5_destroy_psv_in {
+       struct mlx5_inbox_hdr   hdr;
+       __be32                  psv_number;
+       u8                      rsvd[4];
+};
+
+struct mlx5_destroy_psv_out {
+       struct mlx5_outbox_hdr  hdr;
+       u8                      rsvd[8];
+};
+
 #endif /* MLX5_DEVICE_H */
index 130bc8d..93cef63 100644 (file)
@@ -401,6 +401,26 @@ struct mlx5_eq {
        struct mlx5_rsc_debug   *dbg;
 };
 
+struct mlx5_core_psv {
+       u32     psv_idx;
+       struct psv_layout {
+               u32     pd;
+               u16     syndrome;
+               u16     reserved;
+               u16     bg;
+               u16     app_tag;
+               u32     ref_tag;
+       } psv;
+};
+
+struct mlx5_core_sig_ctx {
+       struct mlx5_core_psv    psv_memory;
+       struct mlx5_core_psv    psv_wire;
+       struct ib_sig_err       err_item;
+       bool                    sig_status_checked;
+       bool                    sig_err_exists;
+       u32                     sigerr_count;
+};
 
 struct mlx5_core_mr {
        u64                     iova;
@@ -475,6 +495,13 @@ struct mlx5_srq_table {
        struct radix_tree_root  tree;
 };
 
+struct mlx5_mr_table {
+       /* protect radix tree
+        */
+       rwlock_t                lock;
+       struct radix_tree_root  tree;
+};
+
 struct mlx5_priv {
        char                    name[MLX5_MAX_NAME_LEN];
        struct mlx5_eq_table    eq_table;
@@ -504,6 +531,10 @@ struct mlx5_priv {
        struct mlx5_cq_table    cq_table;
        /* end: cq staff */
 
+       /* start: mr staff */
+       struct mlx5_mr_table    mr_table;
+       /* end: mr staff */
+
        /* start: alloc staff */
        struct mutex            pgdir_mutex;
        struct list_head        pgdir_list;
@@ -651,6 +682,11 @@ static inline void mlx5_vfree(const void *addr)
                kfree(addr);
 }
 
+static inline u32 mlx5_base_mkey(const u32 key)
+{
+       return key & 0xffffff00u;
+}
+
 int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev);
 void mlx5_dev_cleanup(struct mlx5_core_dev *dev);
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
@@ -685,6 +721,8 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
                        struct mlx5_query_srq_mbox_out *out);
 int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
                      u16 lwm, int is_srq);
+void mlx5_init_mr_table(struct mlx5_core_dev *dev);
+void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
                          struct mlx5_create_mkey_mbox_in *in, int inlen,
                          mlx5_cmd_cbk_t callback, void *context,
@@ -746,6 +784,9 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
 const char *mlx5_command_str(int command);
 int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
+                        int npsvs, u32 *sig_index);
+int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
index d51eff7..f829ad8 100644 (file)
@@ -37,6 +37,9 @@
 #include <linux/mlx5/driver.h>
 
 #define MLX5_INVALID_LKEY      0x100
+#define MLX5_SIG_WQE_SIZE      (MLX5_SEND_WQE_BB * 5)
+#define MLX5_DIF_SIZE          8
+#define MLX5_STRIDE_BLOCK_OP   0x400
 
 enum mlx5_qp_optpar {
        MLX5_QP_OPTPAR_ALT_ADDR_PATH            = 1 << 0,
@@ -151,6 +154,11 @@ enum {
        MLX5_SND_DBR    = 1,
 };
 
+enum {
+       MLX5_FLAGS_INLINE       = 1<<7,
+       MLX5_FLAGS_CHECK_FREE   = 1<<5,
+};
+
 struct mlx5_wqe_fmr_seg {
        __be32                  flags;
        __be32                  mem_key;
@@ -278,6 +286,60 @@ struct mlx5_wqe_inline_seg {
        __be32  byte_count;
 };
 
+struct mlx5_bsf {
+       struct mlx5_bsf_basic {
+               u8              bsf_size_sbs;
+               u8              check_byte_mask;
+               union {
+                       u8      copy_byte_mask;
+                       u8      bs_selector;
+                       u8      rsvd_wflags;
+               } wire;
+               union {
+                       u8      bs_selector;
+                       u8      rsvd_mflags;
+               } mem;
+               __be32          raw_data_size;
+               __be32          w_bfs_psv;
+               __be32          m_bfs_psv;
+       } basic;
+       struct mlx5_bsf_ext {
+               __be32          t_init_gen_pro_size;
+               __be32          rsvd_epi_size;
+               __be32          w_tfs_psv;
+               __be32          m_tfs_psv;
+       } ext;
+       struct mlx5_bsf_inl {
+               __be32          w_inl_vld;
+               __be32          w_rsvd;
+               __be64          w_block_format;
+               __be32          m_inl_vld;
+               __be32          m_rsvd;
+               __be64          m_block_format;
+       } inl;
+};
+
+struct mlx5_klm {
+       __be32          bcount;
+       __be32          key;
+       __be64          va;
+};
+
+struct mlx5_stride_block_entry {
+       __be16          stride;
+       __be16          bcount;
+       __be32          key;
+       __be64          va;
+};
+
+struct mlx5_stride_block_ctrl_seg {
+       __be32          bcount_per_cycle;
+       __be32          op;
+       __be32          repeat_count;
+       u16             rsvd;
+       __be16          num_entries;
+};
+
 struct mlx5_core_qp {
        void (*event)           (struct mlx5_core_qp *, int);
        int                     qpn;
@@ -444,6 +506,11 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
        return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
 }
 
+static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
+{
+       return radix_tree_lookup(&dev->priv.mr_table.tree, key);
+}
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
                        struct mlx5_create_qp_mbox_in *in,
index f29e3a2..0e3ff30 100644 (file)
@@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param {
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
                        struct ib_cm_sidr_rep_param *param);
 
-int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
 #endif /* IB_CM_H */
index 9ee0d2e..1ea0b65 100644 (file)
@@ -46,17 +46,12 @@ struct ib_umem {
        int                     page_size;
        int                     writable;
        int                     hugetlb;
-       struct list_head        chunk_list;
        struct work_struct      work;
        struct mm_struct       *mm;
        unsigned long           diff;
-};
-
-struct ib_umem_chunk {
-       struct list_head        list;
-       int                     nents;
-       int                     nmap;
-       struct scatterlist      page_list[0];
+       struct sg_table sg_head;
+       int             nmap;
+       int             npages;
 };
 
 #ifdef CONFIG_INFINIBAND_USER_MEM
index 6793f32..acd8251 100644 (file)
@@ -122,7 +122,19 @@ enum ib_device_cap_flags {
        IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
        IB_DEVICE_MEM_WINDOW_TYPE_2A    = (1<<23),
        IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24),
-       IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29)
+       IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
+       IB_DEVICE_SIGNATURE_HANDOVER    = (1<<30)
+};
+
+enum ib_signature_prot_cap {
+       IB_PROT_T10DIF_TYPE_1 = 1,
+       IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+       IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+       IB_GUARD_T10DIF_CRC     = 1,
+       IB_GUARD_T10DIF_CSUM    = 1 << 1,
 };
 
 enum ib_atomic_cap {
@@ -172,6 +184,8 @@ struct ib_device_attr {
        unsigned int            max_fast_reg_page_list_len;
        u16                     max_pkeys;
        u8                      local_ca_ack_delay;
+       int                     sig_prot_cap;
+       int                     sig_guard_cap;
 };
 
 enum ib_mtu {
@@ -461,6 +475,130 @@ int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
  */
 int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
 
+enum ib_mr_create_flags {
+       IB_MR_SIGNATURE_EN = 1,
+};
+
+/**
+ * ib_mr_init_attr - Memory region init attributes passed to routine
+ *     ib_create_mr.
+ * @max_reg_descriptors: max number of registration descriptors that
+ *     may be used with registration work requests.
+ * @flags: MR creation flags bit mask.
+ */
+struct ib_mr_init_attr {
+       int         max_reg_descriptors;
+       u32         flags;
+};
+
+enum ib_signature_type {
+       IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * T10-DIF Signature types
+ * T10-DIF types are defined by SCSI
+ * specifications.
+ */
+enum ib_t10_dif_type {
+       IB_T10DIF_NONE,
+       IB_T10DIF_TYPE1,
+       IB_T10DIF_TYPE2,
+       IB_T10DIF_TYPE3
+};
+
+/**
+ * Signature T10-DIF block-guard types
+ * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+       IB_T10DIF_CRC,
+       IB_T10DIF_CSUM
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ *     domain.
+ * @type: T10-DIF type (0|1|2|3)
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @type3_inc_reftag: T10-DIF type 3 does not state
+ *     about the reference tag, it is the user
+ *     choice to increment it or not.
+ */
+struct ib_t10_dif_domain {
+       enum ib_t10_dif_type    type;
+       enum ib_t10_dif_bg_type bg_type;
+       u16                     pi_interval;
+       u16                     bg;
+       u16                     app_tag;
+       u32                     ref_tag;
+       bool                    type3_inc_reftag;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ *     be used to set domain layout.
+ */
+struct ib_sig_domain {
+       enum ib_signature_type sig_type;
+       union {
+               struct ib_t10_dif_domain dif;
+       } sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout desciptor.
+ * @wire: wire domain layout desciptor.
+ */
+struct ib_sig_attrs {
+       u8                      check_mask;
+       struct ib_sig_domain    mem;
+       struct ib_sig_domain    wire;
+};
+
+enum ib_sig_err_type {
+       IB_SIG_BAD_GUARD,
+       IB_SIG_BAD_REFTAG,
+       IB_SIG_BAD_APPTAG,
+};
+
+/**
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+       enum ib_sig_err_type    err_type;
+       u32                     expected;
+       u32                     actual;
+       u64                     sig_err_offset;
+       u32                     key;
+};
+
+enum ib_mr_status_check {
+       IB_MR_CHECK_SIG_STATUS = 1,
+};
+
+/**
+ * struct ib_mr_status - Memory region status container
+ *
+ * @fail_status: Bitmask of MR checks status. For each
+ *     failed check a corresponding status bit is set.
+ * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
+ *     failure.
+ */
+struct ib_mr_status {
+       u32                 fail_status;
+       struct ib_sig_err   sig_err;
+};
+
 /**
  * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
  * enum.
@@ -644,6 +782,7 @@ enum ib_qp_create_flags {
        IB_QP_CREATE_IPOIB_UD_LSO               = 1 << 0,
        IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK   = 1 << 1,
        IB_QP_CREATE_NETIF_QP                   = 1 << 5,
+       IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
        /* reserve bits 26-31 for low level drivers' internal use */
        IB_QP_CREATE_RESERVED_START             = 1 << 26,
        IB_QP_CREATE_RESERVED_END               = 1 << 31,
@@ -808,6 +947,7 @@ enum ib_wr_opcode {
        IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
        IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
        IB_WR_BIND_MW,
+       IB_WR_REG_SIG_MR,
        /* reserve values for low level drivers' internal use.
         * These values will not be used at all in the ib core layer.
         */
@@ -913,6 +1053,12 @@ struct ib_send_wr {
                        u32                      rkey;
                        struct ib_mw_bind_info   bind_info;
                } bind_mw;
+               struct {
+                       struct ib_sig_attrs    *sig_attrs;
+                       struct ib_mr           *sig_mr;
+                       int                     access_flags;
+                       struct ib_sge          *prot;
+               } sig_handover;
        } wr;
        u32                     xrc_remote_srq_num;     /* XRC TGT QPs only */
 };
@@ -1266,10 +1412,6 @@ struct ib_dma_mapping_ops {
        void            (*unmap_sg)(struct ib_device *dev,
                                    struct scatterlist *sg, int nents,
                                    enum dma_data_direction direction);
-       u64             (*dma_address)(struct ib_device *dev,
-                                      struct scatterlist *sg);
-       unsigned int    (*dma_len)(struct ib_device *dev,
-                                  struct scatterlist *sg);
        void            (*sync_single_for_cpu)(struct ib_device *dev,
                                               u64 dma_handle,
                                               size_t size,
@@ -1407,6 +1549,9 @@ struct ib_device {
        int                        (*query_mr)(struct ib_mr *mr,
                                               struct ib_mr_attr *mr_attr);
        int                        (*dereg_mr)(struct ib_mr *mr);
+       int                        (*destroy_mr)(struct ib_mr *mr);
+       struct ib_mr *             (*create_mr)(struct ib_pd *pd,
+                                               struct ib_mr_init_attr *mr_init_attr);
        struct ib_mr *             (*alloc_fast_reg_mr)(struct ib_pd *pd,
                                               int max_page_list_len);
        struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
@@ -1455,6 +1600,8 @@ struct ib_device {
                                                  *flow_attr,
                                                  int domain);
        int                        (*destroy_flow)(struct ib_flow *flow_id);
+       int                        (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+                                                     struct ib_mr_status *mr_status);
 
        struct ib_dma_mapping_ops   *dma_ops;
 
@@ -2089,12 +2236,13 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
  * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
  * @dev: The device for which the DMA addresses were created
  * @sg: The scatter/gather entry
+ *
+ * Note: this function is obsolete. To do: change all occurrences of
+ * ib_sg_dma_address() into sg_dma_address().
  */
 static inline u64 ib_sg_dma_address(struct ib_device *dev,
                                    struct scatterlist *sg)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->dma_address(dev, sg);
        return sg_dma_address(sg);
 }
 
@@ -2102,12 +2250,13 @@ static inline u64 ib_sg_dma_address(struct ib_device *dev,
  * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
  * @dev: The device for which the DMA addresses were created
  * @sg: The scatter/gather entry
+ *
+ * Note: this function is obsolete. To do: change all occurrences of
+ * ib_sg_dma_len() into sg_dma_len().
  */
 static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
                                         struct scatterlist *sg)
 {
-       if (dev->dma_ops)
-               return dev->dma_ops->dma_len(dev, sg);
        return sg_dma_len(sg);
 }
 
@@ -2250,6 +2399,25 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
  */
 int ib_dereg_mr(struct ib_mr *mr);
 
+
+/**
+ * ib_create_mr - Allocates a memory region that may be used for
+ *     signature handover operations.
+ * @pd: The protection domain associated with the region.
+ * @mr_init_attr: memory region init attributes.
+ */
+struct ib_mr *ib_create_mr(struct ib_pd *pd,
+                          struct ib_mr_init_attr *mr_init_attr);
+
+/**
+ * ib_destroy_mr - Destroys a memory region that was created using
+ *     ib_create_mr and removes it from HW translation tables.
+ * @mr: The memory region to destroy.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_destroy_mr(struct ib_mr *mr);
+
 /**
  * ib_alloc_fast_reg_mr - Allocates memory region usable with the
  *   IB_WR_FAST_REG_MR send work request.
@@ -2435,4 +2603,19 @@ static inline int ib_check_mr_access(int flags)
        return 0;
 }
 
+/**
+ * ib_check_mr_status: lightweight check of MR status.
+ *     This routine may provide status checks on a selected
+ *     ib_mr. first use is for signature status check.
+ *
+ * @mr: A memory region.
+ * @check_mask: Bitmask of which checks to perform from
+ *     ib_mr_status_check enumeration.
+ * @mr_status: The container of relevant status checks.
+ *     failed checks will be indicated in the status bitmask
+ *     and the relevant info shall be in the error item.
+ */
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+                      struct ib_mr_status *mr_status);
+
 #endif /* IB_VERBS_H */
index b11da5c..cdb05dd 100644 (file)
@@ -41,7 +41,6 @@ enum srp_rport_state {
  * @mutex:             Protects against concurrent rport reconnect /
  *                     fast_io_fail / dev_loss_tmo activity.
  * @state:             rport state.
- * @deleted:           Whether or not srp_rport_del() has already been invoked.
  * @reconnect_delay:   Reconnect delay in seconds.
  * @failed_reconnects: Number of failed reconnect attempts.
  * @reconnect_work:    Work structure used for scheduling reconnect attempts.