qedr: Add support for PD,PKEY and CQ verbs
[cascardo/linux.git] / drivers / infiniband / hw / qedr / verbs.c
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45
46 #include "qedr_hsi.h"
47 #include <linux/qed/qed_if.h>
48 #include "qedr.h"
49 #include "verbs.h"
50 #include <rdma/qedr-abi.h>
51
52 #define DB_ADDR_SHIFT(addr)             ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
53
54 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
55 {
56         if (index > QEDR_ROCE_PKEY_TABLE_LEN)
57                 return -EINVAL;
58
59         *pkey = QEDR_ROCE_PKEY_DEFAULT;
60         return 0;
61 }
62
63 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
64                    union ib_gid *sgid)
65 {
66         struct qedr_dev *dev = get_qedr_dev(ibdev);
67         int rc = 0;
68
69         if (!rdma_cap_roce_gid_table(ibdev, port))
70                 return -ENODEV;
71
72         rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
73         if (rc == -EAGAIN) {
74                 memcpy(sgid, &zgid, sizeof(*sgid));
75                 return 0;
76         }
77
78         DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
79                  sgid->global.interface_id, sgid->global.subnet_prefix);
80
81         return rc;
82 }
83
84 int qedr_add_gid(struct ib_device *device, u8 port_num,
85                  unsigned int index, const union ib_gid *gid,
86                  const struct ib_gid_attr *attr, void **context)
87 {
88         if (!rdma_cap_roce_gid_table(device, port_num))
89                 return -EINVAL;
90
91         if (port_num > QEDR_MAX_PORT)
92                 return -EINVAL;
93
94         if (!context)
95                 return -EINVAL;
96
97         return 0;
98 }
99
100 int qedr_del_gid(struct ib_device *device, u8 port_num,
101                  unsigned int index, void **context)
102 {
103         if (!rdma_cap_roce_gid_table(device, port_num))
104                 return -EINVAL;
105
106         if (port_num > QEDR_MAX_PORT)
107                 return -EINVAL;
108
109         if (!context)
110                 return -EINVAL;
111
112         return 0;
113 }
114
115 int qedr_query_device(struct ib_device *ibdev,
116                       struct ib_device_attr *attr, struct ib_udata *udata)
117 {
118         struct qedr_dev *dev = get_qedr_dev(ibdev);
119         struct qedr_device_attr *qattr = &dev->attr;
120
121         if (!dev->rdma_ctx) {
122                 DP_ERR(dev,
123                        "qedr_query_device called with invalid params rdma_ctx=%p\n",
124                        dev->rdma_ctx);
125                 return -EINVAL;
126         }
127
128         memset(attr, 0, sizeof(*attr));
129
130         attr->fw_ver = qattr->fw_ver;
131         attr->sys_image_guid = qattr->sys_image_guid;
132         attr->max_mr_size = qattr->max_mr_size;
133         attr->page_size_cap = qattr->page_size_caps;
134         attr->vendor_id = qattr->vendor_id;
135         attr->vendor_part_id = qattr->vendor_part_id;
136         attr->hw_ver = qattr->hw_ver;
137         attr->max_qp = qattr->max_qp;
138         attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
139         attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
140             IB_DEVICE_RC_RNR_NAK_GEN |
141             IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
142
143         attr->max_sge = qattr->max_sge;
144         attr->max_sge_rd = qattr->max_sge;
145         attr->max_cq = qattr->max_cq;
146         attr->max_cqe = qattr->max_cqe;
147         attr->max_mr = qattr->max_mr;
148         attr->max_mw = qattr->max_mw;
149         attr->max_pd = qattr->max_pd;
150         attr->atomic_cap = dev->atomic_cap;
151         attr->max_fmr = qattr->max_fmr;
152         attr->max_map_per_fmr = 16;
153         attr->max_qp_init_rd_atom =
154             1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
155         attr->max_qp_rd_atom =
156             min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
157                 attr->max_qp_init_rd_atom);
158
159         attr->max_srq = qattr->max_srq;
160         attr->max_srq_sge = qattr->max_srq_sge;
161         attr->max_srq_wr = qattr->max_srq_wr;
162
163         attr->local_ca_ack_delay = qattr->dev_ack_delay;
164         attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
165         attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
166         attr->max_ah = qattr->max_ah;
167
168         return 0;
169 }
170
171 #define QEDR_SPEED_SDR          (1)
172 #define QEDR_SPEED_DDR          (2)
173 #define QEDR_SPEED_QDR          (4)
174 #define QEDR_SPEED_FDR10        (8)
175 #define QEDR_SPEED_FDR          (16)
176 #define QEDR_SPEED_EDR          (32)
177
178 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
179                                             u8 *ib_width)
180 {
181         switch (speed) {
182         case 1000:
183                 *ib_speed = QEDR_SPEED_SDR;
184                 *ib_width = IB_WIDTH_1X;
185                 break;
186         case 10000:
187                 *ib_speed = QEDR_SPEED_QDR;
188                 *ib_width = IB_WIDTH_1X;
189                 break;
190
191         case 20000:
192                 *ib_speed = QEDR_SPEED_DDR;
193                 *ib_width = IB_WIDTH_4X;
194                 break;
195
196         case 25000:
197                 *ib_speed = QEDR_SPEED_EDR;
198                 *ib_width = IB_WIDTH_1X;
199                 break;
200
201         case 40000:
202                 *ib_speed = QEDR_SPEED_QDR;
203                 *ib_width = IB_WIDTH_4X;
204                 break;
205
206         case 50000:
207                 *ib_speed = QEDR_SPEED_QDR;
208                 *ib_width = IB_WIDTH_4X;
209                 break;
210
211         case 100000:
212                 *ib_speed = QEDR_SPEED_EDR;
213                 *ib_width = IB_WIDTH_4X;
214                 break;
215
216         default:
217                 /* Unsupported */
218                 *ib_speed = QEDR_SPEED_SDR;
219                 *ib_width = IB_WIDTH_1X;
220         }
221 }
222
223 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
224 {
225         struct qedr_dev *dev;
226         struct qed_rdma_port *rdma_port;
227
228         dev = get_qedr_dev(ibdev);
229         if (port > 1) {
230                 DP_ERR(dev, "invalid_port=0x%x\n", port);
231                 return -EINVAL;
232         }
233
234         if (!dev->rdma_ctx) {
235                 DP_ERR(dev, "rdma_ctx is NULL\n");
236                 return -EINVAL;
237         }
238
239         rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
240         memset(attr, 0, sizeof(*attr));
241
242         if (rdma_port->port_state == QED_RDMA_PORT_UP) {
243                 attr->state = IB_PORT_ACTIVE;
244                 attr->phys_state = 5;
245         } else {
246                 attr->state = IB_PORT_DOWN;
247                 attr->phys_state = 3;
248         }
249         attr->max_mtu = IB_MTU_4096;
250         attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
251         attr->lid = 0;
252         attr->lmc = 0;
253         attr->sm_lid = 0;
254         attr->sm_sl = 0;
255         attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
256         attr->gid_tbl_len = QEDR_MAX_SGID;
257         attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
258         attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
259         attr->qkey_viol_cntr = 0;
260         get_link_speed_and_width(rdma_port->link_speed,
261                                  &attr->active_speed, &attr->active_width);
262         attr->max_msg_sz = rdma_port->max_msg_size;
263         attr->max_vl_num = 4;
264
265         return 0;
266 }
267
268 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
269                      struct ib_port_modify *props)
270 {
271         struct qedr_dev *dev;
272
273         dev = get_qedr_dev(ibdev);
274         if (port > 1) {
275                 DP_ERR(dev, "invalid_port=0x%x\n", port);
276                 return -EINVAL;
277         }
278
279         return 0;
280 }
281
282 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
283                          unsigned long len)
284 {
285         struct qedr_mm *mm;
286
287         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
288         if (!mm)
289                 return -ENOMEM;
290
291         mm->key.phy_addr = phy_addr;
292         /* This function might be called with a length which is not a multiple
293          * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
294          * forces this granularity by increasing the requested size if needed.
295          * When qedr_mmap is called, it will search the list with the updated
296          * length as a key. To prevent search failures, the length is rounded up
297          * in advance to PAGE_SIZE.
298          */
299         mm->key.len = roundup(len, PAGE_SIZE);
300         INIT_LIST_HEAD(&mm->entry);
301
302         mutex_lock(&uctx->mm_list_lock);
303         list_add(&mm->entry, &uctx->mm_head);
304         mutex_unlock(&uctx->mm_list_lock);
305
306         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307                  "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
308                  (unsigned long long)mm->key.phy_addr,
309                  (unsigned long)mm->key.len, uctx);
310
311         return 0;
312 }
313
314 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
315                              unsigned long len)
316 {
317         bool found = false;
318         struct qedr_mm *mm;
319
320         mutex_lock(&uctx->mm_list_lock);
321         list_for_each_entry(mm, &uctx->mm_head, entry) {
322                 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
323                         continue;
324
325                 found = true;
326                 break;
327         }
328         mutex_unlock(&uctx->mm_list_lock);
329         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
330                  "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
331                  mm->key.phy_addr, mm->key.len, uctx, found);
332
333         return found;
334 }
335
336 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
337                                         struct ib_udata *udata)
338 {
339         int rc;
340         struct qedr_ucontext *ctx;
341         struct qedr_alloc_ucontext_resp uresp;
342         struct qedr_dev *dev = get_qedr_dev(ibdev);
343         struct qed_rdma_add_user_out_params oparams;
344
345         if (!udata)
346                 return ERR_PTR(-EFAULT);
347
348         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
349         if (!ctx)
350                 return ERR_PTR(-ENOMEM);
351
352         rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
353         if (rc) {
354                 DP_ERR(dev,
355                        "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
356                        rc);
357                 goto err;
358         }
359
360         ctx->dpi = oparams.dpi;
361         ctx->dpi_addr = oparams.dpi_addr;
362         ctx->dpi_phys_addr = oparams.dpi_phys_addr;
363         ctx->dpi_size = oparams.dpi_size;
364         INIT_LIST_HEAD(&ctx->mm_head);
365         mutex_init(&ctx->mm_list_lock);
366
367         memset(&uresp, 0, sizeof(uresp));
368
369         uresp.db_pa = ctx->dpi_phys_addr;
370         uresp.db_size = ctx->dpi_size;
371         uresp.max_send_wr = dev->attr.max_sqe;
372         uresp.max_recv_wr = dev->attr.max_rqe;
373         uresp.max_srq_wr = dev->attr.max_srq_wr;
374         uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
375         uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
376         uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
377         uresp.max_cqes = QEDR_MAX_CQES;
378
379         rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
380         if (rc)
381                 goto err;
382
383         ctx->dev = dev;
384
385         rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
386         if (rc)
387                 goto err;
388
389         DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
390                  &ctx->ibucontext);
391         return &ctx->ibucontext;
392
393 err:
394         kfree(ctx);
395         return ERR_PTR(rc);
396 }
397
398 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
399 {
400         struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
401         struct qedr_mm *mm, *tmp;
402         int status = 0;
403
404         DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
405                  uctx);
406         uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
407
408         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
409                 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
410                          "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
411                          mm->key.phy_addr, mm->key.len, uctx);
412                 list_del(&mm->entry);
413                 kfree(mm);
414         }
415
416         kfree(uctx);
417         return status;
418 }
419
420 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
421 {
422         struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
423         struct qedr_dev *dev = get_qedr_dev(context->device);
424         unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
425         u64 unmapped_db = dev->db_phys_addr;
426         unsigned long len = (vma->vm_end - vma->vm_start);
427         int rc = 0;
428         bool found;
429
430         DP_DEBUG(dev, QEDR_MSG_INIT,
431                  "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
432                  vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
433         if (vma->vm_start & (PAGE_SIZE - 1)) {
434                 DP_ERR(dev, "Vma_start not page aligned = %ld\n",
435                        vma->vm_start);
436                 return -EINVAL;
437         }
438
439         found = qedr_search_mmap(ucontext, vm_page, len);
440         if (!found) {
441                 DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
442                        vma->vm_pgoff);
443                 return -EINVAL;
444         }
445
446         DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
447
448         if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
449                                                      dev->db_size))) {
450                 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
451                 if (vma->vm_flags & VM_READ) {
452                         DP_ERR(dev, "Trying to map doorbell bar for read\n");
453                         return -EPERM;
454                 }
455
456                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
457
458                 rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
459                                         PAGE_SIZE, vma->vm_page_prot);
460         } else {
461                 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
462                 rc = remap_pfn_range(vma, vma->vm_start,
463                                      vma->vm_pgoff, len, vma->vm_page_prot);
464         }
465         DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
466         return rc;
467 }
468
469 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
470                             struct ib_ucontext *context, struct ib_udata *udata)
471 {
472         struct qedr_dev *dev = get_qedr_dev(ibdev);
473         struct qedr_ucontext *uctx = NULL;
474         struct qedr_alloc_pd_uresp uresp;
475         struct qedr_pd *pd;
476         u16 pd_id;
477         int rc;
478
479         DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
480                  (udata && context) ? "User Lib" : "Kernel");
481
482         if (!dev->rdma_ctx) {
483                 DP_ERR(dev, "invlaid RDMA context\n");
484                 return ERR_PTR(-EINVAL);
485         }
486
487         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
488         if (!pd)
489                 return ERR_PTR(-ENOMEM);
490
491         dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
492
493         uresp.pd_id = pd_id;
494         pd->pd_id = pd_id;
495
496         if (udata && context) {
497                 rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
498                 if (rc)
499                         DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
500                 uctx = get_qedr_ucontext(context);
501                 uctx->pd = pd;
502                 pd->uctx = uctx;
503         }
504
505         return &pd->ibpd;
506 }
507
508 int qedr_dealloc_pd(struct ib_pd *ibpd)
509 {
510         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
511         struct qedr_pd *pd = get_qedr_pd(ibpd);
512
513         if (!pd)
514                 pr_err("Invalid PD received in dealloc_pd\n");
515
516         DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
517         dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
518
519         kfree(pd);
520
521         return 0;
522 }
523
524 static void qedr_free_pbl(struct qedr_dev *dev,
525                           struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
526 {
527         struct pci_dev *pdev = dev->pdev;
528         int i;
529
530         for (i = 0; i < pbl_info->num_pbls; i++) {
531                 if (!pbl[i].va)
532                         continue;
533                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
534                                   pbl[i].va, pbl[i].pa);
535         }
536
537         kfree(pbl);
538 }
539
540 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
541 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
542
543 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
544 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
545 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
546
547 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
548                                            struct qedr_pbl_info *pbl_info,
549                                            gfp_t flags)
550 {
551         struct pci_dev *pdev = dev->pdev;
552         struct qedr_pbl *pbl_table;
553         dma_addr_t *pbl_main_tbl;
554         dma_addr_t pa;
555         void *va;
556         int i;
557
558         pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
559         if (!pbl_table)
560                 return ERR_PTR(-ENOMEM);
561
562         for (i = 0; i < pbl_info->num_pbls; i++) {
563                 va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
564                                         &pa, flags);
565                 if (!va)
566                         goto err;
567
568                 memset(va, 0, pbl_info->pbl_size);
569                 pbl_table[i].va = va;
570                 pbl_table[i].pa = pa;
571         }
572
573         /* Two-Layer PBLs, if we have more than one pbl we need to initialize
574          * the first one with physical pointers to all of the rest
575          */
576         pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
577         for (i = 0; i < pbl_info->num_pbls - 1; i++)
578                 pbl_main_tbl[i] = pbl_table[i + 1].pa;
579
580         return pbl_table;
581
582 err:
583         for (i--; i >= 0; i--)
584                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
585                                   pbl_table[i].va, pbl_table[i].pa);
586
587         qedr_free_pbl(dev, pbl_info, pbl_table);
588
589         return ERR_PTR(-ENOMEM);
590 }
591
592 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
593                                 struct qedr_pbl_info *pbl_info,
594                                 u32 num_pbes, int two_layer_capable)
595 {
596         u32 pbl_capacity;
597         u32 pbl_size;
598         u32 num_pbls;
599
600         if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
601                 if (num_pbes > MAX_PBES_TWO_LAYER) {
602                         DP_ERR(dev, "prepare pbl table: too many pages %d\n",
603                                num_pbes);
604                         return -EINVAL;
605                 }
606
607                 /* calculate required pbl page size */
608                 pbl_size = MIN_FW_PBL_PAGE_SIZE;
609                 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
610                                NUM_PBES_ON_PAGE(pbl_size);
611
612                 while (pbl_capacity < num_pbes) {
613                         pbl_size *= 2;
614                         pbl_capacity = pbl_size / sizeof(u64);
615                         pbl_capacity = pbl_capacity * pbl_capacity;
616                 }
617
618                 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
619                 num_pbls++;     /* One for the layer0 ( points to the pbls) */
620                 pbl_info->two_layered = true;
621         } else {
622                 /* One layered PBL */
623                 num_pbls = 1;
624                 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
625                                  roundup_pow_of_two((num_pbes * sizeof(u64))));
626                 pbl_info->two_layered = false;
627         }
628
629         pbl_info->num_pbls = num_pbls;
630         pbl_info->pbl_size = pbl_size;
631         pbl_info->num_pbes = num_pbes;
632
633         DP_DEBUG(dev, QEDR_MSG_MR,
634                  "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
635                  pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
636
637         return 0;
638 }
639
640 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
641                                struct qedr_pbl *pbl,
642                                struct qedr_pbl_info *pbl_info)
643 {
644         int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
645         struct qedr_pbl *pbl_tbl;
646         struct scatterlist *sg;
647         struct regpair *pbe;
648         int entry;
649         u32 addr;
650
651         if (!pbl_info->num_pbes)
652                 return;
653
654         /* If we have a two layered pbl, the first pbl points to the rest
655          * of the pbls and the first entry lays on the second pbl in the table
656          */
657         if (pbl_info->two_layered)
658                 pbl_tbl = &pbl[1];
659         else
660                 pbl_tbl = pbl;
661
662         pbe = (struct regpair *)pbl_tbl->va;
663         if (!pbe) {
664                 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
665                 return;
666         }
667
668         pbe_cnt = 0;
669
670         shift = ilog2(umem->page_size);
671
672         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
673                 pages = sg_dma_len(sg) >> shift;
674                 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
675                         /* store the page address in pbe */
676                         pbe->lo = cpu_to_le32(sg_dma_address(sg) +
677                                               umem->page_size * pg_cnt);
678                         addr = upper_32_bits(sg_dma_address(sg) +
679                                              umem->page_size * pg_cnt);
680                         pbe->hi = cpu_to_le32(addr);
681                         pbe_cnt++;
682                         total_num_pbes++;
683                         pbe++;
684
685                         if (total_num_pbes == pbl_info->num_pbes)
686                                 return;
687
688                         /* If the given pbl is full storing the pbes,
689                          * move to next pbl.
690                          */
691                         if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
692                                 pbl_tbl++;
693                                 pbe = (struct regpair *)pbl_tbl->va;
694                                 pbe_cnt = 0;
695                         }
696                 }
697         }
698 }
699
700 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
701                               struct qedr_cq *cq, struct ib_udata *udata)
702 {
703         struct qedr_create_cq_uresp uresp;
704         int rc;
705
706         memset(&uresp, 0, sizeof(uresp));
707
708         uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
709         uresp.icid = cq->icid;
710
711         rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
712         if (rc)
713                 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
714
715         return rc;
716 }
717
718 static void consume_cqe(struct qedr_cq *cq)
719 {
720         if (cq->latest_cqe == cq->toggle_cqe)
721                 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
722
723         cq->latest_cqe = qed_chain_consume(&cq->pbl);
724 }
725
726 static inline int qedr_align_cq_entries(int entries)
727 {
728         u64 size, aligned_size;
729
730         /* We allocate an extra entry that we don't report to the FW. */
731         size = (entries + 1) * QEDR_CQE_SIZE;
732         aligned_size = ALIGN(size, PAGE_SIZE);
733
734         return aligned_size / QEDR_CQE_SIZE;
735 }
736
737 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
738                                        struct qedr_dev *dev,
739                                        struct qedr_userq *q,
740                                        u64 buf_addr, size_t buf_len,
741                                        int access, int dmasync)
742 {
743         int page_cnt;
744         int rc;
745
746         q->buf_addr = buf_addr;
747         q->buf_len = buf_len;
748         q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
749         if (IS_ERR(q->umem)) {
750                 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
751                        PTR_ERR(q->umem));
752                 return PTR_ERR(q->umem);
753         }
754
755         page_cnt = ib_umem_page_count(q->umem);
756         rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
757         if (rc)
758                 goto err0;
759
760         q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
761         if (IS_ERR_OR_NULL(q->pbl_tbl))
762                 goto err0;
763
764         qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
765
766         return 0;
767
768 err0:
769         ib_umem_release(q->umem);
770
771         return rc;
772 }
773
774 static inline void qedr_init_cq_params(struct qedr_cq *cq,
775                                        struct qedr_ucontext *ctx,
776                                        struct qedr_dev *dev, int vector,
777                                        int chain_entries, int page_cnt,
778                                        u64 pbl_ptr,
779                                        struct qed_rdma_create_cq_in_params
780                                        *params)
781 {
782         memset(params, 0, sizeof(*params));
783         params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
784         params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
785         params->cnq_id = vector;
786         params->cq_size = chain_entries - 1;
787         params->dpi = (ctx) ? ctx->dpi : dev->dpi;
788         params->pbl_num_pages = page_cnt;
789         params->pbl_ptr = pbl_ptr;
790         params->pbl_two_level = 0;
791 }
792
793 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
794 {
795         /* Flush data before signalling doorbell */
796         wmb();
797         cq->db.data.agg_flags = flags;
798         cq->db.data.value = cpu_to_le32(cons);
799         writeq(cq->db.raw, cq->db_addr);
800
801         /* Make sure write would stick */
802         mmiowb();
803 }
804
805 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
806 {
807         struct qedr_cq *cq = get_qedr_cq(ibcq);
808         unsigned long sflags;
809
810         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
811                 return 0;
812
813         spin_lock_irqsave(&cq->cq_lock, sflags);
814
815         cq->arm_flags = 0;
816
817         if (flags & IB_CQ_SOLICITED)
818                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
819
820         if (flags & IB_CQ_NEXT_COMP)
821                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
822
823         doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
824
825         spin_unlock_irqrestore(&cq->cq_lock, sflags);
826
827         return 0;
828 }
829
830 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
831                              const struct ib_cq_init_attr *attr,
832                              struct ib_ucontext *ib_ctx, struct ib_udata *udata)
833 {
834         struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
835         struct qed_rdma_destroy_cq_out_params destroy_oparams;
836         struct qed_rdma_destroy_cq_in_params destroy_iparams;
837         struct qedr_dev *dev = get_qedr_dev(ibdev);
838         struct qed_rdma_create_cq_in_params params;
839         struct qedr_create_cq_ureq ureq;
840         int vector = attr->comp_vector;
841         int entries = attr->cqe;
842         struct qedr_cq *cq;
843         int chain_entries;
844         int page_cnt;
845         u64 pbl_ptr;
846         u16 icid;
847         int rc;
848
849         DP_DEBUG(dev, QEDR_MSG_INIT,
850                  "create_cq: called from %s. entries=%d, vector=%d\n",
851                  udata ? "User Lib" : "Kernel", entries, vector);
852
853         if (entries > QEDR_MAX_CQES) {
854                 DP_ERR(dev,
855                        "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
856                        entries, QEDR_MAX_CQES);
857                 return ERR_PTR(-EINVAL);
858         }
859
860         chain_entries = qedr_align_cq_entries(entries);
861         chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
862
863         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
864         if (!cq)
865                 return ERR_PTR(-ENOMEM);
866
867         if (udata) {
868                 memset(&ureq, 0, sizeof(ureq));
869                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
870                         DP_ERR(dev,
871                                "create cq: problem copying data from user space\n");
872                         goto err0;
873                 }
874
875                 if (!ureq.len) {
876                         DP_ERR(dev,
877                                "create cq: cannot create a cq with 0 entries\n");
878                         goto err0;
879                 }
880
881                 cq->cq_type = QEDR_CQ_TYPE_USER;
882
883                 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
884                                           ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
885                 if (rc)
886                         goto err0;
887
888                 pbl_ptr = cq->q.pbl_tbl->pa;
889                 page_cnt = cq->q.pbl_info.num_pbes;
890         } else {
891                 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
892
893                 rc = dev->ops->common->chain_alloc(dev->cdev,
894                                                    QED_CHAIN_USE_TO_CONSUME,
895                                                    QED_CHAIN_MODE_PBL,
896                                                    QED_CHAIN_CNT_TYPE_U32,
897                                                    chain_entries,
898                                                    sizeof(union rdma_cqe),
899                                                    &cq->pbl);
900                 if (rc)
901                         goto err1;
902
903                 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
904                 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
905         }
906
907         qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
908                             pbl_ptr, &params);
909
910         rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
911         if (rc)
912                 goto err2;
913
914         cq->icid = icid;
915         cq->sig = QEDR_CQ_MAGIC_NUMBER;
916         spin_lock_init(&cq->cq_lock);
917
918         if (ib_ctx) {
919                 rc = qedr_copy_cq_uresp(dev, cq, udata);
920                 if (rc)
921                         goto err3;
922         } else {
923                 /* Generate doorbell address. */
924                 cq->db_addr = dev->db_addr +
925                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
926                 cq->db.data.icid = cq->icid;
927                 cq->db.data.params = DB_AGG_CMD_SET <<
928                     RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
929
930                 /* point to the very last element, passing it we will toggle */
931                 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
932                 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
933                 cq->latest_cqe = NULL;
934                 consume_cqe(cq);
935                 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
936         }
937
938         DP_DEBUG(dev, QEDR_MSG_CQ,
939                  "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
940                  cq->icid, cq, params.cq_size);
941
942         return &cq->ibcq;
943
944 err3:
945         destroy_iparams.icid = cq->icid;
946         dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
947                                   &destroy_oparams);
948 err2:
949         if (udata)
950                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
951         else
952                 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
953 err1:
954         if (udata)
955                 ib_umem_release(cq->q.umem);
956 err0:
957         kfree(cq);
958         return ERR_PTR(-EINVAL);
959 }
960
961 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
962 {
963         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
964         struct qedr_cq *cq = get_qedr_cq(ibcq);
965
966         DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
967
968         return 0;
969 }
970
971 int qedr_destroy_cq(struct ib_cq *ibcq)
972 {
973         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
974         struct qed_rdma_destroy_cq_out_params oparams;
975         struct qed_rdma_destroy_cq_in_params iparams;
976         struct qedr_cq *cq = get_qedr_cq(ibcq);
977
978         DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
979
980         /* GSIs CQs are handled by driver, so they don't exist in the FW */
981         if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
982                 iparams.icid = cq->icid;
983                 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
984                 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
985         }
986
987         if (ibcq->uobject && ibcq->uobject->context) {
988                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
989                 ib_umem_release(cq->q.umem);
990         }
991
992         kfree(cq);
993
994         return 0;
995 }