IB/mlx4: Fix a sparse endianness warning
[cascardo/linux.git] / drivers / infiniband / hw / ehca / ehca_mrmw.c
1 /*
2  *  IBM eServer eHCA Infiniband device driver for Linux on POWER
3  *
4  *  MR/MW functions
5  *
6  *  Authors: Dietmar Decker <ddecker@de.ibm.com>
7  *           Christoph Raisch <raisch@de.ibm.com>
8  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9  *
10  *  Copyright (c) 2005 IBM Corporation
11  *
12  *  All rights reserved.
13  *
14  *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
15  *  BSD.
16  *
17  * OpenIB BSD License
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are met:
21  *
22  * Redistributions of source code must retain the above copyright notice, this
23  * list of conditions and the following disclaimer.
24  *
25  * Redistributions in binary form must reproduce the above copyright notice,
26  * this list of conditions and the following disclaimer in the documentation
27  * and/or other materials
28  * provided with the distribution.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40  * POSSIBILITY OF SUCH DAMAGE.
41  */
42
43 #include <linux/slab.h>
44 #include <rdma/ib_umem.h>
45
46 #include "ehca_iverbs.h"
47 #include "ehca_mrmw.h"
48 #include "hcp_if.h"
49 #include "hipz_hw.h"
50
51 #define NUM_CHUNKS(length, chunk_size) \
52         (((length) + (chunk_size - 1)) / (chunk_size))
53
54 /* max number of rpages (per hcall register_rpages) */
55 #define MAX_RPAGES 512
56
57 /* DMEM toleration management */
58 #define EHCA_SECTSHIFT        SECTION_SIZE_BITS
59 #define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
60 #define EHCA_HUGEPAGESHIFT     34
61 #define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
62 #define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
63 #define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
64 #define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
65 #define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
66 #define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
67 #define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
68 #define EHCA_DIR_MAP_SIZE (0x10000)
69 #define EHCA_ENT_MAP_SIZE (0x10000)
70 #define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
71
72 static unsigned long ehca_mr_len;
73
74 /*
75  * Memory map data structures
76  */
77 struct ehca_dir_bmap {
78         u64 ent[EHCA_MAP_ENTRIES];
79 };
80 struct ehca_top_bmap {
81         struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
82 };
83 struct ehca_bmap {
84         struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
85 };
86
87 static struct ehca_bmap *ehca_bmap;
88
89 static struct kmem_cache *mr_cache;
90 static struct kmem_cache *mw_cache;
91
92 enum ehca_mr_pgsize {
93         EHCA_MR_PGSIZE4K  = 0x1000L,
94         EHCA_MR_PGSIZE64K = 0x10000L,
95         EHCA_MR_PGSIZE1M  = 0x100000L,
96         EHCA_MR_PGSIZE16M = 0x1000000L
97 };
98
99 #define EHCA_MR_PGSHIFT4K  12
100 #define EHCA_MR_PGSHIFT64K 16
101 #define EHCA_MR_PGSHIFT1M  20
102 #define EHCA_MR_PGSHIFT16M 24
103
104 static u64 ehca_map_vaddr(void *caddr);
105
106 static u32 ehca_encode_hwpage_size(u32 pgsize)
107 {
108         int log = ilog2(pgsize);
109         WARN_ON(log < 12 || log > 24 || log & 3);
110         return (log - 12) / 4;
111 }
112
113 static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
114 {
115         return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
116 }
117
118 static struct ehca_mr *ehca_mr_new(void)
119 {
120         struct ehca_mr *me;
121
122         me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
123         if (me)
124                 spin_lock_init(&me->mrlock);
125         else
126                 ehca_gen_err("alloc failed");
127
128         return me;
129 }
130
131 static void ehca_mr_delete(struct ehca_mr *me)
132 {
133         kmem_cache_free(mr_cache, me);
134 }
135
136 static struct ehca_mw *ehca_mw_new(void)
137 {
138         struct ehca_mw *me;
139
140         me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
141         if (me)
142                 spin_lock_init(&me->mwlock);
143         else
144                 ehca_gen_err("alloc failed");
145
146         return me;
147 }
148
149 static void ehca_mw_delete(struct ehca_mw *me)
150 {
151         kmem_cache_free(mw_cache, me);
152 }
153
154 /*----------------------------------------------------------------------*/
155
156 struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
157 {
158         struct ib_mr *ib_mr;
159         int ret;
160         struct ehca_mr *e_maxmr;
161         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
162         struct ehca_shca *shca =
163                 container_of(pd->device, struct ehca_shca, ib_device);
164
165         if (shca->maxmr) {
166                 e_maxmr = ehca_mr_new();
167                 if (!e_maxmr) {
168                         ehca_err(&shca->ib_device, "out of memory");
169                         ib_mr = ERR_PTR(-ENOMEM);
170                         goto get_dma_mr_exit0;
171                 }
172
173                 ret = ehca_reg_maxmr(shca, e_maxmr,
174                                      (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
175                                      mr_access_flags, e_pd,
176                                      &e_maxmr->ib.ib_mr.lkey,
177                                      &e_maxmr->ib.ib_mr.rkey);
178                 if (ret) {
179                         ehca_mr_delete(e_maxmr);
180                         ib_mr = ERR_PTR(ret);
181                         goto get_dma_mr_exit0;
182                 }
183                 ib_mr = &e_maxmr->ib.ib_mr;
184         } else {
185                 ehca_err(&shca->ib_device, "no internal max-MR exist!");
186                 ib_mr = ERR_PTR(-EINVAL);
187                 goto get_dma_mr_exit0;
188         }
189
190 get_dma_mr_exit0:
191         if (IS_ERR(ib_mr))
192                 ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
193                          PTR_ERR(ib_mr), pd, mr_access_flags);
194         return ib_mr;
195 } /* end ehca_get_dma_mr() */
196
197 /*----------------------------------------------------------------------*/
198
199 struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
200                                struct ib_phys_buf *phys_buf_array,
201                                int num_phys_buf,
202                                int mr_access_flags,
203                                u64 *iova_start)
204 {
205         struct ib_mr *ib_mr;
206         int ret;
207         struct ehca_mr *e_mr;
208         struct ehca_shca *shca =
209                 container_of(pd->device, struct ehca_shca, ib_device);
210         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
211
212         u64 size;
213
214         if ((num_phys_buf <= 0) || !phys_buf_array) {
215                 ehca_err(pd->device, "bad input values: num_phys_buf=%x "
216                          "phys_buf_array=%p", num_phys_buf, phys_buf_array);
217                 ib_mr = ERR_PTR(-EINVAL);
218                 goto reg_phys_mr_exit0;
219         }
220         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
221              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
222             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
223              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
224                 /*
225                  * Remote Write Access requires Local Write Access
226                  * Remote Atomic Access requires Local Write Access
227                  */
228                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
229                          mr_access_flags);
230                 ib_mr = ERR_PTR(-EINVAL);
231                 goto reg_phys_mr_exit0;
232         }
233
234         /* check physical buffer list and calculate size */
235         ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
236                                             iova_start, &size);
237         if (ret) {
238                 ib_mr = ERR_PTR(ret);
239                 goto reg_phys_mr_exit0;
240         }
241         if ((size == 0) ||
242             (((u64)iova_start + size) < (u64)iova_start)) {
243                 ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
244                          size, iova_start);
245                 ib_mr = ERR_PTR(-EINVAL);
246                 goto reg_phys_mr_exit0;
247         }
248
249         e_mr = ehca_mr_new();
250         if (!e_mr) {
251                 ehca_err(pd->device, "out of memory");
252                 ib_mr = ERR_PTR(-ENOMEM);
253                 goto reg_phys_mr_exit0;
254         }
255
256         /* register MR on HCA */
257         if (ehca_mr_is_maxmr(size, iova_start)) {
258                 e_mr->flags |= EHCA_MR_FLAG_MAXMR;
259                 ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
260                                      e_pd, &e_mr->ib.ib_mr.lkey,
261                                      &e_mr->ib.ib_mr.rkey);
262                 if (ret) {
263                         ib_mr = ERR_PTR(ret);
264                         goto reg_phys_mr_exit1;
265                 }
266         } else {
267                 struct ehca_mr_pginfo pginfo;
268                 u32 num_kpages;
269                 u32 num_hwpages;
270                 u64 hw_pgsize;
271
272                 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
273                                         PAGE_SIZE);
274                 /* for kernel space we try most possible pgsize */
275                 hw_pgsize = ehca_get_max_hwpage_size(shca);
276                 num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
277                                          hw_pgsize);
278                 memset(&pginfo, 0, sizeof(pginfo));
279                 pginfo.type = EHCA_MR_PGI_PHYS;
280                 pginfo.num_kpages = num_kpages;
281                 pginfo.hwpage_size = hw_pgsize;
282                 pginfo.num_hwpages = num_hwpages;
283                 pginfo.u.phy.num_phys_buf = num_phys_buf;
284                 pginfo.u.phy.phys_buf_array = phys_buf_array;
285                 pginfo.next_hwpage =
286                         ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
287
288                 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
289                                   e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
290                                   &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
291                 if (ret) {
292                         ib_mr = ERR_PTR(ret);
293                         goto reg_phys_mr_exit1;
294                 }
295         }
296
297         /* successful registration of all pages */
298         return &e_mr->ib.ib_mr;
299
300 reg_phys_mr_exit1:
301         ehca_mr_delete(e_mr);
302 reg_phys_mr_exit0:
303         if (IS_ERR(ib_mr))
304                 ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
305                          "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
306                          PTR_ERR(ib_mr), pd, phys_buf_array,
307                          num_phys_buf, mr_access_flags, iova_start);
308         return ib_mr;
309 } /* end ehca_reg_phys_mr() */
310
311 /*----------------------------------------------------------------------*/
312
313 struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
314                                u64 virt, int mr_access_flags,
315                                struct ib_udata *udata)
316 {
317         struct ib_mr *ib_mr;
318         struct ehca_mr *e_mr;
319         struct ehca_shca *shca =
320                 container_of(pd->device, struct ehca_shca, ib_device);
321         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
322         struct ehca_mr_pginfo pginfo;
323         int ret, page_shift;
324         u32 num_kpages;
325         u32 num_hwpages;
326         u64 hwpage_size;
327
328         if (!pd) {
329                 ehca_gen_err("bad pd=%p", pd);
330                 return ERR_PTR(-EFAULT);
331         }
332
333         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
334              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
335             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
336              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
337                 /*
338                  * Remote Write Access requires Local Write Access
339                  * Remote Atomic Access requires Local Write Access
340                  */
341                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
342                          mr_access_flags);
343                 ib_mr = ERR_PTR(-EINVAL);
344                 goto reg_user_mr_exit0;
345         }
346
347         if (length == 0 || virt + length < virt) {
348                 ehca_err(pd->device, "bad input values: length=%llx "
349                          "virt_base=%llx", length, virt);
350                 ib_mr = ERR_PTR(-EINVAL);
351                 goto reg_user_mr_exit0;
352         }
353
354         e_mr = ehca_mr_new();
355         if (!e_mr) {
356                 ehca_err(pd->device, "out of memory");
357                 ib_mr = ERR_PTR(-ENOMEM);
358                 goto reg_user_mr_exit0;
359         }
360
361         e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
362                                  mr_access_flags, 0);
363         if (IS_ERR(e_mr->umem)) {
364                 ib_mr = (void *)e_mr->umem;
365                 goto reg_user_mr_exit1;
366         }
367
368         if (e_mr->umem->page_size != PAGE_SIZE) {
369                 ehca_err(pd->device, "page size not supported, "
370                          "e_mr->umem->page_size=%x", e_mr->umem->page_size);
371                 ib_mr = ERR_PTR(-EINVAL);
372                 goto reg_user_mr_exit2;
373         }
374
375         /* determine number of MR pages */
376         num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
377         /* select proper hw_pgsize */
378         page_shift = PAGE_SHIFT;
379         if (e_mr->umem->hugetlb) {
380                 /* determine page_shift, clamp between 4K and 16M */
381                 page_shift = (fls64(length - 1) + 3) & ~3;
382                 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
383                                  EHCA_MR_PGSHIFT16M);
384         }
385         hwpage_size = 1UL << page_shift;
386
387         /* now that we have the desired page size, shift until it's
388          * supported, too. 4K is always supported, so this terminates.
389          */
390         while (!(hwpage_size & shca->hca_cap_mr_pgsize))
391                 hwpage_size >>= 4;
392
393 reg_user_mr_fallback:
394         num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
395         /* register MR on HCA */
396         memset(&pginfo, 0, sizeof(pginfo));
397         pginfo.type = EHCA_MR_PGI_USER;
398         pginfo.hwpage_size = hwpage_size;
399         pginfo.num_kpages = num_kpages;
400         pginfo.num_hwpages = num_hwpages;
401         pginfo.u.usr.region = e_mr->umem;
402         pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
403         pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
404                                                      (&e_mr->umem->chunk_list),
405                                                      list);
406
407         ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
408                           e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
409                           &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
410         if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
411                 ehca_warn(pd->device, "failed to register mr "
412                           "with hwpage_size=%llx", hwpage_size);
413                 ehca_info(pd->device, "try to register mr with "
414                           "kpage_size=%lx", PAGE_SIZE);
415                 /*
416                  * this means kpages are not contiguous for a hw page
417                  * try kernel page size as fallback solution
418                  */
419                 hwpage_size = PAGE_SIZE;
420                 goto reg_user_mr_fallback;
421         }
422         if (ret) {
423                 ib_mr = ERR_PTR(ret);
424                 goto reg_user_mr_exit2;
425         }
426
427         /* successful registration of all pages */
428         return &e_mr->ib.ib_mr;
429
430 reg_user_mr_exit2:
431         ib_umem_release(e_mr->umem);
432 reg_user_mr_exit1:
433         ehca_mr_delete(e_mr);
434 reg_user_mr_exit0:
435         if (IS_ERR(ib_mr))
436                 ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
437                          PTR_ERR(ib_mr), pd, mr_access_flags, udata);
438         return ib_mr;
439 } /* end ehca_reg_user_mr() */
440
441 /*----------------------------------------------------------------------*/
442
443 int ehca_rereg_phys_mr(struct ib_mr *mr,
444                        int mr_rereg_mask,
445                        struct ib_pd *pd,
446                        struct ib_phys_buf *phys_buf_array,
447                        int num_phys_buf,
448                        int mr_access_flags,
449                        u64 *iova_start)
450 {
451         int ret;
452
453         struct ehca_shca *shca =
454                 container_of(mr->device, struct ehca_shca, ib_device);
455         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
456         u64 new_size;
457         u64 *new_start;
458         u32 new_acl;
459         struct ehca_pd *new_pd;
460         u32 tmp_lkey, tmp_rkey;
461         unsigned long sl_flags;
462         u32 num_kpages = 0;
463         u32 num_hwpages = 0;
464         struct ehca_mr_pginfo pginfo;
465
466         if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
467                 /* TODO not supported, because PHYP rereg hCall needs pages */
468                 ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
469                          "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
470                 ret = -EINVAL;
471                 goto rereg_phys_mr_exit0;
472         }
473
474         if (mr_rereg_mask & IB_MR_REREG_PD) {
475                 if (!pd) {
476                         ehca_err(mr->device, "rereg with bad pd, pd=%p "
477                                  "mr_rereg_mask=%x", pd, mr_rereg_mask);
478                         ret = -EINVAL;
479                         goto rereg_phys_mr_exit0;
480                 }
481         }
482
483         if ((mr_rereg_mask &
484              ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
485             (mr_rereg_mask == 0)) {
486                 ret = -EINVAL;
487                 goto rereg_phys_mr_exit0;
488         }
489
490         /* check other parameters */
491         if (e_mr == shca->maxmr) {
492                 /* should be impossible, however reject to be sure */
493                 ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
494                          "shca->maxmr=%p mr->lkey=%x",
495                          mr, shca->maxmr, mr->lkey);
496                 ret = -EINVAL;
497                 goto rereg_phys_mr_exit0;
498         }
499         if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
500                 if (e_mr->flags & EHCA_MR_FLAG_FMR) {
501                         ehca_err(mr->device, "not supported for FMR, mr=%p "
502                                  "flags=%x", mr, e_mr->flags);
503                         ret = -EINVAL;
504                         goto rereg_phys_mr_exit0;
505                 }
506                 if (!phys_buf_array || num_phys_buf <= 0) {
507                         ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
508                                  " phys_buf_array=%p num_phys_buf=%x",
509                                  mr_rereg_mask, phys_buf_array, num_phys_buf);
510                         ret = -EINVAL;
511                         goto rereg_phys_mr_exit0;
512                 }
513         }
514         if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
515             (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
516               !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
517              ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
518               !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
519                 /*
520                  * Remote Write Access requires Local Write Access
521                  * Remote Atomic Access requires Local Write Access
522                  */
523                 ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
524                          "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
525                 ret = -EINVAL;
526                 goto rereg_phys_mr_exit0;
527         }
528
529         /* set requested values dependent on rereg request */
530         spin_lock_irqsave(&e_mr->mrlock, sl_flags);
531         new_start = e_mr->start;
532         new_size = e_mr->size;
533         new_acl = e_mr->acl;
534         new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
535
536         if (mr_rereg_mask & IB_MR_REREG_TRANS) {
537                 u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
538
539                 new_start = iova_start; /* change address */
540                 /* check physical buffer list and calculate size */
541                 ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
542                                                     num_phys_buf, iova_start,
543                                                     &new_size);
544                 if (ret)
545                         goto rereg_phys_mr_exit1;
546                 if ((new_size == 0) ||
547                     (((u64)iova_start + new_size) < (u64)iova_start)) {
548                         ehca_err(mr->device, "bad input values: new_size=%llx "
549                                  "iova_start=%p", new_size, iova_start);
550                         ret = -EINVAL;
551                         goto rereg_phys_mr_exit1;
552                 }
553                 num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
554                                         new_size, PAGE_SIZE);
555                 num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
556                                          new_size, hw_pgsize);
557                 memset(&pginfo, 0, sizeof(pginfo));
558                 pginfo.type = EHCA_MR_PGI_PHYS;
559                 pginfo.num_kpages = num_kpages;
560                 pginfo.hwpage_size = hw_pgsize;
561                 pginfo.num_hwpages = num_hwpages;
562                 pginfo.u.phy.num_phys_buf = num_phys_buf;
563                 pginfo.u.phy.phys_buf_array = phys_buf_array;
564                 pginfo.next_hwpage =
565                         ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
566         }
567         if (mr_rereg_mask & IB_MR_REREG_ACCESS)
568                 new_acl = mr_access_flags;
569         if (mr_rereg_mask & IB_MR_REREG_PD)
570                 new_pd = container_of(pd, struct ehca_pd, ib_pd);
571
572         ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
573                             new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
574         if (ret)
575                 goto rereg_phys_mr_exit1;
576
577         /* successful reregistration */
578         if (mr_rereg_mask & IB_MR_REREG_PD)
579                 mr->pd = pd;
580         mr->lkey = tmp_lkey;
581         mr->rkey = tmp_rkey;
582
583 rereg_phys_mr_exit1:
584         spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
585 rereg_phys_mr_exit0:
586         if (ret)
587                 ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
588                          "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
589                          "iova_start=%p",
590                          ret, mr, mr_rereg_mask, pd, phys_buf_array,
591                          num_phys_buf, mr_access_flags, iova_start);
592         return ret;
593 } /* end ehca_rereg_phys_mr() */
594
595 /*----------------------------------------------------------------------*/
596
597 int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
598 {
599         int ret = 0;
600         u64 h_ret;
601         struct ehca_shca *shca =
602                 container_of(mr->device, struct ehca_shca, ib_device);
603         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
604         unsigned long sl_flags;
605         struct ehca_mr_hipzout_parms hipzout;
606
607         if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
608                 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
609                          "e_mr->flags=%x", mr, e_mr, e_mr->flags);
610                 ret = -EINVAL;
611                 goto query_mr_exit0;
612         }
613
614         memset(mr_attr, 0, sizeof(struct ib_mr_attr));
615         spin_lock_irqsave(&e_mr->mrlock, sl_flags);
616
617         h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
618         if (h_ret != H_SUCCESS) {
619                 ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
620                          "hca_hndl=%llx mr_hndl=%llx lkey=%x",
621                          h_ret, mr, shca->ipz_hca_handle.handle,
622                          e_mr->ipz_mr_handle.handle, mr->lkey);
623                 ret = ehca2ib_return_code(h_ret);
624                 goto query_mr_exit1;
625         }
626         mr_attr->pd = mr->pd;
627         mr_attr->device_virt_addr = hipzout.vaddr;
628         mr_attr->size = hipzout.len;
629         mr_attr->lkey = hipzout.lkey;
630         mr_attr->rkey = hipzout.rkey;
631         ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
632
633 query_mr_exit1:
634         spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
635 query_mr_exit0:
636         if (ret)
637                 ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
638                          ret, mr, mr_attr);
639         return ret;
640 } /* end ehca_query_mr() */
641
642 /*----------------------------------------------------------------------*/
643
644 int ehca_dereg_mr(struct ib_mr *mr)
645 {
646         int ret = 0;
647         u64 h_ret;
648         struct ehca_shca *shca =
649                 container_of(mr->device, struct ehca_shca, ib_device);
650         struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
651
652         if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
653                 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
654                          "e_mr->flags=%x", mr, e_mr, e_mr->flags);
655                 ret = -EINVAL;
656                 goto dereg_mr_exit0;
657         } else if (e_mr == shca->maxmr) {
658                 /* should be impossible, however reject to be sure */
659                 ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
660                          "shca->maxmr=%p mr->lkey=%x",
661                          mr, shca->maxmr, mr->lkey);
662                 ret = -EINVAL;
663                 goto dereg_mr_exit0;
664         }
665
666         /* TODO: BUSY: MR still has bound window(s) */
667         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
668         if (h_ret != H_SUCCESS) {
669                 ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
670                          "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
671                          h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
672                          e_mr->ipz_mr_handle.handle, mr->lkey);
673                 ret = ehca2ib_return_code(h_ret);
674                 goto dereg_mr_exit0;
675         }
676
677         if (e_mr->umem)
678                 ib_umem_release(e_mr->umem);
679
680         /* successful deregistration */
681         ehca_mr_delete(e_mr);
682
683 dereg_mr_exit0:
684         if (ret)
685                 ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
686         return ret;
687 } /* end ehca_dereg_mr() */
688
689 /*----------------------------------------------------------------------*/
690
691 struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
692 {
693         struct ib_mw *ib_mw;
694         u64 h_ret;
695         struct ehca_mw *e_mw;
696         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
697         struct ehca_shca *shca =
698                 container_of(pd->device, struct ehca_shca, ib_device);
699         struct ehca_mw_hipzout_parms hipzout;
700
701         if (type != IB_MW_TYPE_1)
702                 return ERR_PTR(-EINVAL);
703
704         e_mw = ehca_mw_new();
705         if (!e_mw) {
706                 ib_mw = ERR_PTR(-ENOMEM);
707                 goto alloc_mw_exit0;
708         }
709
710         h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
711                                          e_pd->fw_pd, &hipzout);
712         if (h_ret != H_SUCCESS) {
713                 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
714                          "shca=%p hca_hndl=%llx mw=%p",
715                          h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
716                 ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
717                 goto alloc_mw_exit1;
718         }
719         /* successful MW allocation */
720         e_mw->ipz_mw_handle = hipzout.handle;
721         e_mw->ib_mw.rkey    = hipzout.rkey;
722         return &e_mw->ib_mw;
723
724 alloc_mw_exit1:
725         ehca_mw_delete(e_mw);
726 alloc_mw_exit0:
727         if (IS_ERR(ib_mw))
728                 ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
729         return ib_mw;
730 } /* end ehca_alloc_mw() */
731
732 /*----------------------------------------------------------------------*/
733
734 int ehca_bind_mw(struct ib_qp *qp,
735                  struct ib_mw *mw,
736                  struct ib_mw_bind *mw_bind)
737 {
738         /* TODO: not supported up to now */
739         ehca_gen_err("bind MW currently not supported by HCAD");
740
741         return -EPERM;
742 } /* end ehca_bind_mw() */
743
744 /*----------------------------------------------------------------------*/
745
746 int ehca_dealloc_mw(struct ib_mw *mw)
747 {
748         u64 h_ret;
749         struct ehca_shca *shca =
750                 container_of(mw->device, struct ehca_shca, ib_device);
751         struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
752
753         h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
754         if (h_ret != H_SUCCESS) {
755                 ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
756                          "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
757                          h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
758                          e_mw->ipz_mw_handle.handle);
759                 return ehca2ib_return_code(h_ret);
760         }
761         /* successful deallocation */
762         ehca_mw_delete(e_mw);
763         return 0;
764 } /* end ehca_dealloc_mw() */
765
766 /*----------------------------------------------------------------------*/
767
768 struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
769                               int mr_access_flags,
770                               struct ib_fmr_attr *fmr_attr)
771 {
772         struct ib_fmr *ib_fmr;
773         struct ehca_shca *shca =
774                 container_of(pd->device, struct ehca_shca, ib_device);
775         struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
776         struct ehca_mr *e_fmr;
777         int ret;
778         u32 tmp_lkey, tmp_rkey;
779         struct ehca_mr_pginfo pginfo;
780         u64 hw_pgsize;
781
782         /* check other parameters */
783         if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
784              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
785             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
786              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
787                 /*
788                  * Remote Write Access requires Local Write Access
789                  * Remote Atomic Access requires Local Write Access
790                  */
791                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
792                          mr_access_flags);
793                 ib_fmr = ERR_PTR(-EINVAL);
794                 goto alloc_fmr_exit0;
795         }
796         if (mr_access_flags & IB_ACCESS_MW_BIND) {
797                 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
798                          mr_access_flags);
799                 ib_fmr = ERR_PTR(-EINVAL);
800                 goto alloc_fmr_exit0;
801         }
802         if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
803                 ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
804                          "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
805                          fmr_attr->max_pages, fmr_attr->max_maps,
806                          fmr_attr->page_shift);
807                 ib_fmr = ERR_PTR(-EINVAL);
808                 goto alloc_fmr_exit0;
809         }
810
811         hw_pgsize = 1 << fmr_attr->page_shift;
812         if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
813                 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
814                          fmr_attr->page_shift);
815                 ib_fmr = ERR_PTR(-EINVAL);
816                 goto alloc_fmr_exit0;
817         }
818
819         e_fmr = ehca_mr_new();
820         if (!e_fmr) {
821                 ib_fmr = ERR_PTR(-ENOMEM);
822                 goto alloc_fmr_exit0;
823         }
824         e_fmr->flags |= EHCA_MR_FLAG_FMR;
825
826         /* register MR on HCA */
827         memset(&pginfo, 0, sizeof(pginfo));
828         pginfo.hwpage_size = hw_pgsize;
829         /*
830          * pginfo.num_hwpages==0, ie register_rpages() will not be called
831          * but deferred to map_phys_fmr()
832          */
833         ret = ehca_reg_mr(shca, e_fmr, NULL,
834                           fmr_attr->max_pages * (1 << fmr_attr->page_shift),
835                           mr_access_flags, e_pd, &pginfo,
836                           &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
837         if (ret) {
838                 ib_fmr = ERR_PTR(ret);
839                 goto alloc_fmr_exit1;
840         }
841
842         /* successful */
843         e_fmr->hwpage_size = hw_pgsize;
844         e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
845         e_fmr->fmr_max_pages = fmr_attr->max_pages;
846         e_fmr->fmr_max_maps = fmr_attr->max_maps;
847         e_fmr->fmr_map_cnt = 0;
848         return &e_fmr->ib.ib_fmr;
849
850 alloc_fmr_exit1:
851         ehca_mr_delete(e_fmr);
852 alloc_fmr_exit0:
853         return ib_fmr;
854 } /* end ehca_alloc_fmr() */
855
856 /*----------------------------------------------------------------------*/
857
858 int ehca_map_phys_fmr(struct ib_fmr *fmr,
859                       u64 *page_list,
860                       int list_len,
861                       u64 iova)
862 {
863         int ret;
864         struct ehca_shca *shca =
865                 container_of(fmr->device, struct ehca_shca, ib_device);
866         struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
867         struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
868         struct ehca_mr_pginfo pginfo;
869         u32 tmp_lkey, tmp_rkey;
870
871         if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
872                 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
873                          e_fmr, e_fmr->flags);
874                 ret = -EINVAL;
875                 goto map_phys_fmr_exit0;
876         }
877         ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
878         if (ret)
879                 goto map_phys_fmr_exit0;
880         if (iova % e_fmr->fmr_page_size) {
881                 /* only whole-numbered pages */
882                 ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
883                          iova, e_fmr->fmr_page_size);
884                 ret = -EINVAL;
885                 goto map_phys_fmr_exit0;
886         }
887         if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
888                 /* HCAD does not limit the maps, however trace this anyway */
889                 ehca_info(fmr->device, "map limit exceeded, fmr=%p "
890                           "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
891                           fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
892         }
893
894         memset(&pginfo, 0, sizeof(pginfo));
895         pginfo.type = EHCA_MR_PGI_FMR;
896         pginfo.num_kpages = list_len;
897         pginfo.hwpage_size = e_fmr->hwpage_size;
898         pginfo.num_hwpages =
899                 list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
900         pginfo.u.fmr.page_list = page_list;
901         pginfo.next_hwpage =
902                 (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
903         pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
904
905         ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
906                             list_len * e_fmr->fmr_page_size,
907                             e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
908         if (ret)
909                 goto map_phys_fmr_exit0;
910
911         /* successful reregistration */
912         e_fmr->fmr_map_cnt++;
913         e_fmr->ib.ib_fmr.lkey = tmp_lkey;
914         e_fmr->ib.ib_fmr.rkey = tmp_rkey;
915         return 0;
916
917 map_phys_fmr_exit0:
918         if (ret)
919                 ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
920                          "iova=%llx", ret, fmr, page_list, list_len, iova);
921         return ret;
922 } /* end ehca_map_phys_fmr() */
923
924 /*----------------------------------------------------------------------*/
925
926 int ehca_unmap_fmr(struct list_head *fmr_list)
927 {
928         int ret = 0;
929         struct ib_fmr *ib_fmr;
930         struct ehca_shca *shca = NULL;
931         struct ehca_shca *prev_shca;
932         struct ehca_mr *e_fmr;
933         u32 num_fmr = 0;
934         u32 unmap_fmr_cnt = 0;
935
936         /* check all FMR belong to same SHCA, and check internal flag */
937         list_for_each_entry(ib_fmr, fmr_list, list) {
938                 prev_shca = shca;
939                 shca = container_of(ib_fmr->device, struct ehca_shca,
940                                     ib_device);
941                 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
942                 if ((shca != prev_shca) && prev_shca) {
943                         ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
944                                  "prev_shca=%p e_fmr=%p",
945                                  shca, prev_shca, e_fmr);
946                         ret = -EINVAL;
947                         goto unmap_fmr_exit0;
948                 }
949                 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
950                         ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
951                                  "e_fmr->flags=%x", e_fmr, e_fmr->flags);
952                         ret = -EINVAL;
953                         goto unmap_fmr_exit0;
954                 }
955                 num_fmr++;
956         }
957
958         /* loop over all FMRs to unmap */
959         list_for_each_entry(ib_fmr, fmr_list, list) {
960                 unmap_fmr_cnt++;
961                 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
962                 shca = container_of(ib_fmr->device, struct ehca_shca,
963                                     ib_device);
964                 ret = ehca_unmap_one_fmr(shca, e_fmr);
965                 if (ret) {
966                         /* unmap failed, stop unmapping of rest of FMRs */
967                         ehca_err(&shca->ib_device, "unmap of one FMR failed, "
968                                  "stop rest, e_fmr=%p num_fmr=%x "
969                                  "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
970                                  unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
971                         goto unmap_fmr_exit0;
972                 }
973         }
974
975 unmap_fmr_exit0:
976         if (ret)
977                 ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
978                              ret, fmr_list, num_fmr, unmap_fmr_cnt);
979         return ret;
980 } /* end ehca_unmap_fmr() */
981
982 /*----------------------------------------------------------------------*/
983
984 int ehca_dealloc_fmr(struct ib_fmr *fmr)
985 {
986         int ret;
987         u64 h_ret;
988         struct ehca_shca *shca =
989                 container_of(fmr->device, struct ehca_shca, ib_device);
990         struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
991
992         if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
993                 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
994                          e_fmr, e_fmr->flags);
995                 ret = -EINVAL;
996                 goto free_fmr_exit0;
997         }
998
999         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1000         if (h_ret != H_SUCCESS) {
1001                 ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
1002                          "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
1003                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1004                          e_fmr->ipz_mr_handle.handle, fmr->lkey);
1005                 ret = ehca2ib_return_code(h_ret);
1006                 goto free_fmr_exit0;
1007         }
1008         /* successful deregistration */
1009         ehca_mr_delete(e_fmr);
1010         return 0;
1011
1012 free_fmr_exit0:
1013         if (ret)
1014                 ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
1015         return ret;
1016 } /* end ehca_dealloc_fmr() */
1017
1018 /*----------------------------------------------------------------------*/
1019
1020 static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
1021                                    struct ehca_mr *e_mr,
1022                                    struct ehca_mr_pginfo *pginfo);
1023
1024 int ehca_reg_mr(struct ehca_shca *shca,
1025                 struct ehca_mr *e_mr,
1026                 u64 *iova_start,
1027                 u64 size,
1028                 int acl,
1029                 struct ehca_pd *e_pd,
1030                 struct ehca_mr_pginfo *pginfo,
1031                 u32 *lkey, /*OUT*/
1032                 u32 *rkey, /*OUT*/
1033                 enum ehca_reg_type reg_type)
1034 {
1035         int ret;
1036         u64 h_ret;
1037         u32 hipz_acl;
1038         struct ehca_mr_hipzout_parms hipzout;
1039
1040         ehca_mrmw_map_acl(acl, &hipz_acl);
1041         ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1042         if (ehca_use_hp_mr == 1)
1043                 hipz_acl |= 0x00000001;
1044
1045         h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
1046                                          (u64)iova_start, size, hipz_acl,
1047                                          e_pd->fw_pd, &hipzout);
1048         if (h_ret != H_SUCCESS) {
1049                 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
1050                          "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
1051                 ret = ehca2ib_return_code(h_ret);
1052                 goto ehca_reg_mr_exit0;
1053         }
1054
1055         e_mr->ipz_mr_handle = hipzout.handle;
1056
1057         if (reg_type == EHCA_REG_BUSMAP_MR)
1058                 ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
1059         else if (reg_type == EHCA_REG_MR)
1060                 ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
1061         else
1062                 ret = -EINVAL;
1063
1064         if (ret)
1065                 goto ehca_reg_mr_exit1;
1066
1067         /* successful registration */
1068         e_mr->num_kpages = pginfo->num_kpages;
1069         e_mr->num_hwpages = pginfo->num_hwpages;
1070         e_mr->hwpage_size = pginfo->hwpage_size;
1071         e_mr->start = iova_start;
1072         e_mr->size = size;
1073         e_mr->acl = acl;
1074         *lkey = hipzout.lkey;
1075         *rkey = hipzout.rkey;
1076         return 0;
1077
1078 ehca_reg_mr_exit1:
1079         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1080         if (h_ret != H_SUCCESS) {
1081                 ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
1082                          "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
1083                          "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
1084                          h_ret, shca, e_mr, iova_start, size, acl, e_pd,
1085                          hipzout.lkey, pginfo, pginfo->num_kpages,
1086                          pginfo->num_hwpages, ret);
1087                 ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
1088                          "not recoverable");
1089         }
1090 ehca_reg_mr_exit0:
1091         if (ret)
1092                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1093                          "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1094                          "num_kpages=%llx num_hwpages=%llx",
1095                          ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
1096                          pginfo->num_kpages, pginfo->num_hwpages);
1097         return ret;
1098 } /* end ehca_reg_mr() */
1099
1100 /*----------------------------------------------------------------------*/
1101
1102 int ehca_reg_mr_rpages(struct ehca_shca *shca,
1103                        struct ehca_mr *e_mr,
1104                        struct ehca_mr_pginfo *pginfo)
1105 {
1106         int ret = 0;
1107         u64 h_ret;
1108         u32 rnum;
1109         u64 rpage;
1110         u32 i;
1111         u64 *kpage;
1112
1113         if (!pginfo->num_hwpages) /* in case of fmr */
1114                 return 0;
1115
1116         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1117         if (!kpage) {
1118                 ehca_err(&shca->ib_device, "kpage alloc failed");
1119                 ret = -ENOMEM;
1120                 goto ehca_reg_mr_rpages_exit0;
1121         }
1122
1123         /* max MAX_RPAGES ehca mr pages per register call */
1124         for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
1125
1126                 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1127                         rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
1128                         if (rnum == 0)
1129                                 rnum = MAX_RPAGES;      /* last shot is full */
1130                 } else
1131                         rnum = MAX_RPAGES;
1132
1133                 ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1134                 if (ret) {
1135                         ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1136                                  "bad rc, ret=%i rnum=%x kpage=%p",
1137                                  ret, rnum, kpage);
1138                         goto ehca_reg_mr_rpages_exit1;
1139                 }
1140
1141                 if (rnum > 1) {
1142                         rpage = __pa(kpage);
1143                         if (!rpage) {
1144                                 ehca_err(&shca->ib_device, "kpage=%p i=%x",
1145                                          kpage, i);
1146                                 ret = -EFAULT;
1147                                 goto ehca_reg_mr_rpages_exit1;
1148                         }
1149                 } else
1150                         rpage = *kpage;
1151
1152                 h_ret = hipz_h_register_rpage_mr(
1153                         shca->ipz_hca_handle, e_mr,
1154                         ehca_encode_hwpage_size(pginfo->hwpage_size),
1155                         0, rpage, rnum);
1156
1157                 if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1158                         /*
1159                          * check for 'registration complete'==H_SUCCESS
1160                          * and for 'page registered'==H_PAGE_REGISTERED
1161                          */
1162                         if (h_ret != H_SUCCESS) {
1163                                 ehca_err(&shca->ib_device, "last "
1164                                          "hipz_reg_rpage_mr failed, h_ret=%lli "
1165                                          "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
1166                                          " lkey=%x", h_ret, e_mr, i,
1167                                          shca->ipz_hca_handle.handle,
1168                                          e_mr->ipz_mr_handle.handle,
1169                                          e_mr->ib.ib_mr.lkey);
1170                                 ret = ehca2ib_return_code(h_ret);
1171                                 break;
1172                         } else
1173                                 ret = 0;
1174                 } else if (h_ret != H_PAGE_REGISTERED) {
1175                         ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1176                                  "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
1177                                  "mr_hndl=%llx", h_ret, e_mr, i,
1178                                  e_mr->ib.ib_mr.lkey,
1179                                  shca->ipz_hca_handle.handle,
1180                                  e_mr->ipz_mr_handle.handle);
1181                         ret = ehca2ib_return_code(h_ret);
1182                         break;
1183                 } else
1184                         ret = 0;
1185         } /* end for(i) */
1186
1187
1188 ehca_reg_mr_rpages_exit1:
1189         ehca_free_fw_ctrlblock(kpage);
1190 ehca_reg_mr_rpages_exit0:
1191         if (ret)
1192                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
1193                          "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
1194                          pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1195         return ret;
1196 } /* end ehca_reg_mr_rpages() */
1197
1198 /*----------------------------------------------------------------------*/
1199
1200 inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1201                                 struct ehca_mr *e_mr,
1202                                 u64 *iova_start,
1203                                 u64 size,
1204                                 u32 acl,
1205                                 struct ehca_pd *e_pd,
1206                                 struct ehca_mr_pginfo *pginfo,
1207                                 u32 *lkey, /*OUT*/
1208                                 u32 *rkey) /*OUT*/
1209 {
1210         int ret;
1211         u64 h_ret;
1212         u32 hipz_acl;
1213         u64 *kpage;
1214         u64 rpage;
1215         struct ehca_mr_pginfo pginfo_save;
1216         struct ehca_mr_hipzout_parms hipzout;
1217
1218         ehca_mrmw_map_acl(acl, &hipz_acl);
1219         ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1220
1221         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1222         if (!kpage) {
1223                 ehca_err(&shca->ib_device, "kpage alloc failed");
1224                 ret = -ENOMEM;
1225                 goto ehca_rereg_mr_rereg1_exit0;
1226         }
1227
1228         pginfo_save = *pginfo;
1229         ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
1230         if (ret) {
1231                 ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1232                          "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
1233                          "kpage=%p", e_mr, pginfo, pginfo->type,
1234                          pginfo->num_kpages, pginfo->num_hwpages, kpage);
1235                 goto ehca_rereg_mr_rereg1_exit1;
1236         }
1237         rpage = __pa(kpage);
1238         if (!rpage) {
1239                 ehca_err(&shca->ib_device, "kpage=%p", kpage);
1240                 ret = -EFAULT;
1241                 goto ehca_rereg_mr_rereg1_exit1;
1242         }
1243         h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
1244                                       (u64)iova_start, size, hipz_acl,
1245                                       e_pd->fw_pd, rpage, &hipzout);
1246         if (h_ret != H_SUCCESS) {
1247                 /*
1248                  * reregistration unsuccessful, try it again with the 3 hCalls,
1249                  * e.g. this is required in case H_MR_CONDITION
1250                  * (MW bound or MR is shared)
1251                  */
1252                 ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1253                           "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
1254                 *pginfo = pginfo_save;
1255                 ret = -EAGAIN;
1256         } else if ((u64 *)hipzout.vaddr != iova_start) {
1257                 ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1258                          "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
1259                          "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
1260                          hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
1261                          e_mr->ib.ib_mr.lkey, hipzout.lkey);
1262                 ret = -EFAULT;
1263         } else {
1264                 /*
1265                  * successful reregistration
1266                  * note: start and start_out are identical for eServer HCAs
1267                  */
1268                 e_mr->num_kpages = pginfo->num_kpages;
1269                 e_mr->num_hwpages = pginfo->num_hwpages;
1270                 e_mr->hwpage_size = pginfo->hwpage_size;
1271                 e_mr->start = iova_start;
1272                 e_mr->size = size;
1273                 e_mr->acl = acl;
1274                 *lkey = hipzout.lkey;
1275                 *rkey = hipzout.rkey;
1276         }
1277
1278 ehca_rereg_mr_rereg1_exit1:
1279         ehca_free_fw_ctrlblock(kpage);
1280 ehca_rereg_mr_rereg1_exit0:
1281         if ( ret && (ret != -EAGAIN) )
1282                 ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
1283                          "pginfo=%p num_kpages=%llx num_hwpages=%llx",
1284                          ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1285                          pginfo->num_hwpages);
1286         return ret;
1287 } /* end ehca_rereg_mr_rereg1() */
1288
1289 /*----------------------------------------------------------------------*/
1290
1291 int ehca_rereg_mr(struct ehca_shca *shca,
1292                   struct ehca_mr *e_mr,
1293                   u64 *iova_start,
1294                   u64 size,
1295                   int acl,
1296                   struct ehca_pd *e_pd,
1297                   struct ehca_mr_pginfo *pginfo,
1298                   u32 *lkey,
1299                   u32 *rkey)
1300 {
1301         int ret = 0;
1302         u64 h_ret;
1303         int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
1304         int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1305
1306         /* first determine reregistration hCall(s) */
1307         if ((pginfo->num_hwpages > MAX_RPAGES) ||
1308             (e_mr->num_hwpages > MAX_RPAGES) ||
1309             (pginfo->num_hwpages > e_mr->num_hwpages)) {
1310                 ehca_dbg(&shca->ib_device, "Rereg3 case, "
1311                          "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
1312                          pginfo->num_hwpages, e_mr->num_hwpages);
1313                 rereg_1_hcall = 0;
1314                 rereg_3_hcall = 1;
1315         }
1316
1317         if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
1318                 rereg_1_hcall = 0;
1319                 rereg_3_hcall = 1;
1320                 e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
1321                 ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
1322                          e_mr);
1323         }
1324
1325         if (rereg_1_hcall) {
1326                 ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
1327                                            acl, e_pd, pginfo, lkey, rkey);
1328                 if (ret) {
1329                         if (ret == -EAGAIN)
1330                                 rereg_3_hcall = 1;
1331                         else
1332                                 goto ehca_rereg_mr_exit0;
1333                 }
1334         }
1335
1336         if (rereg_3_hcall) {
1337                 struct ehca_mr save_mr;
1338
1339                 /* first deregister old MR */
1340                 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1341                 if (h_ret != H_SUCCESS) {
1342                         ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1343                                  "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
1344                                  "mr->lkey=%x",
1345                                  h_ret, e_mr, shca->ipz_hca_handle.handle,
1346                                  e_mr->ipz_mr_handle.handle,
1347                                  e_mr->ib.ib_mr.lkey);
1348                         ret = ehca2ib_return_code(h_ret);
1349                         goto ehca_rereg_mr_exit0;
1350                 }
1351                 /* clean ehca_mr_t, without changing struct ib_mr and lock */
1352                 save_mr = *e_mr;
1353                 ehca_mr_deletenew(e_mr);
1354
1355                 /* set some MR values */
1356                 e_mr->flags = save_mr.flags;
1357                 e_mr->hwpage_size = save_mr.hwpage_size;
1358                 e_mr->fmr_page_size = save_mr.fmr_page_size;
1359                 e_mr->fmr_max_pages = save_mr.fmr_max_pages;
1360                 e_mr->fmr_max_maps = save_mr.fmr_max_maps;
1361                 e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
1362
1363                 ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
1364                                   e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
1365                 if (ret) {
1366                         u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
1367                         memcpy(&e_mr->flags, &(save_mr.flags),
1368                                sizeof(struct ehca_mr) - offset);
1369                         goto ehca_rereg_mr_exit0;
1370                 }
1371         }
1372
1373 ehca_rereg_mr_exit0:
1374         if (ret)
1375                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1376                          "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1377                          "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
1378                          "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1379                          acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
1380                          rereg_1_hcall, rereg_3_hcall);
1381         return ret;
1382 } /* end ehca_rereg_mr() */
1383
1384 /*----------------------------------------------------------------------*/
1385
1386 int ehca_unmap_one_fmr(struct ehca_shca *shca,
1387                        struct ehca_mr *e_fmr)
1388 {
1389         int ret = 0;
1390         u64 h_ret;
1391         struct ehca_pd *e_pd =
1392                 container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1393         struct ehca_mr save_fmr;
1394         u32 tmp_lkey, tmp_rkey;
1395         struct ehca_mr_pginfo pginfo;
1396         struct ehca_mr_hipzout_parms hipzout;
1397         struct ehca_mr save_mr;
1398
1399         if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
1400                 /*
1401                  * note: after using rereg hcall with len=0,
1402                  * rereg hcall must be used again for registering pages
1403                  */
1404                 h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1405                                               0, 0, e_pd->fw_pd, 0, &hipzout);
1406                 if (h_ret == H_SUCCESS) {
1407                         /* successful reregistration */
1408                         e_fmr->start = NULL;
1409                         e_fmr->size = 0;
1410                         tmp_lkey = hipzout.lkey;
1411                         tmp_rkey = hipzout.rkey;
1412                         return 0;
1413                 }
1414                 /*
1415                  * should not happen, because length checked above,
1416                  * FMRs are not shared and no MW bound to FMRs
1417                  */
1418                 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1419                          "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
1420                          "mr_hndl=%llx lkey=%x lkey_out=%x",
1421                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1422                          e_fmr->ipz_mr_handle.handle,
1423                          e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1424                 /* try free and rereg */
1425         }
1426
1427         /* first free old FMR */
1428         h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1429         if (h_ret != H_SUCCESS) {
1430                 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1431                          "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
1432                          "lkey=%x",
1433                          h_ret, e_fmr, shca->ipz_hca_handle.handle,
1434                          e_fmr->ipz_mr_handle.handle,
1435                          e_fmr->ib.ib_fmr.lkey);
1436                 ret = ehca2ib_return_code(h_ret);
1437                 goto ehca_unmap_one_fmr_exit0;
1438         }
1439         /* clean ehca_mr_t, without changing lock */
1440         save_fmr = *e_fmr;
1441         ehca_mr_deletenew(e_fmr);
1442
1443         /* set some MR values */
1444         e_fmr->flags = save_fmr.flags;
1445         e_fmr->hwpage_size = save_fmr.hwpage_size;
1446         e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1447         e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1448         e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1449         e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1450         e_fmr->acl = save_fmr.acl;
1451
1452         memset(&pginfo, 0, sizeof(pginfo));
1453         pginfo.type = EHCA_MR_PGI_FMR;
1454         ret = ehca_reg_mr(shca, e_fmr, NULL,
1455                           (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1456                           e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1457                           &tmp_rkey, EHCA_REG_MR);
1458         if (ret) {
1459                 u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1460                 memcpy(&e_fmr->flags, &(save_mr.flags),
1461                        sizeof(struct ehca_mr) - offset);
1462         }
1463
1464 ehca_unmap_one_fmr_exit0:
1465         if (ret)
1466                 ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
1467                          "fmr_max_pages=%x",
1468                          ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1469         return ret;
1470 } /* end ehca_unmap_one_fmr() */
1471
1472 /*----------------------------------------------------------------------*/
1473
1474 int ehca_reg_smr(struct ehca_shca *shca,
1475                  struct ehca_mr *e_origmr,
1476                  struct ehca_mr *e_newmr,
1477                  u64 *iova_start,
1478                  int acl,
1479                  struct ehca_pd *e_pd,
1480                  u32 *lkey, /*OUT*/
1481                  u32 *rkey) /*OUT*/
1482 {
1483         int ret = 0;
1484         u64 h_ret;
1485         u32 hipz_acl;
1486         struct ehca_mr_hipzout_parms hipzout;
1487
1488         ehca_mrmw_map_acl(acl, &hipz_acl);
1489         ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1490
1491         h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1492                                     (u64)iova_start, hipz_acl, e_pd->fw_pd,
1493                                     &hipzout);
1494         if (h_ret != H_SUCCESS) {
1495                 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1496                          "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1497                          "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1498                          h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
1499                          shca->ipz_hca_handle.handle,
1500                          e_origmr->ipz_mr_handle.handle,
1501                          e_origmr->ib.ib_mr.lkey);
1502                 ret = ehca2ib_return_code(h_ret);
1503                 goto ehca_reg_smr_exit0;
1504         }
1505         /* successful registration */
1506         e_newmr->num_kpages = e_origmr->num_kpages;
1507         e_newmr->num_hwpages = e_origmr->num_hwpages;
1508         e_newmr->hwpage_size   = e_origmr->hwpage_size;
1509         e_newmr->start = iova_start;
1510         e_newmr->size = e_origmr->size;
1511         e_newmr->acl = acl;
1512         e_newmr->ipz_mr_handle = hipzout.handle;
1513         *lkey = hipzout.lkey;
1514         *rkey = hipzout.rkey;
1515         return 0;
1516
1517 ehca_reg_smr_exit0:
1518         if (ret)
1519                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
1520                          "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1521                          ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1522         return ret;
1523 } /* end ehca_reg_smr() */
1524
1525 /*----------------------------------------------------------------------*/
1526 static inline void *ehca_calc_sectbase(int top, int dir, int idx)
1527 {
1528         unsigned long ret = idx;
1529         ret |= dir << EHCA_DIR_INDEX_SHIFT;
1530         ret |= top << EHCA_TOP_INDEX_SHIFT;
1531         return __va(ret << SECTION_SIZE_BITS);
1532 }
1533
1534 #define ehca_bmap_valid(entry) \
1535         ((u64)entry != (u64)EHCA_INVAL_ADDR)
1536
1537 static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1538                                struct ehca_shca *shca, struct ehca_mr *mr,
1539                                struct ehca_mr_pginfo *pginfo)
1540 {
1541         u64 h_ret = 0;
1542         unsigned long page = 0;
1543         u64 rpage = __pa(kpage);
1544         int page_count;
1545
1546         void *sectbase = ehca_calc_sectbase(top, dir, idx);
1547         if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
1548                 ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
1549                                            "hwpage_size does not fit to "
1550                                            "section start address");
1551         }
1552         page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
1553
1554         while (page < page_count) {
1555                 u64 rnum;
1556                 for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
1557                      rnum++) {
1558                         void *pg = sectbase + ((page++) * pginfo->hwpage_size);
1559                         kpage[rnum] = __pa(pg);
1560                 }
1561
1562                 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
1563                         ehca_encode_hwpage_size(pginfo->hwpage_size),
1564                         0, rpage, rnum);
1565
1566                 if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
1567                         ehca_err(&shca->ib_device, "register_rpage_mr failed");
1568                         return h_ret;
1569                 }
1570         }
1571         return h_ret;
1572 }
1573
1574 static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
1575                                 struct ehca_shca *shca, struct ehca_mr *mr,
1576                                 struct ehca_mr_pginfo *pginfo)
1577 {
1578         u64 hret = H_SUCCESS;
1579         int idx;
1580
1581         for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
1582                 if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
1583                         continue;
1584
1585                 hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
1586                                            pginfo);
1587                 if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1588                                 return hret;
1589         }
1590         return hret;
1591 }
1592
1593 static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
1594                                     struct ehca_mr *mr,
1595                                     struct ehca_mr_pginfo *pginfo)
1596 {
1597         u64 hret = H_SUCCESS;
1598         int dir;
1599
1600         for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
1601                 if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
1602                         continue;
1603
1604                 hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
1605                 if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1606                                 return hret;
1607         }
1608         return hret;
1609 }
1610
1611 /* register internal max-MR to internal SHCA */
1612 int ehca_reg_internal_maxmr(
1613         struct ehca_shca *shca,
1614         struct ehca_pd *e_pd,
1615         struct ehca_mr **e_maxmr)  /*OUT*/
1616 {
1617         int ret;
1618         struct ehca_mr *e_mr;
1619         u64 *iova_start;
1620         u64 size_maxmr;
1621         struct ehca_mr_pginfo pginfo;
1622         struct ib_phys_buf ib_pbuf;
1623         u32 num_kpages;
1624         u32 num_hwpages;
1625         u64 hw_pgsize;
1626
1627         if (!ehca_bmap) {
1628                 ret = -EFAULT;
1629                 goto ehca_reg_internal_maxmr_exit0;
1630         }
1631
1632         e_mr = ehca_mr_new();
1633         if (!e_mr) {
1634                 ehca_err(&shca->ib_device, "out of memory");
1635                 ret = -ENOMEM;
1636                 goto ehca_reg_internal_maxmr_exit0;
1637         }
1638         e_mr->flags |= EHCA_MR_FLAG_MAXMR;
1639
1640         /* register internal max-MR on HCA */
1641         size_maxmr = ehca_mr_len;
1642         iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
1643         ib_pbuf.addr = 0;
1644         ib_pbuf.size = size_maxmr;
1645         num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1646                                 PAGE_SIZE);
1647         hw_pgsize = ehca_get_max_hwpage_size(shca);
1648         num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
1649                                  hw_pgsize);
1650
1651         memset(&pginfo, 0, sizeof(pginfo));
1652         pginfo.type = EHCA_MR_PGI_PHYS;
1653         pginfo.num_kpages = num_kpages;
1654         pginfo.num_hwpages = num_hwpages;
1655         pginfo.hwpage_size = hw_pgsize;
1656         pginfo.u.phy.num_phys_buf = 1;
1657         pginfo.u.phy.phys_buf_array = &ib_pbuf;
1658
1659         ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1660                           &pginfo, &e_mr->ib.ib_mr.lkey,
1661                           &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
1662         if (ret) {
1663                 ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1664                          "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
1665                          "num_hwpages=%x", e_mr, iova_start, size_maxmr,
1666                          num_kpages, num_hwpages);
1667                 goto ehca_reg_internal_maxmr_exit1;
1668         }
1669
1670         /* successful registration of all pages */
1671         e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
1672         e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1673         e_mr->ib.ib_mr.uobject = NULL;
1674         atomic_inc(&(e_pd->ib_pd.usecnt));
1675         atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1676         *e_maxmr = e_mr;
1677         return 0;
1678
1679 ehca_reg_internal_maxmr_exit1:
1680         ehca_mr_delete(e_mr);
1681 ehca_reg_internal_maxmr_exit0:
1682         if (ret)
1683                 ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
1684                          ret, shca, e_pd, e_maxmr);
1685         return ret;
1686 } /* end ehca_reg_internal_maxmr() */
1687
1688 /*----------------------------------------------------------------------*/
1689
1690 int ehca_reg_maxmr(struct ehca_shca *shca,
1691                    struct ehca_mr *e_newmr,
1692                    u64 *iova_start,
1693                    int acl,
1694                    struct ehca_pd *e_pd,
1695                    u32 *lkey,
1696                    u32 *rkey)
1697 {
1698         u64 h_ret;
1699         struct ehca_mr *e_origmr = shca->maxmr;
1700         u32 hipz_acl;
1701         struct ehca_mr_hipzout_parms hipzout;
1702
1703         ehca_mrmw_map_acl(acl, &hipz_acl);
1704         ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1705
1706         h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1707                                     (u64)iova_start, hipz_acl, e_pd->fw_pd,
1708                                     &hipzout);
1709         if (h_ret != H_SUCCESS) {
1710                 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1711                          "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1712                          h_ret, e_origmr, shca->ipz_hca_handle.handle,
1713                          e_origmr->ipz_mr_handle.handle,
1714                          e_origmr->ib.ib_mr.lkey);
1715                 return ehca2ib_return_code(h_ret);
1716         }
1717         /* successful registration */
1718         e_newmr->num_kpages = e_origmr->num_kpages;
1719         e_newmr->num_hwpages = e_origmr->num_hwpages;
1720         e_newmr->hwpage_size = e_origmr->hwpage_size;
1721         e_newmr->start = iova_start;
1722         e_newmr->size = e_origmr->size;
1723         e_newmr->acl = acl;
1724         e_newmr->ipz_mr_handle = hipzout.handle;
1725         *lkey = hipzout.lkey;
1726         *rkey = hipzout.rkey;
1727         return 0;
1728 } /* end ehca_reg_maxmr() */
1729
1730 /*----------------------------------------------------------------------*/
1731
1732 int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1733 {
1734         int ret;
1735         struct ehca_mr *e_maxmr;
1736         struct ib_pd *ib_pd;
1737
1738         if (!shca->maxmr) {
1739                 ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
1740                 ret = -EINVAL;
1741                 goto ehca_dereg_internal_maxmr_exit0;
1742         }
1743
1744         e_maxmr = shca->maxmr;
1745         ib_pd = e_maxmr->ib.ib_mr.pd;
1746         shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
1747
1748         ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1749         if (ret) {
1750                 ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1751                          "ret=%i e_maxmr=%p shca=%p lkey=%x",
1752                          ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1753                 shca->maxmr = e_maxmr;
1754                 goto ehca_dereg_internal_maxmr_exit0;
1755         }
1756
1757         atomic_dec(&ib_pd->usecnt);
1758
1759 ehca_dereg_internal_maxmr_exit0:
1760         if (ret)
1761                 ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
1762                          ret, shca, shca->maxmr);
1763         return ret;
1764 } /* end ehca_dereg_internal_maxmr() */
1765
1766 /*----------------------------------------------------------------------*/
1767
1768 /*
1769  * check physical buffer array of MR verbs for validness and
1770  * calculates MR size
1771  */
1772 int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1773                                   int num_phys_buf,
1774                                   u64 *iova_start,
1775                                   u64 *size)
1776 {
1777         struct ib_phys_buf *pbuf = phys_buf_array;
1778         u64 size_count = 0;
1779         u32 i;
1780
1781         if (num_phys_buf == 0) {
1782                 ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1783                 return -EINVAL;
1784         }
1785         /* check first buffer */
1786         if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1787                 ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1788                              "pbuf->addr=%llx pbuf->size=%llx",
1789                              iova_start, pbuf->addr, pbuf->size);
1790                 return -EINVAL;
1791         }
1792         if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1793             (num_phys_buf > 1)) {
1794                 ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
1795                              "pbuf->size=%llx", pbuf->addr, pbuf->size);
1796                 return -EINVAL;
1797         }
1798
1799         for (i = 0; i < num_phys_buf; i++) {
1800                 if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1801                         ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
1802                                      "pbuf->size=%llx",
1803                                      i, pbuf->addr, pbuf->size);
1804                         return -EINVAL;
1805                 }
1806                 if (((i > 0) && /* not 1st */
1807                      (i < (num_phys_buf - 1)) &&        /* not last */
1808                      (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1809                         ehca_gen_err("bad size, i=%x pbuf->size=%llx",
1810                                      i, pbuf->size);
1811                         return -EINVAL;
1812                 }
1813                 size_count += pbuf->size;
1814                 pbuf++;
1815         }
1816
1817         *size = size_count;
1818         return 0;
1819 } /* end ehca_mr_chk_buf_and_calc_size() */
1820
1821 /*----------------------------------------------------------------------*/
1822
1823 /* check page list of map FMR verb for validness */
1824 int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1825                              u64 *page_list,
1826                              int list_len)
1827 {
1828         u32 i;
1829         u64 *page;
1830
1831         if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
1832                 ehca_gen_err("bad list_len, list_len=%x "
1833                              "e_fmr->fmr_max_pages=%x fmr=%p",
1834                              list_len, e_fmr->fmr_max_pages, e_fmr);
1835                 return -EINVAL;
1836         }
1837
1838         /* each page must be aligned */
1839         page = page_list;
1840         for (i = 0; i < list_len; i++) {
1841                 if (*page % e_fmr->fmr_page_size) {
1842                         ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
1843                                      "fmr_page_size=%x", i, *page, page, e_fmr,
1844                                      e_fmr->fmr_page_size);
1845                         return -EINVAL;
1846                 }
1847                 page++;
1848         }
1849
1850         return 0;
1851 } /* end ehca_fmr_check_page_list() */
1852
1853 /*----------------------------------------------------------------------*/
1854
1855 /* PAGE_SIZE >= pginfo->hwpage_size */
1856 static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1857                                   u32 number,
1858                                   u64 *kpage)
1859 {
1860         int ret = 0;
1861         struct ib_umem_chunk *prev_chunk;
1862         struct ib_umem_chunk *chunk;
1863         u64 pgaddr;
1864         u32 i = 0;
1865         u32 j = 0;
1866         int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
1867
1868         /* loop over desired chunk entries */
1869         chunk      = pginfo->u.usr.next_chunk;
1870         prev_chunk = pginfo->u.usr.next_chunk;
1871         list_for_each_entry_continue(
1872                 chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1873                 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1874                         pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
1875                                 << PAGE_SHIFT ;
1876                         *kpage = pgaddr + (pginfo->next_hwpage *
1877                                            pginfo->hwpage_size);
1878                         if ( !(*kpage) ) {
1879                                 ehca_gen_err("pgaddr=%llx "
1880                                              "chunk->page_list[i]=%llx "
1881                                              "i=%x next_hwpage=%llx",
1882                                              pgaddr, (u64)sg_dma_address(
1883                                                      &chunk->page_list[i]),
1884                                              i, pginfo->next_hwpage);
1885                                 return -EFAULT;
1886                         }
1887                         (pginfo->hwpage_cnt)++;
1888                         (pginfo->next_hwpage)++;
1889                         kpage++;
1890                         if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
1891                                 (pginfo->kpage_cnt)++;
1892                                 (pginfo->u.usr.next_nmap)++;
1893                                 pginfo->next_hwpage = 0;
1894                                 i++;
1895                         }
1896                         j++;
1897                         if (j >= number) break;
1898                 }
1899                 if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
1900                     (j >= number)) {
1901                         pginfo->u.usr.next_nmap = 0;
1902                         prev_chunk = chunk;
1903                         break;
1904                 } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
1905                         pginfo->u.usr.next_nmap = 0;
1906                         prev_chunk = chunk;
1907                 } else if (j >= number)
1908                         break;
1909                 else
1910                         prev_chunk = chunk;
1911         }
1912         pginfo->u.usr.next_chunk =
1913                 list_prepare_entry(prev_chunk,
1914                                    (&(pginfo->u.usr.region->chunk_list)),
1915                                    list);
1916         return ret;
1917 }
1918
1919 /*
1920  * check given pages for contiguous layout
1921  * last page addr is returned in prev_pgaddr for further check
1922  */
1923 static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
1924                                      int start_idx, int end_idx,
1925                                      u64 *prev_pgaddr)
1926 {
1927         int t;
1928         for (t = start_idx; t <= end_idx; t++) {
1929                 u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
1930                 if (ehca_debug_level >= 3)
1931                         ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
1932                                      *(u64 *)__va(pgaddr));
1933                 if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
1934                         ehca_gen_err("uncontiguous page found pgaddr=%llx "
1935                                      "prev_pgaddr=%llx page_list_i=%x",
1936                                      pgaddr, *prev_pgaddr, t);
1937                         return -EINVAL;
1938                 }
1939                 *prev_pgaddr = pgaddr;
1940         }
1941         return 0;
1942 }
1943
1944 /* PAGE_SIZE < pginfo->hwpage_size */
1945 static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1946                                   u32 number,
1947                                   u64 *kpage)
1948 {
1949         int ret = 0;
1950         struct ib_umem_chunk *prev_chunk;
1951         struct ib_umem_chunk *chunk;
1952         u64 pgaddr, prev_pgaddr;
1953         u32 i = 0;
1954         u32 j = 0;
1955         int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
1956         int nr_kpages = kpages_per_hwpage;
1957
1958         /* loop over desired chunk entries */
1959         chunk      = pginfo->u.usr.next_chunk;
1960         prev_chunk = pginfo->u.usr.next_chunk;
1961         list_for_each_entry_continue(
1962                 chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1963                 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1964                         if (nr_kpages == kpages_per_hwpage) {
1965                                 pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
1966                                            << PAGE_SHIFT );
1967                                 *kpage = pgaddr;
1968                                 if ( !(*kpage) ) {
1969                                         ehca_gen_err("pgaddr=%llx i=%x",
1970                                                      pgaddr, i);
1971                                         ret = -EFAULT;
1972                                         return ret;
1973                                 }
1974                                 /*
1975                                  * The first page in a hwpage must be aligned;
1976                                  * the first MR page is exempt from this rule.
1977                                  */
1978                                 if (pgaddr & (pginfo->hwpage_size - 1)) {
1979                                         if (pginfo->hwpage_cnt) {
1980                                                 ehca_gen_err(
1981                                                         "invalid alignment "
1982                                                         "pgaddr=%llx i=%x "
1983                                                         "mr_pgsize=%llx",
1984                                                         pgaddr, i,
1985                                                         pginfo->hwpage_size);
1986                                                 ret = -EFAULT;
1987                                                 return ret;
1988                                         }
1989                                         /* first MR page */
1990                                         pginfo->kpage_cnt =
1991                                                 (pgaddr &
1992                                                  (pginfo->hwpage_size - 1)) >>
1993                                                 PAGE_SHIFT;
1994                                         nr_kpages -= pginfo->kpage_cnt;
1995                                         *kpage = pgaddr &
1996                                                  ~(pginfo->hwpage_size - 1);
1997                                 }
1998                                 if (ehca_debug_level >= 3) {
1999                                         u64 val = *(u64 *)__va(pgaddr);
2000                                         ehca_gen_dbg("kpage=%llx chunk_page=%llx "
2001                                                      "value=%016llx",
2002                                                      *kpage, pgaddr, val);
2003                                 }
2004                                 prev_pgaddr = pgaddr;
2005                                 i++;
2006                                 pginfo->kpage_cnt++;
2007                                 pginfo->u.usr.next_nmap++;
2008                                 nr_kpages--;
2009                                 if (!nr_kpages)
2010                                         goto next_kpage;
2011                                 continue;
2012                         }
2013                         if (i + nr_kpages > chunk->nmap) {
2014                                 ret = ehca_check_kpages_per_ate(
2015                                         chunk->page_list, i,
2016                                         chunk->nmap - 1, &prev_pgaddr);
2017                                 if (ret) return ret;
2018                                 pginfo->kpage_cnt += chunk->nmap - i;
2019                                 pginfo->u.usr.next_nmap += chunk->nmap - i;
2020                                 nr_kpages -= chunk->nmap - i;
2021                                 break;
2022                         }
2023
2024                         ret = ehca_check_kpages_per_ate(chunk->page_list, i,
2025                                                         i + nr_kpages - 1,
2026                                                         &prev_pgaddr);
2027                         if (ret) return ret;
2028                         i += nr_kpages;
2029                         pginfo->kpage_cnt += nr_kpages;
2030                         pginfo->u.usr.next_nmap += nr_kpages;
2031 next_kpage:
2032                         nr_kpages = kpages_per_hwpage;
2033                         (pginfo->hwpage_cnt)++;
2034                         kpage++;
2035                         j++;
2036                         if (j >= number) break;
2037                 }
2038                 if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
2039                     (j >= number)) {
2040                         pginfo->u.usr.next_nmap = 0;
2041                         prev_chunk = chunk;
2042                         break;
2043                 } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
2044                         pginfo->u.usr.next_nmap = 0;
2045                         prev_chunk = chunk;
2046                 } else if (j >= number)
2047                         break;
2048                 else
2049                         prev_chunk = chunk;
2050         }
2051         pginfo->u.usr.next_chunk =
2052                 list_prepare_entry(prev_chunk,
2053                                    (&(pginfo->u.usr.region->chunk_list)),
2054                                    list);
2055         return ret;
2056 }
2057
2058 static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2059                                  u32 number, u64 *kpage)
2060 {
2061         int ret = 0;
2062         struct ib_phys_buf *pbuf;
2063         u64 num_hw, offs_hw;
2064         u32 i = 0;
2065
2066         /* loop over desired phys_buf_array entries */
2067         while (i < number) {
2068                 pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
2069                 num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
2070                                      pbuf->size, pginfo->hwpage_size);
2071                 offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
2072                         pginfo->hwpage_size;
2073                 while (pginfo->next_hwpage < offs_hw + num_hw) {
2074                         /* sanity check */
2075                         if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
2076                             (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
2077                                 ehca_gen_err("kpage_cnt >= num_kpages, "
2078                                              "kpage_cnt=%llx num_kpages=%llx "
2079                                              "hwpage_cnt=%llx "
2080                                              "num_hwpages=%llx i=%x",
2081                                              pginfo->kpage_cnt,
2082                                              pginfo->num_kpages,
2083                                              pginfo->hwpage_cnt,
2084                                              pginfo->num_hwpages, i);
2085                                 return -EFAULT;
2086                         }
2087                         *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2088                                  (pginfo->next_hwpage * pginfo->hwpage_size);
2089                         if ( !(*kpage) && pbuf->addr ) {
2090                                 ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2091                                              "next_hwpage=%llx", pbuf->addr,
2092                                              pbuf->size, pginfo->next_hwpage);
2093                                 return -EFAULT;
2094                         }
2095                         (pginfo->hwpage_cnt)++;
2096                         (pginfo->next_hwpage)++;
2097                         if (PAGE_SIZE >= pginfo->hwpage_size) {
2098                                 if (pginfo->next_hwpage %
2099                                     (PAGE_SIZE / pginfo->hwpage_size) == 0)
2100                                         (pginfo->kpage_cnt)++;
2101                         } else
2102                                 pginfo->kpage_cnt += pginfo->hwpage_size /
2103                                         PAGE_SIZE;
2104                         kpage++;
2105                         i++;
2106                         if (i >= number) break;
2107                 }
2108                 if (pginfo->next_hwpage >= offs_hw + num_hw) {
2109                         (pginfo->u.phy.next_buf)++;
2110                         pginfo->next_hwpage = 0;
2111                 }
2112         }
2113         return ret;
2114 }
2115
2116 static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2117                                 u32 number, u64 *kpage)
2118 {
2119         int ret = 0;
2120         u64 *fmrlist;
2121         u32 i;
2122
2123         /* loop over desired page_list entries */
2124         fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
2125         for (i = 0; i < number; i++) {
2126                 *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
2127                            pginfo->next_hwpage * pginfo->hwpage_size;
2128                 if ( !(*kpage) ) {
2129                         ehca_gen_err("*fmrlist=%llx fmrlist=%p "
2130                                      "next_listelem=%llx next_hwpage=%llx",
2131                                      *fmrlist, fmrlist,
2132                                      pginfo->u.fmr.next_listelem,
2133                                      pginfo->next_hwpage);
2134                         return -EFAULT;
2135                 }
2136                 (pginfo->hwpage_cnt)++;
2137                 if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
2138                         if (pginfo->next_hwpage %
2139                             (pginfo->u.fmr.fmr_pgsize /
2140                              pginfo->hwpage_size) == 0) {
2141                                 (pginfo->kpage_cnt)++;
2142                                 (pginfo->u.fmr.next_listelem)++;
2143                                 fmrlist++;
2144                                 pginfo->next_hwpage = 0;
2145                         } else
2146                                 (pginfo->next_hwpage)++;
2147                 } else {
2148                         unsigned int cnt_per_hwpage = pginfo->hwpage_size /
2149                                 pginfo->u.fmr.fmr_pgsize;
2150                         unsigned int j;
2151                         u64 prev = *kpage;
2152                         /* check if adrs are contiguous */
2153                         for (j = 1; j < cnt_per_hwpage; j++) {
2154                                 u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
2155                                 if (prev + pginfo->u.fmr.fmr_pgsize != p) {
2156                                         ehca_gen_err("uncontiguous fmr pages "
2157                                                      "found prev=%llx p=%llx "
2158                                                      "idx=%x", prev, p, i + j);
2159                                         return -EINVAL;
2160                                 }
2161                                 prev = p;
2162                         }
2163                         pginfo->kpage_cnt += cnt_per_hwpage;
2164                         pginfo->u.fmr.next_listelem += cnt_per_hwpage;
2165                         fmrlist += cnt_per_hwpage;
2166                 }
2167                 kpage++;
2168         }
2169         return ret;
2170 }
2171
2172 /* setup page buffer from page info */
2173 int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
2174                      u32 number,
2175                      u64 *kpage)
2176 {
2177         int ret;
2178
2179         switch (pginfo->type) {
2180         case EHCA_MR_PGI_PHYS:
2181                 ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
2182                 break;
2183         case EHCA_MR_PGI_USER:
2184                 ret = PAGE_SIZE >= pginfo->hwpage_size ?
2185                         ehca_set_pagebuf_user1(pginfo, number, kpage) :
2186                         ehca_set_pagebuf_user2(pginfo, number, kpage);
2187                 break;
2188         case EHCA_MR_PGI_FMR:
2189                 ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
2190                 break;
2191         default:
2192                 ehca_gen_err("bad pginfo->type=%x", pginfo->type);
2193                 ret = -EFAULT;
2194                 break;
2195         }
2196         return ret;
2197 } /* end ehca_set_pagebuf() */
2198
2199 /*----------------------------------------------------------------------*/
2200
2201 /*
2202  * check MR if it is a max-MR, i.e. uses whole memory
2203  * in case it's a max-MR 1 is returned, else 0
2204  */
2205 int ehca_mr_is_maxmr(u64 size,
2206                      u64 *iova_start)
2207 {
2208         /* a MR is treated as max-MR only if it fits following: */
2209         if ((size == ehca_mr_len) &&
2210             (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
2211                 ehca_gen_dbg("this is a max-MR");
2212                 return 1;
2213         } else
2214                 return 0;
2215 } /* end ehca_mr_is_maxmr() */
2216
2217 /*----------------------------------------------------------------------*/
2218
2219 /* map access control for MR/MW. This routine is used for MR and MW. */
2220 void ehca_mrmw_map_acl(int ib_acl,
2221                        u32 *hipz_acl)
2222 {
2223         *hipz_acl = 0;
2224         if (ib_acl & IB_ACCESS_REMOTE_READ)
2225                 *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
2226         if (ib_acl & IB_ACCESS_REMOTE_WRITE)
2227                 *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
2228         if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
2229                 *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
2230         if (ib_acl & IB_ACCESS_LOCAL_WRITE)
2231                 *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
2232         if (ib_acl & IB_ACCESS_MW_BIND)
2233                 *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
2234 } /* end ehca_mrmw_map_acl() */
2235
2236 /*----------------------------------------------------------------------*/
2237
2238 /* sets page size in hipz access control for MR/MW. */
2239 void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
2240 {
2241         *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
2242 } /* end ehca_mrmw_set_pgsize_hipz_acl() */
2243
2244 /*----------------------------------------------------------------------*/
2245
2246 /*
2247  * reverse map access control for MR/MW.
2248  * This routine is used for MR and MW.
2249  */
2250 void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2251                                int *ib_acl) /*OUT*/
2252 {
2253         *ib_acl = 0;
2254         if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
2255                 *ib_acl |= IB_ACCESS_REMOTE_READ;
2256         if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
2257                 *ib_acl |= IB_ACCESS_REMOTE_WRITE;
2258         if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
2259                 *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
2260         if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
2261                 *ib_acl |= IB_ACCESS_LOCAL_WRITE;
2262         if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
2263                 *ib_acl |= IB_ACCESS_MW_BIND;
2264 } /* end ehca_mrmw_reverse_map_acl() */
2265
2266
2267 /*----------------------------------------------------------------------*/
2268
2269 /*
2270  * MR destructor and constructor
2271  * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2272  * except struct ib_mr and spinlock
2273  */
2274 void ehca_mr_deletenew(struct ehca_mr *mr)
2275 {
2276         mr->flags = 0;
2277         mr->num_kpages = 0;
2278         mr->num_hwpages = 0;
2279         mr->acl = 0;
2280         mr->start = NULL;
2281         mr->fmr_page_size = 0;
2282         mr->fmr_max_pages = 0;
2283         mr->fmr_max_maps = 0;
2284         mr->fmr_map_cnt = 0;
2285         memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2286         memset(&mr->galpas, 0, sizeof(mr->galpas));
2287 } /* end ehca_mr_deletenew() */
2288
2289 int ehca_init_mrmw_cache(void)
2290 {
2291         mr_cache = kmem_cache_create("ehca_cache_mr",
2292                                      sizeof(struct ehca_mr), 0,
2293                                      SLAB_HWCACHE_ALIGN,
2294                                      NULL);
2295         if (!mr_cache)
2296                 return -ENOMEM;
2297         mw_cache = kmem_cache_create("ehca_cache_mw",
2298                                      sizeof(struct ehca_mw), 0,
2299                                      SLAB_HWCACHE_ALIGN,
2300                                      NULL);
2301         if (!mw_cache) {
2302                 kmem_cache_destroy(mr_cache);
2303                 mr_cache = NULL;
2304                 return -ENOMEM;
2305         }
2306         return 0;
2307 }
2308
2309 void ehca_cleanup_mrmw_cache(void)
2310 {
2311         if (mr_cache)
2312                 kmem_cache_destroy(mr_cache);
2313         if (mw_cache)
2314                 kmem_cache_destroy(mw_cache);
2315 }
2316
2317 static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
2318                                      int dir)
2319 {
2320         if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
2321                 ehca_top_bmap->dir[dir] =
2322                         kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
2323                 if (!ehca_top_bmap->dir[dir])
2324                         return -ENOMEM;
2325                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2326                 memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
2327         }
2328         return 0;
2329 }
2330
2331 static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
2332 {
2333         if (!ehca_bmap_valid(ehca_bmap->top[top])) {
2334                 ehca_bmap->top[top] =
2335                         kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
2336                 if (!ehca_bmap->top[top])
2337                         return -ENOMEM;
2338                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2339                 memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
2340         }
2341         return ehca_init_top_bmap(ehca_bmap->top[top], dir);
2342 }
2343
2344 static inline int ehca_calc_index(unsigned long i, unsigned long s)
2345 {
2346         return (i >> s) & EHCA_INDEX_MASK;
2347 }
2348
2349 void ehca_destroy_busmap(void)
2350 {
2351         int top, dir;
2352
2353         if (!ehca_bmap)
2354                 return;
2355
2356         for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2357                 if (!ehca_bmap_valid(ehca_bmap->top[top]))
2358                         continue;
2359                 for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
2360                         if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2361                                 continue;
2362
2363                         kfree(ehca_bmap->top[top]->dir[dir]);
2364                 }
2365
2366                 kfree(ehca_bmap->top[top]);
2367         }
2368
2369         kfree(ehca_bmap);
2370         ehca_bmap = NULL;
2371 }
2372
2373 static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
2374 {
2375         unsigned long i, start_section, end_section;
2376         int top, dir, idx;
2377
2378         if (!nr_pages)
2379                 return 0;
2380
2381         if (!ehca_bmap) {
2382                 ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
2383                 if (!ehca_bmap)
2384                         return -ENOMEM;
2385                 /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2386                 memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
2387         }
2388
2389         start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
2390         end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
2391         for (i = start_section; i < end_section; i++) {
2392                 int ret;
2393                 top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
2394                 dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
2395                 idx = i & EHCA_INDEX_MASK;
2396
2397                 ret = ehca_init_bmap(ehca_bmap, top, dir);
2398                 if (ret) {
2399                         ehca_destroy_busmap();
2400                         return ret;
2401                 }
2402                 ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
2403                 ehca_mr_len += EHCA_SECTSIZE;
2404         }
2405         return 0;
2406 }
2407
2408 static int ehca_is_hugepage(unsigned long pfn)
2409 {
2410         int page_order;
2411
2412         if (pfn & EHCA_HUGEPAGE_PFN_MASK)
2413                 return 0;
2414
2415         page_order = compound_order(pfn_to_page(pfn));
2416         if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
2417                 return 0;
2418
2419         return 1;
2420 }
2421
2422 static int ehca_create_busmap_callback(unsigned long initial_pfn,
2423                                        unsigned long total_nr_pages, void *arg)
2424 {
2425         int ret;
2426         unsigned long pfn, start_pfn, end_pfn, nr_pages;
2427
2428         if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
2429                 return ehca_update_busmap(initial_pfn, total_nr_pages);
2430
2431         /* Given chunk is >= 16GB -> check for hugepages */
2432         start_pfn = initial_pfn;
2433         end_pfn = initial_pfn + total_nr_pages;
2434         pfn = start_pfn;
2435
2436         while (pfn < end_pfn) {
2437                 if (ehca_is_hugepage(pfn)) {
2438                         /* Add mem found in front of the hugepage */
2439                         nr_pages = pfn - start_pfn;
2440                         ret = ehca_update_busmap(start_pfn, nr_pages);
2441                         if (ret)
2442                                 return ret;
2443                         /* Skip the hugepage */
2444                         pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
2445                         start_pfn = pfn;
2446                 } else
2447                         pfn += (EHCA_SECTSIZE / PAGE_SIZE);
2448         }
2449
2450         /* Add mem found behind the hugepage(s)  */
2451         nr_pages = pfn - start_pfn;
2452         return ehca_update_busmap(start_pfn, nr_pages);
2453 }
2454
2455 int ehca_create_busmap(void)
2456 {
2457         int ret;
2458
2459         ehca_mr_len = 0;
2460         ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
2461                                    ehca_create_busmap_callback);
2462         return ret;
2463 }
2464
2465 static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
2466                                    struct ehca_mr *e_mr,
2467                                    struct ehca_mr_pginfo *pginfo)
2468 {
2469         int top;
2470         u64 hret, *kpage;
2471
2472         kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
2473         if (!kpage) {
2474                 ehca_err(&shca->ib_device, "kpage alloc failed");
2475                 return -ENOMEM;
2476         }
2477         for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2478                 if (!ehca_bmap_valid(ehca_bmap->top[top]))
2479                         continue;
2480                 hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
2481                 if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
2482                         break;
2483         }
2484
2485         ehca_free_fw_ctrlblock(kpage);
2486
2487         if (hret == H_SUCCESS)
2488                 return 0; /* Everything is fine */
2489         else {
2490                 ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
2491                                  "h_ret=%lli e_mr=%p top=%x lkey=%x "
2492                                  "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
2493                                  e_mr->ib.ib_mr.lkey,
2494                                  shca->ipz_hca_handle.handle,
2495                                  e_mr->ipz_mr_handle.handle);
2496                 return ehca2ib_return_code(hret);
2497         }
2498 }
2499
2500 static u64 ehca_map_vaddr(void *caddr)
2501 {
2502         int top, dir, idx;
2503         unsigned long abs_addr, offset;
2504         u64 entry;
2505
2506         if (!ehca_bmap)
2507                 return EHCA_INVAL_ADDR;
2508
2509         abs_addr = __pa(caddr);
2510         top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
2511         if (!ehca_bmap_valid(ehca_bmap->top[top]))
2512                 return EHCA_INVAL_ADDR;
2513
2514         dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
2515         if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2516                 return EHCA_INVAL_ADDR;
2517
2518         idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
2519
2520         entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
2521         if (ehca_bmap_valid(entry)) {
2522                 offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
2523                 return entry | offset;
2524         } else
2525                 return EHCA_INVAL_ADDR;
2526 }
2527
2528 static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
2529 {
2530         return dma_addr == EHCA_INVAL_ADDR;
2531 }
2532
2533 static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
2534                                size_t size, enum dma_data_direction direction)
2535 {
2536         if (cpu_addr)
2537                 return ehca_map_vaddr(cpu_addr);
2538         else
2539                 return EHCA_INVAL_ADDR;
2540 }
2541
2542 static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
2543                                   enum dma_data_direction direction)
2544 {
2545         /* This is only a stub; nothing to be done here */
2546 }
2547
2548 static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
2549                              unsigned long offset, size_t size,
2550                              enum dma_data_direction direction)
2551 {
2552         u64 addr;
2553
2554         if (offset + size > PAGE_SIZE)
2555                 return EHCA_INVAL_ADDR;
2556
2557         addr = ehca_map_vaddr(page_address(page));
2558         if (!ehca_dma_mapping_error(dev, addr))
2559                 addr += offset;
2560
2561         return addr;
2562 }
2563
2564 static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
2565                                 enum dma_data_direction direction)
2566 {
2567         /* This is only a stub; nothing to be done here */
2568 }
2569
2570 static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
2571                            int nents, enum dma_data_direction direction)
2572 {
2573         struct scatterlist *sg;
2574         int i;
2575
2576         for_each_sg(sgl, sg, nents, i) {
2577                 u64 addr;
2578                 addr = ehca_map_vaddr(sg_virt(sg));
2579                 if (ehca_dma_mapping_error(dev, addr))
2580                         return 0;
2581
2582                 sg->dma_address = addr;
2583                 sg->dma_length = sg->length;
2584         }
2585         return nents;
2586 }
2587
2588 static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
2589                               int nents, enum dma_data_direction direction)
2590 {
2591         /* This is only a stub; nothing to be done here */
2592 }
2593
2594 static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
2595 {
2596         return sg->dma_address;
2597 }
2598
2599 static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
2600 {
2601         return sg->length;
2602 }
2603
2604 static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
2605                                          size_t size,
2606                                          enum dma_data_direction dir)
2607 {
2608         dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
2609 }
2610
2611 static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
2612                                             size_t size,
2613                                             enum dma_data_direction dir)
2614 {
2615         dma_sync_single_for_device(dev->dma_device, addr, size, dir);
2616 }
2617
2618 static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
2619                                      u64 *dma_handle, gfp_t flag)
2620 {
2621         struct page *p;
2622         void *addr = NULL;
2623         u64 dma_addr;
2624
2625         p = alloc_pages(flag, get_order(size));
2626         if (p) {
2627                 addr = page_address(p);
2628                 dma_addr = ehca_map_vaddr(addr);
2629                 if (ehca_dma_mapping_error(dev, dma_addr)) {
2630                         free_pages((unsigned long)addr, get_order(size));
2631                         return NULL;
2632                 }
2633                 if (dma_handle)
2634                         *dma_handle = dma_addr;
2635                 return addr;
2636         }
2637         return NULL;
2638 }
2639
2640 static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
2641                                    void *cpu_addr, u64 dma_handle)
2642 {
2643         if (cpu_addr && size)
2644                 free_pages((unsigned long)cpu_addr, get_order(size));
2645 }
2646
2647
2648 struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
2649         .mapping_error          = ehca_dma_mapping_error,
2650         .map_single             = ehca_dma_map_single,
2651         .unmap_single           = ehca_dma_unmap_single,
2652         .map_page               = ehca_dma_map_page,
2653         .unmap_page             = ehca_dma_unmap_page,
2654         .map_sg                 = ehca_dma_map_sg,
2655         .unmap_sg               = ehca_dma_unmap_sg,
2656         .dma_address            = ehca_dma_address,
2657         .dma_len                = ehca_dma_len,
2658         .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
2659         .sync_single_for_device = ehca_dma_sync_single_for_device,
2660         .alloc_coherent         = ehca_dma_alloc_coherent,
2661         .free_coherent          = ehca_dma_free_coherent,
2662 };