IB/srp: Use pd->local_dma_lkey
[cascardo/linux.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44
45 #include <linux/atomic.h>
46
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53
54 #include "ib_srp.h"
55
56 #define DRV_NAME        "ib_srp"
57 #define PFX             DRV_NAME ": "
58 #define DRV_VERSION     "2.0"
59 #define DRV_RELDATE     "July 26, 2015"
60
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr;
72 static bool register_always;
73 static int topspin_workarounds = 1;
74
75 module_param(srp_sg_tablesize, uint, 0444);
76 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
77
78 module_param(cmd_sg_entries, uint, 0444);
79 MODULE_PARM_DESC(cmd_sg_entries,
80                  "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
81
82 module_param(indirect_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(indirect_sg_entries,
84                  "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
85
86 module_param(allow_ext_sg, bool, 0444);
87 MODULE_PARM_DESC(allow_ext_sg,
88                   "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
89
90 module_param(topspin_workarounds, int, 0444);
91 MODULE_PARM_DESC(topspin_workarounds,
92                  "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
93
94 module_param(prefer_fr, bool, 0444);
95 MODULE_PARM_DESC(prefer_fr,
96 "Whether to use fast registration if both FMR and fast registration are supported");
97
98 module_param(register_always, bool, 0444);
99 MODULE_PARM_DESC(register_always,
100                  "Use memory registration even for contiguous memory regions");
101
102 static const struct kernel_param_ops srp_tmo_ops;
103
104 static int srp_reconnect_delay = 10;
105 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
106                 S_IRUGO | S_IWUSR);
107 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
108
109 static int srp_fast_io_fail_tmo = 15;
110 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
111                 S_IRUGO | S_IWUSR);
112 MODULE_PARM_DESC(fast_io_fail_tmo,
113                  "Number of seconds between the observation of a transport"
114                  " layer error and failing all I/O. \"off\" means that this"
115                  " functionality is disabled.");
116
117 static int srp_dev_loss_tmo = 600;
118 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
119                 S_IRUGO | S_IWUSR);
120 MODULE_PARM_DESC(dev_loss_tmo,
121                  "Maximum number of seconds that the SRP transport should"
122                  " insulate transport layer errors. After this time has been"
123                  " exceeded the SCSI host is removed. Should be"
124                  " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
125                  " if fast_io_fail_tmo has not been set. \"off\" means that"
126                  " this functionality is disabled.");
127
128 static unsigned ch_count;
129 module_param(ch_count, uint, 0444);
130 MODULE_PARM_DESC(ch_count,
131                  "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
132
133 static void srp_add_one(struct ib_device *device);
134 static void srp_remove_one(struct ib_device *device, void *client_data);
135 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
136 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
137 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
138
139 static struct scsi_transport_template *ib_srp_transport_template;
140 static struct workqueue_struct *srp_remove_wq;
141
142 static struct ib_client srp_client = {
143         .name   = "srp",
144         .add    = srp_add_one,
145         .remove = srp_remove_one
146 };
147
148 static struct ib_sa_client srp_sa_client;
149
150 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
151 {
152         int tmo = *(int *)kp->arg;
153
154         if (tmo >= 0)
155                 return sprintf(buffer, "%d", tmo);
156         else
157                 return sprintf(buffer, "off");
158 }
159
160 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
161 {
162         int tmo, res;
163
164         res = srp_parse_tmo(&tmo, val);
165         if (res)
166                 goto out;
167
168         if (kp->arg == &srp_reconnect_delay)
169                 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
170                                     srp_dev_loss_tmo);
171         else if (kp->arg == &srp_fast_io_fail_tmo)
172                 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
173         else
174                 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
175                                     tmo);
176         if (res)
177                 goto out;
178         *(int *)kp->arg = tmo;
179
180 out:
181         return res;
182 }
183
184 static const struct kernel_param_ops srp_tmo_ops = {
185         .get = srp_tmo_get,
186         .set = srp_tmo_set,
187 };
188
189 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
190 {
191         return (struct srp_target_port *) host->hostdata;
192 }
193
194 static const char *srp_target_info(struct Scsi_Host *host)
195 {
196         return host_to_target(host)->target_name;
197 }
198
199 static int srp_target_is_topspin(struct srp_target_port *target)
200 {
201         static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
202         static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
203
204         return topspin_workarounds &&
205                 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
206                  !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
207 }
208
209 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
210                                    gfp_t gfp_mask,
211                                    enum dma_data_direction direction)
212 {
213         struct srp_iu *iu;
214
215         iu = kmalloc(sizeof *iu, gfp_mask);
216         if (!iu)
217                 goto out;
218
219         iu->buf = kzalloc(size, gfp_mask);
220         if (!iu->buf)
221                 goto out_free_iu;
222
223         iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
224                                     direction);
225         if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
226                 goto out_free_buf;
227
228         iu->size      = size;
229         iu->direction = direction;
230
231         return iu;
232
233 out_free_buf:
234         kfree(iu->buf);
235 out_free_iu:
236         kfree(iu);
237 out:
238         return NULL;
239 }
240
241 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
242 {
243         if (!iu)
244                 return;
245
246         ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
247                             iu->direction);
248         kfree(iu->buf);
249         kfree(iu);
250 }
251
252 static void srp_qp_event(struct ib_event *event, void *context)
253 {
254         pr_debug("QP event %s (%d)\n",
255                  ib_event_msg(event->event), event->event);
256 }
257
258 static int srp_init_qp(struct srp_target_port *target,
259                        struct ib_qp *qp)
260 {
261         struct ib_qp_attr *attr;
262         int ret;
263
264         attr = kmalloc(sizeof *attr, GFP_KERNEL);
265         if (!attr)
266                 return -ENOMEM;
267
268         ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269                                   target->srp_host->port,
270                                   be16_to_cpu(target->pkey),
271                                   &attr->pkey_index);
272         if (ret)
273                 goto out;
274
275         attr->qp_state        = IB_QPS_INIT;
276         attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277                                     IB_ACCESS_REMOTE_WRITE);
278         attr->port_num        = target->srp_host->port;
279
280         ret = ib_modify_qp(qp, attr,
281                            IB_QP_STATE          |
282                            IB_QP_PKEY_INDEX     |
283                            IB_QP_ACCESS_FLAGS   |
284                            IB_QP_PORT);
285
286 out:
287         kfree(attr);
288         return ret;
289 }
290
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
292 {
293         struct srp_target_port *target = ch->target;
294         struct ib_cm_id *new_cm_id;
295
296         new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
297                                     srp_cm_handler, ch);
298         if (IS_ERR(new_cm_id))
299                 return PTR_ERR(new_cm_id);
300
301         if (ch->cm_id)
302                 ib_destroy_cm_id(ch->cm_id);
303         ch->cm_id = new_cm_id;
304         ch->path.sgid = target->sgid;
305         ch->path.dgid = target->orig_dgid;
306         ch->path.pkey = target->pkey;
307         ch->path.service_id = target->service_id;
308
309         return 0;
310 }
311
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
313 {
314         struct srp_device *dev = target->srp_host->srp_dev;
315         struct ib_fmr_pool_param fmr_param;
316
317         memset(&fmr_param, 0, sizeof(fmr_param));
318         fmr_param.pool_size         = target->scsi_host->can_queue;
319         fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
320         fmr_param.cache             = 1;
321         fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322         fmr_param.page_shift        = ilog2(dev->mr_page_size);
323         fmr_param.access            = (IB_ACCESS_LOCAL_WRITE |
324                                        IB_ACCESS_REMOTE_WRITE |
325                                        IB_ACCESS_REMOTE_READ);
326
327         return ib_create_fmr_pool(dev->pd, &fmr_param);
328 }
329
330 /**
331  * srp_destroy_fr_pool() - free the resources owned by a pool
332  * @pool: Fast registration pool to be destroyed.
333  */
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
335 {
336         int i;
337         struct srp_fr_desc *d;
338
339         if (!pool)
340                 return;
341
342         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
343                 if (d->frpl)
344                         ib_free_fast_reg_page_list(d->frpl);
345                 if (d->mr)
346                         ib_dereg_mr(d->mr);
347         }
348         kfree(pool);
349 }
350
351 /**
352  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
353  * @device:            IB device to allocate fast registration descriptors for.
354  * @pd:                Protection domain associated with the FR descriptors.
355  * @pool_size:         Number of descriptors to allocate.
356  * @max_page_list_len: Maximum fast registration work request page list length.
357  */
358 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
359                                               struct ib_pd *pd, int pool_size,
360                                               int max_page_list_len)
361 {
362         struct srp_fr_pool *pool;
363         struct srp_fr_desc *d;
364         struct ib_mr *mr;
365         struct ib_fast_reg_page_list *frpl;
366         int i, ret = -EINVAL;
367
368         if (pool_size <= 0)
369                 goto err;
370         ret = -ENOMEM;
371         pool = kzalloc(sizeof(struct srp_fr_pool) +
372                        pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
373         if (!pool)
374                 goto err;
375         pool->size = pool_size;
376         pool->max_page_list_len = max_page_list_len;
377         spin_lock_init(&pool->lock);
378         INIT_LIST_HEAD(&pool->free_list);
379
380         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
381                 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
382                                  max_page_list_len);
383                 if (IS_ERR(mr)) {
384                         ret = PTR_ERR(mr);
385                         goto destroy_pool;
386                 }
387                 d->mr = mr;
388                 frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
389                 if (IS_ERR(frpl)) {
390                         ret = PTR_ERR(frpl);
391                         goto destroy_pool;
392                 }
393                 d->frpl = frpl;
394                 list_add_tail(&d->entry, &pool->free_list);
395         }
396
397 out:
398         return pool;
399
400 destroy_pool:
401         srp_destroy_fr_pool(pool);
402
403 err:
404         pool = ERR_PTR(ret);
405         goto out;
406 }
407
408 /**
409  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
410  * @pool: Pool to obtain descriptor from.
411  */
412 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
413 {
414         struct srp_fr_desc *d = NULL;
415         unsigned long flags;
416
417         spin_lock_irqsave(&pool->lock, flags);
418         if (!list_empty(&pool->free_list)) {
419                 d = list_first_entry(&pool->free_list, typeof(*d), entry);
420                 list_del(&d->entry);
421         }
422         spin_unlock_irqrestore(&pool->lock, flags);
423
424         return d;
425 }
426
427 /**
428  * srp_fr_pool_put() - put an FR descriptor back in the free list
429  * @pool: Pool the descriptor was allocated from.
430  * @desc: Pointer to an array of fast registration descriptor pointers.
431  * @n:    Number of descriptors to put back.
432  *
433  * Note: The caller must already have queued an invalidation request for
434  * desc->mr->rkey before calling this function.
435  */
436 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
437                             int n)
438 {
439         unsigned long flags;
440         int i;
441
442         spin_lock_irqsave(&pool->lock, flags);
443         for (i = 0; i < n; i++)
444                 list_add(&desc[i]->entry, &pool->free_list);
445         spin_unlock_irqrestore(&pool->lock, flags);
446 }
447
448 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
449 {
450         struct srp_device *dev = target->srp_host->srp_dev;
451
452         return srp_create_fr_pool(dev->dev, dev->pd,
453                                   target->scsi_host->can_queue,
454                                   dev->max_pages_per_mr);
455 }
456
457 /**
458  * srp_destroy_qp() - destroy an RDMA queue pair
459  * @ch: SRP RDMA channel.
460  *
461  * Change a queue pair into the error state and wait until all receive
462  * completions have been processed before destroying it. This avoids that
463  * the receive completion handler can access the queue pair while it is
464  * being destroyed.
465  */
466 static void srp_destroy_qp(struct srp_rdma_ch *ch)
467 {
468         static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
469         static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
470         struct ib_recv_wr *bad_wr;
471         int ret;
472
473         /* Destroying a QP and reusing ch->done is only safe if not connected */
474         WARN_ON_ONCE(ch->connected);
475
476         ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
477         WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
478         if (ret)
479                 goto out;
480
481         init_completion(&ch->done);
482         ret = ib_post_recv(ch->qp, &wr, &bad_wr);
483         WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
484         if (ret == 0)
485                 wait_for_completion(&ch->done);
486
487 out:
488         ib_destroy_qp(ch->qp);
489 }
490
491 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
492 {
493         struct srp_target_port *target = ch->target;
494         struct srp_device *dev = target->srp_host->srp_dev;
495         struct ib_qp_init_attr *init_attr;
496         struct ib_cq *recv_cq, *send_cq;
497         struct ib_qp *qp;
498         struct ib_fmr_pool *fmr_pool = NULL;
499         struct srp_fr_pool *fr_pool = NULL;
500         const int m = 1 + dev->use_fast_reg;
501         struct ib_cq_init_attr cq_attr = {};
502         int ret;
503
504         init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
505         if (!init_attr)
506                 return -ENOMEM;
507
508         /* + 1 for SRP_LAST_WR_ID */
509         cq_attr.cqe = target->queue_size + 1;
510         cq_attr.comp_vector = ch->comp_vector;
511         recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
512                                &cq_attr);
513         if (IS_ERR(recv_cq)) {
514                 ret = PTR_ERR(recv_cq);
515                 goto err;
516         }
517
518         cq_attr.cqe = m * target->queue_size;
519         cq_attr.comp_vector = ch->comp_vector;
520         send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
521                                &cq_attr);
522         if (IS_ERR(send_cq)) {
523                 ret = PTR_ERR(send_cq);
524                 goto err_recv_cq;
525         }
526
527         ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
528
529         init_attr->event_handler       = srp_qp_event;
530         init_attr->cap.max_send_wr     = m * target->queue_size;
531         init_attr->cap.max_recv_wr     = target->queue_size + 1;
532         init_attr->cap.max_recv_sge    = 1;
533         init_attr->cap.max_send_sge    = 1;
534         init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
535         init_attr->qp_type             = IB_QPT_RC;
536         init_attr->send_cq             = send_cq;
537         init_attr->recv_cq             = recv_cq;
538
539         qp = ib_create_qp(dev->pd, init_attr);
540         if (IS_ERR(qp)) {
541                 ret = PTR_ERR(qp);
542                 goto err_send_cq;
543         }
544
545         ret = srp_init_qp(target, qp);
546         if (ret)
547                 goto err_qp;
548
549         if (dev->use_fast_reg && dev->has_fr) {
550                 fr_pool = srp_alloc_fr_pool(target);
551                 if (IS_ERR(fr_pool)) {
552                         ret = PTR_ERR(fr_pool);
553                         shost_printk(KERN_WARNING, target->scsi_host, PFX
554                                      "FR pool allocation failed (%d)\n", ret);
555                         goto err_qp;
556                 }
557                 if (ch->fr_pool)
558                         srp_destroy_fr_pool(ch->fr_pool);
559                 ch->fr_pool = fr_pool;
560         } else if (!dev->use_fast_reg && dev->has_fmr) {
561                 fmr_pool = srp_alloc_fmr_pool(target);
562                 if (IS_ERR(fmr_pool)) {
563                         ret = PTR_ERR(fmr_pool);
564                         shost_printk(KERN_WARNING, target->scsi_host, PFX
565                                      "FMR pool allocation failed (%d)\n", ret);
566                         goto err_qp;
567                 }
568                 if (ch->fmr_pool)
569                         ib_destroy_fmr_pool(ch->fmr_pool);
570                 ch->fmr_pool = fmr_pool;
571         }
572
573         if (ch->qp)
574                 srp_destroy_qp(ch);
575         if (ch->recv_cq)
576                 ib_destroy_cq(ch->recv_cq);
577         if (ch->send_cq)
578                 ib_destroy_cq(ch->send_cq);
579
580         ch->qp = qp;
581         ch->recv_cq = recv_cq;
582         ch->send_cq = send_cq;
583
584         kfree(init_attr);
585         return 0;
586
587 err_qp:
588         ib_destroy_qp(qp);
589
590 err_send_cq:
591         ib_destroy_cq(send_cq);
592
593 err_recv_cq:
594         ib_destroy_cq(recv_cq);
595
596 err:
597         kfree(init_attr);
598         return ret;
599 }
600
601 /*
602  * Note: this function may be called without srp_alloc_iu_bufs() having been
603  * invoked. Hence the ch->[rt]x_ring checks.
604  */
605 static void srp_free_ch_ib(struct srp_target_port *target,
606                            struct srp_rdma_ch *ch)
607 {
608         struct srp_device *dev = target->srp_host->srp_dev;
609         int i;
610
611         if (!ch->target)
612                 return;
613
614         if (ch->cm_id) {
615                 ib_destroy_cm_id(ch->cm_id);
616                 ch->cm_id = NULL;
617         }
618
619         /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
620         if (!ch->qp)
621                 return;
622
623         if (dev->use_fast_reg) {
624                 if (ch->fr_pool)
625                         srp_destroy_fr_pool(ch->fr_pool);
626         } else {
627                 if (ch->fmr_pool)
628                         ib_destroy_fmr_pool(ch->fmr_pool);
629         }
630         srp_destroy_qp(ch);
631         ib_destroy_cq(ch->send_cq);
632         ib_destroy_cq(ch->recv_cq);
633
634         /*
635          * Avoid that the SCSI error handler tries to use this channel after
636          * it has been freed. The SCSI error handler can namely continue
637          * trying to perform recovery actions after scsi_remove_host()
638          * returned.
639          */
640         ch->target = NULL;
641
642         ch->qp = NULL;
643         ch->send_cq = ch->recv_cq = NULL;
644
645         if (ch->rx_ring) {
646                 for (i = 0; i < target->queue_size; ++i)
647                         srp_free_iu(target->srp_host, ch->rx_ring[i]);
648                 kfree(ch->rx_ring);
649                 ch->rx_ring = NULL;
650         }
651         if (ch->tx_ring) {
652                 for (i = 0; i < target->queue_size; ++i)
653                         srp_free_iu(target->srp_host, ch->tx_ring[i]);
654                 kfree(ch->tx_ring);
655                 ch->tx_ring = NULL;
656         }
657 }
658
659 static void srp_path_rec_completion(int status,
660                                     struct ib_sa_path_rec *pathrec,
661                                     void *ch_ptr)
662 {
663         struct srp_rdma_ch *ch = ch_ptr;
664         struct srp_target_port *target = ch->target;
665
666         ch->status = status;
667         if (status)
668                 shost_printk(KERN_ERR, target->scsi_host,
669                              PFX "Got failed path rec status %d\n", status);
670         else
671                 ch->path = *pathrec;
672         complete(&ch->done);
673 }
674
675 static int srp_lookup_path(struct srp_rdma_ch *ch)
676 {
677         struct srp_target_port *target = ch->target;
678         int ret;
679
680         ch->path.numb_path = 1;
681
682         init_completion(&ch->done);
683
684         ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
685                                                target->srp_host->srp_dev->dev,
686                                                target->srp_host->port,
687                                                &ch->path,
688                                                IB_SA_PATH_REC_SERVICE_ID |
689                                                IB_SA_PATH_REC_DGID       |
690                                                IB_SA_PATH_REC_SGID       |
691                                                IB_SA_PATH_REC_NUMB_PATH  |
692                                                IB_SA_PATH_REC_PKEY,
693                                                SRP_PATH_REC_TIMEOUT_MS,
694                                                GFP_KERNEL,
695                                                srp_path_rec_completion,
696                                                ch, &ch->path_query);
697         if (ch->path_query_id < 0)
698                 return ch->path_query_id;
699
700         ret = wait_for_completion_interruptible(&ch->done);
701         if (ret < 0)
702                 return ret;
703
704         if (ch->status < 0)
705                 shost_printk(KERN_WARNING, target->scsi_host,
706                              PFX "Path record query failed\n");
707
708         return ch->status;
709 }
710
711 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
712 {
713         struct srp_target_port *target = ch->target;
714         struct {
715                 struct ib_cm_req_param param;
716                 struct srp_login_req   priv;
717         } *req = NULL;
718         int status;
719
720         req = kzalloc(sizeof *req, GFP_KERNEL);
721         if (!req)
722                 return -ENOMEM;
723
724         req->param.primary_path               = &ch->path;
725         req->param.alternate_path             = NULL;
726         req->param.service_id                 = target->service_id;
727         req->param.qp_num                     = ch->qp->qp_num;
728         req->param.qp_type                    = ch->qp->qp_type;
729         req->param.private_data               = &req->priv;
730         req->param.private_data_len           = sizeof req->priv;
731         req->param.flow_control               = 1;
732
733         get_random_bytes(&req->param.starting_psn, 4);
734         req->param.starting_psn              &= 0xffffff;
735
736         /*
737          * Pick some arbitrary defaults here; we could make these
738          * module parameters if anyone cared about setting them.
739          */
740         req->param.responder_resources        = 4;
741         req->param.remote_cm_response_timeout = 20;
742         req->param.local_cm_response_timeout  = 20;
743         req->param.retry_count                = target->tl_retry_count;
744         req->param.rnr_retry_count            = 7;
745         req->param.max_cm_retries             = 15;
746
747         req->priv.opcode        = SRP_LOGIN_REQ;
748         req->priv.tag           = 0;
749         req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
750         req->priv.req_buf_fmt   = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
751                                               SRP_BUF_FORMAT_INDIRECT);
752         req->priv.req_flags     = (multich ? SRP_MULTICHAN_MULTI :
753                                    SRP_MULTICHAN_SINGLE);
754         /*
755          * In the published SRP specification (draft rev. 16a), the
756          * port identifier format is 8 bytes of ID extension followed
757          * by 8 bytes of GUID.  Older drafts put the two halves in the
758          * opposite order, so that the GUID comes first.
759          *
760          * Targets conforming to these obsolete drafts can be
761          * recognized by the I/O Class they report.
762          */
763         if (target->io_class == SRP_REV10_IB_IO_CLASS) {
764                 memcpy(req->priv.initiator_port_id,
765                        &target->sgid.global.interface_id, 8);
766                 memcpy(req->priv.initiator_port_id + 8,
767                        &target->initiator_ext, 8);
768                 memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
769                 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
770         } else {
771                 memcpy(req->priv.initiator_port_id,
772                        &target->initiator_ext, 8);
773                 memcpy(req->priv.initiator_port_id + 8,
774                        &target->sgid.global.interface_id, 8);
775                 memcpy(req->priv.target_port_id,     &target->id_ext, 8);
776                 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
777         }
778
779         /*
780          * Topspin/Cisco SRP targets will reject our login unless we
781          * zero out the first 8 bytes of our initiator port ID and set
782          * the second 8 bytes to the local node GUID.
783          */
784         if (srp_target_is_topspin(target)) {
785                 shost_printk(KERN_DEBUG, target->scsi_host,
786                              PFX "Topspin/Cisco initiator port ID workaround "
787                              "activated for target GUID %016llx\n",
788                              be64_to_cpu(target->ioc_guid));
789                 memset(req->priv.initiator_port_id, 0, 8);
790                 memcpy(req->priv.initiator_port_id + 8,
791                        &target->srp_host->srp_dev->dev->node_guid, 8);
792         }
793
794         status = ib_send_cm_req(ch->cm_id, &req->param);
795
796         kfree(req);
797
798         return status;
799 }
800
801 static bool srp_queue_remove_work(struct srp_target_port *target)
802 {
803         bool changed = false;
804
805         spin_lock_irq(&target->lock);
806         if (target->state != SRP_TARGET_REMOVED) {
807                 target->state = SRP_TARGET_REMOVED;
808                 changed = true;
809         }
810         spin_unlock_irq(&target->lock);
811
812         if (changed)
813                 queue_work(srp_remove_wq, &target->remove_work);
814
815         return changed;
816 }
817
818 static void srp_disconnect_target(struct srp_target_port *target)
819 {
820         struct srp_rdma_ch *ch;
821         int i;
822
823         /* XXX should send SRP_I_LOGOUT request */
824
825         for (i = 0; i < target->ch_count; i++) {
826                 ch = &target->ch[i];
827                 ch->connected = false;
828                 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
829                         shost_printk(KERN_DEBUG, target->scsi_host,
830                                      PFX "Sending CM DREQ failed\n");
831                 }
832         }
833 }
834
835 static void srp_free_req_data(struct srp_target_port *target,
836                               struct srp_rdma_ch *ch)
837 {
838         struct srp_device *dev = target->srp_host->srp_dev;
839         struct ib_device *ibdev = dev->dev;
840         struct srp_request *req;
841         int i;
842
843         if (!ch->req_ring)
844                 return;
845
846         for (i = 0; i < target->req_ring_size; ++i) {
847                 req = &ch->req_ring[i];
848                 if (dev->use_fast_reg)
849                         kfree(req->fr_list);
850                 else
851                         kfree(req->fmr_list);
852                 kfree(req->map_page);
853                 if (req->indirect_dma_addr) {
854                         ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
855                                             target->indirect_size,
856                                             DMA_TO_DEVICE);
857                 }
858                 kfree(req->indirect_desc);
859         }
860
861         kfree(ch->req_ring);
862         ch->req_ring = NULL;
863 }
864
865 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
866 {
867         struct srp_target_port *target = ch->target;
868         struct srp_device *srp_dev = target->srp_host->srp_dev;
869         struct ib_device *ibdev = srp_dev->dev;
870         struct srp_request *req;
871         void *mr_list;
872         dma_addr_t dma_addr;
873         int i, ret = -ENOMEM;
874
875         ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
876                                GFP_KERNEL);
877         if (!ch->req_ring)
878                 goto out;
879
880         for (i = 0; i < target->req_ring_size; ++i) {
881                 req = &ch->req_ring[i];
882                 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
883                                   GFP_KERNEL);
884                 if (!mr_list)
885                         goto out;
886                 if (srp_dev->use_fast_reg)
887                         req->fr_list = mr_list;
888                 else
889                         req->fmr_list = mr_list;
890                 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
891                                         sizeof(void *), GFP_KERNEL);
892                 if (!req->map_page)
893                         goto out;
894                 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
895                 if (!req->indirect_desc)
896                         goto out;
897
898                 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
899                                              target->indirect_size,
900                                              DMA_TO_DEVICE);
901                 if (ib_dma_mapping_error(ibdev, dma_addr))
902                         goto out;
903
904                 req->indirect_dma_addr = dma_addr;
905         }
906         ret = 0;
907
908 out:
909         return ret;
910 }
911
912 /**
913  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
914  * @shost: SCSI host whose attributes to remove from sysfs.
915  *
916  * Note: Any attributes defined in the host template and that did not exist
917  * before invocation of this function will be ignored.
918  */
919 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
920 {
921         struct device_attribute **attr;
922
923         for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
924                 device_remove_file(&shost->shost_dev, *attr);
925 }
926
927 static void srp_remove_target(struct srp_target_port *target)
928 {
929         struct srp_rdma_ch *ch;
930         int i;
931
932         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
933
934         srp_del_scsi_host_attr(target->scsi_host);
935         srp_rport_get(target->rport);
936         srp_remove_host(target->scsi_host);
937         scsi_remove_host(target->scsi_host);
938         srp_stop_rport_timers(target->rport);
939         srp_disconnect_target(target);
940         for (i = 0; i < target->ch_count; i++) {
941                 ch = &target->ch[i];
942                 srp_free_ch_ib(target, ch);
943         }
944         cancel_work_sync(&target->tl_err_work);
945         srp_rport_put(target->rport);
946         for (i = 0; i < target->ch_count; i++) {
947                 ch = &target->ch[i];
948                 srp_free_req_data(target, ch);
949         }
950         kfree(target->ch);
951         target->ch = NULL;
952
953         spin_lock(&target->srp_host->target_lock);
954         list_del(&target->list);
955         spin_unlock(&target->srp_host->target_lock);
956
957         scsi_host_put(target->scsi_host);
958 }
959
960 static void srp_remove_work(struct work_struct *work)
961 {
962         struct srp_target_port *target =
963                 container_of(work, struct srp_target_port, remove_work);
964
965         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
966
967         srp_remove_target(target);
968 }
969
970 static void srp_rport_delete(struct srp_rport *rport)
971 {
972         struct srp_target_port *target = rport->lld_data;
973
974         srp_queue_remove_work(target);
975 }
976
977 /**
978  * srp_connected_ch() - number of connected channels
979  * @target: SRP target port.
980  */
981 static int srp_connected_ch(struct srp_target_port *target)
982 {
983         int i, c = 0;
984
985         for (i = 0; i < target->ch_count; i++)
986                 c += target->ch[i].connected;
987
988         return c;
989 }
990
991 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
992 {
993         struct srp_target_port *target = ch->target;
994         int ret;
995
996         WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
997
998         ret = srp_lookup_path(ch);
999         if (ret)
1000                 return ret;
1001
1002         while (1) {
1003                 init_completion(&ch->done);
1004                 ret = srp_send_req(ch, multich);
1005                 if (ret)
1006                         return ret;
1007                 ret = wait_for_completion_interruptible(&ch->done);
1008                 if (ret < 0)
1009                         return ret;
1010
1011                 /*
1012                  * The CM event handling code will set status to
1013                  * SRP_PORT_REDIRECT if we get a port redirect REJ
1014                  * back, or SRP_DLID_REDIRECT if we get a lid/qp
1015                  * redirect REJ back.
1016                  */
1017                 switch (ch->status) {
1018                 case 0:
1019                         ch->connected = true;
1020                         return 0;
1021
1022                 case SRP_PORT_REDIRECT:
1023                         ret = srp_lookup_path(ch);
1024                         if (ret)
1025                                 return ret;
1026                         break;
1027
1028                 case SRP_DLID_REDIRECT:
1029                         break;
1030
1031                 case SRP_STALE_CONN:
1032                         shost_printk(KERN_ERR, target->scsi_host, PFX
1033                                      "giving up on stale connection\n");
1034                         ch->status = -ECONNRESET;
1035                         return ch->status;
1036
1037                 default:
1038                         return ch->status;
1039                 }
1040         }
1041 }
1042
1043 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1044 {
1045         struct ib_send_wr *bad_wr;
1046         struct ib_send_wr wr = {
1047                 .opcode             = IB_WR_LOCAL_INV,
1048                 .wr_id              = LOCAL_INV_WR_ID_MASK,
1049                 .next               = NULL,
1050                 .num_sge            = 0,
1051                 .send_flags         = 0,
1052                 .ex.invalidate_rkey = rkey,
1053         };
1054
1055         return ib_post_send(ch->qp, &wr, &bad_wr);
1056 }
1057
1058 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1059                            struct srp_rdma_ch *ch,
1060                            struct srp_request *req)
1061 {
1062         struct srp_target_port *target = ch->target;
1063         struct srp_device *dev = target->srp_host->srp_dev;
1064         struct ib_device *ibdev = dev->dev;
1065         int i, res;
1066
1067         if (!scsi_sglist(scmnd) ||
1068             (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1069              scmnd->sc_data_direction != DMA_FROM_DEVICE))
1070                 return;
1071
1072         if (dev->use_fast_reg) {
1073                 struct srp_fr_desc **pfr;
1074
1075                 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1076                         res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1077                         if (res < 0) {
1078                                 shost_printk(KERN_ERR, target->scsi_host, PFX
1079                                   "Queueing INV WR for rkey %#x failed (%d)\n",
1080                                   (*pfr)->mr->rkey, res);
1081                                 queue_work(system_long_wq,
1082                                            &target->tl_err_work);
1083                         }
1084                 }
1085                 if (req->nmdesc)
1086                         srp_fr_pool_put(ch->fr_pool, req->fr_list,
1087                                         req->nmdesc);
1088         } else {
1089                 struct ib_pool_fmr **pfmr;
1090
1091                 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1092                         ib_fmr_pool_unmap(*pfmr);
1093         }
1094
1095         ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1096                         scmnd->sc_data_direction);
1097 }
1098
1099 /**
1100  * srp_claim_req - Take ownership of the scmnd associated with a request.
1101  * @ch: SRP RDMA channel.
1102  * @req: SRP request.
1103  * @sdev: If not NULL, only take ownership for this SCSI device.
1104  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1105  *         ownership of @req->scmnd if it equals @scmnd.
1106  *
1107  * Return value:
1108  * Either NULL or a pointer to the SCSI command the caller became owner of.
1109  */
1110 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1111                                        struct srp_request *req,
1112                                        struct scsi_device *sdev,
1113                                        struct scsi_cmnd *scmnd)
1114 {
1115         unsigned long flags;
1116
1117         spin_lock_irqsave(&ch->lock, flags);
1118         if (req->scmnd &&
1119             (!sdev || req->scmnd->device == sdev) &&
1120             (!scmnd || req->scmnd == scmnd)) {
1121                 scmnd = req->scmnd;
1122                 req->scmnd = NULL;
1123         } else {
1124                 scmnd = NULL;
1125         }
1126         spin_unlock_irqrestore(&ch->lock, flags);
1127
1128         return scmnd;
1129 }
1130
1131 /**
1132  * srp_free_req() - Unmap data and add request to the free request list.
1133  * @ch:     SRP RDMA channel.
1134  * @req:    Request to be freed.
1135  * @scmnd:  SCSI command associated with @req.
1136  * @req_lim_delta: Amount to be added to @target->req_lim.
1137  */
1138 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1139                          struct scsi_cmnd *scmnd, s32 req_lim_delta)
1140 {
1141         unsigned long flags;
1142
1143         srp_unmap_data(scmnd, ch, req);
1144
1145         spin_lock_irqsave(&ch->lock, flags);
1146         ch->req_lim += req_lim_delta;
1147         spin_unlock_irqrestore(&ch->lock, flags);
1148 }
1149
1150 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1151                            struct scsi_device *sdev, int result)
1152 {
1153         struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1154
1155         if (scmnd) {
1156                 srp_free_req(ch, req, scmnd, 0);
1157                 scmnd->result = result;
1158                 scmnd->scsi_done(scmnd);
1159         }
1160 }
1161
1162 static void srp_terminate_io(struct srp_rport *rport)
1163 {
1164         struct srp_target_port *target = rport->lld_data;
1165         struct srp_rdma_ch *ch;
1166         struct Scsi_Host *shost = target->scsi_host;
1167         struct scsi_device *sdev;
1168         int i, j;
1169
1170         /*
1171          * Invoking srp_terminate_io() while srp_queuecommand() is running
1172          * is not safe. Hence the warning statement below.
1173          */
1174         shost_for_each_device(sdev, shost)
1175                 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1176
1177         for (i = 0; i < target->ch_count; i++) {
1178                 ch = &target->ch[i];
1179
1180                 for (j = 0; j < target->req_ring_size; ++j) {
1181                         struct srp_request *req = &ch->req_ring[j];
1182
1183                         srp_finish_req(ch, req, NULL,
1184                                        DID_TRANSPORT_FAILFAST << 16);
1185                 }
1186         }
1187 }
1188
1189 /*
1190  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1191  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1192  * srp_reset_device() or srp_reset_host() calls will occur while this function
1193  * is in progress. One way to realize that is not to call this function
1194  * directly but to call srp_reconnect_rport() instead since that last function
1195  * serializes calls of this function via rport->mutex and also blocks
1196  * srp_queuecommand() calls before invoking this function.
1197  */
1198 static int srp_rport_reconnect(struct srp_rport *rport)
1199 {
1200         struct srp_target_port *target = rport->lld_data;
1201         struct srp_rdma_ch *ch;
1202         int i, j, ret = 0;
1203         bool multich = false;
1204
1205         srp_disconnect_target(target);
1206
1207         if (target->state == SRP_TARGET_SCANNING)
1208                 return -ENODEV;
1209
1210         /*
1211          * Now get a new local CM ID so that we avoid confusing the target in
1212          * case things are really fouled up. Doing so also ensures that all CM
1213          * callbacks will have finished before a new QP is allocated.
1214          */
1215         for (i = 0; i < target->ch_count; i++) {
1216                 ch = &target->ch[i];
1217                 ret += srp_new_cm_id(ch);
1218         }
1219         for (i = 0; i < target->ch_count; i++) {
1220                 ch = &target->ch[i];
1221                 for (j = 0; j < target->req_ring_size; ++j) {
1222                         struct srp_request *req = &ch->req_ring[j];
1223
1224                         srp_finish_req(ch, req, NULL, DID_RESET << 16);
1225                 }
1226         }
1227         for (i = 0; i < target->ch_count; i++) {
1228                 ch = &target->ch[i];
1229                 /*
1230                  * Whether or not creating a new CM ID succeeded, create a new
1231                  * QP. This guarantees that all completion callback function
1232                  * invocations have finished before request resetting starts.
1233                  */
1234                 ret += srp_create_ch_ib(ch);
1235
1236                 INIT_LIST_HEAD(&ch->free_tx);
1237                 for (j = 0; j < target->queue_size; ++j)
1238                         list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1239         }
1240
1241         target->qp_in_error = false;
1242
1243         for (i = 0; i < target->ch_count; i++) {
1244                 ch = &target->ch[i];
1245                 if (ret)
1246                         break;
1247                 ret = srp_connect_ch(ch, multich);
1248                 multich = true;
1249         }
1250
1251         if (ret == 0)
1252                 shost_printk(KERN_INFO, target->scsi_host,
1253                              PFX "reconnect succeeded\n");
1254
1255         return ret;
1256 }
1257
1258 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1259                          unsigned int dma_len, u32 rkey)
1260 {
1261         struct srp_direct_buf *desc = state->desc;
1262
1263         desc->va = cpu_to_be64(dma_addr);
1264         desc->key = cpu_to_be32(rkey);
1265         desc->len = cpu_to_be32(dma_len);
1266
1267         state->total_len += dma_len;
1268         state->desc++;
1269         state->ndesc++;
1270 }
1271
1272 static int srp_map_finish_fmr(struct srp_map_state *state,
1273                               struct srp_rdma_ch *ch)
1274 {
1275         struct ib_pool_fmr *fmr;
1276         u64 io_addr = 0;
1277
1278         fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1279                                    state->npages, io_addr);
1280         if (IS_ERR(fmr))
1281                 return PTR_ERR(fmr);
1282
1283         *state->next_fmr++ = fmr;
1284         state->nmdesc++;
1285
1286         srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
1287
1288         return 0;
1289 }
1290
1291 static int srp_map_finish_fr(struct srp_map_state *state,
1292                              struct srp_rdma_ch *ch)
1293 {
1294         struct srp_target_port *target = ch->target;
1295         struct srp_device *dev = target->srp_host->srp_dev;
1296         struct ib_send_wr *bad_wr;
1297         struct ib_send_wr wr;
1298         struct srp_fr_desc *desc;
1299         u32 rkey;
1300
1301         desc = srp_fr_pool_get(ch->fr_pool);
1302         if (!desc)
1303                 return -ENOMEM;
1304
1305         rkey = ib_inc_rkey(desc->mr->rkey);
1306         ib_update_fast_reg_key(desc->mr, rkey);
1307
1308         memcpy(desc->frpl->page_list, state->pages,
1309                sizeof(state->pages[0]) * state->npages);
1310
1311         memset(&wr, 0, sizeof(wr));
1312         wr.opcode = IB_WR_FAST_REG_MR;
1313         wr.wr_id = FAST_REG_WR_ID_MASK;
1314         wr.wr.fast_reg.iova_start = state->base_dma_addr;
1315         wr.wr.fast_reg.page_list = desc->frpl;
1316         wr.wr.fast_reg.page_list_len = state->npages;
1317         wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
1318         wr.wr.fast_reg.length = state->dma_len;
1319         wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
1320                                        IB_ACCESS_REMOTE_READ |
1321                                        IB_ACCESS_REMOTE_WRITE);
1322         wr.wr.fast_reg.rkey = desc->mr->lkey;
1323
1324         *state->next_fr++ = desc;
1325         state->nmdesc++;
1326
1327         srp_map_desc(state, state->base_dma_addr, state->dma_len,
1328                      desc->mr->rkey);
1329
1330         return ib_post_send(ch->qp, &wr, &bad_wr);
1331 }
1332
1333 static int srp_finish_mapping(struct srp_map_state *state,
1334                               struct srp_rdma_ch *ch)
1335 {
1336         struct srp_target_port *target = ch->target;
1337         int ret = 0;
1338
1339         if (state->npages == 0)
1340                 return 0;
1341
1342         if (state->npages == 1 && !register_always)
1343                 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1344                              target->rkey);
1345         else
1346                 ret = target->srp_host->srp_dev->use_fast_reg ?
1347                         srp_map_finish_fr(state, ch) :
1348                         srp_map_finish_fmr(state, ch);
1349
1350         if (ret == 0) {
1351                 state->npages = 0;
1352                 state->dma_len = 0;
1353         }
1354
1355         return ret;
1356 }
1357
1358 static void srp_map_update_start(struct srp_map_state *state,
1359                                  struct scatterlist *sg, int sg_index,
1360                                  dma_addr_t dma_addr)
1361 {
1362         state->unmapped_sg = sg;
1363         state->unmapped_index = sg_index;
1364         state->unmapped_addr = dma_addr;
1365 }
1366
1367 static int srp_map_sg_entry(struct srp_map_state *state,
1368                             struct srp_rdma_ch *ch,
1369                             struct scatterlist *sg, int sg_index,
1370                             bool use_mr)
1371 {
1372         struct srp_target_port *target = ch->target;
1373         struct srp_device *dev = target->srp_host->srp_dev;
1374         struct ib_device *ibdev = dev->dev;
1375         dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1376         unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1377         unsigned int len;
1378         int ret;
1379
1380         if (!dma_len)
1381                 return 0;
1382
1383         if (!use_mr) {
1384                 /*
1385                  * Once we're in direct map mode for a request, we don't
1386                  * go back to FMR or FR mode, so no need to update anything
1387                  * other than the descriptor.
1388                  */
1389                 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1390                 return 0;
1391         }
1392
1393         /*
1394          * Since not all RDMA HW drivers support non-zero page offsets for
1395          * FMR, if we start at an offset into a page, don't merge into the
1396          * current FMR mapping. Finish it out, and use the kernel's MR for
1397          * this sg entry.
1398          */
1399         if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
1400             dma_len > dev->mr_max_size) {
1401                 ret = srp_finish_mapping(state, ch);
1402                 if (ret)
1403                         return ret;
1404
1405                 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1406                 srp_map_update_start(state, NULL, 0, 0);
1407                 return 0;
1408         }
1409
1410         /*
1411          * If this is the first sg that will be mapped via FMR or via FR, save
1412          * our position. We need to know the first unmapped entry, its index,
1413          * and the first unmapped address within that entry to be able to
1414          * restart mapping after an error.
1415          */
1416         if (!state->unmapped_sg)
1417                 srp_map_update_start(state, sg, sg_index, dma_addr);
1418
1419         while (dma_len) {
1420                 unsigned offset = dma_addr & ~dev->mr_page_mask;
1421                 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1422                         ret = srp_finish_mapping(state, ch);
1423                         if (ret)
1424                                 return ret;
1425
1426                         srp_map_update_start(state, sg, sg_index, dma_addr);
1427                 }
1428
1429                 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1430
1431                 if (!state->npages)
1432                         state->base_dma_addr = dma_addr;
1433                 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1434                 state->dma_len += len;
1435                 dma_addr += len;
1436                 dma_len -= len;
1437         }
1438
1439         /*
1440          * If the last entry of the MR wasn't a full page, then we need to
1441          * close it out and start a new one -- we can only merge at page
1442          * boundries.
1443          */
1444         ret = 0;
1445         if (len != dev->mr_page_size) {
1446                 ret = srp_finish_mapping(state, ch);
1447                 if (!ret)
1448                         srp_map_update_start(state, NULL, 0, 0);
1449         }
1450         return ret;
1451 }
1452
1453 static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
1454                       struct srp_request *req, struct scatterlist *scat,
1455                       int count)
1456 {
1457         struct srp_target_port *target = ch->target;
1458         struct srp_device *dev = target->srp_host->srp_dev;
1459         struct ib_device *ibdev = dev->dev;
1460         struct scatterlist *sg;
1461         int i;
1462         bool use_mr;
1463
1464         state->desc     = req->indirect_desc;
1465         state->pages    = req->map_page;
1466         if (dev->use_fast_reg) {
1467                 state->next_fr = req->fr_list;
1468                 use_mr = !!ch->fr_pool;
1469         } else {
1470                 state->next_fmr = req->fmr_list;
1471                 use_mr = !!ch->fmr_pool;
1472         }
1473
1474         for_each_sg(scat, sg, count, i) {
1475                 if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
1476                         /*
1477                          * Memory registration failed, so backtrack to the
1478                          * first unmapped entry and continue on without using
1479                          * memory registration.
1480                          */
1481                         dma_addr_t dma_addr;
1482                         unsigned int dma_len;
1483
1484 backtrack:
1485                         sg = state->unmapped_sg;
1486                         i = state->unmapped_index;
1487
1488                         dma_addr = ib_sg_dma_address(ibdev, sg);
1489                         dma_len = ib_sg_dma_len(ibdev, sg);
1490                         dma_len -= (state->unmapped_addr - dma_addr);
1491                         dma_addr = state->unmapped_addr;
1492                         use_mr = false;
1493                         srp_map_desc(state, dma_addr, dma_len, target->rkey);
1494                 }
1495         }
1496
1497         if (use_mr && srp_finish_mapping(state, ch))
1498                 goto backtrack;
1499
1500         req->nmdesc = state->nmdesc;
1501
1502         return 0;
1503 }
1504
1505 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1506                         struct srp_request *req)
1507 {
1508         struct srp_target_port *target = ch->target;
1509         struct scatterlist *scat;
1510         struct srp_cmd *cmd = req->cmd->buf;
1511         int len, nents, count;
1512         struct srp_device *dev;
1513         struct ib_device *ibdev;
1514         struct srp_map_state state;
1515         struct srp_indirect_buf *indirect_hdr;
1516         u32 table_len;
1517         u8 fmt;
1518
1519         if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1520                 return sizeof (struct srp_cmd);
1521
1522         if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1523             scmnd->sc_data_direction != DMA_TO_DEVICE) {
1524                 shost_printk(KERN_WARNING, target->scsi_host,
1525                              PFX "Unhandled data direction %d\n",
1526                              scmnd->sc_data_direction);
1527                 return -EINVAL;
1528         }
1529
1530         nents = scsi_sg_count(scmnd);
1531         scat  = scsi_sglist(scmnd);
1532
1533         dev = target->srp_host->srp_dev;
1534         ibdev = dev->dev;
1535
1536         count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1537         if (unlikely(count == 0))
1538                 return -EIO;
1539
1540         fmt = SRP_DATA_DESC_DIRECT;
1541         len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1542
1543         if (count == 1 && !register_always) {
1544                 /*
1545                  * The midlayer only generated a single gather/scatter
1546                  * entry, or DMA mapping coalesced everything to a
1547                  * single entry.  So a direct descriptor along with
1548                  * the DMA MR suffices.
1549                  */
1550                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1551
1552                 buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1553                 buf->key = cpu_to_be32(target->rkey);
1554                 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1555
1556                 req->nmdesc = 0;
1557                 goto map_complete;
1558         }
1559
1560         /*
1561          * We have more than one scatter/gather entry, so build our indirect
1562          * descriptor table, trying to merge as many entries as we can.
1563          */
1564         indirect_hdr = (void *) cmd->add_data;
1565
1566         ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1567                                    target->indirect_size, DMA_TO_DEVICE);
1568
1569         memset(&state, 0, sizeof(state));
1570         srp_map_sg(&state, ch, req, scat, count);
1571
1572         /* We've mapped the request, now pull as much of the indirect
1573          * descriptor table as we can into the command buffer. If this
1574          * target is not using an external indirect table, we are
1575          * guaranteed to fit into the command, as the SCSI layer won't
1576          * give us more S/G entries than we allow.
1577          */
1578         if (state.ndesc == 1) {
1579                 /*
1580                  * Memory registration collapsed the sg-list into one entry,
1581                  * so use a direct descriptor.
1582                  */
1583                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1584
1585                 *buf = req->indirect_desc[0];
1586                 goto map_complete;
1587         }
1588
1589         if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1590                                                 !target->allow_ext_sg)) {
1591                 shost_printk(KERN_ERR, target->scsi_host,
1592                              "Could not fit S/G list into SRP_CMD\n");
1593                 return -EIO;
1594         }
1595
1596         count = min(state.ndesc, target->cmd_sg_cnt);
1597         table_len = state.ndesc * sizeof (struct srp_direct_buf);
1598
1599         fmt = SRP_DATA_DESC_INDIRECT;
1600         len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1601         len += count * sizeof (struct srp_direct_buf);
1602
1603         memcpy(indirect_hdr->desc_list, req->indirect_desc,
1604                count * sizeof (struct srp_direct_buf));
1605
1606         indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1607         indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
1608         indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1609         indirect_hdr->len = cpu_to_be32(state.total_len);
1610
1611         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1612                 cmd->data_out_desc_cnt = count;
1613         else
1614                 cmd->data_in_desc_cnt = count;
1615
1616         ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1617                                       DMA_TO_DEVICE);
1618
1619 map_complete:
1620         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1621                 cmd->buf_fmt = fmt << 4;
1622         else
1623                 cmd->buf_fmt = fmt;
1624
1625         return len;
1626 }
1627
1628 /*
1629  * Return an IU and possible credit to the free pool
1630  */
1631 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1632                           enum srp_iu_type iu_type)
1633 {
1634         unsigned long flags;
1635
1636         spin_lock_irqsave(&ch->lock, flags);
1637         list_add(&iu->list, &ch->free_tx);
1638         if (iu_type != SRP_IU_RSP)
1639                 ++ch->req_lim;
1640         spin_unlock_irqrestore(&ch->lock, flags);
1641 }
1642
1643 /*
1644  * Must be called with ch->lock held to protect req_lim and free_tx.
1645  * If IU is not sent, it must be returned using srp_put_tx_iu().
1646  *
1647  * Note:
1648  * An upper limit for the number of allocated information units for each
1649  * request type is:
1650  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1651  *   more than Scsi_Host.can_queue requests.
1652  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1653  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1654  *   one unanswered SRP request to an initiator.
1655  */
1656 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1657                                       enum srp_iu_type iu_type)
1658 {
1659         struct srp_target_port *target = ch->target;
1660         s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1661         struct srp_iu *iu;
1662
1663         srp_send_completion(ch->send_cq, ch);
1664
1665         if (list_empty(&ch->free_tx))
1666                 return NULL;
1667
1668         /* Initiator responses to target requests do not consume credits */
1669         if (iu_type != SRP_IU_RSP) {
1670                 if (ch->req_lim <= rsv) {
1671                         ++target->zero_req_lim;
1672                         return NULL;
1673                 }
1674
1675                 --ch->req_lim;
1676         }
1677
1678         iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1679         list_del(&iu->list);
1680         return iu;
1681 }
1682
1683 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1684 {
1685         struct srp_target_port *target = ch->target;
1686         struct ib_sge list;
1687         struct ib_send_wr wr, *bad_wr;
1688
1689         list.addr   = iu->dma;
1690         list.length = len;
1691         list.lkey   = target->lkey;
1692
1693         wr.next       = NULL;
1694         wr.wr_id      = (uintptr_t) iu;
1695         wr.sg_list    = &list;
1696         wr.num_sge    = 1;
1697         wr.opcode     = IB_WR_SEND;
1698         wr.send_flags = IB_SEND_SIGNALED;
1699
1700         return ib_post_send(ch->qp, &wr, &bad_wr);
1701 }
1702
1703 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1704 {
1705         struct srp_target_port *target = ch->target;
1706         struct ib_recv_wr wr, *bad_wr;
1707         struct ib_sge list;
1708
1709         list.addr   = iu->dma;
1710         list.length = iu->size;
1711         list.lkey   = target->lkey;
1712
1713         wr.next     = NULL;
1714         wr.wr_id    = (uintptr_t) iu;
1715         wr.sg_list  = &list;
1716         wr.num_sge  = 1;
1717
1718         return ib_post_recv(ch->qp, &wr, &bad_wr);
1719 }
1720
1721 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1722 {
1723         struct srp_target_port *target = ch->target;
1724         struct srp_request *req;
1725         struct scsi_cmnd *scmnd;
1726         unsigned long flags;
1727
1728         if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1729                 spin_lock_irqsave(&ch->lock, flags);
1730                 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1731                 spin_unlock_irqrestore(&ch->lock, flags);
1732
1733                 ch->tsk_mgmt_status = -1;
1734                 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1735                         ch->tsk_mgmt_status = rsp->data[3];
1736                 complete(&ch->tsk_mgmt_done);
1737         } else {
1738                 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1739                 if (scmnd) {
1740                         req = (void *)scmnd->host_scribble;
1741                         scmnd = srp_claim_req(ch, req, NULL, scmnd);
1742                 }
1743                 if (!scmnd) {
1744                         shost_printk(KERN_ERR, target->scsi_host,
1745                                      "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1746                                      rsp->tag, ch - target->ch, ch->qp->qp_num);
1747
1748                         spin_lock_irqsave(&ch->lock, flags);
1749                         ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1750                         spin_unlock_irqrestore(&ch->lock, flags);
1751
1752                         return;
1753                 }
1754                 scmnd->result = rsp->status;
1755
1756                 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1757                         memcpy(scmnd->sense_buffer, rsp->data +
1758                                be32_to_cpu(rsp->resp_data_len),
1759                                min_t(int, be32_to_cpu(rsp->sense_data_len),
1760                                      SCSI_SENSE_BUFFERSIZE));
1761                 }
1762
1763                 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1764                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1765                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1766                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1767                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1768                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1769                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1770                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1771
1772                 srp_free_req(ch, req, scmnd,
1773                              be32_to_cpu(rsp->req_lim_delta));
1774
1775                 scmnd->host_scribble = NULL;
1776                 scmnd->scsi_done(scmnd);
1777         }
1778 }
1779
1780 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1781                                void *rsp, int len)
1782 {
1783         struct srp_target_port *target = ch->target;
1784         struct ib_device *dev = target->srp_host->srp_dev->dev;
1785         unsigned long flags;
1786         struct srp_iu *iu;
1787         int err;
1788
1789         spin_lock_irqsave(&ch->lock, flags);
1790         ch->req_lim += req_delta;
1791         iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1792         spin_unlock_irqrestore(&ch->lock, flags);
1793
1794         if (!iu) {
1795                 shost_printk(KERN_ERR, target->scsi_host, PFX
1796                              "no IU available to send response\n");
1797                 return 1;
1798         }
1799
1800         ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1801         memcpy(iu->buf, rsp, len);
1802         ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1803
1804         err = srp_post_send(ch, iu, len);
1805         if (err) {
1806                 shost_printk(KERN_ERR, target->scsi_host, PFX
1807                              "unable to post response: %d\n", err);
1808                 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1809         }
1810
1811         return err;
1812 }
1813
1814 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1815                                  struct srp_cred_req *req)
1816 {
1817         struct srp_cred_rsp rsp = {
1818                 .opcode = SRP_CRED_RSP,
1819                 .tag = req->tag,
1820         };
1821         s32 delta = be32_to_cpu(req->req_lim_delta);
1822
1823         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1824                 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1825                              "problems processing SRP_CRED_REQ\n");
1826 }
1827
1828 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1829                                 struct srp_aer_req *req)
1830 {
1831         struct srp_target_port *target = ch->target;
1832         struct srp_aer_rsp rsp = {
1833                 .opcode = SRP_AER_RSP,
1834                 .tag = req->tag,
1835         };
1836         s32 delta = be32_to_cpu(req->req_lim_delta);
1837
1838         shost_printk(KERN_ERR, target->scsi_host, PFX
1839                      "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1840
1841         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1842                 shost_printk(KERN_ERR, target->scsi_host, PFX
1843                              "problems processing SRP_AER_REQ\n");
1844 }
1845
1846 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1847 {
1848         struct srp_target_port *target = ch->target;
1849         struct ib_device *dev = target->srp_host->srp_dev->dev;
1850         struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1851         int res;
1852         u8 opcode;
1853
1854         ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1855                                    DMA_FROM_DEVICE);
1856
1857         opcode = *(u8 *) iu->buf;
1858
1859         if (0) {
1860                 shost_printk(KERN_ERR, target->scsi_host,
1861                              PFX "recv completion, opcode 0x%02x\n", opcode);
1862                 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1863                                iu->buf, wc->byte_len, true);
1864         }
1865
1866         switch (opcode) {
1867         case SRP_RSP:
1868                 srp_process_rsp(ch, iu->buf);
1869                 break;
1870
1871         case SRP_CRED_REQ:
1872                 srp_process_cred_req(ch, iu->buf);
1873                 break;
1874
1875         case SRP_AER_REQ:
1876                 srp_process_aer_req(ch, iu->buf);
1877                 break;
1878
1879         case SRP_T_LOGOUT:
1880                 /* XXX Handle target logout */
1881                 shost_printk(KERN_WARNING, target->scsi_host,
1882                              PFX "Got target logout request\n");
1883                 break;
1884
1885         default:
1886                 shost_printk(KERN_WARNING, target->scsi_host,
1887                              PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1888                 break;
1889         }
1890
1891         ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1892                                       DMA_FROM_DEVICE);
1893
1894         res = srp_post_recv(ch, iu);
1895         if (res != 0)
1896                 shost_printk(KERN_ERR, target->scsi_host,
1897                              PFX "Recv failed with error code %d\n", res);
1898 }
1899
1900 /**
1901  * srp_tl_err_work() - handle a transport layer error
1902  * @work: Work structure embedded in an SRP target port.
1903  *
1904  * Note: This function may get invoked before the rport has been created,
1905  * hence the target->rport test.
1906  */
1907 static void srp_tl_err_work(struct work_struct *work)
1908 {
1909         struct srp_target_port *target;
1910
1911         target = container_of(work, struct srp_target_port, tl_err_work);
1912         if (target->rport)
1913                 srp_start_tl_fail_timers(target->rport);
1914 }
1915
1916 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1917                               bool send_err, struct srp_rdma_ch *ch)
1918 {
1919         struct srp_target_port *target = ch->target;
1920
1921         if (wr_id == SRP_LAST_WR_ID) {
1922                 complete(&ch->done);
1923                 return;
1924         }
1925
1926         if (ch->connected && !target->qp_in_error) {
1927                 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1928                         shost_printk(KERN_ERR, target->scsi_host, PFX
1929                                      "LOCAL_INV failed with status %s (%d)\n",
1930                                      ib_wc_status_msg(wc_status), wc_status);
1931                 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1932                         shost_printk(KERN_ERR, target->scsi_host, PFX
1933                                      "FAST_REG_MR failed status %s (%d)\n",
1934                                      ib_wc_status_msg(wc_status), wc_status);
1935                 } else {
1936                         shost_printk(KERN_ERR, target->scsi_host,
1937                                      PFX "failed %s status %s (%d) for iu %p\n",
1938                                      send_err ? "send" : "receive",
1939                                      ib_wc_status_msg(wc_status), wc_status,
1940                                      (void *)(uintptr_t)wr_id);
1941                 }
1942                 queue_work(system_long_wq, &target->tl_err_work);
1943         }
1944         target->qp_in_error = true;
1945 }
1946
1947 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
1948 {
1949         struct srp_rdma_ch *ch = ch_ptr;
1950         struct ib_wc wc;
1951
1952         ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1953         while (ib_poll_cq(cq, 1, &wc) > 0) {
1954                 if (likely(wc.status == IB_WC_SUCCESS)) {
1955                         srp_handle_recv(ch, &wc);
1956                 } else {
1957                         srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
1958                 }
1959         }
1960 }
1961
1962 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
1963 {
1964         struct srp_rdma_ch *ch = ch_ptr;
1965         struct ib_wc wc;
1966         struct srp_iu *iu;
1967
1968         while (ib_poll_cq(cq, 1, &wc) > 0) {
1969                 if (likely(wc.status == IB_WC_SUCCESS)) {
1970                         iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1971                         list_add(&iu->list, &ch->free_tx);
1972                 } else {
1973                         srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
1974                 }
1975         }
1976 }
1977
1978 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1979 {
1980         struct srp_target_port *target = host_to_target(shost);
1981         struct srp_rport *rport = target->rport;
1982         struct srp_rdma_ch *ch;
1983         struct srp_request *req;
1984         struct srp_iu *iu;
1985         struct srp_cmd *cmd;
1986         struct ib_device *dev;
1987         unsigned long flags;
1988         u32 tag;
1989         u16 idx;
1990         int len, ret;
1991         const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1992
1993         /*
1994          * The SCSI EH thread is the only context from which srp_queuecommand()
1995          * can get invoked for blocked devices (SDEV_BLOCK /
1996          * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1997          * locking the rport mutex if invoked from inside the SCSI EH.
1998          */
1999         if (in_scsi_eh)
2000                 mutex_lock(&rport->mutex);
2001
2002         scmnd->result = srp_chkready(target->rport);
2003         if (unlikely(scmnd->result))
2004                 goto err;
2005
2006         WARN_ON_ONCE(scmnd->request->tag < 0);
2007         tag = blk_mq_unique_tag(scmnd->request);
2008         ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2009         idx = blk_mq_unique_tag_to_tag(tag);
2010         WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2011                   dev_name(&shost->shost_gendev), tag, idx,
2012                   target->req_ring_size);
2013
2014         spin_lock_irqsave(&ch->lock, flags);
2015         iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2016         spin_unlock_irqrestore(&ch->lock, flags);
2017
2018         if (!iu)
2019                 goto err;
2020
2021         req = &ch->req_ring[idx];
2022         dev = target->srp_host->srp_dev->dev;
2023         ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2024                                    DMA_TO_DEVICE);
2025
2026         scmnd->host_scribble = (void *) req;
2027
2028         cmd = iu->buf;
2029         memset(cmd, 0, sizeof *cmd);
2030
2031         cmd->opcode = SRP_CMD;
2032         int_to_scsilun(scmnd->device->lun, &cmd->lun);
2033         cmd->tag    = tag;
2034         memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2035
2036         req->scmnd    = scmnd;
2037         req->cmd      = iu;
2038
2039         len = srp_map_data(scmnd, ch, req);
2040         if (len < 0) {
2041                 shost_printk(KERN_ERR, target->scsi_host,
2042                              PFX "Failed to map data (%d)\n", len);
2043                 /*
2044                  * If we ran out of memory descriptors (-ENOMEM) because an
2045                  * application is queuing many requests with more than
2046                  * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2047                  * to reduce queue depth temporarily.
2048                  */
2049                 scmnd->result = len == -ENOMEM ?
2050                         DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2051                 goto err_iu;
2052         }
2053
2054         ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2055                                       DMA_TO_DEVICE);
2056
2057         if (srp_post_send(ch, iu, len)) {
2058                 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2059                 goto err_unmap;
2060         }
2061
2062         ret = 0;
2063
2064 unlock_rport:
2065         if (in_scsi_eh)
2066                 mutex_unlock(&rport->mutex);
2067
2068         return ret;
2069
2070 err_unmap:
2071         srp_unmap_data(scmnd, ch, req);
2072
2073 err_iu:
2074         srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2075
2076         /*
2077          * Avoid that the loops that iterate over the request ring can
2078          * encounter a dangling SCSI command pointer.
2079          */
2080         req->scmnd = NULL;
2081
2082 err:
2083         if (scmnd->result) {
2084                 scmnd->scsi_done(scmnd);
2085                 ret = 0;
2086         } else {
2087                 ret = SCSI_MLQUEUE_HOST_BUSY;
2088         }
2089
2090         goto unlock_rport;
2091 }
2092
2093 /*
2094  * Note: the resources allocated in this function are freed in
2095  * srp_free_ch_ib().
2096  */
2097 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2098 {
2099         struct srp_target_port *target = ch->target;
2100         int i;
2101
2102         ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2103                               GFP_KERNEL);
2104         if (!ch->rx_ring)
2105                 goto err_no_ring;
2106         ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2107                               GFP_KERNEL);
2108         if (!ch->tx_ring)
2109                 goto err_no_ring;
2110
2111         for (i = 0; i < target->queue_size; ++i) {
2112                 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2113                                               ch->max_ti_iu_len,
2114                                               GFP_KERNEL, DMA_FROM_DEVICE);
2115                 if (!ch->rx_ring[i])
2116                         goto err;
2117         }
2118
2119         for (i = 0; i < target->queue_size; ++i) {
2120                 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2121                                               target->max_iu_len,
2122                                               GFP_KERNEL, DMA_TO_DEVICE);
2123                 if (!ch->tx_ring[i])
2124                         goto err;
2125
2126                 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2127         }
2128
2129         return 0;
2130
2131 err:
2132         for (i = 0; i < target->queue_size; ++i) {
2133                 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2134                 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2135         }
2136
2137
2138 err_no_ring:
2139         kfree(ch->tx_ring);
2140         ch->tx_ring = NULL;
2141         kfree(ch->rx_ring);
2142         ch->rx_ring = NULL;
2143
2144         return -ENOMEM;
2145 }
2146
2147 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2148 {
2149         uint64_t T_tr_ns, max_compl_time_ms;
2150         uint32_t rq_tmo_jiffies;
2151
2152         /*
2153          * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2154          * table 91), both the QP timeout and the retry count have to be set
2155          * for RC QP's during the RTR to RTS transition.
2156          */
2157         WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2158                      (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2159
2160         /*
2161          * Set target->rq_tmo_jiffies to one second more than the largest time
2162          * it can take before an error completion is generated. See also
2163          * C9-140..142 in the IBTA spec for more information about how to
2164          * convert the QP Local ACK Timeout value to nanoseconds.
2165          */
2166         T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2167         max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2168         do_div(max_compl_time_ms, NSEC_PER_MSEC);
2169         rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2170
2171         return rq_tmo_jiffies;
2172 }
2173
2174 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2175                                const struct srp_login_rsp *lrsp,
2176                                struct srp_rdma_ch *ch)
2177 {
2178         struct srp_target_port *target = ch->target;
2179         struct ib_qp_attr *qp_attr = NULL;
2180         int attr_mask = 0;
2181         int ret;
2182         int i;
2183
2184         if (lrsp->opcode == SRP_LOGIN_RSP) {
2185                 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2186                 ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2187
2188                 /*
2189                  * Reserve credits for task management so we don't
2190                  * bounce requests back to the SCSI mid-layer.
2191                  */
2192                 target->scsi_host->can_queue
2193                         = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2194                               target->scsi_host->can_queue);
2195                 target->scsi_host->cmd_per_lun
2196                         = min_t(int, target->scsi_host->can_queue,
2197                                 target->scsi_host->cmd_per_lun);
2198         } else {
2199                 shost_printk(KERN_WARNING, target->scsi_host,
2200                              PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2201                 ret = -ECONNRESET;
2202                 goto error;
2203         }
2204
2205         if (!ch->rx_ring) {
2206                 ret = srp_alloc_iu_bufs(ch);
2207                 if (ret)
2208                         goto error;
2209         }
2210
2211         ret = -ENOMEM;
2212         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2213         if (!qp_attr)
2214                 goto error;
2215
2216         qp_attr->qp_state = IB_QPS_RTR;
2217         ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2218         if (ret)
2219                 goto error_free;
2220
2221         ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2222         if (ret)
2223                 goto error_free;
2224
2225         for (i = 0; i < target->queue_size; i++) {
2226                 struct srp_iu *iu = ch->rx_ring[i];
2227
2228                 ret = srp_post_recv(ch, iu);
2229                 if (ret)
2230                         goto error_free;
2231         }
2232
2233         qp_attr->qp_state = IB_QPS_RTS;
2234         ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2235         if (ret)
2236                 goto error_free;
2237
2238         target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2239
2240         ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2241         if (ret)
2242                 goto error_free;
2243
2244         ret = ib_send_cm_rtu(cm_id, NULL, 0);
2245
2246 error_free:
2247         kfree(qp_attr);
2248
2249 error:
2250         ch->status = ret;
2251 }
2252
2253 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2254                                struct ib_cm_event *event,
2255                                struct srp_rdma_ch *ch)
2256 {
2257         struct srp_target_port *target = ch->target;
2258         struct Scsi_Host *shost = target->scsi_host;
2259         struct ib_class_port_info *cpi;
2260         int opcode;
2261
2262         switch (event->param.rej_rcvd.reason) {
2263         case IB_CM_REJ_PORT_CM_REDIRECT:
2264                 cpi = event->param.rej_rcvd.ari;
2265                 ch->path.dlid = cpi->redirect_lid;
2266                 ch->path.pkey = cpi->redirect_pkey;
2267                 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2268                 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2269
2270                 ch->status = ch->path.dlid ?
2271                         SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2272                 break;
2273
2274         case IB_CM_REJ_PORT_REDIRECT:
2275                 if (srp_target_is_topspin(target)) {
2276                         /*
2277                          * Topspin/Cisco SRP gateways incorrectly send
2278                          * reject reason code 25 when they mean 24
2279                          * (port redirect).
2280                          */
2281                         memcpy(ch->path.dgid.raw,
2282                                event->param.rej_rcvd.ari, 16);
2283
2284                         shost_printk(KERN_DEBUG, shost,
2285                                      PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2286                                      be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2287                                      be64_to_cpu(ch->path.dgid.global.interface_id));
2288
2289                         ch->status = SRP_PORT_REDIRECT;
2290                 } else {
2291                         shost_printk(KERN_WARNING, shost,
2292                                      "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2293                         ch->status = -ECONNRESET;
2294                 }
2295                 break;
2296
2297         case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2298                 shost_printk(KERN_WARNING, shost,
2299                             "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2300                 ch->status = -ECONNRESET;
2301                 break;
2302
2303         case IB_CM_REJ_CONSUMER_DEFINED:
2304                 opcode = *(u8 *) event->private_data;
2305                 if (opcode == SRP_LOGIN_REJ) {
2306                         struct srp_login_rej *rej = event->private_data;
2307                         u32 reason = be32_to_cpu(rej->reason);
2308
2309                         if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2310                                 shost_printk(KERN_WARNING, shost,
2311                                              PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2312                         else
2313                                 shost_printk(KERN_WARNING, shost, PFX
2314                                              "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2315                                              target->sgid.raw,
2316                                              target->orig_dgid.raw, reason);
2317                 } else
2318                         shost_printk(KERN_WARNING, shost,
2319                                      "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2320                                      " opcode 0x%02x\n", opcode);
2321                 ch->status = -ECONNRESET;
2322                 break;
2323
2324         case IB_CM_REJ_STALE_CONN:
2325                 shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2326                 ch->status = SRP_STALE_CONN;
2327                 break;
2328
2329         default:
2330                 shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2331                              event->param.rej_rcvd.reason);
2332                 ch->status = -ECONNRESET;
2333         }
2334 }
2335
2336 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2337 {
2338         struct srp_rdma_ch *ch = cm_id->context;
2339         struct srp_target_port *target = ch->target;
2340         int comp = 0;
2341
2342         switch (event->event) {
2343         case IB_CM_REQ_ERROR:
2344                 shost_printk(KERN_DEBUG, target->scsi_host,
2345                              PFX "Sending CM REQ failed\n");
2346                 comp = 1;
2347                 ch->status = -ECONNRESET;
2348                 break;
2349
2350         case IB_CM_REP_RECEIVED:
2351                 comp = 1;
2352                 srp_cm_rep_handler(cm_id, event->private_data, ch);
2353                 break;
2354
2355         case IB_CM_REJ_RECEIVED:
2356                 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2357                 comp = 1;
2358
2359                 srp_cm_rej_handler(cm_id, event, ch);
2360                 break;
2361
2362         case IB_CM_DREQ_RECEIVED:
2363                 shost_printk(KERN_WARNING, target->scsi_host,
2364                              PFX "DREQ received - connection closed\n");
2365                 ch->connected = false;
2366                 if (ib_send_cm_drep(cm_id, NULL, 0))
2367                         shost_printk(KERN_ERR, target->scsi_host,
2368                                      PFX "Sending CM DREP failed\n");
2369                 queue_work(system_long_wq, &target->tl_err_work);
2370                 break;
2371
2372         case IB_CM_TIMEWAIT_EXIT:
2373                 shost_printk(KERN_ERR, target->scsi_host,
2374                              PFX "connection closed\n");
2375                 comp = 1;
2376
2377                 ch->status = 0;
2378                 break;
2379
2380         case IB_CM_MRA_RECEIVED:
2381         case IB_CM_DREQ_ERROR:
2382         case IB_CM_DREP_RECEIVED:
2383                 break;
2384
2385         default:
2386                 shost_printk(KERN_WARNING, target->scsi_host,
2387                              PFX "Unhandled CM event %d\n", event->event);
2388                 break;
2389         }
2390
2391         if (comp)
2392                 complete(&ch->done);
2393
2394         return 0;
2395 }
2396
2397 /**
2398  * srp_change_queue_depth - setting device queue depth
2399  * @sdev: scsi device struct
2400  * @qdepth: requested queue depth
2401  *
2402  * Returns queue depth.
2403  */
2404 static int
2405 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2406 {
2407         if (!sdev->tagged_supported)
2408                 qdepth = 1;
2409         return scsi_change_queue_depth(sdev, qdepth);
2410 }
2411
2412 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2413                              u8 func)
2414 {
2415         struct srp_target_port *target = ch->target;
2416         struct srp_rport *rport = target->rport;
2417         struct ib_device *dev = target->srp_host->srp_dev->dev;
2418         struct srp_iu *iu;
2419         struct srp_tsk_mgmt *tsk_mgmt;
2420
2421         if (!ch->connected || target->qp_in_error)
2422                 return -1;
2423
2424         init_completion(&ch->tsk_mgmt_done);
2425
2426         /*
2427          * Lock the rport mutex to avoid that srp_create_ch_ib() is
2428          * invoked while a task management function is being sent.
2429          */
2430         mutex_lock(&rport->mutex);
2431         spin_lock_irq(&ch->lock);
2432         iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2433         spin_unlock_irq(&ch->lock);
2434
2435         if (!iu) {
2436                 mutex_unlock(&rport->mutex);
2437
2438                 return -1;
2439         }
2440
2441         ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2442                                    DMA_TO_DEVICE);
2443         tsk_mgmt = iu->buf;
2444         memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2445
2446         tsk_mgmt->opcode        = SRP_TSK_MGMT;
2447         int_to_scsilun(lun, &tsk_mgmt->lun);
2448         tsk_mgmt->tag           = req_tag | SRP_TAG_TSK_MGMT;
2449         tsk_mgmt->tsk_mgmt_func = func;
2450         tsk_mgmt->task_tag      = req_tag;
2451
2452         ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2453                                       DMA_TO_DEVICE);
2454         if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2455                 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2456                 mutex_unlock(&rport->mutex);
2457
2458                 return -1;
2459         }
2460         mutex_unlock(&rport->mutex);
2461
2462         if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2463                                          msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2464                 return -1;
2465
2466         return 0;
2467 }
2468
2469 static int srp_abort(struct scsi_cmnd *scmnd)
2470 {
2471         struct srp_target_port *target = host_to_target(scmnd->device->host);
2472         struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2473         u32 tag;
2474         u16 ch_idx;
2475         struct srp_rdma_ch *ch;
2476         int ret;
2477
2478         shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2479
2480         if (!req)
2481                 return SUCCESS;
2482         tag = blk_mq_unique_tag(scmnd->request);
2483         ch_idx = blk_mq_unique_tag_to_hwq(tag);
2484         if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2485                 return SUCCESS;
2486         ch = &target->ch[ch_idx];
2487         if (!srp_claim_req(ch, req, NULL, scmnd))
2488                 return SUCCESS;
2489         shost_printk(KERN_ERR, target->scsi_host,
2490                      "Sending SRP abort for tag %#x\n", tag);
2491         if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2492                               SRP_TSK_ABORT_TASK) == 0)
2493                 ret = SUCCESS;
2494         else if (target->rport->state == SRP_RPORT_LOST)
2495                 ret = FAST_IO_FAIL;
2496         else
2497                 ret = FAILED;
2498         srp_free_req(ch, req, scmnd, 0);
2499         scmnd->result = DID_ABORT << 16;
2500         scmnd->scsi_done(scmnd);
2501
2502         return ret;
2503 }
2504
2505 static int srp_reset_device(struct scsi_cmnd *scmnd)
2506 {
2507         struct srp_target_port *target = host_to_target(scmnd->device->host);
2508         struct srp_rdma_ch *ch;
2509         int i;
2510
2511         shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2512
2513         ch = &target->ch[0];
2514         if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2515                               SRP_TSK_LUN_RESET))
2516                 return FAILED;
2517         if (ch->tsk_mgmt_status)
2518                 return FAILED;
2519
2520         for (i = 0; i < target->ch_count; i++) {
2521                 ch = &target->ch[i];
2522                 for (i = 0; i < target->req_ring_size; ++i) {
2523                         struct srp_request *req = &ch->req_ring[i];
2524
2525                         srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2526                 }
2527         }
2528
2529         return SUCCESS;
2530 }
2531
2532 static int srp_reset_host(struct scsi_cmnd *scmnd)
2533 {
2534         struct srp_target_port *target = host_to_target(scmnd->device->host);
2535
2536         shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2537
2538         return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2539 }
2540
2541 static int srp_slave_configure(struct scsi_device *sdev)
2542 {
2543         struct Scsi_Host *shost = sdev->host;
2544         struct srp_target_port *target = host_to_target(shost);
2545         struct request_queue *q = sdev->request_queue;
2546         unsigned long timeout;
2547
2548         if (sdev->type == TYPE_DISK) {
2549                 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2550                 blk_queue_rq_timeout(q, timeout);
2551         }
2552
2553         return 0;
2554 }
2555
2556 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2557                            char *buf)
2558 {
2559         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2560
2561         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2562 }
2563
2564 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2565                              char *buf)
2566 {
2567         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2568
2569         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2570 }
2571
2572 static ssize_t show_service_id(struct device *dev,
2573                                struct device_attribute *attr, char *buf)
2574 {
2575         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2576
2577         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2578 }
2579
2580 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2581                          char *buf)
2582 {
2583         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2584
2585         return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2586 }
2587
2588 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2589                          char *buf)
2590 {
2591         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2592
2593         return sprintf(buf, "%pI6\n", target->sgid.raw);
2594 }
2595
2596 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2597                          char *buf)
2598 {
2599         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2600         struct srp_rdma_ch *ch = &target->ch[0];
2601
2602         return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2603 }
2604
2605 static ssize_t show_orig_dgid(struct device *dev,
2606                               struct device_attribute *attr, char *buf)
2607 {
2608         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2609
2610         return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2611 }
2612
2613 static ssize_t show_req_lim(struct device *dev,
2614                             struct device_attribute *attr, char *buf)
2615 {
2616         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2617         struct srp_rdma_ch *ch;
2618         int i, req_lim = INT_MAX;
2619
2620         for (i = 0; i < target->ch_count; i++) {
2621                 ch = &target->ch[i];
2622                 req_lim = min(req_lim, ch->req_lim);
2623         }
2624         return sprintf(buf, "%d\n", req_lim);
2625 }
2626
2627 static ssize_t show_zero_req_lim(struct device *dev,
2628                                  struct device_attribute *attr, char *buf)
2629 {
2630         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2631
2632         return sprintf(buf, "%d\n", target->zero_req_lim);
2633 }
2634
2635 static ssize_t show_local_ib_port(struct device *dev,
2636                                   struct device_attribute *attr, char *buf)
2637 {
2638         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2639
2640         return sprintf(buf, "%d\n", target->srp_host->port);
2641 }
2642
2643 static ssize_t show_local_ib_device(struct device *dev,
2644                                     struct device_attribute *attr, char *buf)
2645 {
2646         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2647
2648         return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2649 }
2650
2651 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2652                              char *buf)
2653 {
2654         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2655
2656         return sprintf(buf, "%d\n", target->ch_count);
2657 }
2658
2659 static ssize_t show_comp_vector(struct device *dev,
2660                                 struct device_attribute *attr, char *buf)
2661 {
2662         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2663
2664         return sprintf(buf, "%d\n", target->comp_vector);
2665 }
2666
2667 static ssize_t show_tl_retry_count(struct device *dev,
2668                                    struct device_attribute *attr, char *buf)
2669 {
2670         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2671
2672         return sprintf(buf, "%d\n", target->tl_retry_count);
2673 }
2674
2675 static ssize_t show_cmd_sg_entries(struct device *dev,
2676                                    struct device_attribute *attr, char *buf)
2677 {
2678         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2679
2680         return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2681 }
2682
2683 static ssize_t show_allow_ext_sg(struct device *dev,
2684                                  struct device_attribute *attr, char *buf)
2685 {
2686         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2687
2688         return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2689 }
2690
2691 static DEVICE_ATTR(id_ext,          S_IRUGO, show_id_ext,          NULL);
2692 static DEVICE_ATTR(ioc_guid,        S_IRUGO, show_ioc_guid,        NULL);
2693 static DEVICE_ATTR(service_id,      S_IRUGO, show_service_id,      NULL);
2694 static DEVICE_ATTR(pkey,            S_IRUGO, show_pkey,            NULL);
2695 static DEVICE_ATTR(sgid,            S_IRUGO, show_sgid,            NULL);
2696 static DEVICE_ATTR(dgid,            S_IRUGO, show_dgid,            NULL);
2697 static DEVICE_ATTR(orig_dgid,       S_IRUGO, show_orig_dgid,       NULL);
2698 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2699 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,    NULL);
2700 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2701 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2702 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2703 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2704 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2705 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2706 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2707
2708 static struct device_attribute *srp_host_attrs[] = {
2709         &dev_attr_id_ext,
2710         &dev_attr_ioc_guid,
2711         &dev_attr_service_id,
2712         &dev_attr_pkey,
2713         &dev_attr_sgid,
2714         &dev_attr_dgid,
2715         &dev_attr_orig_dgid,
2716         &dev_attr_req_lim,
2717         &dev_attr_zero_req_lim,
2718         &dev_attr_local_ib_port,
2719         &dev_attr_local_ib_device,
2720         &dev_attr_ch_count,
2721         &dev_attr_comp_vector,
2722         &dev_attr_tl_retry_count,
2723         &dev_attr_cmd_sg_entries,
2724         &dev_attr_allow_ext_sg,
2725         NULL
2726 };
2727
2728 static struct scsi_host_template srp_template = {
2729         .module                         = THIS_MODULE,
2730         .name                           = "InfiniBand SRP initiator",
2731         .proc_name                      = DRV_NAME,
2732         .slave_configure                = srp_slave_configure,
2733         .info                           = srp_target_info,
2734         .queuecommand                   = srp_queuecommand,
2735         .change_queue_depth             = srp_change_queue_depth,
2736         .eh_abort_handler               = srp_abort,
2737         .eh_device_reset_handler        = srp_reset_device,
2738         .eh_host_reset_handler          = srp_reset_host,
2739         .skip_settle_delay              = true,
2740         .sg_tablesize                   = SRP_DEF_SG_TABLESIZE,
2741         .can_queue                      = SRP_DEFAULT_CMD_SQ_SIZE,
2742         .this_id                        = -1,
2743         .cmd_per_lun                    = SRP_DEFAULT_CMD_SQ_SIZE,
2744         .use_clustering                 = ENABLE_CLUSTERING,
2745         .shost_attrs                    = srp_host_attrs,
2746         .use_blk_tags                   = 1,
2747         .track_queue_depth              = 1,
2748 };
2749
2750 static int srp_sdev_count(struct Scsi_Host *host)
2751 {
2752         struct scsi_device *sdev;
2753         int c = 0;
2754
2755         shost_for_each_device(sdev, host)
2756                 c++;
2757
2758         return c;
2759 }
2760
2761 /*
2762  * Return values:
2763  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2764  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2765  *    removal has been scheduled.
2766  * 0 and target->state != SRP_TARGET_REMOVED upon success.
2767  */
2768 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2769 {
2770         struct srp_rport_identifiers ids;
2771         struct srp_rport *rport;
2772
2773         target->state = SRP_TARGET_SCANNING;
2774         sprintf(target->target_name, "SRP.T10:%016llX",
2775                 be64_to_cpu(target->id_ext));
2776
2777         if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2778                 return -ENODEV;
2779
2780         memcpy(ids.port_id, &target->id_ext, 8);
2781         memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2782         ids.roles = SRP_RPORT_ROLE_TARGET;
2783         rport = srp_rport_add(target->scsi_host, &ids);
2784         if (IS_ERR(rport)) {
2785                 scsi_remove_host(target->scsi_host);
2786                 return PTR_ERR(rport);
2787         }
2788
2789         rport->lld_data = target;
2790         target->rport = rport;
2791
2792         spin_lock(&host->target_lock);
2793         list_add_tail(&target->list, &host->target_list);
2794         spin_unlock(&host->target_lock);
2795
2796         scsi_scan_target(&target->scsi_host->shost_gendev,
2797                          0, target->scsi_id, SCAN_WILD_CARD, 0);
2798
2799         if (srp_connected_ch(target) < target->ch_count ||
2800             target->qp_in_error) {
2801                 shost_printk(KERN_INFO, target->scsi_host,
2802                              PFX "SCSI scan failed - removing SCSI host\n");
2803                 srp_queue_remove_work(target);
2804                 goto out;
2805         }
2806
2807         pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2808                  dev_name(&target->scsi_host->shost_gendev),
2809                  srp_sdev_count(target->scsi_host));
2810
2811         spin_lock_irq(&target->lock);
2812         if (target->state == SRP_TARGET_SCANNING)
2813                 target->state = SRP_TARGET_LIVE;
2814         spin_unlock_irq(&target->lock);
2815
2816 out:
2817         return 0;
2818 }
2819
2820 static void srp_release_dev(struct device *dev)
2821 {
2822         struct srp_host *host =
2823                 container_of(dev, struct srp_host, dev);
2824
2825         complete(&host->released);
2826 }
2827
2828 static struct class srp_class = {
2829         .name    = "infiniband_srp",
2830         .dev_release = srp_release_dev
2831 };
2832
2833 /**
2834  * srp_conn_unique() - check whether the connection to a target is unique
2835  * @host:   SRP host.
2836  * @target: SRP target port.
2837  */
2838 static bool srp_conn_unique(struct srp_host *host,
2839                             struct srp_target_port *target)
2840 {
2841         struct srp_target_port *t;
2842         bool ret = false;
2843
2844         if (target->state == SRP_TARGET_REMOVED)
2845                 goto out;
2846
2847         ret = true;
2848
2849         spin_lock(&host->target_lock);
2850         list_for_each_entry(t, &host->target_list, list) {
2851                 if (t != target &&
2852                     target->id_ext == t->id_ext &&
2853                     target->ioc_guid == t->ioc_guid &&
2854                     target->initiator_ext == t->initiator_ext) {
2855                         ret = false;
2856                         break;
2857                 }
2858         }
2859         spin_unlock(&host->target_lock);
2860
2861 out:
2862         return ret;
2863 }
2864
2865 /*
2866  * Target ports are added by writing
2867  *
2868  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2869  *     pkey=<P_Key>,service_id=<service ID>
2870  *
2871  * to the add_target sysfs attribute.
2872  */
2873 enum {
2874         SRP_OPT_ERR             = 0,
2875         SRP_OPT_ID_EXT          = 1 << 0,
2876         SRP_OPT_IOC_GUID        = 1 << 1,
2877         SRP_OPT_DGID            = 1 << 2,
2878         SRP_OPT_PKEY            = 1 << 3,
2879         SRP_OPT_SERVICE_ID      = 1 << 4,
2880         SRP_OPT_MAX_SECT        = 1 << 5,
2881         SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2882         SRP_OPT_IO_CLASS        = 1 << 7,
2883         SRP_OPT_INITIATOR_EXT   = 1 << 8,
2884         SRP_OPT_CMD_SG_ENTRIES  = 1 << 9,
2885         SRP_OPT_ALLOW_EXT_SG    = 1 << 10,
2886         SRP_OPT_SG_TABLESIZE    = 1 << 11,
2887         SRP_OPT_COMP_VECTOR     = 1 << 12,
2888         SRP_OPT_TL_RETRY_COUNT  = 1 << 13,
2889         SRP_OPT_QUEUE_SIZE      = 1 << 14,
2890         SRP_OPT_ALL             = (SRP_OPT_ID_EXT       |
2891                                    SRP_OPT_IOC_GUID     |
2892                                    SRP_OPT_DGID         |
2893                                    SRP_OPT_PKEY         |
2894                                    SRP_OPT_SERVICE_ID),
2895 };
2896
2897 static const match_table_t srp_opt_tokens = {
2898         { SRP_OPT_ID_EXT,               "id_ext=%s"             },
2899         { SRP_OPT_IOC_GUID,             "ioc_guid=%s"           },
2900         { SRP_OPT_DGID,                 "dgid=%s"               },
2901         { SRP_OPT_PKEY,                 "pkey=%x"               },
2902         { SRP_OPT_SERVICE_ID,           "service_id=%s"         },
2903         { SRP_OPT_MAX_SECT,             "max_sect=%d"           },
2904         { SRP_OPT_MAX_CMD_PER_LUN,      "max_cmd_per_lun=%d"    },
2905         { SRP_OPT_IO_CLASS,             "io_class=%x"           },
2906         { SRP_OPT_INITIATOR_EXT,        "initiator_ext=%s"      },
2907         { SRP_OPT_CMD_SG_ENTRIES,       "cmd_sg_entries=%u"     },
2908         { SRP_OPT_ALLOW_EXT_SG,         "allow_ext_sg=%u"       },
2909         { SRP_OPT_SG_TABLESIZE,         "sg_tablesize=%u"       },
2910         { SRP_OPT_COMP_VECTOR,          "comp_vector=%u"        },
2911         { SRP_OPT_TL_RETRY_COUNT,       "tl_retry_count=%u"     },
2912         { SRP_OPT_QUEUE_SIZE,           "queue_size=%d"         },
2913         { SRP_OPT_ERR,                  NULL                    }
2914 };
2915
2916 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2917 {
2918         char *options, *sep_opt;
2919         char *p;
2920         char dgid[3];
2921         substring_t args[MAX_OPT_ARGS];
2922         int opt_mask = 0;
2923         int token;
2924         int ret = -EINVAL;
2925         int i;
2926
2927         options = kstrdup(buf, GFP_KERNEL);
2928         if (!options)
2929                 return -ENOMEM;
2930
2931         sep_opt = options;
2932         while ((p = strsep(&sep_opt, ",\n")) != NULL) {
2933                 if (!*p)
2934                         continue;
2935
2936                 token = match_token(p, srp_opt_tokens, args);
2937                 opt_mask |= token;
2938
2939                 switch (token) {
2940                 case SRP_OPT_ID_EXT:
2941                         p = match_strdup(args);
2942                         if (!p) {
2943                                 ret = -ENOMEM;
2944                                 goto out;
2945                         }
2946                         target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2947                         kfree(p);
2948                         break;
2949
2950                 case SRP_OPT_IOC_GUID:
2951                         p = match_strdup(args);
2952                         if (!p) {
2953                                 ret = -ENOMEM;
2954                                 goto out;
2955                         }
2956                         target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2957                         kfree(p);
2958                         break;
2959
2960                 case SRP_OPT_DGID:
2961                         p = match_strdup(args);
2962                         if (!p) {
2963                                 ret = -ENOMEM;
2964                                 goto out;
2965                         }
2966                         if (strlen(p) != 32) {
2967                                 pr_warn("bad dest GID parameter '%s'\n", p);
2968                                 kfree(p);
2969                                 goto out;
2970                         }
2971
2972                         for (i = 0; i < 16; ++i) {
2973                                 strlcpy(dgid, p + i * 2, sizeof(dgid));
2974                                 if (sscanf(dgid, "%hhx",
2975                                            &target->orig_dgid.raw[i]) < 1) {
2976                                         ret = -EINVAL;
2977                                         kfree(p);
2978                                         goto out;
2979                                 }
2980                         }
2981                         kfree(p);
2982                         break;
2983
2984                 case SRP_OPT_PKEY:
2985                         if (match_hex(args, &token)) {
2986                                 pr_warn("bad P_Key parameter '%s'\n", p);
2987                                 goto out;
2988                         }
2989                         target->pkey = cpu_to_be16(token);
2990                         break;
2991
2992                 case SRP_OPT_SERVICE_ID:
2993                         p = match_strdup(args);
2994                         if (!p) {
2995                                 ret = -ENOMEM;
2996                                 goto out;
2997                         }
2998                         target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
2999                         kfree(p);
3000                         break;
3001
3002                 case SRP_OPT_MAX_SECT:
3003                         if (match_int(args, &token)) {
3004                                 pr_warn("bad max sect parameter '%s'\n", p);
3005                                 goto out;
3006                         }
3007                         target->scsi_host->max_sectors = token;
3008                         break;
3009
3010                 case SRP_OPT_QUEUE_SIZE:
3011                         if (match_int(args, &token) || token < 1) {
3012                                 pr_warn("bad queue_size parameter '%s'\n", p);
3013                                 goto out;
3014                         }
3015                         target->scsi_host->can_queue = token;
3016                         target->queue_size = token + SRP_RSP_SQ_SIZE +
3017                                              SRP_TSK_MGMT_SQ_SIZE;
3018                         if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3019                                 target->scsi_host->cmd_per_lun = token;
3020                         break;
3021
3022                 case SRP_OPT_MAX_CMD_PER_LUN:
3023                         if (match_int(args, &token) || token < 1) {
3024                                 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3025                                         p);
3026                                 goto out;
3027                         }
3028                         target->scsi_host->cmd_per_lun = token;
3029                         break;
3030
3031                 case SRP_OPT_IO_CLASS:
3032                         if (match_hex(args, &token)) {
3033                                 pr_warn("bad IO class parameter '%s'\n", p);
3034                                 goto out;
3035                         }
3036                         if (token != SRP_REV10_IB_IO_CLASS &&
3037                             token != SRP_REV16A_IB_IO_CLASS) {
3038                                 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3039                                         token, SRP_REV10_IB_IO_CLASS,
3040                                         SRP_REV16A_IB_IO_CLASS);
3041                                 goto out;
3042                         }
3043                         target->io_class = token;
3044                         break;
3045
3046                 case SRP_OPT_INITIATOR_EXT:
3047                         p = match_strdup(args);
3048                         if (!p) {
3049                                 ret = -ENOMEM;
3050                                 goto out;
3051                         }
3052                         target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3053                         kfree(p);
3054                         break;
3055
3056                 case SRP_OPT_CMD_SG_ENTRIES:
3057                         if (match_int(args, &token) || token < 1 || token > 255) {
3058                                 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3059                                         p);
3060                                 goto out;
3061                         }
3062                         target->cmd_sg_cnt = token;
3063                         break;
3064
3065                 case SRP_OPT_ALLOW_EXT_SG:
3066                         if (match_int(args, &token)) {
3067                                 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3068                                 goto out;
3069                         }
3070                         target->allow_ext_sg = !!token;
3071                         break;
3072
3073                 case SRP_OPT_SG_TABLESIZE:
3074                         if (match_int(args, &token) || token < 1 ||
3075                                         token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3076                                 pr_warn("bad max sg_tablesize parameter '%s'\n",
3077                                         p);
3078                                 goto out;
3079                         }
3080                         target->sg_tablesize = token;
3081                         break;
3082
3083                 case SRP_OPT_COMP_VECTOR:
3084                         if (match_int(args, &token) || token < 0) {
3085                                 pr_warn("bad comp_vector parameter '%s'\n", p);
3086                                 goto out;
3087                         }
3088                         target->comp_vector = token;
3089                         break;
3090
3091                 case SRP_OPT_TL_RETRY_COUNT:
3092                         if (match_int(args, &token) || token < 2 || token > 7) {
3093                                 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3094                                         p);
3095                                 goto out;
3096                         }
3097                         target->tl_retry_count = token;
3098                         break;
3099
3100                 default:
3101                         pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3102                                 p);
3103                         goto out;
3104                 }
3105         }
3106
3107         if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3108                 ret = 0;
3109         else
3110                 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3111                         if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3112                             !(srp_opt_tokens[i].token & opt_mask))
3113                                 pr_warn("target creation request is missing parameter '%s'\n",
3114                                         srp_opt_tokens[i].pattern);
3115
3116         if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3117             && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3118                 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3119                         target->scsi_host->cmd_per_lun,
3120                         target->scsi_host->can_queue);
3121
3122 out:
3123         kfree(options);
3124         return ret;
3125 }
3126
3127 static ssize_t srp_create_target(struct device *dev,
3128                                  struct device_attribute *attr,
3129                                  const char *buf, size_t count)
3130 {
3131         struct srp_host *host =
3132                 container_of(dev, struct srp_host, dev);
3133         struct Scsi_Host *target_host;
3134         struct srp_target_port *target;
3135         struct srp_rdma_ch *ch;
3136         struct srp_device *srp_dev = host->srp_dev;
3137         struct ib_device *ibdev = srp_dev->dev;
3138         int ret, node_idx, node, cpu, i;
3139         bool multich = false;
3140
3141         target_host = scsi_host_alloc(&srp_template,
3142                                       sizeof (struct srp_target_port));
3143         if (!target_host)
3144                 return -ENOMEM;
3145
3146         target_host->transportt  = ib_srp_transport_template;
3147         target_host->max_channel = 0;
3148         target_host->max_id      = 1;
3149         target_host->max_lun     = -1LL;
3150         target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3151
3152         target = host_to_target(target_host);
3153
3154         target->io_class        = SRP_REV16A_IB_IO_CLASS;
3155         target->scsi_host       = target_host;
3156         target->srp_host        = host;
3157         target->lkey            = host->srp_dev->pd->local_dma_lkey;
3158         target->rkey            = host->srp_dev->mr->rkey;
3159         target->cmd_sg_cnt      = cmd_sg_entries;
3160         target->sg_tablesize    = indirect_sg_entries ? : cmd_sg_entries;
3161         target->allow_ext_sg    = allow_ext_sg;
3162         target->tl_retry_count  = 7;
3163         target->queue_size      = SRP_DEFAULT_QUEUE_SIZE;
3164
3165         /*
3166          * Avoid that the SCSI host can be removed by srp_remove_target()
3167          * before this function returns.
3168          */
3169         scsi_host_get(target->scsi_host);
3170
3171         mutex_lock(&host->add_target_mutex);
3172
3173         ret = srp_parse_options(buf, target);
3174         if (ret)
3175                 goto out;
3176
3177         ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
3178         if (ret)
3179                 goto out;
3180
3181         target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3182
3183         if (!srp_conn_unique(target->srp_host, target)) {
3184                 shost_printk(KERN_INFO, target->scsi_host,
3185                              PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3186                              be64_to_cpu(target->id_ext),
3187                              be64_to_cpu(target->ioc_guid),
3188                              be64_to_cpu(target->initiator_ext));
3189                 ret = -EEXIST;
3190                 goto out;
3191         }
3192
3193         if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3194             target->cmd_sg_cnt < target->sg_tablesize) {
3195                 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3196                 target->sg_tablesize = target->cmd_sg_cnt;
3197         }
3198
3199         target_host->sg_tablesize = target->sg_tablesize;
3200         target->indirect_size = target->sg_tablesize *
3201                                 sizeof (struct srp_direct_buf);
3202         target->max_iu_len = sizeof (struct srp_cmd) +
3203                              sizeof (struct srp_indirect_buf) +
3204                              target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3205
3206         INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3207         INIT_WORK(&target->remove_work, srp_remove_work);
3208         spin_lock_init(&target->lock);
3209         ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
3210         if (ret)
3211                 goto out;
3212
3213         ret = -ENOMEM;
3214         target->ch_count = max_t(unsigned, num_online_nodes(),
3215                                  min(ch_count ? :
3216                                      min(4 * num_online_nodes(),
3217                                          ibdev->num_comp_vectors),
3218                                      num_online_cpus()));
3219         target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3220                              GFP_KERNEL);
3221         if (!target->ch)
3222                 goto out;
3223
3224         node_idx = 0;
3225         for_each_online_node(node) {
3226                 const int ch_start = (node_idx * target->ch_count /
3227                                       num_online_nodes());
3228                 const int ch_end = ((node_idx + 1) * target->ch_count /
3229                                     num_online_nodes());
3230                 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3231                                       num_online_nodes() + target->comp_vector)
3232                                      % ibdev->num_comp_vectors;
3233                 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3234                                     num_online_nodes() + target->comp_vector)
3235                                    % ibdev->num_comp_vectors;
3236                 int cpu_idx = 0;
3237
3238                 for_each_online_cpu(cpu) {
3239                         if (cpu_to_node(cpu) != node)
3240                                 continue;
3241                         if (ch_start + cpu_idx >= ch_end)
3242                                 continue;
3243                         ch = &target->ch[ch_start + cpu_idx];
3244                         ch->target = target;
3245                         ch->comp_vector = cv_start == cv_end ? cv_start :
3246                                 cv_start + cpu_idx % (cv_end - cv_start);
3247                         spin_lock_init(&ch->lock);
3248                         INIT_LIST_HEAD(&ch->free_tx);
3249                         ret = srp_new_cm_id(ch);
3250                         if (ret)
3251                                 goto err_disconnect;
3252
3253                         ret = srp_create_ch_ib(ch);
3254                         if (ret)
3255                                 goto err_disconnect;
3256
3257                         ret = srp_alloc_req_data(ch);
3258                         if (ret)
3259                                 goto err_disconnect;
3260
3261                         ret = srp_connect_ch(ch, multich);
3262                         if (ret) {
3263                                 shost_printk(KERN_ERR, target->scsi_host,
3264                                              PFX "Connection %d/%d failed\n",
3265                                              ch_start + cpu_idx,
3266                                              target->ch_count);
3267                                 if (node_idx == 0 && cpu_idx == 0) {
3268                                         goto err_disconnect;
3269                                 } else {
3270                                         srp_free_ch_ib(target, ch);
3271                                         srp_free_req_data(target, ch);
3272                                         target->ch_count = ch - target->ch;
3273                                         goto connected;
3274                                 }
3275                         }
3276
3277                         multich = true;
3278                         cpu_idx++;
3279                 }
3280                 node_idx++;
3281         }
3282
3283 connected:
3284         target->scsi_host->nr_hw_queues = target->ch_count;
3285
3286         ret = srp_add_target(host, target);
3287         if (ret)
3288                 goto err_disconnect;
3289
3290         if (target->state != SRP_TARGET_REMOVED) {
3291                 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3292                              "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3293                              be64_to_cpu(target->id_ext),
3294                              be64_to_cpu(target->ioc_guid),
3295                              be16_to_cpu(target->pkey),
3296                              be64_to_cpu(target->service_id),
3297                              target->sgid.raw, target->orig_dgid.raw);
3298         }
3299
3300         ret = count;
3301
3302 out:
3303         mutex_unlock(&host->add_target_mutex);
3304
3305         scsi_host_put(target->scsi_host);
3306         if (ret < 0)
3307                 scsi_host_put(target->scsi_host);
3308
3309         return ret;
3310
3311 err_disconnect:
3312         srp_disconnect_target(target);
3313
3314         for (i = 0; i < target->ch_count; i++) {
3315                 ch = &target->ch[i];
3316                 srp_free_ch_ib(target, ch);
3317                 srp_free_req_data(target, ch);
3318         }
3319
3320         kfree(target->ch);
3321         goto out;
3322 }
3323
3324 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3325
3326 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3327                           char *buf)
3328 {
3329         struct srp_host *host = container_of(dev, struct srp_host, dev);
3330
3331         return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3332 }
3333
3334 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3335
3336 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3337                          char *buf)
3338 {
3339         struct srp_host *host = container_of(dev, struct srp_host, dev);
3340
3341         return sprintf(buf, "%d\n", host->port);
3342 }
3343
3344 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3345
3346 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3347 {
3348         struct srp_host *host;
3349
3350         host = kzalloc(sizeof *host, GFP_KERNEL);
3351         if (!host)
3352                 return NULL;
3353
3354         INIT_LIST_HEAD(&host->target_list);
3355         spin_lock_init(&host->target_lock);
3356         init_completion(&host->released);
3357         mutex_init(&host->add_target_mutex);
3358         host->srp_dev = device;
3359         host->port = port;
3360
3361         host->dev.class = &srp_class;
3362         host->dev.parent = device->dev->dma_device;
3363         dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3364
3365         if (device_register(&host->dev))
3366                 goto free_host;
3367         if (device_create_file(&host->dev, &dev_attr_add_target))
3368                 goto err_class;
3369         if (device_create_file(&host->dev, &dev_attr_ibdev))
3370                 goto err_class;
3371         if (device_create_file(&host->dev, &dev_attr_port))
3372                 goto err_class;
3373
3374         return host;
3375
3376 err_class:
3377         device_unregister(&host->dev);
3378
3379 free_host:
3380         kfree(host);
3381
3382         return NULL;
3383 }
3384
3385 static void srp_add_one(struct ib_device *device)
3386 {
3387         struct srp_device *srp_dev;
3388         struct ib_device_attr *dev_attr;
3389         struct srp_host *host;
3390         int mr_page_shift, p;
3391         u64 max_pages_per_mr;
3392
3393         dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3394         if (!dev_attr)
3395                 return;
3396
3397         if (ib_query_device(device, dev_attr)) {
3398                 pr_warn("Query device failed for %s\n", device->name);
3399                 goto free_attr;
3400         }
3401
3402         srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3403         if (!srp_dev)
3404                 goto free_attr;
3405
3406         srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3407                             device->map_phys_fmr && device->unmap_fmr);
3408         srp_dev->has_fr = (dev_attr->device_cap_flags &
3409                            IB_DEVICE_MEM_MGT_EXTENSIONS);
3410         if (!srp_dev->has_fmr && !srp_dev->has_fr)
3411                 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3412
3413         srp_dev->use_fast_reg = (srp_dev->has_fr &&
3414                                  (!srp_dev->has_fmr || prefer_fr));
3415
3416         /*
3417          * Use the smallest page size supported by the HCA, down to a
3418          * minimum of 4096 bytes. We're unlikely to build large sglists
3419          * out of smaller entries.
3420          */
3421         mr_page_shift           = max(12, ffs(dev_attr->page_size_cap) - 1);
3422         srp_dev->mr_page_size   = 1 << mr_page_shift;
3423         srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
3424         max_pages_per_mr        = dev_attr->max_mr_size;
3425         do_div(max_pages_per_mr, srp_dev->mr_page_size);
3426         srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3427                                           max_pages_per_mr);
3428         if (srp_dev->use_fast_reg) {
3429                 srp_dev->max_pages_per_mr =
3430                         min_t(u32, srp_dev->max_pages_per_mr,
3431                               dev_attr->max_fast_reg_page_list_len);
3432         }
3433         srp_dev->mr_max_size    = srp_dev->mr_page_size *
3434                                    srp_dev->max_pages_per_mr;
3435         pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3436                  device->name, mr_page_shift, dev_attr->max_mr_size,
3437                  dev_attr->max_fast_reg_page_list_len,
3438                  srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3439
3440         INIT_LIST_HEAD(&srp_dev->dev_list);
3441
3442         srp_dev->dev = device;
3443         srp_dev->pd  = ib_alloc_pd(device);
3444         if (IS_ERR(srp_dev->pd))
3445                 goto free_dev;
3446
3447         srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
3448                                     IB_ACCESS_LOCAL_WRITE |
3449                                     IB_ACCESS_REMOTE_READ |
3450                                     IB_ACCESS_REMOTE_WRITE);
3451         if (IS_ERR(srp_dev->mr))
3452                 goto err_pd;
3453
3454         for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3455                 host = srp_add_port(srp_dev, p);
3456                 if (host)
3457                         list_add_tail(&host->list, &srp_dev->dev_list);
3458         }
3459
3460         ib_set_client_data(device, &srp_client, srp_dev);
3461
3462         goto free_attr;
3463
3464 err_pd:
3465         ib_dealloc_pd(srp_dev->pd);
3466
3467 free_dev:
3468         kfree(srp_dev);
3469
3470 free_attr:
3471         kfree(dev_attr);
3472 }
3473
3474 static void srp_remove_one(struct ib_device *device, void *client_data)
3475 {
3476         struct srp_device *srp_dev;
3477         struct srp_host *host, *tmp_host;
3478         struct srp_target_port *target;
3479
3480         srp_dev = client_data;
3481         if (!srp_dev)
3482                 return;
3483
3484         list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3485                 device_unregister(&host->dev);
3486                 /*
3487                  * Wait for the sysfs entry to go away, so that no new
3488                  * target ports can be created.
3489                  */
3490                 wait_for_completion(&host->released);
3491
3492                 /*
3493                  * Remove all target ports.
3494                  */
3495                 spin_lock(&host->target_lock);
3496                 list_for_each_entry(target, &host->target_list, list)
3497                         srp_queue_remove_work(target);
3498                 spin_unlock(&host->target_lock);
3499
3500                 /*
3501                  * Wait for tl_err and target port removal tasks.
3502                  */
3503                 flush_workqueue(system_long_wq);
3504                 flush_workqueue(srp_remove_wq);
3505
3506                 kfree(host);
3507         }
3508
3509         ib_dereg_mr(srp_dev->mr);
3510         ib_dealloc_pd(srp_dev->pd);
3511
3512         kfree(srp_dev);
3513 }
3514
3515 static struct srp_function_template ib_srp_transport_functions = {
3516         .has_rport_state         = true,
3517         .reset_timer_if_blocked  = true,
3518         .reconnect_delay         = &srp_reconnect_delay,
3519         .fast_io_fail_tmo        = &srp_fast_io_fail_tmo,
3520         .dev_loss_tmo            = &srp_dev_loss_tmo,
3521         .reconnect               = srp_rport_reconnect,
3522         .rport_delete            = srp_rport_delete,
3523         .terminate_rport_io      = srp_terminate_io,
3524 };
3525
3526 static int __init srp_init_module(void)
3527 {
3528         int ret;
3529
3530         BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3531
3532         if (srp_sg_tablesize) {
3533                 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3534                 if (!cmd_sg_entries)
3535                         cmd_sg_entries = srp_sg_tablesize;
3536         }
3537
3538         if (!cmd_sg_entries)
3539                 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3540
3541         if (cmd_sg_entries > 255) {
3542                 pr_warn("Clamping cmd_sg_entries to 255\n");
3543                 cmd_sg_entries = 255;
3544         }
3545
3546         if (!indirect_sg_entries)
3547                 indirect_sg_entries = cmd_sg_entries;
3548         else if (indirect_sg_entries < cmd_sg_entries) {
3549                 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3550                         cmd_sg_entries);
3551                 indirect_sg_entries = cmd_sg_entries;
3552         }
3553
3554         srp_remove_wq = create_workqueue("srp_remove");
3555         if (!srp_remove_wq) {
3556                 ret = -ENOMEM;
3557                 goto out;
3558         }
3559
3560         ret = -ENOMEM;
3561         ib_srp_transport_template =
3562                 srp_attach_transport(&ib_srp_transport_functions);
3563         if (!ib_srp_transport_template)
3564                 goto destroy_wq;
3565
3566         ret = class_register(&srp_class);
3567         if (ret) {
3568                 pr_err("couldn't register class infiniband_srp\n");
3569                 goto release_tr;
3570         }
3571
3572         ib_sa_register_client(&srp_sa_client);
3573
3574         ret = ib_register_client(&srp_client);
3575         if (ret) {
3576                 pr_err("couldn't register IB client\n");
3577                 goto unreg_sa;
3578         }
3579
3580 out:
3581         return ret;
3582
3583 unreg_sa:
3584         ib_sa_unregister_client(&srp_sa_client);
3585         class_unregister(&srp_class);
3586
3587 release_tr:
3588         srp_release_transport(ib_srp_transport_template);
3589
3590 destroy_wq:
3591         destroy_workqueue(srp_remove_wq);
3592         goto out;
3593 }
3594
3595 static void __exit srp_cleanup_module(void)
3596 {
3597         ib_unregister_client(&srp_client);
3598         ib_sa_unregister_client(&srp_sa_client);
3599         class_unregister(&srp_class);
3600         srp_release_transport(ib_srp_transport_template);
3601         destroy_workqueue(srp_remove_wq);
3602 }
3603
3604 module_init(srp_init_module);
3605 module_exit(srp_cleanup_module);