scsi: avoid ->change_queue_depth indirection for queue full tracking
[cascardo/linux.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43
44 #include <linux/atomic.h>
45
46 #include <scsi/scsi.h>
47 #include <scsi/scsi_device.h>
48 #include <scsi/scsi_dbg.h>
49 #include <scsi/scsi_tcq.h>
50 #include <scsi/srp.h>
51 #include <scsi/scsi_transport_srp.h>
52
53 #include "ib_srp.h"
54
55 #define DRV_NAME        "ib_srp"
56 #define PFX             DRV_NAME ": "
57 #define DRV_VERSION     "1.0"
58 #define DRV_RELDATE     "July 1, 2013"
59
60 MODULE_AUTHOR("Roland Dreier");
61 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
62                    "v" DRV_VERSION " (" DRV_RELDATE ")");
63 MODULE_LICENSE("Dual BSD/GPL");
64
65 static unsigned int srp_sg_tablesize;
66 static unsigned int cmd_sg_entries;
67 static unsigned int indirect_sg_entries;
68 static bool allow_ext_sg;
69 static bool prefer_fr;
70 static bool register_always;
71 static int topspin_workarounds = 1;
72
73 module_param(srp_sg_tablesize, uint, 0444);
74 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
75
76 module_param(cmd_sg_entries, uint, 0444);
77 MODULE_PARM_DESC(cmd_sg_entries,
78                  "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
79
80 module_param(indirect_sg_entries, uint, 0444);
81 MODULE_PARM_DESC(indirect_sg_entries,
82                  "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
83
84 module_param(allow_ext_sg, bool, 0444);
85 MODULE_PARM_DESC(allow_ext_sg,
86                   "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
87
88 module_param(topspin_workarounds, int, 0444);
89 MODULE_PARM_DESC(topspin_workarounds,
90                  "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
91
92 module_param(prefer_fr, bool, 0444);
93 MODULE_PARM_DESC(prefer_fr,
94 "Whether to use fast registration if both FMR and fast registration are supported");
95
96 module_param(register_always, bool, 0444);
97 MODULE_PARM_DESC(register_always,
98                  "Use memory registration even for contiguous memory regions");
99
100 static struct kernel_param_ops srp_tmo_ops;
101
102 static int srp_reconnect_delay = 10;
103 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
104                 S_IRUGO | S_IWUSR);
105 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
106
107 static int srp_fast_io_fail_tmo = 15;
108 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
109                 S_IRUGO | S_IWUSR);
110 MODULE_PARM_DESC(fast_io_fail_tmo,
111                  "Number of seconds between the observation of a transport"
112                  " layer error and failing all I/O. \"off\" means that this"
113                  " functionality is disabled.");
114
115 static int srp_dev_loss_tmo = 600;
116 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
117                 S_IRUGO | S_IWUSR);
118 MODULE_PARM_DESC(dev_loss_tmo,
119                  "Maximum number of seconds that the SRP transport should"
120                  " insulate transport layer errors. After this time has been"
121                  " exceeded the SCSI host is removed. Should be"
122                  " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
123                  " if fast_io_fail_tmo has not been set. \"off\" means that"
124                  " this functionality is disabled.");
125
126 static unsigned ch_count;
127 module_param(ch_count, uint, 0444);
128 MODULE_PARM_DESC(ch_count,
129                  "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
130
131 static void srp_add_one(struct ib_device *device);
132 static void srp_remove_one(struct ib_device *device);
133 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
134 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
135 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
136
137 static struct scsi_transport_template *ib_srp_transport_template;
138 static struct workqueue_struct *srp_remove_wq;
139
140 static struct ib_client srp_client = {
141         .name   = "srp",
142         .add    = srp_add_one,
143         .remove = srp_remove_one
144 };
145
146 static struct ib_sa_client srp_sa_client;
147
148 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
149 {
150         int tmo = *(int *)kp->arg;
151
152         if (tmo >= 0)
153                 return sprintf(buffer, "%d", tmo);
154         else
155                 return sprintf(buffer, "off");
156 }
157
158 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
159 {
160         int tmo, res;
161
162         if (strncmp(val, "off", 3) != 0) {
163                 res = kstrtoint(val, 0, &tmo);
164                 if (res)
165                         goto out;
166         } else {
167                 tmo = -1;
168         }
169         if (kp->arg == &srp_reconnect_delay)
170                 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
171                                     srp_dev_loss_tmo);
172         else if (kp->arg == &srp_fast_io_fail_tmo)
173                 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
174         else
175                 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
176                                     tmo);
177         if (res)
178                 goto out;
179         *(int *)kp->arg = tmo;
180
181 out:
182         return res;
183 }
184
185 static struct kernel_param_ops srp_tmo_ops = {
186         .get = srp_tmo_get,
187         .set = srp_tmo_set,
188 };
189
190 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
191 {
192         return (struct srp_target_port *) host->hostdata;
193 }
194
195 static const char *srp_target_info(struct Scsi_Host *host)
196 {
197         return host_to_target(host)->target_name;
198 }
199
200 static int srp_target_is_topspin(struct srp_target_port *target)
201 {
202         static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
203         static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
204
205         return topspin_workarounds &&
206                 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
207                  !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
208 }
209
210 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
211                                    gfp_t gfp_mask,
212                                    enum dma_data_direction direction)
213 {
214         struct srp_iu *iu;
215
216         iu = kmalloc(sizeof *iu, gfp_mask);
217         if (!iu)
218                 goto out;
219
220         iu->buf = kzalloc(size, gfp_mask);
221         if (!iu->buf)
222                 goto out_free_iu;
223
224         iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
225                                     direction);
226         if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
227                 goto out_free_buf;
228
229         iu->size      = size;
230         iu->direction = direction;
231
232         return iu;
233
234 out_free_buf:
235         kfree(iu->buf);
236 out_free_iu:
237         kfree(iu);
238 out:
239         return NULL;
240 }
241
242 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
243 {
244         if (!iu)
245                 return;
246
247         ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
248                             iu->direction);
249         kfree(iu->buf);
250         kfree(iu);
251 }
252
253 static void srp_qp_event(struct ib_event *event, void *context)
254 {
255         pr_debug("QP event %d\n", event->event);
256 }
257
258 static int srp_init_qp(struct srp_target_port *target,
259                        struct ib_qp *qp)
260 {
261         struct ib_qp_attr *attr;
262         int ret;
263
264         attr = kmalloc(sizeof *attr, GFP_KERNEL);
265         if (!attr)
266                 return -ENOMEM;
267
268         ret = ib_find_pkey(target->srp_host->srp_dev->dev,
269                            target->srp_host->port,
270                            be16_to_cpu(target->pkey),
271                            &attr->pkey_index);
272         if (ret)
273                 goto out;
274
275         attr->qp_state        = IB_QPS_INIT;
276         attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
277                                     IB_ACCESS_REMOTE_WRITE);
278         attr->port_num        = target->srp_host->port;
279
280         ret = ib_modify_qp(qp, attr,
281                            IB_QP_STATE          |
282                            IB_QP_PKEY_INDEX     |
283                            IB_QP_ACCESS_FLAGS   |
284                            IB_QP_PORT);
285
286 out:
287         kfree(attr);
288         return ret;
289 }
290
291 static int srp_new_cm_id(struct srp_rdma_ch *ch)
292 {
293         struct srp_target_port *target = ch->target;
294         struct ib_cm_id *new_cm_id;
295
296         new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
297                                     srp_cm_handler, ch);
298         if (IS_ERR(new_cm_id))
299                 return PTR_ERR(new_cm_id);
300
301         if (ch->cm_id)
302                 ib_destroy_cm_id(ch->cm_id);
303         ch->cm_id = new_cm_id;
304         ch->path.sgid = target->sgid;
305         ch->path.dgid = target->orig_dgid;
306         ch->path.pkey = target->pkey;
307         ch->path.service_id = target->service_id;
308
309         return 0;
310 }
311
312 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
313 {
314         struct srp_device *dev = target->srp_host->srp_dev;
315         struct ib_fmr_pool_param fmr_param;
316
317         memset(&fmr_param, 0, sizeof(fmr_param));
318         fmr_param.pool_size         = target->scsi_host->can_queue;
319         fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
320         fmr_param.cache             = 1;
321         fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
322         fmr_param.page_shift        = ilog2(dev->mr_page_size);
323         fmr_param.access            = (IB_ACCESS_LOCAL_WRITE |
324                                        IB_ACCESS_REMOTE_WRITE |
325                                        IB_ACCESS_REMOTE_READ);
326
327         return ib_create_fmr_pool(dev->pd, &fmr_param);
328 }
329
330 /**
331  * srp_destroy_fr_pool() - free the resources owned by a pool
332  * @pool: Fast registration pool to be destroyed.
333  */
334 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
335 {
336         int i;
337         struct srp_fr_desc *d;
338
339         if (!pool)
340                 return;
341
342         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
343                 if (d->frpl)
344                         ib_free_fast_reg_page_list(d->frpl);
345                 if (d->mr)
346                         ib_dereg_mr(d->mr);
347         }
348         kfree(pool);
349 }
350
351 /**
352  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
353  * @device:            IB device to allocate fast registration descriptors for.
354  * @pd:                Protection domain associated with the FR descriptors.
355  * @pool_size:         Number of descriptors to allocate.
356  * @max_page_list_len: Maximum fast registration work request page list length.
357  */
358 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
359                                               struct ib_pd *pd, int pool_size,
360                                               int max_page_list_len)
361 {
362         struct srp_fr_pool *pool;
363         struct srp_fr_desc *d;
364         struct ib_mr *mr;
365         struct ib_fast_reg_page_list *frpl;
366         int i, ret = -EINVAL;
367
368         if (pool_size <= 0)
369                 goto err;
370         ret = -ENOMEM;
371         pool = kzalloc(sizeof(struct srp_fr_pool) +
372                        pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
373         if (!pool)
374                 goto err;
375         pool->size = pool_size;
376         pool->max_page_list_len = max_page_list_len;
377         spin_lock_init(&pool->lock);
378         INIT_LIST_HEAD(&pool->free_list);
379
380         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
381                 mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
382                 if (IS_ERR(mr)) {
383                         ret = PTR_ERR(mr);
384                         goto destroy_pool;
385                 }
386                 d->mr = mr;
387                 frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
388                 if (IS_ERR(frpl)) {
389                         ret = PTR_ERR(frpl);
390                         goto destroy_pool;
391                 }
392                 d->frpl = frpl;
393                 list_add_tail(&d->entry, &pool->free_list);
394         }
395
396 out:
397         return pool;
398
399 destroy_pool:
400         srp_destroy_fr_pool(pool);
401
402 err:
403         pool = ERR_PTR(ret);
404         goto out;
405 }
406
407 /**
408  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
409  * @pool: Pool to obtain descriptor from.
410  */
411 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
412 {
413         struct srp_fr_desc *d = NULL;
414         unsigned long flags;
415
416         spin_lock_irqsave(&pool->lock, flags);
417         if (!list_empty(&pool->free_list)) {
418                 d = list_first_entry(&pool->free_list, typeof(*d), entry);
419                 list_del(&d->entry);
420         }
421         spin_unlock_irqrestore(&pool->lock, flags);
422
423         return d;
424 }
425
426 /**
427  * srp_fr_pool_put() - put an FR descriptor back in the free list
428  * @pool: Pool the descriptor was allocated from.
429  * @desc: Pointer to an array of fast registration descriptor pointers.
430  * @n:    Number of descriptors to put back.
431  *
432  * Note: The caller must already have queued an invalidation request for
433  * desc->mr->rkey before calling this function.
434  */
435 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
436                             int n)
437 {
438         unsigned long flags;
439         int i;
440
441         spin_lock_irqsave(&pool->lock, flags);
442         for (i = 0; i < n; i++)
443                 list_add(&desc[i]->entry, &pool->free_list);
444         spin_unlock_irqrestore(&pool->lock, flags);
445 }
446
447 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
448 {
449         struct srp_device *dev = target->srp_host->srp_dev;
450
451         return srp_create_fr_pool(dev->dev, dev->pd,
452                                   target->scsi_host->can_queue,
453                                   dev->max_pages_per_mr);
454 }
455
456 /**
457  * srp_destroy_qp() - destroy an RDMA queue pair
458  * @ch: SRP RDMA channel.
459  *
460  * Change a queue pair into the error state and wait until all receive
461  * completions have been processed before destroying it. This avoids that
462  * the receive completion handler can access the queue pair while it is
463  * being destroyed.
464  */
465 static void srp_destroy_qp(struct srp_rdma_ch *ch)
466 {
467         struct srp_target_port *target = ch->target;
468         static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
469         static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
470         struct ib_recv_wr *bad_wr;
471         int ret;
472
473         /* Destroying a QP and reusing ch->done is only safe if not connected */
474         WARN_ON_ONCE(target->connected);
475
476         ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
477         WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
478         if (ret)
479                 goto out;
480
481         init_completion(&ch->done);
482         ret = ib_post_recv(ch->qp, &wr, &bad_wr);
483         WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
484         if (ret == 0)
485                 wait_for_completion(&ch->done);
486
487 out:
488         ib_destroy_qp(ch->qp);
489 }
490
491 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
492 {
493         struct srp_target_port *target = ch->target;
494         struct srp_device *dev = target->srp_host->srp_dev;
495         struct ib_qp_init_attr *init_attr;
496         struct ib_cq *recv_cq, *send_cq;
497         struct ib_qp *qp;
498         struct ib_fmr_pool *fmr_pool = NULL;
499         struct srp_fr_pool *fr_pool = NULL;
500         const int m = 1 + dev->use_fast_reg;
501         int ret;
502
503         init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
504         if (!init_attr)
505                 return -ENOMEM;
506
507         /* + 1 for SRP_LAST_WR_ID */
508         recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
509                                target->queue_size + 1, ch->comp_vector);
510         if (IS_ERR(recv_cq)) {
511                 ret = PTR_ERR(recv_cq);
512                 goto err;
513         }
514
515         send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
516                                m * target->queue_size, ch->comp_vector);
517         if (IS_ERR(send_cq)) {
518                 ret = PTR_ERR(send_cq);
519                 goto err_recv_cq;
520         }
521
522         ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
523
524         init_attr->event_handler       = srp_qp_event;
525         init_attr->cap.max_send_wr     = m * target->queue_size;
526         init_attr->cap.max_recv_wr     = target->queue_size + 1;
527         init_attr->cap.max_recv_sge    = 1;
528         init_attr->cap.max_send_sge    = 1;
529         init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
530         init_attr->qp_type             = IB_QPT_RC;
531         init_attr->send_cq             = send_cq;
532         init_attr->recv_cq             = recv_cq;
533
534         qp = ib_create_qp(dev->pd, init_attr);
535         if (IS_ERR(qp)) {
536                 ret = PTR_ERR(qp);
537                 goto err_send_cq;
538         }
539
540         ret = srp_init_qp(target, qp);
541         if (ret)
542                 goto err_qp;
543
544         if (dev->use_fast_reg && dev->has_fr) {
545                 fr_pool = srp_alloc_fr_pool(target);
546                 if (IS_ERR(fr_pool)) {
547                         ret = PTR_ERR(fr_pool);
548                         shost_printk(KERN_WARNING, target->scsi_host, PFX
549                                      "FR pool allocation failed (%d)\n", ret);
550                         goto err_qp;
551                 }
552                 if (ch->fr_pool)
553                         srp_destroy_fr_pool(ch->fr_pool);
554                 ch->fr_pool = fr_pool;
555         } else if (!dev->use_fast_reg && dev->has_fmr) {
556                 fmr_pool = srp_alloc_fmr_pool(target);
557                 if (IS_ERR(fmr_pool)) {
558                         ret = PTR_ERR(fmr_pool);
559                         shost_printk(KERN_WARNING, target->scsi_host, PFX
560                                      "FMR pool allocation failed (%d)\n", ret);
561                         goto err_qp;
562                 }
563                 if (ch->fmr_pool)
564                         ib_destroy_fmr_pool(ch->fmr_pool);
565                 ch->fmr_pool = fmr_pool;
566         }
567
568         if (ch->qp)
569                 srp_destroy_qp(ch);
570         if (ch->recv_cq)
571                 ib_destroy_cq(ch->recv_cq);
572         if (ch->send_cq)
573                 ib_destroy_cq(ch->send_cq);
574
575         ch->qp = qp;
576         ch->recv_cq = recv_cq;
577         ch->send_cq = send_cq;
578
579         kfree(init_attr);
580         return 0;
581
582 err_qp:
583         ib_destroy_qp(qp);
584
585 err_send_cq:
586         ib_destroy_cq(send_cq);
587
588 err_recv_cq:
589         ib_destroy_cq(recv_cq);
590
591 err:
592         kfree(init_attr);
593         return ret;
594 }
595
596 /*
597  * Note: this function may be called without srp_alloc_iu_bufs() having been
598  * invoked. Hence the ch->[rt]x_ring checks.
599  */
600 static void srp_free_ch_ib(struct srp_target_port *target,
601                            struct srp_rdma_ch *ch)
602 {
603         struct srp_device *dev = target->srp_host->srp_dev;
604         int i;
605
606         if (!ch->target)
607                 return;
608
609         if (ch->cm_id) {
610                 ib_destroy_cm_id(ch->cm_id);
611                 ch->cm_id = NULL;
612         }
613
614         /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
615         if (!ch->qp)
616                 return;
617
618         if (dev->use_fast_reg) {
619                 if (ch->fr_pool)
620                         srp_destroy_fr_pool(ch->fr_pool);
621         } else {
622                 if (ch->fmr_pool)
623                         ib_destroy_fmr_pool(ch->fmr_pool);
624         }
625         srp_destroy_qp(ch);
626         ib_destroy_cq(ch->send_cq);
627         ib_destroy_cq(ch->recv_cq);
628
629         /*
630          * Avoid that the SCSI error handler tries to use this channel after
631          * it has been freed. The SCSI error handler can namely continue
632          * trying to perform recovery actions after scsi_remove_host()
633          * returned.
634          */
635         ch->target = NULL;
636
637         ch->qp = NULL;
638         ch->send_cq = ch->recv_cq = NULL;
639
640         if (ch->rx_ring) {
641                 for (i = 0; i < target->queue_size; ++i)
642                         srp_free_iu(target->srp_host, ch->rx_ring[i]);
643                 kfree(ch->rx_ring);
644                 ch->rx_ring = NULL;
645         }
646         if (ch->tx_ring) {
647                 for (i = 0; i < target->queue_size; ++i)
648                         srp_free_iu(target->srp_host, ch->tx_ring[i]);
649                 kfree(ch->tx_ring);
650                 ch->tx_ring = NULL;
651         }
652 }
653
654 static void srp_path_rec_completion(int status,
655                                     struct ib_sa_path_rec *pathrec,
656                                     void *ch_ptr)
657 {
658         struct srp_rdma_ch *ch = ch_ptr;
659         struct srp_target_port *target = ch->target;
660
661         ch->status = status;
662         if (status)
663                 shost_printk(KERN_ERR, target->scsi_host,
664                              PFX "Got failed path rec status %d\n", status);
665         else
666                 ch->path = *pathrec;
667         complete(&ch->done);
668 }
669
670 static int srp_lookup_path(struct srp_rdma_ch *ch)
671 {
672         struct srp_target_port *target = ch->target;
673         int ret;
674
675         ch->path.numb_path = 1;
676
677         init_completion(&ch->done);
678
679         ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
680                                                target->srp_host->srp_dev->dev,
681                                                target->srp_host->port,
682                                                &ch->path,
683                                                IB_SA_PATH_REC_SERVICE_ID |
684                                                IB_SA_PATH_REC_DGID       |
685                                                IB_SA_PATH_REC_SGID       |
686                                                IB_SA_PATH_REC_NUMB_PATH  |
687                                                IB_SA_PATH_REC_PKEY,
688                                                SRP_PATH_REC_TIMEOUT_MS,
689                                                GFP_KERNEL,
690                                                srp_path_rec_completion,
691                                                ch, &ch->path_query);
692         if (ch->path_query_id < 0)
693                 return ch->path_query_id;
694
695         ret = wait_for_completion_interruptible(&ch->done);
696         if (ret < 0)
697                 return ret;
698
699         if (ch->status < 0)
700                 shost_printk(KERN_WARNING, target->scsi_host,
701                              PFX "Path record query failed\n");
702
703         return ch->status;
704 }
705
706 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
707 {
708         struct srp_target_port *target = ch->target;
709         struct {
710                 struct ib_cm_req_param param;
711                 struct srp_login_req   priv;
712         } *req = NULL;
713         int status;
714
715         req = kzalloc(sizeof *req, GFP_KERNEL);
716         if (!req)
717                 return -ENOMEM;
718
719         req->param.primary_path               = &ch->path;
720         req->param.alternate_path             = NULL;
721         req->param.service_id                 = target->service_id;
722         req->param.qp_num                     = ch->qp->qp_num;
723         req->param.qp_type                    = ch->qp->qp_type;
724         req->param.private_data               = &req->priv;
725         req->param.private_data_len           = sizeof req->priv;
726         req->param.flow_control               = 1;
727
728         get_random_bytes(&req->param.starting_psn, 4);
729         req->param.starting_psn              &= 0xffffff;
730
731         /*
732          * Pick some arbitrary defaults here; we could make these
733          * module parameters if anyone cared about setting them.
734          */
735         req->param.responder_resources        = 4;
736         req->param.remote_cm_response_timeout = 20;
737         req->param.local_cm_response_timeout  = 20;
738         req->param.retry_count                = target->tl_retry_count;
739         req->param.rnr_retry_count            = 7;
740         req->param.max_cm_retries             = 15;
741
742         req->priv.opcode        = SRP_LOGIN_REQ;
743         req->priv.tag           = 0;
744         req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
745         req->priv.req_buf_fmt   = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
746                                               SRP_BUF_FORMAT_INDIRECT);
747         req->priv.req_flags     = (multich ? SRP_MULTICHAN_MULTI :
748                                    SRP_MULTICHAN_SINGLE);
749         /*
750          * In the published SRP specification (draft rev. 16a), the
751          * port identifier format is 8 bytes of ID extension followed
752          * by 8 bytes of GUID.  Older drafts put the two halves in the
753          * opposite order, so that the GUID comes first.
754          *
755          * Targets conforming to these obsolete drafts can be
756          * recognized by the I/O Class they report.
757          */
758         if (target->io_class == SRP_REV10_IB_IO_CLASS) {
759                 memcpy(req->priv.initiator_port_id,
760                        &target->sgid.global.interface_id, 8);
761                 memcpy(req->priv.initiator_port_id + 8,
762                        &target->initiator_ext, 8);
763                 memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
764                 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
765         } else {
766                 memcpy(req->priv.initiator_port_id,
767                        &target->initiator_ext, 8);
768                 memcpy(req->priv.initiator_port_id + 8,
769                        &target->sgid.global.interface_id, 8);
770                 memcpy(req->priv.target_port_id,     &target->id_ext, 8);
771                 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
772         }
773
774         /*
775          * Topspin/Cisco SRP targets will reject our login unless we
776          * zero out the first 8 bytes of our initiator port ID and set
777          * the second 8 bytes to the local node GUID.
778          */
779         if (srp_target_is_topspin(target)) {
780                 shost_printk(KERN_DEBUG, target->scsi_host,
781                              PFX "Topspin/Cisco initiator port ID workaround "
782                              "activated for target GUID %016llx\n",
783                              (unsigned long long) be64_to_cpu(target->ioc_guid));
784                 memset(req->priv.initiator_port_id, 0, 8);
785                 memcpy(req->priv.initiator_port_id + 8,
786                        &target->srp_host->srp_dev->dev->node_guid, 8);
787         }
788
789         status = ib_send_cm_req(ch->cm_id, &req->param);
790
791         kfree(req);
792
793         return status;
794 }
795
796 static bool srp_queue_remove_work(struct srp_target_port *target)
797 {
798         bool changed = false;
799
800         spin_lock_irq(&target->lock);
801         if (target->state != SRP_TARGET_REMOVED) {
802                 target->state = SRP_TARGET_REMOVED;
803                 changed = true;
804         }
805         spin_unlock_irq(&target->lock);
806
807         if (changed)
808                 queue_work(srp_remove_wq, &target->remove_work);
809
810         return changed;
811 }
812
813 static bool srp_change_conn_state(struct srp_target_port *target,
814                                   bool connected)
815 {
816         bool changed = false;
817
818         spin_lock_irq(&target->lock);
819         if (target->connected != connected) {
820                 target->connected = connected;
821                 changed = true;
822         }
823         spin_unlock_irq(&target->lock);
824
825         return changed;
826 }
827
828 static void srp_disconnect_target(struct srp_target_port *target)
829 {
830         struct srp_rdma_ch *ch;
831         int i;
832
833         if (srp_change_conn_state(target, false)) {
834                 /* XXX should send SRP_I_LOGOUT request */
835
836                 for (i = 0; i < target->ch_count; i++) {
837                         ch = &target->ch[i];
838                         if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
839                                 shost_printk(KERN_DEBUG, target->scsi_host,
840                                              PFX "Sending CM DREQ failed\n");
841                         }
842                 }
843         }
844 }
845
846 static void srp_free_req_data(struct srp_target_port *target,
847                               struct srp_rdma_ch *ch)
848 {
849         struct srp_device *dev = target->srp_host->srp_dev;
850         struct ib_device *ibdev = dev->dev;
851         struct srp_request *req;
852         int i;
853
854         if (!ch->target || !ch->req_ring)
855                 return;
856
857         for (i = 0; i < target->req_ring_size; ++i) {
858                 req = &ch->req_ring[i];
859                 if (dev->use_fast_reg)
860                         kfree(req->fr_list);
861                 else
862                         kfree(req->fmr_list);
863                 kfree(req->map_page);
864                 if (req->indirect_dma_addr) {
865                         ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
866                                             target->indirect_size,
867                                             DMA_TO_DEVICE);
868                 }
869                 kfree(req->indirect_desc);
870         }
871
872         kfree(ch->req_ring);
873         ch->req_ring = NULL;
874 }
875
876 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
877 {
878         struct srp_target_port *target = ch->target;
879         struct srp_device *srp_dev = target->srp_host->srp_dev;
880         struct ib_device *ibdev = srp_dev->dev;
881         struct srp_request *req;
882         void *mr_list;
883         dma_addr_t dma_addr;
884         int i, ret = -ENOMEM;
885
886         ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
887                                GFP_KERNEL);
888         if (!ch->req_ring)
889                 goto out;
890
891         for (i = 0; i < target->req_ring_size; ++i) {
892                 req = &ch->req_ring[i];
893                 mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
894                                   GFP_KERNEL);
895                 if (!mr_list)
896                         goto out;
897                 if (srp_dev->use_fast_reg)
898                         req->fr_list = mr_list;
899                 else
900                         req->fmr_list = mr_list;
901                 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
902                                         sizeof(void *), GFP_KERNEL);
903                 if (!req->map_page)
904                         goto out;
905                 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
906                 if (!req->indirect_desc)
907                         goto out;
908
909                 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
910                                              target->indirect_size,
911                                              DMA_TO_DEVICE);
912                 if (ib_dma_mapping_error(ibdev, dma_addr))
913                         goto out;
914
915                 req->indirect_dma_addr = dma_addr;
916         }
917         ret = 0;
918
919 out:
920         return ret;
921 }
922
923 /**
924  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
925  * @shost: SCSI host whose attributes to remove from sysfs.
926  *
927  * Note: Any attributes defined in the host template and that did not exist
928  * before invocation of this function will be ignored.
929  */
930 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
931 {
932         struct device_attribute **attr;
933
934         for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
935                 device_remove_file(&shost->shost_dev, *attr);
936 }
937
938 static void srp_remove_target(struct srp_target_port *target)
939 {
940         struct srp_rdma_ch *ch;
941         int i;
942
943         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
944
945         srp_del_scsi_host_attr(target->scsi_host);
946         srp_rport_get(target->rport);
947         srp_remove_host(target->scsi_host);
948         scsi_remove_host(target->scsi_host);
949         srp_stop_rport_timers(target->rport);
950         srp_disconnect_target(target);
951         for (i = 0; i < target->ch_count; i++) {
952                 ch = &target->ch[i];
953                 srp_free_ch_ib(target, ch);
954         }
955         cancel_work_sync(&target->tl_err_work);
956         srp_rport_put(target->rport);
957         for (i = 0; i < target->ch_count; i++) {
958                 ch = &target->ch[i];
959                 srp_free_req_data(target, ch);
960         }
961         kfree(target->ch);
962         target->ch = NULL;
963
964         spin_lock(&target->srp_host->target_lock);
965         list_del(&target->list);
966         spin_unlock(&target->srp_host->target_lock);
967
968         scsi_host_put(target->scsi_host);
969 }
970
971 static void srp_remove_work(struct work_struct *work)
972 {
973         struct srp_target_port *target =
974                 container_of(work, struct srp_target_port, remove_work);
975
976         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
977
978         srp_remove_target(target);
979 }
980
981 static void srp_rport_delete(struct srp_rport *rport)
982 {
983         struct srp_target_port *target = rport->lld_data;
984
985         srp_queue_remove_work(target);
986 }
987
988 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
989 {
990         struct srp_target_port *target = ch->target;
991         int ret;
992
993         WARN_ON_ONCE(!multich && target->connected);
994
995         target->qp_in_error = false;
996
997         ret = srp_lookup_path(ch);
998         if (ret)
999                 return ret;
1000
1001         while (1) {
1002                 init_completion(&ch->done);
1003                 ret = srp_send_req(ch, multich);
1004                 if (ret)
1005                         return ret;
1006                 ret = wait_for_completion_interruptible(&ch->done);
1007                 if (ret < 0)
1008                         return ret;
1009
1010                 /*
1011                  * The CM event handling code will set status to
1012                  * SRP_PORT_REDIRECT if we get a port redirect REJ
1013                  * back, or SRP_DLID_REDIRECT if we get a lid/qp
1014                  * redirect REJ back.
1015                  */
1016                 switch (ch->status) {
1017                 case 0:
1018                         srp_change_conn_state(target, true);
1019                         return 0;
1020
1021                 case SRP_PORT_REDIRECT:
1022                         ret = srp_lookup_path(ch);
1023                         if (ret)
1024                                 return ret;
1025                         break;
1026
1027                 case SRP_DLID_REDIRECT:
1028                         break;
1029
1030                 case SRP_STALE_CONN:
1031                         shost_printk(KERN_ERR, target->scsi_host, PFX
1032                                      "giving up on stale connection\n");
1033                         ch->status = -ECONNRESET;
1034                         return ch->status;
1035
1036                 default:
1037                         return ch->status;
1038                 }
1039         }
1040 }
1041
1042 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
1043 {
1044         struct ib_send_wr *bad_wr;
1045         struct ib_send_wr wr = {
1046                 .opcode             = IB_WR_LOCAL_INV,
1047                 .wr_id              = LOCAL_INV_WR_ID_MASK,
1048                 .next               = NULL,
1049                 .num_sge            = 0,
1050                 .send_flags         = 0,
1051                 .ex.invalidate_rkey = rkey,
1052         };
1053
1054         return ib_post_send(ch->qp, &wr, &bad_wr);
1055 }
1056
1057 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1058                            struct srp_rdma_ch *ch,
1059                            struct srp_request *req)
1060 {
1061         struct srp_target_port *target = ch->target;
1062         struct srp_device *dev = target->srp_host->srp_dev;
1063         struct ib_device *ibdev = dev->dev;
1064         int i, res;
1065
1066         if (!scsi_sglist(scmnd) ||
1067             (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1068              scmnd->sc_data_direction != DMA_FROM_DEVICE))
1069                 return;
1070
1071         if (dev->use_fast_reg) {
1072                 struct srp_fr_desc **pfr;
1073
1074                 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1075                         res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
1076                         if (res < 0) {
1077                                 shost_printk(KERN_ERR, target->scsi_host, PFX
1078                                   "Queueing INV WR for rkey %#x failed (%d)\n",
1079                                   (*pfr)->mr->rkey, res);
1080                                 queue_work(system_long_wq,
1081                                            &target->tl_err_work);
1082                         }
1083                 }
1084                 if (req->nmdesc)
1085                         srp_fr_pool_put(ch->fr_pool, req->fr_list,
1086                                         req->nmdesc);
1087         } else {
1088                 struct ib_pool_fmr **pfmr;
1089
1090                 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1091                         ib_fmr_pool_unmap(*pfmr);
1092         }
1093
1094         ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1095                         scmnd->sc_data_direction);
1096 }
1097
1098 /**
1099  * srp_claim_req - Take ownership of the scmnd associated with a request.
1100  * @ch: SRP RDMA channel.
1101  * @req: SRP request.
1102  * @sdev: If not NULL, only take ownership for this SCSI device.
1103  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1104  *         ownership of @req->scmnd if it equals @scmnd.
1105  *
1106  * Return value:
1107  * Either NULL or a pointer to the SCSI command the caller became owner of.
1108  */
1109 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1110                                        struct srp_request *req,
1111                                        struct scsi_device *sdev,
1112                                        struct scsi_cmnd *scmnd)
1113 {
1114         unsigned long flags;
1115
1116         spin_lock_irqsave(&ch->lock, flags);
1117         if (req->scmnd &&
1118             (!sdev || req->scmnd->device == sdev) &&
1119             (!scmnd || req->scmnd == scmnd)) {
1120                 scmnd = req->scmnd;
1121                 req->scmnd = NULL;
1122         } else {
1123                 scmnd = NULL;
1124         }
1125         spin_unlock_irqrestore(&ch->lock, flags);
1126
1127         return scmnd;
1128 }
1129
1130 /**
1131  * srp_free_req() - Unmap data and add request to the free request list.
1132  * @ch:     SRP RDMA channel.
1133  * @req:    Request to be freed.
1134  * @scmnd:  SCSI command associated with @req.
1135  * @req_lim_delta: Amount to be added to @target->req_lim.
1136  */
1137 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1138                          struct scsi_cmnd *scmnd, s32 req_lim_delta)
1139 {
1140         unsigned long flags;
1141
1142         srp_unmap_data(scmnd, ch, req);
1143
1144         spin_lock_irqsave(&ch->lock, flags);
1145         ch->req_lim += req_lim_delta;
1146         spin_unlock_irqrestore(&ch->lock, flags);
1147 }
1148
1149 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1150                            struct scsi_device *sdev, int result)
1151 {
1152         struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1153
1154         if (scmnd) {
1155                 srp_free_req(ch, req, scmnd, 0);
1156                 scmnd->result = result;
1157                 scmnd->scsi_done(scmnd);
1158         }
1159 }
1160
1161 static void srp_terminate_io(struct srp_rport *rport)
1162 {
1163         struct srp_target_port *target = rport->lld_data;
1164         struct srp_rdma_ch *ch;
1165         struct Scsi_Host *shost = target->scsi_host;
1166         struct scsi_device *sdev;
1167         int i, j;
1168
1169         /*
1170          * Invoking srp_terminate_io() while srp_queuecommand() is running
1171          * is not safe. Hence the warning statement below.
1172          */
1173         shost_for_each_device(sdev, shost)
1174                 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1175
1176         for (i = 0; i < target->ch_count; i++) {
1177                 ch = &target->ch[i];
1178
1179                 for (j = 0; j < target->req_ring_size; ++j) {
1180                         struct srp_request *req = &ch->req_ring[j];
1181
1182                         srp_finish_req(ch, req, NULL,
1183                                        DID_TRANSPORT_FAILFAST << 16);
1184                 }
1185         }
1186 }
1187
1188 /*
1189  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1190  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1191  * srp_reset_device() or srp_reset_host() calls will occur while this function
1192  * is in progress. One way to realize that is not to call this function
1193  * directly but to call srp_reconnect_rport() instead since that last function
1194  * serializes calls of this function via rport->mutex and also blocks
1195  * srp_queuecommand() calls before invoking this function.
1196  */
1197 static int srp_rport_reconnect(struct srp_rport *rport)
1198 {
1199         struct srp_target_port *target = rport->lld_data;
1200         struct srp_rdma_ch *ch;
1201         int i, j, ret = 0;
1202         bool multich = false;
1203
1204         srp_disconnect_target(target);
1205
1206         if (target->state == SRP_TARGET_SCANNING)
1207                 return -ENODEV;
1208
1209         /*
1210          * Now get a new local CM ID so that we avoid confusing the target in
1211          * case things are really fouled up. Doing so also ensures that all CM
1212          * callbacks will have finished before a new QP is allocated.
1213          */
1214         for (i = 0; i < target->ch_count; i++) {
1215                 ch = &target->ch[i];
1216                 if (!ch->target)
1217                         break;
1218                 ret += srp_new_cm_id(ch);
1219         }
1220         for (i = 0; i < target->ch_count; i++) {
1221                 ch = &target->ch[i];
1222                 if (!ch->target)
1223                         break;
1224                 for (j = 0; j < target->req_ring_size; ++j) {
1225                         struct srp_request *req = &ch->req_ring[j];
1226
1227                         srp_finish_req(ch, req, NULL, DID_RESET << 16);
1228                 }
1229         }
1230         for (i = 0; i < target->ch_count; i++) {
1231                 ch = &target->ch[i];
1232                 if (!ch->target)
1233                         break;
1234                 /*
1235                  * Whether or not creating a new CM ID succeeded, create a new
1236                  * QP. This guarantees that all completion callback function
1237                  * invocations have finished before request resetting starts.
1238                  */
1239                 ret += srp_create_ch_ib(ch);
1240
1241                 INIT_LIST_HEAD(&ch->free_tx);
1242                 for (j = 0; j < target->queue_size; ++j)
1243                         list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1244         }
1245         for (i = 0; i < target->ch_count; i++) {
1246                 ch = &target->ch[i];
1247                 if (ret || !ch->target) {
1248                         if (i > 1)
1249                                 ret = 0;
1250                         break;
1251                 }
1252                 ret = srp_connect_ch(ch, multich);
1253                 multich = true;
1254         }
1255
1256         if (ret == 0)
1257                 shost_printk(KERN_INFO, target->scsi_host,
1258                              PFX "reconnect succeeded\n");
1259
1260         return ret;
1261 }
1262
1263 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1264                          unsigned int dma_len, u32 rkey)
1265 {
1266         struct srp_direct_buf *desc = state->desc;
1267
1268         desc->va = cpu_to_be64(dma_addr);
1269         desc->key = cpu_to_be32(rkey);
1270         desc->len = cpu_to_be32(dma_len);
1271
1272         state->total_len += dma_len;
1273         state->desc++;
1274         state->ndesc++;
1275 }
1276
1277 static int srp_map_finish_fmr(struct srp_map_state *state,
1278                               struct srp_rdma_ch *ch)
1279 {
1280         struct ib_pool_fmr *fmr;
1281         u64 io_addr = 0;
1282
1283         fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1284                                    state->npages, io_addr);
1285         if (IS_ERR(fmr))
1286                 return PTR_ERR(fmr);
1287
1288         *state->next_fmr++ = fmr;
1289         state->nmdesc++;
1290
1291         srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
1292
1293         return 0;
1294 }
1295
1296 static int srp_map_finish_fr(struct srp_map_state *state,
1297                              struct srp_rdma_ch *ch)
1298 {
1299         struct srp_target_port *target = ch->target;
1300         struct srp_device *dev = target->srp_host->srp_dev;
1301         struct ib_send_wr *bad_wr;
1302         struct ib_send_wr wr;
1303         struct srp_fr_desc *desc;
1304         u32 rkey;
1305
1306         desc = srp_fr_pool_get(ch->fr_pool);
1307         if (!desc)
1308                 return -ENOMEM;
1309
1310         rkey = ib_inc_rkey(desc->mr->rkey);
1311         ib_update_fast_reg_key(desc->mr, rkey);
1312
1313         memcpy(desc->frpl->page_list, state->pages,
1314                sizeof(state->pages[0]) * state->npages);
1315
1316         memset(&wr, 0, sizeof(wr));
1317         wr.opcode = IB_WR_FAST_REG_MR;
1318         wr.wr_id = FAST_REG_WR_ID_MASK;
1319         wr.wr.fast_reg.iova_start = state->base_dma_addr;
1320         wr.wr.fast_reg.page_list = desc->frpl;
1321         wr.wr.fast_reg.page_list_len = state->npages;
1322         wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
1323         wr.wr.fast_reg.length = state->dma_len;
1324         wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
1325                                        IB_ACCESS_REMOTE_READ |
1326                                        IB_ACCESS_REMOTE_WRITE);
1327         wr.wr.fast_reg.rkey = desc->mr->lkey;
1328
1329         *state->next_fr++ = desc;
1330         state->nmdesc++;
1331
1332         srp_map_desc(state, state->base_dma_addr, state->dma_len,
1333                      desc->mr->rkey);
1334
1335         return ib_post_send(ch->qp, &wr, &bad_wr);
1336 }
1337
1338 static int srp_finish_mapping(struct srp_map_state *state,
1339                               struct srp_rdma_ch *ch)
1340 {
1341         struct srp_target_port *target = ch->target;
1342         int ret = 0;
1343
1344         if (state->npages == 0)
1345                 return 0;
1346
1347         if (state->npages == 1 && !register_always)
1348                 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1349                              target->rkey);
1350         else
1351                 ret = target->srp_host->srp_dev->use_fast_reg ?
1352                         srp_map_finish_fr(state, ch) :
1353                         srp_map_finish_fmr(state, ch);
1354
1355         if (ret == 0) {
1356                 state->npages = 0;
1357                 state->dma_len = 0;
1358         }
1359
1360         return ret;
1361 }
1362
1363 static void srp_map_update_start(struct srp_map_state *state,
1364                                  struct scatterlist *sg, int sg_index,
1365                                  dma_addr_t dma_addr)
1366 {
1367         state->unmapped_sg = sg;
1368         state->unmapped_index = sg_index;
1369         state->unmapped_addr = dma_addr;
1370 }
1371
1372 static int srp_map_sg_entry(struct srp_map_state *state,
1373                             struct srp_rdma_ch *ch,
1374                             struct scatterlist *sg, int sg_index,
1375                             bool use_mr)
1376 {
1377         struct srp_target_port *target = ch->target;
1378         struct srp_device *dev = target->srp_host->srp_dev;
1379         struct ib_device *ibdev = dev->dev;
1380         dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1381         unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1382         unsigned int len;
1383         int ret;
1384
1385         if (!dma_len)
1386                 return 0;
1387
1388         if (!use_mr) {
1389                 /*
1390                  * Once we're in direct map mode for a request, we don't
1391                  * go back to FMR or FR mode, so no need to update anything
1392                  * other than the descriptor.
1393                  */
1394                 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1395                 return 0;
1396         }
1397
1398         /*
1399          * Since not all RDMA HW drivers support non-zero page offsets for
1400          * FMR, if we start at an offset into a page, don't merge into the
1401          * current FMR mapping. Finish it out, and use the kernel's MR for
1402          * this sg entry.
1403          */
1404         if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
1405             dma_len > dev->mr_max_size) {
1406                 ret = srp_finish_mapping(state, ch);
1407                 if (ret)
1408                         return ret;
1409
1410                 srp_map_desc(state, dma_addr, dma_len, target->rkey);
1411                 srp_map_update_start(state, NULL, 0, 0);
1412                 return 0;
1413         }
1414
1415         /*
1416          * If this is the first sg that will be mapped via FMR or via FR, save
1417          * our position. We need to know the first unmapped entry, its index,
1418          * and the first unmapped address within that entry to be able to
1419          * restart mapping after an error.
1420          */
1421         if (!state->unmapped_sg)
1422                 srp_map_update_start(state, sg, sg_index, dma_addr);
1423
1424         while (dma_len) {
1425                 unsigned offset = dma_addr & ~dev->mr_page_mask;
1426                 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1427                         ret = srp_finish_mapping(state, ch);
1428                         if (ret)
1429                                 return ret;
1430
1431                         srp_map_update_start(state, sg, sg_index, dma_addr);
1432                 }
1433
1434                 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1435
1436                 if (!state->npages)
1437                         state->base_dma_addr = dma_addr;
1438                 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1439                 state->dma_len += len;
1440                 dma_addr += len;
1441                 dma_len -= len;
1442         }
1443
1444         /*
1445          * If the last entry of the MR wasn't a full page, then we need to
1446          * close it out and start a new one -- we can only merge at page
1447          * boundries.
1448          */
1449         ret = 0;
1450         if (len != dev->mr_page_size) {
1451                 ret = srp_finish_mapping(state, ch);
1452                 if (!ret)
1453                         srp_map_update_start(state, NULL, 0, 0);
1454         }
1455         return ret;
1456 }
1457
1458 static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
1459                       struct srp_request *req, struct scatterlist *scat,
1460                       int count)
1461 {
1462         struct srp_target_port *target = ch->target;
1463         struct srp_device *dev = target->srp_host->srp_dev;
1464         struct ib_device *ibdev = dev->dev;
1465         struct scatterlist *sg;
1466         int i;
1467         bool use_mr;
1468
1469         state->desc     = req->indirect_desc;
1470         state->pages    = req->map_page;
1471         if (dev->use_fast_reg) {
1472                 state->next_fr = req->fr_list;
1473                 use_mr = !!ch->fr_pool;
1474         } else {
1475                 state->next_fmr = req->fmr_list;
1476                 use_mr = !!ch->fmr_pool;
1477         }
1478
1479         for_each_sg(scat, sg, count, i) {
1480                 if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
1481                         /*
1482                          * Memory registration failed, so backtrack to the
1483                          * first unmapped entry and continue on without using
1484                          * memory registration.
1485                          */
1486                         dma_addr_t dma_addr;
1487                         unsigned int dma_len;
1488
1489 backtrack:
1490                         sg = state->unmapped_sg;
1491                         i = state->unmapped_index;
1492
1493                         dma_addr = ib_sg_dma_address(ibdev, sg);
1494                         dma_len = ib_sg_dma_len(ibdev, sg);
1495                         dma_len -= (state->unmapped_addr - dma_addr);
1496                         dma_addr = state->unmapped_addr;
1497                         use_mr = false;
1498                         srp_map_desc(state, dma_addr, dma_len, target->rkey);
1499                 }
1500         }
1501
1502         if (use_mr && srp_finish_mapping(state, ch))
1503                 goto backtrack;
1504
1505         req->nmdesc = state->nmdesc;
1506
1507         return 0;
1508 }
1509
1510 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1511                         struct srp_request *req)
1512 {
1513         struct srp_target_port *target = ch->target;
1514         struct scatterlist *scat;
1515         struct srp_cmd *cmd = req->cmd->buf;
1516         int len, nents, count;
1517         struct srp_device *dev;
1518         struct ib_device *ibdev;
1519         struct srp_map_state state;
1520         struct srp_indirect_buf *indirect_hdr;
1521         u32 table_len;
1522         u8 fmt;
1523
1524         if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1525                 return sizeof (struct srp_cmd);
1526
1527         if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1528             scmnd->sc_data_direction != DMA_TO_DEVICE) {
1529                 shost_printk(KERN_WARNING, target->scsi_host,
1530                              PFX "Unhandled data direction %d\n",
1531                              scmnd->sc_data_direction);
1532                 return -EINVAL;
1533         }
1534
1535         nents = scsi_sg_count(scmnd);
1536         scat  = scsi_sglist(scmnd);
1537
1538         dev = target->srp_host->srp_dev;
1539         ibdev = dev->dev;
1540
1541         count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1542         if (unlikely(count == 0))
1543                 return -EIO;
1544
1545         fmt = SRP_DATA_DESC_DIRECT;
1546         len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1547
1548         if (count == 1 && !register_always) {
1549                 /*
1550                  * The midlayer only generated a single gather/scatter
1551                  * entry, or DMA mapping coalesced everything to a
1552                  * single entry.  So a direct descriptor along with
1553                  * the DMA MR suffices.
1554                  */
1555                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1556
1557                 buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1558                 buf->key = cpu_to_be32(target->rkey);
1559                 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1560
1561                 req->nmdesc = 0;
1562                 goto map_complete;
1563         }
1564
1565         /*
1566          * We have more than one scatter/gather entry, so build our indirect
1567          * descriptor table, trying to merge as many entries as we can.
1568          */
1569         indirect_hdr = (void *) cmd->add_data;
1570
1571         ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1572                                    target->indirect_size, DMA_TO_DEVICE);
1573
1574         memset(&state, 0, sizeof(state));
1575         srp_map_sg(&state, ch, req, scat, count);
1576
1577         /* We've mapped the request, now pull as much of the indirect
1578          * descriptor table as we can into the command buffer. If this
1579          * target is not using an external indirect table, we are
1580          * guaranteed to fit into the command, as the SCSI layer won't
1581          * give us more S/G entries than we allow.
1582          */
1583         if (state.ndesc == 1) {
1584                 /*
1585                  * Memory registration collapsed the sg-list into one entry,
1586                  * so use a direct descriptor.
1587                  */
1588                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1589
1590                 *buf = req->indirect_desc[0];
1591                 goto map_complete;
1592         }
1593
1594         if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1595                                                 !target->allow_ext_sg)) {
1596                 shost_printk(KERN_ERR, target->scsi_host,
1597                              "Could not fit S/G list into SRP_CMD\n");
1598                 return -EIO;
1599         }
1600
1601         count = min(state.ndesc, target->cmd_sg_cnt);
1602         table_len = state.ndesc * sizeof (struct srp_direct_buf);
1603
1604         fmt = SRP_DATA_DESC_INDIRECT;
1605         len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1606         len += count * sizeof (struct srp_direct_buf);
1607
1608         memcpy(indirect_hdr->desc_list, req->indirect_desc,
1609                count * sizeof (struct srp_direct_buf));
1610
1611         indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1612         indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
1613         indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1614         indirect_hdr->len = cpu_to_be32(state.total_len);
1615
1616         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1617                 cmd->data_out_desc_cnt = count;
1618         else
1619                 cmd->data_in_desc_cnt = count;
1620
1621         ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1622                                       DMA_TO_DEVICE);
1623
1624 map_complete:
1625         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1626                 cmd->buf_fmt = fmt << 4;
1627         else
1628                 cmd->buf_fmt = fmt;
1629
1630         return len;
1631 }
1632
1633 /*
1634  * Return an IU and possible credit to the free pool
1635  */
1636 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1637                           enum srp_iu_type iu_type)
1638 {
1639         unsigned long flags;
1640
1641         spin_lock_irqsave(&ch->lock, flags);
1642         list_add(&iu->list, &ch->free_tx);
1643         if (iu_type != SRP_IU_RSP)
1644                 ++ch->req_lim;
1645         spin_unlock_irqrestore(&ch->lock, flags);
1646 }
1647
1648 /*
1649  * Must be called with ch->lock held to protect req_lim and free_tx.
1650  * If IU is not sent, it must be returned using srp_put_tx_iu().
1651  *
1652  * Note:
1653  * An upper limit for the number of allocated information units for each
1654  * request type is:
1655  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1656  *   more than Scsi_Host.can_queue requests.
1657  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1658  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1659  *   one unanswered SRP request to an initiator.
1660  */
1661 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1662                                       enum srp_iu_type iu_type)
1663 {
1664         struct srp_target_port *target = ch->target;
1665         s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1666         struct srp_iu *iu;
1667
1668         srp_send_completion(ch->send_cq, ch);
1669
1670         if (list_empty(&ch->free_tx))
1671                 return NULL;
1672
1673         /* Initiator responses to target requests do not consume credits */
1674         if (iu_type != SRP_IU_RSP) {
1675                 if (ch->req_lim <= rsv) {
1676                         ++target->zero_req_lim;
1677                         return NULL;
1678                 }
1679
1680                 --ch->req_lim;
1681         }
1682
1683         iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1684         list_del(&iu->list);
1685         return iu;
1686 }
1687
1688 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1689 {
1690         struct srp_target_port *target = ch->target;
1691         struct ib_sge list;
1692         struct ib_send_wr wr, *bad_wr;
1693
1694         list.addr   = iu->dma;
1695         list.length = len;
1696         list.lkey   = target->lkey;
1697
1698         wr.next       = NULL;
1699         wr.wr_id      = (uintptr_t) iu;
1700         wr.sg_list    = &list;
1701         wr.num_sge    = 1;
1702         wr.opcode     = IB_WR_SEND;
1703         wr.send_flags = IB_SEND_SIGNALED;
1704
1705         return ib_post_send(ch->qp, &wr, &bad_wr);
1706 }
1707
1708 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1709 {
1710         struct srp_target_port *target = ch->target;
1711         struct ib_recv_wr wr, *bad_wr;
1712         struct ib_sge list;
1713
1714         list.addr   = iu->dma;
1715         list.length = iu->size;
1716         list.lkey   = target->lkey;
1717
1718         wr.next     = NULL;
1719         wr.wr_id    = (uintptr_t) iu;
1720         wr.sg_list  = &list;
1721         wr.num_sge  = 1;
1722
1723         return ib_post_recv(ch->qp, &wr, &bad_wr);
1724 }
1725
1726 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1727 {
1728         struct srp_target_port *target = ch->target;
1729         struct srp_request *req;
1730         struct scsi_cmnd *scmnd;
1731         unsigned long flags;
1732
1733         if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1734                 spin_lock_irqsave(&ch->lock, flags);
1735                 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1736                 spin_unlock_irqrestore(&ch->lock, flags);
1737
1738                 ch->tsk_mgmt_status = -1;
1739                 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1740                         ch->tsk_mgmt_status = rsp->data[3];
1741                 complete(&ch->tsk_mgmt_done);
1742         } else {
1743                 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1744                 if (scmnd) {
1745                         req = (void *)scmnd->host_scribble;
1746                         scmnd = srp_claim_req(ch, req, NULL, scmnd);
1747                 }
1748                 if (!scmnd) {
1749                         shost_printk(KERN_ERR, target->scsi_host,
1750                                      "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1751                                      rsp->tag, ch - target->ch, ch->qp->qp_num);
1752
1753                         spin_lock_irqsave(&ch->lock, flags);
1754                         ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1755                         spin_unlock_irqrestore(&ch->lock, flags);
1756
1757                         return;
1758                 }
1759                 scmnd->result = rsp->status;
1760
1761                 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1762                         memcpy(scmnd->sense_buffer, rsp->data +
1763                                be32_to_cpu(rsp->resp_data_len),
1764                                min_t(int, be32_to_cpu(rsp->sense_data_len),
1765                                      SCSI_SENSE_BUFFERSIZE));
1766                 }
1767
1768                 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1769                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1770                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1771                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1772                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1773                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1774                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1775                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1776
1777                 srp_free_req(ch, req, scmnd,
1778                              be32_to_cpu(rsp->req_lim_delta));
1779
1780                 scmnd->host_scribble = NULL;
1781                 scmnd->scsi_done(scmnd);
1782         }
1783 }
1784
1785 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1786                                void *rsp, int len)
1787 {
1788         struct srp_target_port *target = ch->target;
1789         struct ib_device *dev = target->srp_host->srp_dev->dev;
1790         unsigned long flags;
1791         struct srp_iu *iu;
1792         int err;
1793
1794         spin_lock_irqsave(&ch->lock, flags);
1795         ch->req_lim += req_delta;
1796         iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1797         spin_unlock_irqrestore(&ch->lock, flags);
1798
1799         if (!iu) {
1800                 shost_printk(KERN_ERR, target->scsi_host, PFX
1801                              "no IU available to send response\n");
1802                 return 1;
1803         }
1804
1805         ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1806         memcpy(iu->buf, rsp, len);
1807         ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1808
1809         err = srp_post_send(ch, iu, len);
1810         if (err) {
1811                 shost_printk(KERN_ERR, target->scsi_host, PFX
1812                              "unable to post response: %d\n", err);
1813                 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1814         }
1815
1816         return err;
1817 }
1818
1819 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1820                                  struct srp_cred_req *req)
1821 {
1822         struct srp_cred_rsp rsp = {
1823                 .opcode = SRP_CRED_RSP,
1824                 .tag = req->tag,
1825         };
1826         s32 delta = be32_to_cpu(req->req_lim_delta);
1827
1828         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1829                 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1830                              "problems processing SRP_CRED_REQ\n");
1831 }
1832
1833 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1834                                 struct srp_aer_req *req)
1835 {
1836         struct srp_target_port *target = ch->target;
1837         struct srp_aer_rsp rsp = {
1838                 .opcode = SRP_AER_RSP,
1839                 .tag = req->tag,
1840         };
1841         s32 delta = be32_to_cpu(req->req_lim_delta);
1842
1843         shost_printk(KERN_ERR, target->scsi_host, PFX
1844                      "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));
1845
1846         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1847                 shost_printk(KERN_ERR, target->scsi_host, PFX
1848                              "problems processing SRP_AER_REQ\n");
1849 }
1850
1851 static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
1852 {
1853         struct srp_target_port *target = ch->target;
1854         struct ib_device *dev = target->srp_host->srp_dev->dev;
1855         struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1856         int res;
1857         u8 opcode;
1858
1859         ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1860                                    DMA_FROM_DEVICE);
1861
1862         opcode = *(u8 *) iu->buf;
1863
1864         if (0) {
1865                 shost_printk(KERN_ERR, target->scsi_host,
1866                              PFX "recv completion, opcode 0x%02x\n", opcode);
1867                 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
1868                                iu->buf, wc->byte_len, true);
1869         }
1870
1871         switch (opcode) {
1872         case SRP_RSP:
1873                 srp_process_rsp(ch, iu->buf);
1874                 break;
1875
1876         case SRP_CRED_REQ:
1877                 srp_process_cred_req(ch, iu->buf);
1878                 break;
1879
1880         case SRP_AER_REQ:
1881                 srp_process_aer_req(ch, iu->buf);
1882                 break;
1883
1884         case SRP_T_LOGOUT:
1885                 /* XXX Handle target logout */
1886                 shost_printk(KERN_WARNING, target->scsi_host,
1887                              PFX "Got target logout request\n");
1888                 break;
1889
1890         default:
1891                 shost_printk(KERN_WARNING, target->scsi_host,
1892                              PFX "Unhandled SRP opcode 0x%02x\n", opcode);
1893                 break;
1894         }
1895
1896         ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
1897                                       DMA_FROM_DEVICE);
1898
1899         res = srp_post_recv(ch, iu);
1900         if (res != 0)
1901                 shost_printk(KERN_ERR, target->scsi_host,
1902                              PFX "Recv failed with error code %d\n", res);
1903 }
1904
1905 /**
1906  * srp_tl_err_work() - handle a transport layer error
1907  * @work: Work structure embedded in an SRP target port.
1908  *
1909  * Note: This function may get invoked before the rport has been created,
1910  * hence the target->rport test.
1911  */
1912 static void srp_tl_err_work(struct work_struct *work)
1913 {
1914         struct srp_target_port *target;
1915
1916         target = container_of(work, struct srp_target_port, tl_err_work);
1917         if (target->rport)
1918                 srp_start_tl_fail_timers(target->rport);
1919 }
1920
1921 static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
1922                               bool send_err, struct srp_rdma_ch *ch)
1923 {
1924         struct srp_target_port *target = ch->target;
1925
1926         if (wr_id == SRP_LAST_WR_ID) {
1927                 complete(&ch->done);
1928                 return;
1929         }
1930
1931         if (target->connected && !target->qp_in_error) {
1932                 if (wr_id & LOCAL_INV_WR_ID_MASK) {
1933                         shost_printk(KERN_ERR, target->scsi_host, PFX
1934                                      "LOCAL_INV failed with status %d\n",
1935                                      wc_status);
1936                 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1937                         shost_printk(KERN_ERR, target->scsi_host, PFX
1938                                      "FAST_REG_MR failed status %d\n",
1939                                      wc_status);
1940                 } else {
1941                         shost_printk(KERN_ERR, target->scsi_host,
1942                                      PFX "failed %s status %d for iu %p\n",
1943                                      send_err ? "send" : "receive",
1944                                      wc_status, (void *)(uintptr_t)wr_id);
1945                 }
1946                 queue_work(system_long_wq, &target->tl_err_work);
1947         }
1948         target->qp_in_error = true;
1949 }
1950
1951 static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
1952 {
1953         struct srp_rdma_ch *ch = ch_ptr;
1954         struct ib_wc wc;
1955
1956         ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1957         while (ib_poll_cq(cq, 1, &wc) > 0) {
1958                 if (likely(wc.status == IB_WC_SUCCESS)) {
1959                         srp_handle_recv(ch, &wc);
1960                 } else {
1961                         srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
1962                 }
1963         }
1964 }
1965
1966 static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
1967 {
1968         struct srp_rdma_ch *ch = ch_ptr;
1969         struct ib_wc wc;
1970         struct srp_iu *iu;
1971
1972         while (ib_poll_cq(cq, 1, &wc) > 0) {
1973                 if (likely(wc.status == IB_WC_SUCCESS)) {
1974                         iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1975                         list_add(&iu->list, &ch->free_tx);
1976                 } else {
1977                         srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
1978                 }
1979         }
1980 }
1981
1982 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1983 {
1984         struct srp_target_port *target = host_to_target(shost);
1985         struct srp_rport *rport = target->rport;
1986         struct srp_rdma_ch *ch;
1987         struct srp_request *req;
1988         struct srp_iu *iu;
1989         struct srp_cmd *cmd;
1990         struct ib_device *dev;
1991         unsigned long flags;
1992         u32 tag;
1993         u16 idx;
1994         int len, ret;
1995         const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1996
1997         /*
1998          * The SCSI EH thread is the only context from which srp_queuecommand()
1999          * can get invoked for blocked devices (SDEV_BLOCK /
2000          * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2001          * locking the rport mutex if invoked from inside the SCSI EH.
2002          */
2003         if (in_scsi_eh)
2004                 mutex_lock(&rport->mutex);
2005
2006         scmnd->result = srp_chkready(target->rport);
2007         if (unlikely(scmnd->result))
2008                 goto err;
2009
2010         WARN_ON_ONCE(scmnd->request->tag < 0);
2011         tag = blk_mq_unique_tag(scmnd->request);
2012         ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2013         idx = blk_mq_unique_tag_to_tag(tag);
2014         WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2015                   dev_name(&shost->shost_gendev), tag, idx,
2016                   target->req_ring_size);
2017
2018         spin_lock_irqsave(&ch->lock, flags);
2019         iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2020         spin_unlock_irqrestore(&ch->lock, flags);
2021
2022         if (!iu)
2023                 goto err;
2024
2025         req = &ch->req_ring[idx];
2026         dev = target->srp_host->srp_dev->dev;
2027         ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2028                                    DMA_TO_DEVICE);
2029
2030         scmnd->host_scribble = (void *) req;
2031
2032         cmd = iu->buf;
2033         memset(cmd, 0, sizeof *cmd);
2034
2035         cmd->opcode = SRP_CMD;
2036         cmd->lun    = cpu_to_be64((u64) scmnd->device->lun << 48);
2037         cmd->tag    = tag;
2038         memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2039
2040         req->scmnd    = scmnd;
2041         req->cmd      = iu;
2042
2043         len = srp_map_data(scmnd, ch, req);
2044         if (len < 0) {
2045                 shost_printk(KERN_ERR, target->scsi_host,
2046                              PFX "Failed to map data (%d)\n", len);
2047                 /*
2048                  * If we ran out of memory descriptors (-ENOMEM) because an
2049                  * application is queuing many requests with more than
2050                  * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2051                  * to reduce queue depth temporarily.
2052                  */
2053                 scmnd->result = len == -ENOMEM ?
2054                         DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2055                 goto err_iu;
2056         }
2057
2058         ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2059                                       DMA_TO_DEVICE);
2060
2061         if (srp_post_send(ch, iu, len)) {
2062                 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2063                 goto err_unmap;
2064         }
2065
2066         ret = 0;
2067
2068 unlock_rport:
2069         if (in_scsi_eh)
2070                 mutex_unlock(&rport->mutex);
2071
2072         return ret;
2073
2074 err_unmap:
2075         srp_unmap_data(scmnd, ch, req);
2076
2077 err_iu:
2078         srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2079
2080         /*
2081          * Avoid that the loops that iterate over the request ring can
2082          * encounter a dangling SCSI command pointer.
2083          */
2084         req->scmnd = NULL;
2085
2086 err:
2087         if (scmnd->result) {
2088                 scmnd->scsi_done(scmnd);
2089                 ret = 0;
2090         } else {
2091                 ret = SCSI_MLQUEUE_HOST_BUSY;
2092         }
2093
2094         goto unlock_rport;
2095 }
2096
2097 /*
2098  * Note: the resources allocated in this function are freed in
2099  * srp_free_ch_ib().
2100  */
2101 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2102 {
2103         struct srp_target_port *target = ch->target;
2104         int i;
2105
2106         ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2107                               GFP_KERNEL);
2108         if (!ch->rx_ring)
2109                 goto err_no_ring;
2110         ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2111                               GFP_KERNEL);
2112         if (!ch->tx_ring)
2113                 goto err_no_ring;
2114
2115         for (i = 0; i < target->queue_size; ++i) {
2116                 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2117                                               ch->max_ti_iu_len,
2118                                               GFP_KERNEL, DMA_FROM_DEVICE);
2119                 if (!ch->rx_ring[i])
2120                         goto err;
2121         }
2122
2123         for (i = 0; i < target->queue_size; ++i) {
2124                 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2125                                               target->max_iu_len,
2126                                               GFP_KERNEL, DMA_TO_DEVICE);
2127                 if (!ch->tx_ring[i])
2128                         goto err;
2129
2130                 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2131         }
2132
2133         return 0;
2134
2135 err:
2136         for (i = 0; i < target->queue_size; ++i) {
2137                 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2138                 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2139         }
2140
2141
2142 err_no_ring:
2143         kfree(ch->tx_ring);
2144         ch->tx_ring = NULL;
2145         kfree(ch->rx_ring);
2146         ch->rx_ring = NULL;
2147
2148         return -ENOMEM;
2149 }
2150
2151 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2152 {
2153         uint64_t T_tr_ns, max_compl_time_ms;
2154         uint32_t rq_tmo_jiffies;
2155
2156         /*
2157          * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2158          * table 91), both the QP timeout and the retry count have to be set
2159          * for RC QP's during the RTR to RTS transition.
2160          */
2161         WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2162                      (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2163
2164         /*
2165          * Set target->rq_tmo_jiffies to one second more than the largest time
2166          * it can take before an error completion is generated. See also
2167          * C9-140..142 in the IBTA spec for more information about how to
2168          * convert the QP Local ACK Timeout value to nanoseconds.
2169          */
2170         T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2171         max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2172         do_div(max_compl_time_ms, NSEC_PER_MSEC);
2173         rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2174
2175         return rq_tmo_jiffies;
2176 }
2177
2178 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2179                                struct srp_login_rsp *lrsp,
2180                                struct srp_rdma_ch *ch)
2181 {
2182         struct srp_target_port *target = ch->target;
2183         struct ib_qp_attr *qp_attr = NULL;
2184         int attr_mask = 0;
2185         int ret;
2186         int i;
2187
2188         if (lrsp->opcode == SRP_LOGIN_RSP) {
2189                 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2190                 ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2191
2192                 /*
2193                  * Reserve credits for task management so we don't
2194                  * bounce requests back to the SCSI mid-layer.
2195                  */
2196                 target->scsi_host->can_queue
2197                         = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2198                               target->scsi_host->can_queue);
2199                 target->scsi_host->cmd_per_lun
2200                         = min_t(int, target->scsi_host->can_queue,
2201                                 target->scsi_host->cmd_per_lun);
2202         } else {
2203                 shost_printk(KERN_WARNING, target->scsi_host,
2204                              PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2205                 ret = -ECONNRESET;
2206                 goto error;
2207         }
2208
2209         if (!ch->rx_ring) {
2210                 ret = srp_alloc_iu_bufs(ch);
2211                 if (ret)
2212                         goto error;
2213         }
2214
2215         ret = -ENOMEM;
2216         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2217         if (!qp_attr)
2218                 goto error;
2219
2220         qp_attr->qp_state = IB_QPS_RTR;
2221         ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2222         if (ret)
2223                 goto error_free;
2224
2225         ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2226         if (ret)
2227                 goto error_free;
2228
2229         for (i = 0; i < target->queue_size; i++) {
2230                 struct srp_iu *iu = ch->rx_ring[i];
2231
2232                 ret = srp_post_recv(ch, iu);
2233                 if (ret)
2234                         goto error_free;
2235         }
2236
2237         qp_attr->qp_state = IB_QPS_RTS;
2238         ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2239         if (ret)
2240                 goto error_free;
2241
2242         target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2243
2244         ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2245         if (ret)
2246                 goto error_free;
2247
2248         ret = ib_send_cm_rtu(cm_id, NULL, 0);
2249
2250 error_free:
2251         kfree(qp_attr);
2252
2253 error:
2254         ch->status = ret;
2255 }
2256
2257 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2258                                struct ib_cm_event *event,
2259                                struct srp_rdma_ch *ch)
2260 {
2261         struct srp_target_port *target = ch->target;
2262         struct Scsi_Host *shost = target->scsi_host;
2263         struct ib_class_port_info *cpi;
2264         int opcode;
2265
2266         switch (event->param.rej_rcvd.reason) {
2267         case IB_CM_REJ_PORT_CM_REDIRECT:
2268                 cpi = event->param.rej_rcvd.ari;
2269                 ch->path.dlid = cpi->redirect_lid;
2270                 ch->path.pkey = cpi->redirect_pkey;
2271                 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2272                 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2273
2274                 ch->status = ch->path.dlid ?
2275                         SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2276                 break;
2277
2278         case IB_CM_REJ_PORT_REDIRECT:
2279                 if (srp_target_is_topspin(target)) {
2280                         /*
2281                          * Topspin/Cisco SRP gateways incorrectly send
2282                          * reject reason code 25 when they mean 24
2283                          * (port redirect).
2284                          */
2285                         memcpy(ch->path.dgid.raw,
2286                                event->param.rej_rcvd.ari, 16);
2287
2288                         shost_printk(KERN_DEBUG, shost,
2289                                      PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2290                                      be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2291                                      be64_to_cpu(ch->path.dgid.global.interface_id));
2292
2293                         ch->status = SRP_PORT_REDIRECT;
2294                 } else {
2295                         shost_printk(KERN_WARNING, shost,
2296                                      "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2297                         ch->status = -ECONNRESET;
2298                 }
2299                 break;
2300
2301         case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2302                 shost_printk(KERN_WARNING, shost,
2303                             "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2304                 ch->status = -ECONNRESET;
2305                 break;
2306
2307         case IB_CM_REJ_CONSUMER_DEFINED:
2308                 opcode = *(u8 *) event->private_data;
2309                 if (opcode == SRP_LOGIN_REJ) {
2310                         struct srp_login_rej *rej = event->private_data;
2311                         u32 reason = be32_to_cpu(rej->reason);
2312
2313                         if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2314                                 shost_printk(KERN_WARNING, shost,
2315                                              PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2316                         else
2317                                 shost_printk(KERN_WARNING, shost, PFX
2318                                              "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2319                                              target->sgid.raw,
2320                                              target->orig_dgid.raw, reason);
2321                 } else
2322                         shost_printk(KERN_WARNING, shost,
2323                                      "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2324                                      " opcode 0x%02x\n", opcode);
2325                 ch->status = -ECONNRESET;
2326                 break;
2327
2328         case IB_CM_REJ_STALE_CONN:
2329                 shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2330                 ch->status = SRP_STALE_CONN;
2331                 break;
2332
2333         default:
2334                 shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2335                              event->param.rej_rcvd.reason);
2336                 ch->status = -ECONNRESET;
2337         }
2338 }
2339
2340 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2341 {
2342         struct srp_rdma_ch *ch = cm_id->context;
2343         struct srp_target_port *target = ch->target;
2344         int comp = 0;
2345
2346         switch (event->event) {
2347         case IB_CM_REQ_ERROR:
2348                 shost_printk(KERN_DEBUG, target->scsi_host,
2349                              PFX "Sending CM REQ failed\n");
2350                 comp = 1;
2351                 ch->status = -ECONNRESET;
2352                 break;
2353
2354         case IB_CM_REP_RECEIVED:
2355                 comp = 1;
2356                 srp_cm_rep_handler(cm_id, event->private_data, ch);
2357                 break;
2358
2359         case IB_CM_REJ_RECEIVED:
2360                 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2361                 comp = 1;
2362
2363                 srp_cm_rej_handler(cm_id, event, ch);
2364                 break;
2365
2366         case IB_CM_DREQ_RECEIVED:
2367                 shost_printk(KERN_WARNING, target->scsi_host,
2368                              PFX "DREQ received - connection closed\n");
2369                 srp_change_conn_state(target, false);
2370                 if (ib_send_cm_drep(cm_id, NULL, 0))
2371                         shost_printk(KERN_ERR, target->scsi_host,
2372                                      PFX "Sending CM DREP failed\n");
2373                 queue_work(system_long_wq, &target->tl_err_work);
2374                 break;
2375
2376         case IB_CM_TIMEWAIT_EXIT:
2377                 shost_printk(KERN_ERR, target->scsi_host,
2378                              PFX "connection closed\n");
2379                 comp = 1;
2380
2381                 ch->status = 0;
2382                 break;
2383
2384         case IB_CM_MRA_RECEIVED:
2385         case IB_CM_DREQ_ERROR:
2386         case IB_CM_DREP_RECEIVED:
2387                 break;
2388
2389         default:
2390                 shost_printk(KERN_WARNING, target->scsi_host,
2391                              PFX "Unhandled CM event %d\n", event->event);
2392                 break;
2393         }
2394
2395         if (comp)
2396                 complete(&ch->done);
2397
2398         return 0;
2399 }
2400
2401 /**
2402  * srp_change_queue_depth - setting device queue depth
2403  * @sdev: scsi device struct
2404  * @qdepth: requested queue depth
2405  * @reason: SCSI_QDEPTH_DEFAULT
2406  * (see include/scsi/scsi_host.h for definition)
2407  *
2408  * Returns queue depth.
2409  */
2410 static int
2411 srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
2412 {
2413         struct Scsi_Host *shost = sdev->host;
2414         int max_depth;
2415
2416         max_depth = shost->can_queue;
2417         if (!sdev->tagged_supported)
2418                 max_depth = 1;
2419         if (qdepth > max_depth)
2420                 qdepth = max_depth;
2421         scsi_adjust_queue_depth(sdev, qdepth);
2422         return sdev->queue_depth;
2423 }
2424
2425 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag,
2426                              unsigned int lun, u8 func)
2427 {
2428         struct srp_target_port *target = ch->target;
2429         struct srp_rport *rport = target->rport;
2430         struct ib_device *dev = target->srp_host->srp_dev->dev;
2431         struct srp_iu *iu;
2432         struct srp_tsk_mgmt *tsk_mgmt;
2433
2434         if (!target->connected || target->qp_in_error)
2435                 return -1;
2436
2437         init_completion(&ch->tsk_mgmt_done);
2438
2439         /*
2440          * Lock the rport mutex to avoid that srp_create_ch_ib() is
2441          * invoked while a task management function is being sent.
2442          */
2443         mutex_lock(&rport->mutex);
2444         spin_lock_irq(&ch->lock);
2445         iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2446         spin_unlock_irq(&ch->lock);
2447
2448         if (!iu) {
2449                 mutex_unlock(&rport->mutex);
2450
2451                 return -1;
2452         }
2453
2454         ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2455                                    DMA_TO_DEVICE);
2456         tsk_mgmt = iu->buf;
2457         memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2458
2459         tsk_mgmt->opcode        = SRP_TSK_MGMT;
2460         tsk_mgmt->lun           = cpu_to_be64((u64) lun << 48);
2461         tsk_mgmt->tag           = req_tag | SRP_TAG_TSK_MGMT;
2462         tsk_mgmt->tsk_mgmt_func = func;
2463         tsk_mgmt->task_tag      = req_tag;
2464
2465         ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2466                                       DMA_TO_DEVICE);
2467         if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2468                 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2469                 mutex_unlock(&rport->mutex);
2470
2471                 return -1;
2472         }
2473         mutex_unlock(&rport->mutex);
2474
2475         if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2476                                          msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2477                 return -1;
2478
2479         return 0;
2480 }
2481
2482 static int srp_abort(struct scsi_cmnd *scmnd)
2483 {
2484         struct srp_target_port *target = host_to_target(scmnd->device->host);
2485         struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2486         u32 tag;
2487         u16 ch_idx;
2488         struct srp_rdma_ch *ch;
2489         int ret;
2490
2491         shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2492
2493         if (!req)
2494                 return SUCCESS;
2495         tag = blk_mq_unique_tag(scmnd->request);
2496         ch_idx = blk_mq_unique_tag_to_hwq(tag);
2497         if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2498                 return SUCCESS;
2499         ch = &target->ch[ch_idx];
2500         if (!srp_claim_req(ch, req, NULL, scmnd))
2501                 return SUCCESS;
2502         shost_printk(KERN_ERR, target->scsi_host,
2503                      "Sending SRP abort for tag %#x\n", tag);
2504         if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2505                               SRP_TSK_ABORT_TASK) == 0)
2506                 ret = SUCCESS;
2507         else if (target->rport->state == SRP_RPORT_LOST)
2508                 ret = FAST_IO_FAIL;
2509         else
2510                 ret = FAILED;
2511         srp_free_req(ch, req, scmnd, 0);
2512         scmnd->result = DID_ABORT << 16;
2513         scmnd->scsi_done(scmnd);
2514
2515         return ret;
2516 }
2517
2518 static int srp_reset_device(struct scsi_cmnd *scmnd)
2519 {
2520         struct srp_target_port *target = host_to_target(scmnd->device->host);
2521         struct srp_rdma_ch *ch;
2522         int i;
2523
2524         shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2525
2526         ch = &target->ch[0];
2527         if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2528                               SRP_TSK_LUN_RESET))
2529                 return FAILED;
2530         if (ch->tsk_mgmt_status)
2531                 return FAILED;
2532
2533         for (i = 0; i < target->ch_count; i++) {
2534                 ch = &target->ch[i];
2535                 for (i = 0; i < target->req_ring_size; ++i) {
2536                         struct srp_request *req = &ch->req_ring[i];
2537
2538                         srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2539                 }
2540         }
2541
2542         return SUCCESS;
2543 }
2544
2545 static int srp_reset_host(struct scsi_cmnd *scmnd)
2546 {
2547         struct srp_target_port *target = host_to_target(scmnd->device->host);
2548
2549         shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2550
2551         return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2552 }
2553
2554 static int srp_slave_configure(struct scsi_device *sdev)
2555 {
2556         struct Scsi_Host *shost = sdev->host;
2557         struct srp_target_port *target = host_to_target(shost);
2558         struct request_queue *q = sdev->request_queue;
2559         unsigned long timeout;
2560
2561         if (sdev->type == TYPE_DISK) {
2562                 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2563                 blk_queue_rq_timeout(q, timeout);
2564         }
2565
2566         return 0;
2567 }
2568
2569 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2570                            char *buf)
2571 {
2572         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2573
2574         return sprintf(buf, "0x%016llx\n",
2575                        (unsigned long long) be64_to_cpu(target->id_ext));
2576 }
2577
2578 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2579                              char *buf)
2580 {
2581         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2582
2583         return sprintf(buf, "0x%016llx\n",
2584                        (unsigned long long) be64_to_cpu(target->ioc_guid));
2585 }
2586
2587 static ssize_t show_service_id(struct device *dev,
2588                                struct device_attribute *attr, char *buf)
2589 {
2590         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2591
2592         return sprintf(buf, "0x%016llx\n",
2593                        (unsigned long long) be64_to_cpu(target->service_id));
2594 }
2595
2596 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2597                          char *buf)
2598 {
2599         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2600
2601         return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2602 }
2603
2604 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2605                          char *buf)
2606 {
2607         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2608
2609         return sprintf(buf, "%pI6\n", target->sgid.raw);
2610 }
2611
2612 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2613                          char *buf)
2614 {
2615         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2616         struct srp_rdma_ch *ch = &target->ch[0];
2617
2618         return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2619 }
2620
2621 static ssize_t show_orig_dgid(struct device *dev,
2622                               struct device_attribute *attr, char *buf)
2623 {
2624         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2625
2626         return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2627 }
2628
2629 static ssize_t show_req_lim(struct device *dev,
2630                             struct device_attribute *attr, char *buf)
2631 {
2632         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2633         struct srp_rdma_ch *ch;
2634         int i, req_lim = INT_MAX;
2635
2636         for (i = 0; i < target->ch_count; i++) {
2637                 ch = &target->ch[i];
2638                 req_lim = min(req_lim, ch->req_lim);
2639         }
2640         return sprintf(buf, "%d\n", req_lim);
2641 }
2642
2643 static ssize_t show_zero_req_lim(struct device *dev,
2644                                  struct device_attribute *attr, char *buf)
2645 {
2646         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2647
2648         return sprintf(buf, "%d\n", target->zero_req_lim);
2649 }
2650
2651 static ssize_t show_local_ib_port(struct device *dev,
2652                                   struct device_attribute *attr, char *buf)
2653 {
2654         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2655
2656         return sprintf(buf, "%d\n", target->srp_host->port);
2657 }
2658
2659 static ssize_t show_local_ib_device(struct device *dev,
2660                                     struct device_attribute *attr, char *buf)
2661 {
2662         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2663
2664         return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2665 }
2666
2667 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2668                              char *buf)
2669 {
2670         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2671
2672         return sprintf(buf, "%d\n", target->ch_count);
2673 }
2674
2675 static ssize_t show_comp_vector(struct device *dev,
2676                                 struct device_attribute *attr, char *buf)
2677 {
2678         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2679
2680         return sprintf(buf, "%d\n", target->comp_vector);
2681 }
2682
2683 static ssize_t show_tl_retry_count(struct device *dev,
2684                                    struct device_attribute *attr, char *buf)
2685 {
2686         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2687
2688         return sprintf(buf, "%d\n", target->tl_retry_count);
2689 }
2690
2691 static ssize_t show_cmd_sg_entries(struct device *dev,
2692                                    struct device_attribute *attr, char *buf)
2693 {
2694         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2695
2696         return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2697 }
2698
2699 static ssize_t show_allow_ext_sg(struct device *dev,
2700                                  struct device_attribute *attr, char *buf)
2701 {
2702         struct srp_target_port *target = host_to_target(class_to_shost(dev));
2703
2704         return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2705 }
2706
2707 static DEVICE_ATTR(id_ext,          S_IRUGO, show_id_ext,          NULL);
2708 static DEVICE_ATTR(ioc_guid,        S_IRUGO, show_ioc_guid,        NULL);
2709 static DEVICE_ATTR(service_id,      S_IRUGO, show_service_id,      NULL);
2710 static DEVICE_ATTR(pkey,            S_IRUGO, show_pkey,            NULL);
2711 static DEVICE_ATTR(sgid,            S_IRUGO, show_sgid,            NULL);
2712 static DEVICE_ATTR(dgid,            S_IRUGO, show_dgid,            NULL);
2713 static DEVICE_ATTR(orig_dgid,       S_IRUGO, show_orig_dgid,       NULL);
2714 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2715 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,    NULL);
2716 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2717 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2718 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2719 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2720 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2721 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2722 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2723
2724 static struct device_attribute *srp_host_attrs[] = {
2725         &dev_attr_id_ext,
2726         &dev_attr_ioc_guid,
2727         &dev_attr_service_id,
2728         &dev_attr_pkey,
2729         &dev_attr_sgid,
2730         &dev_attr_dgid,
2731         &dev_attr_orig_dgid,
2732         &dev_attr_req_lim,
2733         &dev_attr_zero_req_lim,
2734         &dev_attr_local_ib_port,
2735         &dev_attr_local_ib_device,
2736         &dev_attr_ch_count,
2737         &dev_attr_comp_vector,
2738         &dev_attr_tl_retry_count,
2739         &dev_attr_cmd_sg_entries,
2740         &dev_attr_allow_ext_sg,
2741         NULL
2742 };
2743
2744 static struct scsi_host_template srp_template = {
2745         .module                         = THIS_MODULE,
2746         .name                           = "InfiniBand SRP initiator",
2747         .proc_name                      = DRV_NAME,
2748         .slave_configure                = srp_slave_configure,
2749         .info                           = srp_target_info,
2750         .queuecommand                   = srp_queuecommand,
2751         .change_queue_depth             = srp_change_queue_depth,
2752         .change_queue_type              = scsi_change_queue_type,
2753         .eh_abort_handler               = srp_abort,
2754         .eh_device_reset_handler        = srp_reset_device,
2755         .eh_host_reset_handler          = srp_reset_host,
2756         .skip_settle_delay              = true,
2757         .sg_tablesize                   = SRP_DEF_SG_TABLESIZE,
2758         .can_queue                      = SRP_DEFAULT_CMD_SQ_SIZE,
2759         .this_id                        = -1,
2760         .cmd_per_lun                    = SRP_DEFAULT_CMD_SQ_SIZE,
2761         .use_clustering                 = ENABLE_CLUSTERING,
2762         .shost_attrs                    = srp_host_attrs,
2763         .use_blk_tags                   = 1,
2764         .track_queue_depth              = 1,
2765 };
2766
2767 static int srp_sdev_count(struct Scsi_Host *host)
2768 {
2769         struct scsi_device *sdev;
2770         int c = 0;
2771
2772         shost_for_each_device(sdev, host)
2773                 c++;
2774
2775         return c;
2776 }
2777
2778 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2779 {
2780         struct srp_rport_identifiers ids;
2781         struct srp_rport *rport;
2782
2783         target->state = SRP_TARGET_SCANNING;
2784         sprintf(target->target_name, "SRP.T10:%016llX",
2785                  (unsigned long long) be64_to_cpu(target->id_ext));
2786
2787         if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2788                 return -ENODEV;
2789
2790         memcpy(ids.port_id, &target->id_ext, 8);
2791         memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2792         ids.roles = SRP_RPORT_ROLE_TARGET;
2793         rport = srp_rport_add(target->scsi_host, &ids);
2794         if (IS_ERR(rport)) {
2795                 scsi_remove_host(target->scsi_host);
2796                 return PTR_ERR(rport);
2797         }
2798
2799         rport->lld_data = target;
2800         target->rport = rport;
2801
2802         spin_lock(&host->target_lock);
2803         list_add_tail(&target->list, &host->target_list);
2804         spin_unlock(&host->target_lock);
2805
2806         scsi_scan_target(&target->scsi_host->shost_gendev,
2807                          0, target->scsi_id, SCAN_WILD_CARD, 0);
2808
2809         if (!target->connected || target->qp_in_error) {
2810                 shost_printk(KERN_INFO, target->scsi_host,
2811                              PFX "SCSI scan failed - removing SCSI host\n");
2812                 srp_queue_remove_work(target);
2813                 goto out;
2814         }
2815
2816         pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
2817                  dev_name(&target->scsi_host->shost_gendev),
2818                  srp_sdev_count(target->scsi_host));
2819
2820         spin_lock_irq(&target->lock);
2821         if (target->state == SRP_TARGET_SCANNING)
2822                 target->state = SRP_TARGET_LIVE;
2823         spin_unlock_irq(&target->lock);
2824
2825 out:
2826         return 0;
2827 }
2828
2829 static void srp_release_dev(struct device *dev)
2830 {
2831         struct srp_host *host =
2832                 container_of(dev, struct srp_host, dev);
2833
2834         complete(&host->released);
2835 }
2836
2837 static struct class srp_class = {
2838         .name    = "infiniband_srp",
2839         .dev_release = srp_release_dev
2840 };
2841
2842 /**
2843  * srp_conn_unique() - check whether the connection to a target is unique
2844  * @host:   SRP host.
2845  * @target: SRP target port.
2846  */
2847 static bool srp_conn_unique(struct srp_host *host,
2848                             struct srp_target_port *target)
2849 {
2850         struct srp_target_port *t;
2851         bool ret = false;
2852
2853         if (target->state == SRP_TARGET_REMOVED)
2854                 goto out;
2855
2856         ret = true;
2857
2858         spin_lock(&host->target_lock);
2859         list_for_each_entry(t, &host->target_list, list) {
2860                 if (t != target &&
2861                     target->id_ext == t->id_ext &&
2862                     target->ioc_guid == t->ioc_guid &&
2863                     target->initiator_ext == t->initiator_ext) {
2864                         ret = false;
2865                         break;
2866                 }
2867         }
2868         spin_unlock(&host->target_lock);
2869
2870 out:
2871         return ret;
2872 }
2873
2874 /*
2875  * Target ports are added by writing
2876  *
2877  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2878  *     pkey=<P_Key>,service_id=<service ID>
2879  *
2880  * to the add_target sysfs attribute.
2881  */
2882 enum {
2883         SRP_OPT_ERR             = 0,
2884         SRP_OPT_ID_EXT          = 1 << 0,
2885         SRP_OPT_IOC_GUID        = 1 << 1,
2886         SRP_OPT_DGID            = 1 << 2,
2887         SRP_OPT_PKEY            = 1 << 3,
2888         SRP_OPT_SERVICE_ID      = 1 << 4,
2889         SRP_OPT_MAX_SECT        = 1 << 5,
2890         SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2891         SRP_OPT_IO_CLASS        = 1 << 7,
2892         SRP_OPT_INITIATOR_EXT   = 1 << 8,
2893         SRP_OPT_CMD_SG_ENTRIES  = 1 << 9,
2894         SRP_OPT_ALLOW_EXT_SG    = 1 << 10,
2895         SRP_OPT_SG_TABLESIZE    = 1 << 11,
2896         SRP_OPT_COMP_VECTOR     = 1 << 12,
2897         SRP_OPT_TL_RETRY_COUNT  = 1 << 13,
2898         SRP_OPT_QUEUE_SIZE      = 1 << 14,
2899         SRP_OPT_ALL             = (SRP_OPT_ID_EXT       |
2900                                    SRP_OPT_IOC_GUID     |
2901                                    SRP_OPT_DGID         |
2902                                    SRP_OPT_PKEY         |
2903                                    SRP_OPT_SERVICE_ID),
2904 };
2905
2906 static const match_table_t srp_opt_tokens = {
2907         { SRP_OPT_ID_EXT,               "id_ext=%s"             },
2908         { SRP_OPT_IOC_GUID,             "ioc_guid=%s"           },
2909         { SRP_OPT_DGID,                 "dgid=%s"               },
2910         { SRP_OPT_PKEY,                 "pkey=%x"               },
2911         { SRP_OPT_SERVICE_ID,           "service_id=%s"         },
2912         { SRP_OPT_MAX_SECT,             "max_sect=%d"           },
2913         { SRP_OPT_MAX_CMD_PER_LUN,      "max_cmd_per_lun=%d"    },
2914         { SRP_OPT_IO_CLASS,             "io_class=%x"           },
2915         { SRP_OPT_INITIATOR_EXT,        "initiator_ext=%s"      },
2916         { SRP_OPT_CMD_SG_ENTRIES,       "cmd_sg_entries=%u"     },
2917         { SRP_OPT_ALLOW_EXT_SG,         "allow_ext_sg=%u"       },
2918         { SRP_OPT_SG_TABLESIZE,         "sg_tablesize=%u"       },
2919         { SRP_OPT_COMP_VECTOR,          "comp_vector=%u"        },
2920         { SRP_OPT_TL_RETRY_COUNT,       "tl_retry_count=%u"     },
2921         { SRP_OPT_QUEUE_SIZE,           "queue_size=%d"         },
2922         { SRP_OPT_ERR,                  NULL                    }
2923 };
2924
2925 static int srp_parse_options(const char *buf, struct srp_target_port *target)
2926 {
2927         char *options, *sep_opt;
2928         char *p;
2929         char dgid[3];
2930         substring_t args[MAX_OPT_ARGS];
2931         int opt_mask = 0;
2932         int token;
2933         int ret = -EINVAL;
2934         int i;
2935
2936         options = kstrdup(buf, GFP_KERNEL);
2937         if (!options)
2938                 return -ENOMEM;
2939
2940         sep_opt = options;
2941         while ((p = strsep(&sep_opt, ",")) != NULL) {
2942                 if (!*p)
2943                         continue;
2944
2945                 token = match_token(p, srp_opt_tokens, args);
2946                 opt_mask |= token;
2947
2948                 switch (token) {
2949                 case SRP_OPT_ID_EXT:
2950                         p = match_strdup(args);
2951                         if (!p) {
2952                                 ret = -ENOMEM;
2953                                 goto out;
2954                         }
2955                         target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
2956                         kfree(p);
2957                         break;
2958
2959                 case SRP_OPT_IOC_GUID:
2960                         p = match_strdup(args);
2961                         if (!p) {
2962                                 ret = -ENOMEM;
2963                                 goto out;
2964                         }
2965                         target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
2966                         kfree(p);
2967                         break;
2968
2969                 case SRP_OPT_DGID:
2970                         p = match_strdup(args);
2971                         if (!p) {
2972                                 ret = -ENOMEM;
2973                                 goto out;
2974                         }
2975                         if (strlen(p) != 32) {
2976                                 pr_warn("bad dest GID parameter '%s'\n", p);
2977                                 kfree(p);
2978                                 goto out;
2979                         }
2980
2981                         for (i = 0; i < 16; ++i) {
2982                                 strlcpy(dgid, p + i * 2, sizeof(dgid));
2983                                 if (sscanf(dgid, "%hhx",
2984                                            &target->orig_dgid.raw[i]) < 1) {
2985                                         ret = -EINVAL;
2986                                         kfree(p);
2987                                         goto out;
2988                                 }
2989                         }
2990                         kfree(p);
2991                         break;
2992
2993                 case SRP_OPT_PKEY:
2994                         if (match_hex(args, &token)) {
2995                                 pr_warn("bad P_Key parameter '%s'\n", p);
2996                                 goto out;
2997                         }
2998                         target->pkey = cpu_to_be16(token);
2999                         break;
3000
3001                 case SRP_OPT_SERVICE_ID:
3002                         p = match_strdup(args);
3003                         if (!p) {
3004                                 ret = -ENOMEM;
3005                                 goto out;
3006                         }
3007                         target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3008                         kfree(p);
3009                         break;
3010
3011                 case SRP_OPT_MAX_SECT:
3012                         if (match_int(args, &token)) {
3013                                 pr_warn("bad max sect parameter '%s'\n", p);
3014                                 goto out;
3015                         }
3016                         target->scsi_host->max_sectors = token;
3017                         break;
3018
3019                 case SRP_OPT_QUEUE_SIZE:
3020                         if (match_int(args, &token) || token < 1) {
3021                                 pr_warn("bad queue_size parameter '%s'\n", p);
3022                                 goto out;
3023                         }
3024                         target->scsi_host->can_queue = token;
3025                         target->queue_size = token + SRP_RSP_SQ_SIZE +
3026                                              SRP_TSK_MGMT_SQ_SIZE;
3027                         if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3028                                 target->scsi_host->cmd_per_lun = token;
3029                         break;
3030
3031                 case SRP_OPT_MAX_CMD_PER_LUN:
3032                         if (match_int(args, &token) || token < 1) {
3033                                 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3034                                         p);
3035                                 goto out;
3036                         }
3037                         target->scsi_host->cmd_per_lun = token;
3038                         break;
3039
3040                 case SRP_OPT_IO_CLASS:
3041                         if (match_hex(args, &token)) {
3042                                 pr_warn("bad IO class parameter '%s'\n", p);
3043                                 goto out;
3044                         }
3045                         if (token != SRP_REV10_IB_IO_CLASS &&
3046                             token != SRP_REV16A_IB_IO_CLASS) {
3047                                 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3048                                         token, SRP_REV10_IB_IO_CLASS,
3049                                         SRP_REV16A_IB_IO_CLASS);
3050                                 goto out;
3051                         }
3052                         target->io_class = token;
3053                         break;
3054
3055                 case SRP_OPT_INITIATOR_EXT:
3056                         p = match_strdup(args);
3057                         if (!p) {
3058                                 ret = -ENOMEM;
3059                                 goto out;
3060                         }
3061                         target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3062                         kfree(p);
3063                         break;
3064
3065                 case SRP_OPT_CMD_SG_ENTRIES:
3066                         if (match_int(args, &token) || token < 1 || token > 255) {
3067                                 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3068                                         p);
3069                                 goto out;
3070                         }
3071                         target->cmd_sg_cnt = token;
3072                         break;
3073
3074                 case SRP_OPT_ALLOW_EXT_SG:
3075                         if (match_int(args, &token)) {
3076                                 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3077                                 goto out;
3078                         }
3079                         target->allow_ext_sg = !!token;
3080                         break;
3081
3082                 case SRP_OPT_SG_TABLESIZE:
3083                         if (match_int(args, &token) || token < 1 ||
3084                                         token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
3085                                 pr_warn("bad max sg_tablesize parameter '%s'\n",
3086                                         p);
3087                                 goto out;
3088                         }
3089                         target->sg_tablesize = token;
3090                         break;
3091
3092                 case SRP_OPT_COMP_VECTOR:
3093                         if (match_int(args, &token) || token < 0) {
3094                                 pr_warn("bad comp_vector parameter '%s'\n", p);
3095                                 goto out;
3096                         }
3097                         target->comp_vector = token;
3098                         break;
3099
3100                 case SRP_OPT_TL_RETRY_COUNT:
3101                         if (match_int(args, &token) || token < 2 || token > 7) {
3102                                 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3103                                         p);
3104                                 goto out;
3105                         }
3106                         target->tl_retry_count = token;
3107                         break;
3108
3109                 default:
3110                         pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3111                                 p);
3112                         goto out;
3113                 }
3114         }
3115
3116         if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3117                 ret = 0;
3118         else
3119                 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3120                         if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3121                             !(srp_opt_tokens[i].token & opt_mask))
3122                                 pr_warn("target creation request is missing parameter '%s'\n",
3123                                         srp_opt_tokens[i].pattern);
3124
3125         if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3126             && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3127                 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3128                         target->scsi_host->cmd_per_lun,
3129                         target->scsi_host->can_queue);
3130
3131 out:
3132         kfree(options);
3133         return ret;
3134 }
3135
3136 static ssize_t srp_create_target(struct device *dev,
3137                                  struct device_attribute *attr,
3138                                  const char *buf, size_t count)
3139 {
3140         struct srp_host *host =
3141                 container_of(dev, struct srp_host, dev);
3142         struct Scsi_Host *target_host;
3143         struct srp_target_port *target;
3144         struct srp_rdma_ch *ch;
3145         struct srp_device *srp_dev = host->srp_dev;
3146         struct ib_device *ibdev = srp_dev->dev;
3147         int ret, node_idx, node, cpu, i;
3148         bool multich = false;
3149
3150         target_host = scsi_host_alloc(&srp_template,
3151                                       sizeof (struct srp_target_port));
3152         if (!target_host)
3153                 return -ENOMEM;
3154
3155         target_host->transportt  = ib_srp_transport_template;
3156         target_host->max_channel = 0;
3157         target_host->max_id      = 1;
3158         target_host->max_lun     = SRP_MAX_LUN;
3159         target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3160
3161         target = host_to_target(target_host);
3162
3163         target->io_class        = SRP_REV16A_IB_IO_CLASS;
3164         target->scsi_host       = target_host;
3165         target->srp_host        = host;
3166         target->lkey            = host->srp_dev->mr->lkey;
3167         target->rkey            = host->srp_dev->mr->rkey;
3168         target->cmd_sg_cnt      = cmd_sg_entries;
3169         target->sg_tablesize    = indirect_sg_entries ? : cmd_sg_entries;
3170         target->allow_ext_sg    = allow_ext_sg;
3171         target->tl_retry_count  = 7;
3172         target->queue_size      = SRP_DEFAULT_QUEUE_SIZE;
3173
3174         /*
3175          * Avoid that the SCSI host can be removed by srp_remove_target()
3176          * before this function returns.
3177          */
3178         scsi_host_get(target->scsi_host);
3179
3180         mutex_lock(&host->add_target_mutex);
3181
3182         ret = srp_parse_options(buf, target);
3183         if (ret)
3184                 goto err;
3185
3186         ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
3187         if (ret)
3188                 goto err;
3189
3190         target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3191
3192         if (!srp_conn_unique(target->srp_host, target)) {
3193                 shost_printk(KERN_INFO, target->scsi_host,
3194                              PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3195                              be64_to_cpu(target->id_ext),
3196                              be64_to_cpu(target->ioc_guid),
3197                              be64_to_cpu(target->initiator_ext));
3198                 ret = -EEXIST;
3199                 goto err;
3200         }
3201
3202         if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3203             target->cmd_sg_cnt < target->sg_tablesize) {
3204                 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3205                 target->sg_tablesize = target->cmd_sg_cnt;
3206         }
3207
3208         target_host->sg_tablesize = target->sg_tablesize;
3209         target->indirect_size = target->sg_tablesize *
3210                                 sizeof (struct srp_direct_buf);
3211         target->max_iu_len = sizeof (struct srp_cmd) +
3212                              sizeof (struct srp_indirect_buf) +
3213                              target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3214
3215         INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3216         INIT_WORK(&target->remove_work, srp_remove_work);
3217         spin_lock_init(&target->lock);
3218         ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
3219         if (ret)
3220                 goto err;
3221
3222         ret = -ENOMEM;
3223         target->ch_count = max_t(unsigned, num_online_nodes(),
3224                                  min(ch_count ? :
3225                                      min(4 * num_online_nodes(),
3226                                          ibdev->num_comp_vectors),
3227                                      num_online_cpus()));
3228         target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3229                              GFP_KERNEL);
3230         if (!target->ch)
3231                 goto err;
3232
3233         node_idx = 0;
3234         for_each_online_node(node) {
3235                 const int ch_start = (node_idx * target->ch_count /
3236                                       num_online_nodes());
3237                 const int ch_end = ((node_idx + 1) * target->ch_count /
3238                                     num_online_nodes());
3239                 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3240                                       num_online_nodes() + target->comp_vector)
3241                                      % ibdev->num_comp_vectors;
3242                 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3243                                     num_online_nodes() + target->comp_vector)
3244                                    % ibdev->num_comp_vectors;
3245                 int cpu_idx = 0;
3246
3247                 for_each_online_cpu(cpu) {
3248                         if (cpu_to_node(cpu) != node)
3249                                 continue;
3250                         if (ch_start + cpu_idx >= ch_end)
3251                                 continue;
3252                         ch = &target->ch[ch_start + cpu_idx];
3253                         ch->target = target;
3254                         ch->comp_vector = cv_start == cv_end ? cv_start :
3255                                 cv_start + cpu_idx % (cv_end - cv_start);
3256                         spin_lock_init(&ch->lock);
3257                         INIT_LIST_HEAD(&ch->free_tx);
3258                         ret = srp_new_cm_id(ch);
3259                         if (ret)
3260                                 goto err_disconnect;
3261
3262                         ret = srp_create_ch_ib(ch);
3263                         if (ret)
3264                                 goto err_disconnect;
3265
3266                         ret = srp_alloc_req_data(ch);
3267                         if (ret)
3268                                 goto err_disconnect;
3269
3270                         ret = srp_connect_ch(ch, multich);
3271                         if (ret) {
3272                                 shost_printk(KERN_ERR, target->scsi_host,
3273                                              PFX "Connection %d/%d failed\n",
3274                                              ch_start + cpu_idx,
3275                                              target->ch_count);
3276                                 if (node_idx == 0 && cpu_idx == 0) {
3277                                         goto err_disconnect;
3278                                 } else {
3279                                         srp_free_ch_ib(target, ch);
3280                                         srp_free_req_data(target, ch);
3281                                         target->ch_count = ch - target->ch;
3282                                         break;
3283                                 }
3284                         }
3285
3286                         multich = true;
3287                         cpu_idx++;
3288                 }
3289                 node_idx++;
3290         }
3291
3292         target->scsi_host->nr_hw_queues = target->ch_count;
3293
3294         ret = srp_add_target(host, target);
3295         if (ret)
3296                 goto err_disconnect;
3297
3298         if (target->state != SRP_TARGET_REMOVED) {
3299                 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3300                              "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3301                              be64_to_cpu(target->id_ext),
3302                              be64_to_cpu(target->ioc_guid),
3303                              be16_to_cpu(target->pkey),
3304                              be64_to_cpu(target->service_id),
3305                              target->sgid.raw, target->orig_dgid.raw);
3306         }
3307
3308         ret = count;
3309
3310 out:
3311         mutex_unlock(&host->add_target_mutex);
3312
3313         scsi_host_put(target->scsi_host);
3314
3315         return ret;
3316
3317 err_disconnect:
3318         srp_disconnect_target(target);
3319
3320         for (i = 0; i < target->ch_count; i++) {
3321                 ch = &target->ch[i];
3322                 srp_free_ch_ib(target, ch);
3323                 srp_free_req_data(target, ch);
3324         }
3325
3326         kfree(target->ch);
3327
3328 err:
3329         scsi_host_put(target_host);
3330         goto out;
3331 }
3332
3333 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3334
3335 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3336                           char *buf)
3337 {
3338         struct srp_host *host = container_of(dev, struct srp_host, dev);
3339
3340         return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3341 }
3342
3343 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3344
3345 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3346                          char *buf)
3347 {
3348         struct srp_host *host = container_of(dev, struct srp_host, dev);
3349
3350         return sprintf(buf, "%d\n", host->port);
3351 }
3352
3353 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3354
3355 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3356 {
3357         struct srp_host *host;
3358
3359         host = kzalloc(sizeof *host, GFP_KERNEL);
3360         if (!host)
3361                 return NULL;
3362
3363         INIT_LIST_HEAD(&host->target_list);
3364         spin_lock_init(&host->target_lock);
3365         init_completion(&host->released);
3366         mutex_init(&host->add_target_mutex);
3367         host->srp_dev = device;
3368         host->port = port;
3369
3370         host->dev.class = &srp_class;
3371         host->dev.parent = device->dev->dma_device;
3372         dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3373
3374         if (device_register(&host->dev))
3375                 goto free_host;
3376         if (device_create_file(&host->dev, &dev_attr_add_target))
3377                 goto err_class;
3378         if (device_create_file(&host->dev, &dev_attr_ibdev))
3379                 goto err_class;
3380         if (device_create_file(&host->dev, &dev_attr_port))
3381                 goto err_class;
3382
3383         return host;
3384
3385 err_class:
3386         device_unregister(&host->dev);
3387
3388 free_host:
3389         kfree(host);
3390
3391         return NULL;
3392 }
3393
3394 static void srp_add_one(struct ib_device *device)
3395 {
3396         struct srp_device *srp_dev;
3397         struct ib_device_attr *dev_attr;
3398         struct srp_host *host;
3399         int mr_page_shift, s, e, p;
3400         u64 max_pages_per_mr;
3401
3402         dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3403         if (!dev_attr)
3404                 return;
3405
3406         if (ib_query_device(device, dev_attr)) {
3407                 pr_warn("Query device failed for %s\n", device->name);
3408                 goto free_attr;
3409         }
3410
3411         srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3412         if (!srp_dev)
3413                 goto free_attr;
3414
3415         srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3416                             device->map_phys_fmr && device->unmap_fmr);
3417         srp_dev->has_fr = (dev_attr->device_cap_flags &
3418                            IB_DEVICE_MEM_MGT_EXTENSIONS);
3419         if (!srp_dev->has_fmr && !srp_dev->has_fr)
3420                 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3421
3422         srp_dev->use_fast_reg = (srp_dev->has_fr &&
3423                                  (!srp_dev->has_fmr || prefer_fr));
3424
3425         /*
3426          * Use the smallest page size supported by the HCA, down to a
3427          * minimum of 4096 bytes. We're unlikely to build large sglists
3428          * out of smaller entries.
3429          */
3430         mr_page_shift           = max(12, ffs(dev_attr->page_size_cap) - 1);
3431         srp_dev->mr_page_size   = 1 << mr_page_shift;
3432         srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
3433         max_pages_per_mr        = dev_attr->max_mr_size;
3434         do_div(max_pages_per_mr, srp_dev->mr_page_size);
3435         srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3436                                           max_pages_per_mr);
3437         if (srp_dev->use_fast_reg) {
3438                 srp_dev->max_pages_per_mr =
3439                         min_t(u32, srp_dev->max_pages_per_mr,
3440                               dev_attr->max_fast_reg_page_list_len);
3441         }
3442         srp_dev->mr_max_size    = srp_dev->mr_page_size *
3443                                    srp_dev->max_pages_per_mr;
3444         pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3445                  device->name, mr_page_shift, dev_attr->max_mr_size,
3446                  dev_attr->max_fast_reg_page_list_len,
3447                  srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3448
3449         INIT_LIST_HEAD(&srp_dev->dev_list);
3450
3451         srp_dev->dev = device;
3452         srp_dev->pd  = ib_alloc_pd(device);
3453         if (IS_ERR(srp_dev->pd))
3454                 goto free_dev;
3455
3456         srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
3457                                     IB_ACCESS_LOCAL_WRITE |
3458                                     IB_ACCESS_REMOTE_READ |
3459                                     IB_ACCESS_REMOTE_WRITE);
3460         if (IS_ERR(srp_dev->mr))
3461                 goto err_pd;
3462
3463         if (device->node_type == RDMA_NODE_IB_SWITCH) {
3464                 s = 0;
3465                 e = 0;
3466         } else {
3467                 s = 1;
3468                 e = device->phys_port_cnt;
3469         }
3470
3471         for (p = s; p <= e; ++p) {
3472                 host = srp_add_port(srp_dev, p);
3473                 if (host)
3474                         list_add_tail(&host->list, &srp_dev->dev_list);
3475         }
3476
3477         ib_set_client_data(device, &srp_client, srp_dev);
3478
3479         goto free_attr;
3480
3481 err_pd:
3482         ib_dealloc_pd(srp_dev->pd);
3483
3484 free_dev:
3485         kfree(srp_dev);
3486
3487 free_attr:
3488         kfree(dev_attr);
3489 }
3490
3491 static void srp_remove_one(struct ib_device *device)
3492 {
3493         struct srp_device *srp_dev;
3494         struct srp_host *host, *tmp_host;
3495         struct srp_target_port *target;
3496
3497         srp_dev = ib_get_client_data(device, &srp_client);
3498         if (!srp_dev)
3499                 return;
3500
3501         list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3502                 device_unregister(&host->dev);
3503                 /*
3504                  * Wait for the sysfs entry to go away, so that no new
3505                  * target ports can be created.
3506                  */
3507                 wait_for_completion(&host->released);
3508
3509                 /*
3510                  * Remove all target ports.
3511                  */
3512                 spin_lock(&host->target_lock);
3513                 list_for_each_entry(target, &host->target_list, list)
3514                         srp_queue_remove_work(target);
3515                 spin_unlock(&host->target_lock);
3516
3517                 /*
3518                  * Wait for tl_err and target port removal tasks.
3519                  */
3520                 flush_workqueue(system_long_wq);
3521                 flush_workqueue(srp_remove_wq);
3522
3523                 kfree(host);
3524         }
3525
3526         ib_dereg_mr(srp_dev->mr);
3527         ib_dealloc_pd(srp_dev->pd);
3528
3529         kfree(srp_dev);
3530 }
3531
3532 static struct srp_function_template ib_srp_transport_functions = {
3533         .has_rport_state         = true,
3534         .reset_timer_if_blocked  = true,
3535         .reconnect_delay         = &srp_reconnect_delay,
3536         .fast_io_fail_tmo        = &srp_fast_io_fail_tmo,
3537         .dev_loss_tmo            = &srp_dev_loss_tmo,
3538         .reconnect               = srp_rport_reconnect,
3539         .rport_delete            = srp_rport_delete,
3540         .terminate_rport_io      = srp_terminate_io,
3541 };
3542
3543 static int __init srp_init_module(void)
3544 {
3545         int ret;
3546
3547         BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3548
3549         if (srp_sg_tablesize) {
3550                 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3551                 if (!cmd_sg_entries)
3552                         cmd_sg_entries = srp_sg_tablesize;
3553         }
3554
3555         if (!cmd_sg_entries)
3556                 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3557
3558         if (cmd_sg_entries > 255) {
3559                 pr_warn("Clamping cmd_sg_entries to 255\n");
3560                 cmd_sg_entries = 255;
3561         }
3562
3563         if (!indirect_sg_entries)
3564                 indirect_sg_entries = cmd_sg_entries;
3565         else if (indirect_sg_entries < cmd_sg_entries) {
3566                 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3567                         cmd_sg_entries);
3568                 indirect_sg_entries = cmd_sg_entries;
3569         }
3570
3571         srp_remove_wq = create_workqueue("srp_remove");
3572         if (!srp_remove_wq) {
3573                 ret = -ENOMEM;
3574                 goto out;
3575         }
3576
3577         ret = -ENOMEM;
3578         ib_srp_transport_template =
3579                 srp_attach_transport(&ib_srp_transport_functions);
3580         if (!ib_srp_transport_template)
3581                 goto destroy_wq;
3582
3583         ret = class_register(&srp_class);
3584         if (ret) {
3585                 pr_err("couldn't register class infiniband_srp\n");
3586                 goto release_tr;
3587         }
3588
3589         ib_sa_register_client(&srp_sa_client);
3590
3591         ret = ib_register_client(&srp_client);
3592         if (ret) {
3593                 pr_err("couldn't register IB client\n");
3594                 goto unreg_sa;
3595         }
3596
3597 out:
3598         return ret;
3599
3600 unreg_sa:
3601         ib_sa_unregister_client(&srp_sa_client);
3602         class_unregister(&srp_class);
3603
3604 release_tr:
3605         srp_release_transport(ib_srp_transport_template);
3606
3607 destroy_wq:
3608         destroy_workqueue(srp_remove_wq);
3609         goto out;
3610 }
3611
3612 static void __exit srp_cleanup_module(void)
3613 {
3614         ib_unregister_client(&srp_client);
3615         ib_sa_unregister_client(&srp_sa_client);
3616         class_unregister(&srp_class);
3617         srp_release_transport(ib_srp_transport_template);
3618         destroy_workqueue(srp_remove_wq);
3619 }
3620
3621 module_init(srp_init_module);
3622 module_exit(srp_cleanup_module);