mlx4_core: Implement mechanism for reserved Q_Keys
[cascardo/linux.git] / drivers / net / ethernet / mellanox / mlx4 / main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/errno.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/io-mapping.h>
43 #include <linux/delay.h>
44
45 #include <linux/mlx4/device.h>
46 #include <linux/mlx4/doorbell.h>
47
48 #include "mlx4.h"
49 #include "fw.h"
50 #include "icm.h"
51
52 MODULE_AUTHOR("Roland Dreier");
53 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
54 MODULE_LICENSE("Dual BSD/GPL");
55 MODULE_VERSION(DRV_VERSION);
56
57 struct workqueue_struct *mlx4_wq;
58
59 #ifdef CONFIG_MLX4_DEBUG
60
61 int mlx4_debug_level = 0;
62 module_param_named(debug_level, mlx4_debug_level, int, 0644);
63 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
64
65 #endif /* CONFIG_MLX4_DEBUG */
66
67 #ifdef CONFIG_PCI_MSI
68
69 static int msi_x = 1;
70 module_param(msi_x, int, 0444);
71 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
72
73 #else /* CONFIG_PCI_MSI */
74
75 #define msi_x (0)
76
77 #endif /* CONFIG_PCI_MSI */
78
79 static int num_vfs;
80 module_param(num_vfs, int, 0444);
81 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0");
82
83 static int probe_vf;
84 module_param(probe_vf, int, 0644);
85 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
86
87 int mlx4_log_num_mgm_entry_size = 10;
88 module_param_named(log_num_mgm_entry_size,
89                         mlx4_log_num_mgm_entry_size, int, 0444);
90 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
91                                          " of qp per mcg, for example:"
92                                          " 10 gives 248.range: 9<="
93                                          " log_num_mgm_entry_size <= 12");
94
95 #define MLX4_VF                                        (1 << 0)
96
97 #define HCA_GLOBAL_CAP_MASK            0
98 #define PF_CONTEXT_BEHAVIOUR_MASK      0
99
100 static char mlx4_version[] __devinitdata =
101         DRV_NAME ": Mellanox ConnectX core driver v"
102         DRV_VERSION " (" DRV_RELDATE ")\n";
103
104 static struct mlx4_profile default_profile = {
105         .num_qp         = 1 << 18,
106         .num_srq        = 1 << 16,
107         .rdmarc_per_qp  = 1 << 4,
108         .num_cq         = 1 << 16,
109         .num_mcg        = 1 << 13,
110         .num_mpt        = 1 << 19,
111         .num_mtt        = 1 << 20, /* It is really num mtt segements */
112 };
113
114 static int log_num_mac = 7;
115 module_param_named(log_num_mac, log_num_mac, int, 0444);
116 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
117
118 static int log_num_vlan;
119 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
120 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
121 /* Log2 max number of VLANs per ETH port (0-7) */
122 #define MLX4_LOG_NUM_VLANS 7
123
124 static bool use_prio;
125 module_param_named(use_prio, use_prio, bool, 0444);
126 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
127                   "(0/1, default 0)");
128
129 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
130 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
131 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
132
133 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
134 static int arr_argc = 2;
135 module_param_array(port_type_array, int, &arr_argc, 0444);
136 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
137                                 "1 for IB, 2 for Ethernet");
138
139 struct mlx4_port_config {
140         struct list_head list;
141         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
142         struct pci_dev *pdev;
143 };
144
145 int mlx4_check_port_params(struct mlx4_dev *dev,
146                            enum mlx4_port_type *port_type)
147 {
148         int i;
149
150         for (i = 0; i < dev->caps.num_ports - 1; i++) {
151                 if (port_type[i] != port_type[i + 1]) {
152                         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
153                                 mlx4_err(dev, "Only same port types supported "
154                                          "on this HCA, aborting.\n");
155                                 return -EINVAL;
156                         }
157                         if (port_type[i] == MLX4_PORT_TYPE_ETH &&
158                             port_type[i + 1] == MLX4_PORT_TYPE_IB)
159                                 return -EINVAL;
160                 }
161         }
162
163         for (i = 0; i < dev->caps.num_ports; i++) {
164                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
165                         mlx4_err(dev, "Requested port type for port %d is not "
166                                       "supported on this HCA\n", i + 1);
167                         return -EINVAL;
168                 }
169         }
170         return 0;
171 }
172
173 static void mlx4_set_port_mask(struct mlx4_dev *dev)
174 {
175         int i;
176
177         for (i = 1; i <= dev->caps.num_ports; ++i)
178                 dev->caps.port_mask[i] = dev->caps.port_type[i];
179 }
180
181 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
182 {
183         int err;
184         int i;
185
186         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
187         if (err) {
188                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
189                 return err;
190         }
191
192         if (dev_cap->min_page_sz > PAGE_SIZE) {
193                 mlx4_err(dev, "HCA minimum page size of %d bigger than "
194                          "kernel PAGE_SIZE of %ld, aborting.\n",
195                          dev_cap->min_page_sz, PAGE_SIZE);
196                 return -ENODEV;
197         }
198         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
199                 mlx4_err(dev, "HCA has %d ports, but we only support %d, "
200                          "aborting.\n",
201                          dev_cap->num_ports, MLX4_MAX_PORTS);
202                 return -ENODEV;
203         }
204
205         if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
206                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
207                          "PCI resource 2 size of 0x%llx, aborting.\n",
208                          dev_cap->uar_size,
209                          (unsigned long long) pci_resource_len(dev->pdev, 2));
210                 return -ENODEV;
211         }
212
213         dev->caps.num_ports          = dev_cap->num_ports;
214         dev->phys_caps.num_phys_eqs  = MLX4_MAX_EQ_NUM;
215         for (i = 1; i <= dev->caps.num_ports; ++i) {
216                 dev->caps.vl_cap[i]         = dev_cap->max_vl[i];
217                 dev->caps.ib_mtu_cap[i]     = dev_cap->ib_mtu[i];
218                 dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
219                 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
220                 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
221                 dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
222                 dev->caps.def_mac[i]        = dev_cap->def_mac[i];
223                 dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
224                 dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
225                 dev->caps.default_sense[i] = dev_cap->default_sense[i];
226                 dev->caps.trans_type[i]     = dev_cap->trans_type[i];
227                 dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
228                 dev->caps.wavelength[i]     = dev_cap->wavelength[i];
229                 dev->caps.trans_code[i]     = dev_cap->trans_code[i];
230         }
231
232         dev->caps.uar_page_size      = PAGE_SIZE;
233         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
234         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
235         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
236         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
237         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
238         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
239         dev->caps.max_wqes           = dev_cap->max_qp_sz;
240         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
241         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
242         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
243         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
244         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
245         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
246         dev->caps.num_qp_per_mgm     = mlx4_get_qp_per_mgm(dev);
247         /*
248          * Subtract 1 from the limit because we need to allocate a
249          * spare CQE so the HCA HW can tell the difference between an
250          * empty CQ and a full CQ.
251          */
252         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
253         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
254         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
255         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
256         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
257
258         /* The first 128 UARs are used for EQ doorbells */
259         dev->caps.reserved_uars      = max_t(int, 128, dev_cap->reserved_uars);
260         dev->caps.reserved_pds       = dev_cap->reserved_pds;
261         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
262                                         dev_cap->reserved_xrcds : 0;
263         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
264                                         dev_cap->max_xrcds : 0;
265         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
266
267         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
268         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
269         dev->caps.flags              = dev_cap->flags;
270         dev->caps.flags2             = dev_cap->flags2;
271         dev->caps.bmme_flags         = dev_cap->bmme_flags;
272         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
273         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
274         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
275         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
276
277         /* Sense port always allowed on supported devices for ConnectX1 and 2 */
278         if (dev->pdev->device != 0x1003)
279                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
280
281         dev->caps.log_num_macs  = log_num_mac;
282         dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
283         dev->caps.log_num_prios = use_prio ? 3 : 0;
284
285         for (i = 1; i <= dev->caps.num_ports; ++i) {
286                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
287                 if (dev->caps.supported_type[i]) {
288                         /* if only ETH is supported - assign ETH */
289                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
290                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
291                         /* if only IB is supported,
292                          * assign IB only if SRIOV is off*/
293                         else if (dev->caps.supported_type[i] ==
294                                  MLX4_PORT_TYPE_IB) {
295                                 if (dev->flags & MLX4_FLAG_SRIOV)
296                                         dev->caps.port_type[i] =
297                                                 MLX4_PORT_TYPE_NONE;
298                                 else
299                                         dev->caps.port_type[i] =
300                                                 MLX4_PORT_TYPE_IB;
301                         /* if IB and ETH are supported,
302                          * first of all check if SRIOV is on */
303                         } else if (dev->flags & MLX4_FLAG_SRIOV)
304                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
305                         else {
306                                 /* In non-SRIOV mode, we set the port type
307                                  * according to user selection of port type,
308                                  * if usere selected none, take the FW hint */
309                                 if (port_type_array[i-1] == MLX4_PORT_TYPE_NONE)
310                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
311                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
312                                 else
313                                         dev->caps.port_type[i] = port_type_array[i-1];
314                         }
315                 }
316                 /*
317                  * Link sensing is allowed on the port if 3 conditions are true:
318                  * 1. Both protocols are supported on the port.
319                  * 2. Different types are supported on the port
320                  * 3. FW declared that it supports link sensing
321                  */
322                 mlx4_priv(dev)->sense.sense_allowed[i] =
323                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
324                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
325                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
326
327                 /*
328                  * If "default_sense" bit is set, we move the port to "AUTO" mode
329                  * and perform sense_port FW command to try and set the correct
330                  * port type from beginning
331                  */
332                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
333                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
334                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
335                         mlx4_SENSE_PORT(dev, i, &sensed_port);
336                         if (sensed_port != MLX4_PORT_TYPE_NONE)
337                                 dev->caps.port_type[i] = sensed_port;
338                 } else {
339                         dev->caps.possible_type[i] = dev->caps.port_type[i];
340                 }
341
342                 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
343                         dev->caps.log_num_macs = dev_cap->log_max_macs[i];
344                         mlx4_warn(dev, "Requested number of MACs is too much "
345                                   "for port %d, reducing to %d.\n",
346                                   i, 1 << dev->caps.log_num_macs);
347                 }
348                 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
349                         dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
350                         mlx4_warn(dev, "Requested number of VLANs is too much "
351                                   "for port %d, reducing to %d.\n",
352                                   i, 1 << dev->caps.log_num_vlans);
353                 }
354         }
355
356         dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
357
358         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
359         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
360                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
361                 (1 << dev->caps.log_num_macs) *
362                 (1 << dev->caps.log_num_vlans) *
363                 (1 << dev->caps.log_num_prios) *
364                 dev->caps.num_ports;
365         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
366
367         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
368                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
369                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
370                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
371
372         return 0;
373 }
374 /*The function checks if there are live vf, return the num of them*/
375 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
376 {
377         struct mlx4_priv *priv = mlx4_priv(dev);
378         struct mlx4_slave_state *s_state;
379         int i;
380         int ret = 0;
381
382         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
383                 s_state = &priv->mfunc.master.slave_state[i];
384                 if (s_state->active && s_state->last_cmd !=
385                     MLX4_COMM_CMD_RESET) {
386                         mlx4_warn(dev, "%s: slave: %d is still active\n",
387                                   __func__, i);
388                         ret++;
389                 }
390         }
391         return ret;
392 }
393
394 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
395 {
396         u32 qk = MLX4_RESERVED_QKEY_BASE;
397         if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
398             qpn < dev->caps.sqp_start)
399                 return -EINVAL;
400
401         if (qpn >= dev->caps.base_tunnel_sqpn)
402                 /* tunnel qp */
403                 qk += qpn - dev->caps.base_tunnel_sqpn;
404         else
405                 qk += qpn - dev->caps.sqp_start;
406         *qkey = qk;
407         return 0;
408 }
409 EXPORT_SYMBOL(mlx4_get_parav_qkey);
410
411 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
412 {
413         struct mlx4_priv *priv = mlx4_priv(dev);
414         struct mlx4_slave_state *s_slave;
415
416         if (!mlx4_is_master(dev))
417                 return 0;
418
419         s_slave = &priv->mfunc.master.slave_state[slave];
420         return !!s_slave->active;
421 }
422 EXPORT_SYMBOL(mlx4_is_slave_active);
423
424 static int mlx4_slave_cap(struct mlx4_dev *dev)
425 {
426         int                        err;
427         u32                        page_size;
428         struct mlx4_dev_cap        dev_cap;
429         struct mlx4_func_cap       func_cap;
430         struct mlx4_init_hca_param hca_param;
431         int                        i;
432
433         memset(&hca_param, 0, sizeof(hca_param));
434         err = mlx4_QUERY_HCA(dev, &hca_param);
435         if (err) {
436                 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
437                 return err;
438         }
439
440         /*fail if the hca has an unknown capability */
441         if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
442             HCA_GLOBAL_CAP_MASK) {
443                 mlx4_err(dev, "Unknown hca global capabilities\n");
444                 return -ENOSYS;
445         }
446
447         mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
448
449         memset(&dev_cap, 0, sizeof(dev_cap));
450         dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
451         err = mlx4_dev_cap(dev, &dev_cap);
452         if (err) {
453                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
454                 return err;
455         }
456
457         err = mlx4_QUERY_FW(dev);
458         if (err)
459                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
460
461         page_size = ~dev->caps.page_size_cap + 1;
462         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
463         if (page_size > PAGE_SIZE) {
464                 mlx4_err(dev, "HCA minimum page size of %d bigger than "
465                          "kernel PAGE_SIZE of %ld, aborting.\n",
466                          page_size, PAGE_SIZE);
467                 return -ENODEV;
468         }
469
470         /* slave gets uar page size from QUERY_HCA fw command */
471         dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
472
473         /* TODO: relax this assumption */
474         if (dev->caps.uar_page_size != PAGE_SIZE) {
475                 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
476                          dev->caps.uar_page_size, PAGE_SIZE);
477                 return -ENODEV;
478         }
479
480         memset(&func_cap, 0, sizeof(func_cap));
481         err = mlx4_QUERY_FUNC_CAP(dev, &func_cap);
482         if (err) {
483                 mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n");
484                 return err;
485         }
486
487         if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
488             PF_CONTEXT_BEHAVIOUR_MASK) {
489                 mlx4_err(dev, "Unknown pf context behaviour\n");
490                 return -ENOSYS;
491         }
492
493         dev->caps.num_ports             = func_cap.num_ports;
494         dev->caps.num_qps               = func_cap.qp_quota;
495         dev->caps.num_srqs              = func_cap.srq_quota;
496         dev->caps.num_cqs               = func_cap.cq_quota;
497         dev->caps.num_eqs               = func_cap.max_eq;
498         dev->caps.reserved_eqs          = func_cap.reserved_eq;
499         dev->caps.num_mpts              = func_cap.mpt_quota;
500         dev->caps.num_mtts              = func_cap.mtt_quota;
501         dev->caps.num_pds               = MLX4_NUM_PDS;
502         dev->caps.num_mgms              = 0;
503         dev->caps.num_amgms             = 0;
504
505         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
506                 mlx4_err(dev, "HCA has %d ports, but we only support %d, "
507                          "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
508                 return -ENODEV;
509         }
510
511         for (i = 1; i <= dev->caps.num_ports; ++i)
512                 dev->caps.port_mask[i] = dev->caps.port_type[i];
513
514         if (dev->caps.uar_page_size * (dev->caps.num_uars -
515                                        dev->caps.reserved_uars) >
516                                        pci_resource_len(dev->pdev, 2)) {
517                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
518                          "PCI resource 2 size of 0x%llx, aborting.\n",
519                          dev->caps.uar_page_size * dev->caps.num_uars,
520                          (unsigned long long) pci_resource_len(dev->pdev, 2));
521                 return -ENODEV;
522         }
523
524         return 0;
525 }
526
527 /*
528  * Change the port configuration of the device.
529  * Every user of this function must hold the port mutex.
530  */
531 int mlx4_change_port_types(struct mlx4_dev *dev,
532                            enum mlx4_port_type *port_types)
533 {
534         int err = 0;
535         int change = 0;
536         int port;
537
538         for (port = 0; port <  dev->caps.num_ports; port++) {
539                 /* Change the port type only if the new type is different
540                  * from the current, and not set to Auto */
541                 if (port_types[port] != dev->caps.port_type[port + 1])
542                         change = 1;
543         }
544         if (change) {
545                 mlx4_unregister_device(dev);
546                 for (port = 1; port <= dev->caps.num_ports; port++) {
547                         mlx4_CLOSE_PORT(dev, port);
548                         dev->caps.port_type[port] = port_types[port - 1];
549                         err = mlx4_SET_PORT(dev, port);
550                         if (err) {
551                                 mlx4_err(dev, "Failed to set port %d, "
552                                               "aborting\n", port);
553                                 goto out;
554                         }
555                 }
556                 mlx4_set_port_mask(dev);
557                 err = mlx4_register_device(dev);
558         }
559
560 out:
561         return err;
562 }
563
564 static ssize_t show_port_type(struct device *dev,
565                               struct device_attribute *attr,
566                               char *buf)
567 {
568         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
569                                                    port_attr);
570         struct mlx4_dev *mdev = info->dev;
571         char type[8];
572
573         sprintf(type, "%s",
574                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
575                 "ib" : "eth");
576         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
577                 sprintf(buf, "auto (%s)\n", type);
578         else
579                 sprintf(buf, "%s\n", type);
580
581         return strlen(buf);
582 }
583
584 static ssize_t set_port_type(struct device *dev,
585                              struct device_attribute *attr,
586                              const char *buf, size_t count)
587 {
588         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
589                                                    port_attr);
590         struct mlx4_dev *mdev = info->dev;
591         struct mlx4_priv *priv = mlx4_priv(mdev);
592         enum mlx4_port_type types[MLX4_MAX_PORTS];
593         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
594         int i;
595         int err = 0;
596
597         if (!strcmp(buf, "ib\n"))
598                 info->tmp_type = MLX4_PORT_TYPE_IB;
599         else if (!strcmp(buf, "eth\n"))
600                 info->tmp_type = MLX4_PORT_TYPE_ETH;
601         else if (!strcmp(buf, "auto\n"))
602                 info->tmp_type = MLX4_PORT_TYPE_AUTO;
603         else {
604                 mlx4_err(mdev, "%s is not supported port type\n", buf);
605                 return -EINVAL;
606         }
607
608         mlx4_stop_sense(mdev);
609         mutex_lock(&priv->port_mutex);
610         /* Possible type is always the one that was delivered */
611         mdev->caps.possible_type[info->port] = info->tmp_type;
612
613         for (i = 0; i < mdev->caps.num_ports; i++) {
614                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
615                                         mdev->caps.possible_type[i+1];
616                 if (types[i] == MLX4_PORT_TYPE_AUTO)
617                         types[i] = mdev->caps.port_type[i+1];
618         }
619
620         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
621             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
622                 for (i = 1; i <= mdev->caps.num_ports; i++) {
623                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
624                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
625                                 err = -EINVAL;
626                         }
627                 }
628         }
629         if (err) {
630                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
631                                "Set only 'eth' or 'ib' for both ports "
632                                "(should be the same)\n");
633                 goto out;
634         }
635
636         mlx4_do_sense_ports(mdev, new_types, types);
637
638         err = mlx4_check_port_params(mdev, new_types);
639         if (err)
640                 goto out;
641
642         /* We are about to apply the changes after the configuration
643          * was verified, no need to remember the temporary types
644          * any more */
645         for (i = 0; i < mdev->caps.num_ports; i++)
646                 priv->port[i + 1].tmp_type = 0;
647
648         err = mlx4_change_port_types(mdev, new_types);
649
650 out:
651         mlx4_start_sense(mdev);
652         mutex_unlock(&priv->port_mutex);
653         return err ? err : count;
654 }
655
656 enum ibta_mtu {
657         IB_MTU_256  = 1,
658         IB_MTU_512  = 2,
659         IB_MTU_1024 = 3,
660         IB_MTU_2048 = 4,
661         IB_MTU_4096 = 5
662 };
663
664 static inline int int_to_ibta_mtu(int mtu)
665 {
666         switch (mtu) {
667         case 256:  return IB_MTU_256;
668         case 512:  return IB_MTU_512;
669         case 1024: return IB_MTU_1024;
670         case 2048: return IB_MTU_2048;
671         case 4096: return IB_MTU_4096;
672         default: return -1;
673         }
674 }
675
676 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
677 {
678         switch (mtu) {
679         case IB_MTU_256:  return  256;
680         case IB_MTU_512:  return  512;
681         case IB_MTU_1024: return 1024;
682         case IB_MTU_2048: return 2048;
683         case IB_MTU_4096: return 4096;
684         default: return -1;
685         }
686 }
687
688 static ssize_t show_port_ib_mtu(struct device *dev,
689                              struct device_attribute *attr,
690                              char *buf)
691 {
692         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
693                                                    port_mtu_attr);
694         struct mlx4_dev *mdev = info->dev;
695
696         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
697                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
698
699         sprintf(buf, "%d\n",
700                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
701         return strlen(buf);
702 }
703
704 static ssize_t set_port_ib_mtu(struct device *dev,
705                              struct device_attribute *attr,
706                              const char *buf, size_t count)
707 {
708         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
709                                                    port_mtu_attr);
710         struct mlx4_dev *mdev = info->dev;
711         struct mlx4_priv *priv = mlx4_priv(mdev);
712         int err, port, mtu, ibta_mtu = -1;
713
714         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
715                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
716                 return -EINVAL;
717         }
718
719         err = sscanf(buf, "%d", &mtu);
720         if (err > 0)
721                 ibta_mtu = int_to_ibta_mtu(mtu);
722
723         if (err <= 0 || ibta_mtu < 0) {
724                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
725                 return -EINVAL;
726         }
727
728         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
729
730         mlx4_stop_sense(mdev);
731         mutex_lock(&priv->port_mutex);
732         mlx4_unregister_device(mdev);
733         for (port = 1; port <= mdev->caps.num_ports; port++) {
734                 mlx4_CLOSE_PORT(mdev, port);
735                 err = mlx4_SET_PORT(mdev, port);
736                 if (err) {
737                         mlx4_err(mdev, "Failed to set port %d, "
738                                       "aborting\n", port);
739                         goto err_set_port;
740                 }
741         }
742         err = mlx4_register_device(mdev);
743 err_set_port:
744         mutex_unlock(&priv->port_mutex);
745         mlx4_start_sense(mdev);
746         return err ? err : count;
747 }
748
749 static int mlx4_load_fw(struct mlx4_dev *dev)
750 {
751         struct mlx4_priv *priv = mlx4_priv(dev);
752         int err;
753
754         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
755                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
756         if (!priv->fw.fw_icm) {
757                 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
758                 return -ENOMEM;
759         }
760
761         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
762         if (err) {
763                 mlx4_err(dev, "MAP_FA command failed, aborting.\n");
764                 goto err_free;
765         }
766
767         err = mlx4_RUN_FW(dev);
768         if (err) {
769                 mlx4_err(dev, "RUN_FW command failed, aborting.\n");
770                 goto err_unmap_fa;
771         }
772
773         return 0;
774
775 err_unmap_fa:
776         mlx4_UNMAP_FA(dev);
777
778 err_free:
779         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
780         return err;
781 }
782
783 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
784                                 int cmpt_entry_sz)
785 {
786         struct mlx4_priv *priv = mlx4_priv(dev);
787         int err;
788         int num_eqs;
789
790         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
791                                   cmpt_base +
792                                   ((u64) (MLX4_CMPT_TYPE_QP *
793                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
794                                   cmpt_entry_sz, dev->caps.num_qps,
795                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
796                                   0, 0);
797         if (err)
798                 goto err;
799
800         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
801                                   cmpt_base +
802                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
803                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
804                                   cmpt_entry_sz, dev->caps.num_srqs,
805                                   dev->caps.reserved_srqs, 0, 0);
806         if (err)
807                 goto err_qp;
808
809         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
810                                   cmpt_base +
811                                   ((u64) (MLX4_CMPT_TYPE_CQ *
812                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
813                                   cmpt_entry_sz, dev->caps.num_cqs,
814                                   dev->caps.reserved_cqs, 0, 0);
815         if (err)
816                 goto err_srq;
817
818         num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
819                   dev->caps.num_eqs;
820         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
821                                   cmpt_base +
822                                   ((u64) (MLX4_CMPT_TYPE_EQ *
823                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
824                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
825         if (err)
826                 goto err_cq;
827
828         return 0;
829
830 err_cq:
831         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
832
833 err_srq:
834         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
835
836 err_qp:
837         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
838
839 err:
840         return err;
841 }
842
843 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
844                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
845 {
846         struct mlx4_priv *priv = mlx4_priv(dev);
847         u64 aux_pages;
848         int num_eqs;
849         int err;
850
851         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
852         if (err) {
853                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
854                 return err;
855         }
856
857         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
858                  (unsigned long long) icm_size >> 10,
859                  (unsigned long long) aux_pages << 2);
860
861         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
862                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
863         if (!priv->fw.aux_icm) {
864                 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
865                 return -ENOMEM;
866         }
867
868         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
869         if (err) {
870                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
871                 goto err_free_aux;
872         }
873
874         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
875         if (err) {
876                 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
877                 goto err_unmap_aux;
878         }
879
880
881         num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
882                    dev->caps.num_eqs;
883         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
884                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
885                                   num_eqs, num_eqs, 0, 0);
886         if (err) {
887                 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
888                 goto err_unmap_cmpt;
889         }
890
891         /*
892          * Reserved MTT entries must be aligned up to a cacheline
893          * boundary, since the FW will write to them, while the driver
894          * writes to all other MTT entries. (The variable
895          * dev->caps.mtt_entry_sz below is really the MTT segment
896          * size, not the raw entry size)
897          */
898         dev->caps.reserved_mtts =
899                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
900                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
901
902         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
903                                   init_hca->mtt_base,
904                                   dev->caps.mtt_entry_sz,
905                                   dev->caps.num_mtts,
906                                   dev->caps.reserved_mtts, 1, 0);
907         if (err) {
908                 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
909                 goto err_unmap_eq;
910         }
911
912         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
913                                   init_hca->dmpt_base,
914                                   dev_cap->dmpt_entry_sz,
915                                   dev->caps.num_mpts,
916                                   dev->caps.reserved_mrws, 1, 1);
917         if (err) {
918                 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
919                 goto err_unmap_mtt;
920         }
921
922         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
923                                   init_hca->qpc_base,
924                                   dev_cap->qpc_entry_sz,
925                                   dev->caps.num_qps,
926                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
927                                   0, 0);
928         if (err) {
929                 mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
930                 goto err_unmap_dmpt;
931         }
932
933         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
934                                   init_hca->auxc_base,
935                                   dev_cap->aux_entry_sz,
936                                   dev->caps.num_qps,
937                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
938                                   0, 0);
939         if (err) {
940                 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
941                 goto err_unmap_qp;
942         }
943
944         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
945                                   init_hca->altc_base,
946                                   dev_cap->altc_entry_sz,
947                                   dev->caps.num_qps,
948                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
949                                   0, 0);
950         if (err) {
951                 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
952                 goto err_unmap_auxc;
953         }
954
955         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
956                                   init_hca->rdmarc_base,
957                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
958                                   dev->caps.num_qps,
959                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
960                                   0, 0);
961         if (err) {
962                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
963                 goto err_unmap_altc;
964         }
965
966         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
967                                   init_hca->cqc_base,
968                                   dev_cap->cqc_entry_sz,
969                                   dev->caps.num_cqs,
970                                   dev->caps.reserved_cqs, 0, 0);
971         if (err) {
972                 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
973                 goto err_unmap_rdmarc;
974         }
975
976         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
977                                   init_hca->srqc_base,
978                                   dev_cap->srq_entry_sz,
979                                   dev->caps.num_srqs,
980                                   dev->caps.reserved_srqs, 0, 0);
981         if (err) {
982                 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
983                 goto err_unmap_cq;
984         }
985
986         /*
987          * It's not strictly required, but for simplicity just map the
988          * whole multicast group table now.  The table isn't very big
989          * and it's a lot easier than trying to track ref counts.
990          */
991         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
992                                   init_hca->mc_base,
993                                   mlx4_get_mgm_entry_size(dev),
994                                   dev->caps.num_mgms + dev->caps.num_amgms,
995                                   dev->caps.num_mgms + dev->caps.num_amgms,
996                                   0, 0);
997         if (err) {
998                 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
999                 goto err_unmap_srq;
1000         }
1001
1002         return 0;
1003
1004 err_unmap_srq:
1005         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1006
1007 err_unmap_cq:
1008         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1009
1010 err_unmap_rdmarc:
1011         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1012
1013 err_unmap_altc:
1014         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1015
1016 err_unmap_auxc:
1017         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1018
1019 err_unmap_qp:
1020         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1021
1022 err_unmap_dmpt:
1023         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1024
1025 err_unmap_mtt:
1026         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1027
1028 err_unmap_eq:
1029         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1030
1031 err_unmap_cmpt:
1032         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1033         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1034         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1035         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1036
1037 err_unmap_aux:
1038         mlx4_UNMAP_ICM_AUX(dev);
1039
1040 err_free_aux:
1041         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1042
1043         return err;
1044 }
1045
1046 static void mlx4_free_icms(struct mlx4_dev *dev)
1047 {
1048         struct mlx4_priv *priv = mlx4_priv(dev);
1049
1050         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1051         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1052         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1053         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1054         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1055         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1056         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1057         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1058         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1059         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1060         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1061         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1062         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1063         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1064
1065         mlx4_UNMAP_ICM_AUX(dev);
1066         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1067 }
1068
1069 static void mlx4_slave_exit(struct mlx4_dev *dev)
1070 {
1071         struct mlx4_priv *priv = mlx4_priv(dev);
1072
1073         down(&priv->cmd.slave_sem);
1074         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
1075                 mlx4_warn(dev, "Failed to close slave function.\n");
1076         up(&priv->cmd.slave_sem);
1077 }
1078
1079 static int map_bf_area(struct mlx4_dev *dev)
1080 {
1081         struct mlx4_priv *priv = mlx4_priv(dev);
1082         resource_size_t bf_start;
1083         resource_size_t bf_len;
1084         int err = 0;
1085
1086         if (!dev->caps.bf_reg_size)
1087                 return -ENXIO;
1088
1089         bf_start = pci_resource_start(dev->pdev, 2) +
1090                         (dev->caps.num_uars << PAGE_SHIFT);
1091         bf_len = pci_resource_len(dev->pdev, 2) -
1092                         (dev->caps.num_uars << PAGE_SHIFT);
1093         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1094         if (!priv->bf_mapping)
1095                 err = -ENOMEM;
1096
1097         return err;
1098 }
1099
1100 static void unmap_bf_area(struct mlx4_dev *dev)
1101 {
1102         if (mlx4_priv(dev)->bf_mapping)
1103                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1104 }
1105
1106 static void mlx4_close_hca(struct mlx4_dev *dev)
1107 {
1108         unmap_bf_area(dev);
1109         if (mlx4_is_slave(dev))
1110                 mlx4_slave_exit(dev);
1111         else {
1112                 mlx4_CLOSE_HCA(dev, 0);
1113                 mlx4_free_icms(dev);
1114                 mlx4_UNMAP_FA(dev);
1115                 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1116         }
1117 }
1118
1119 static int mlx4_init_slave(struct mlx4_dev *dev)
1120 {
1121         struct mlx4_priv *priv = mlx4_priv(dev);
1122         u64 dma = (u64) priv->mfunc.vhcr_dma;
1123         int num_of_reset_retries = NUM_OF_RESET_RETRIES;
1124         int ret_from_reset = 0;
1125         u32 slave_read;
1126         u32 cmd_channel_ver;
1127
1128         down(&priv->cmd.slave_sem);
1129         priv->cmd.max_cmds = 1;
1130         mlx4_warn(dev, "Sending reset\n");
1131         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1132                                        MLX4_COMM_TIME);
1133         /* if we are in the middle of flr the slave will try
1134          * NUM_OF_RESET_RETRIES times before leaving.*/
1135         if (ret_from_reset) {
1136                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1137                         msleep(SLEEP_TIME_IN_RESET);
1138                         while (ret_from_reset && num_of_reset_retries) {
1139                                 mlx4_warn(dev, "slave is currently in the"
1140                                           "middle of FLR. retrying..."
1141                                           "(try num:%d)\n",
1142                                           (NUM_OF_RESET_RETRIES -
1143                                            num_of_reset_retries  + 1));
1144                                 ret_from_reset =
1145                                         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
1146                                                       0, MLX4_COMM_TIME);
1147                                 num_of_reset_retries = num_of_reset_retries - 1;
1148                         }
1149                 } else
1150                         goto err;
1151         }
1152
1153         /* check the driver version - the slave I/F revision
1154          * must match the master's */
1155         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1156         cmd_channel_ver = mlx4_comm_get_version();
1157
1158         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1159                 MLX4_COMM_GET_IF_REV(slave_read)) {
1160                 mlx4_err(dev, "slave driver version is not supported"
1161                          " by the master\n");
1162                 goto err;
1163         }
1164
1165         mlx4_warn(dev, "Sending vhcr0\n");
1166         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1167                                                     MLX4_COMM_TIME))
1168                 goto err;
1169         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1170                                                     MLX4_COMM_TIME))
1171                 goto err;
1172         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1173                                                     MLX4_COMM_TIME))
1174                 goto err;
1175         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
1176                 goto err;
1177         up(&priv->cmd.slave_sem);
1178         return 0;
1179
1180 err:
1181         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
1182         up(&priv->cmd.slave_sem);
1183         return -EIO;
1184 }
1185
1186 static int mlx4_init_hca(struct mlx4_dev *dev)
1187 {
1188         struct mlx4_priv          *priv = mlx4_priv(dev);
1189         struct mlx4_adapter        adapter;
1190         struct mlx4_dev_cap        dev_cap;
1191         struct mlx4_mod_stat_cfg   mlx4_cfg;
1192         struct mlx4_profile        profile;
1193         struct mlx4_init_hca_param init_hca;
1194         u64 icm_size;
1195         int err;
1196
1197         if (!mlx4_is_slave(dev)) {
1198                 err = mlx4_QUERY_FW(dev);
1199                 if (err) {
1200                         if (err == -EACCES)
1201                                 mlx4_info(dev, "non-primary physical function, skipping.\n");
1202                         else
1203                                 mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
1204                         goto unmap_bf;
1205                 }
1206
1207                 err = mlx4_load_fw(dev);
1208                 if (err) {
1209                         mlx4_err(dev, "Failed to start FW, aborting.\n");
1210                         goto unmap_bf;
1211                 }
1212
1213                 mlx4_cfg.log_pg_sz_m = 1;
1214                 mlx4_cfg.log_pg_sz = 0;
1215                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
1216                 if (err)
1217                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
1218
1219                 err = mlx4_dev_cap(dev, &dev_cap);
1220                 if (err) {
1221                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
1222                         goto err_stop_fw;
1223                 }
1224
1225                 profile = default_profile;
1226
1227                 icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
1228                                              &init_hca);
1229                 if ((long long) icm_size < 0) {
1230                         err = icm_size;
1231                         goto err_stop_fw;
1232                 }
1233
1234                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
1235
1236                 init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
1237                 init_hca.uar_page_sz = PAGE_SHIFT - 12;
1238
1239                 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
1240                 if (err)
1241                         goto err_stop_fw;
1242
1243                 err = mlx4_INIT_HCA(dev, &init_hca);
1244                 if (err) {
1245                         mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
1246                         goto err_free_icm;
1247                 }
1248         } else {
1249                 err = mlx4_init_slave(dev);
1250                 if (err) {
1251                         mlx4_err(dev, "Failed to initialize slave\n");
1252                         goto unmap_bf;
1253                 }
1254
1255                 err = mlx4_slave_cap(dev);
1256                 if (err) {
1257                         mlx4_err(dev, "Failed to obtain slave caps\n");
1258                         goto err_close;
1259                 }
1260         }
1261
1262         if (map_bf_area(dev))
1263                 mlx4_dbg(dev, "Failed to map blue flame area\n");
1264
1265         /*Only the master set the ports, all the rest got it from it.*/
1266         if (!mlx4_is_slave(dev))
1267                 mlx4_set_port_mask(dev);
1268
1269         err = mlx4_QUERY_ADAPTER(dev, &adapter);
1270         if (err) {
1271                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
1272                 goto err_close;
1273         }
1274
1275         priv->eq_table.inta_pin = adapter.inta_pin;
1276         memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
1277
1278         return 0;
1279
1280 err_close:
1281         mlx4_close_hca(dev);
1282
1283 err_free_icm:
1284         if (!mlx4_is_slave(dev))
1285                 mlx4_free_icms(dev);
1286
1287 err_stop_fw:
1288         if (!mlx4_is_slave(dev)) {
1289                 mlx4_UNMAP_FA(dev);
1290                 mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1291         }
1292 unmap_bf:
1293         unmap_bf_area(dev);
1294         return err;
1295 }
1296
1297 static int mlx4_init_counters_table(struct mlx4_dev *dev)
1298 {
1299         struct mlx4_priv *priv = mlx4_priv(dev);
1300         int nent;
1301
1302         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
1303                 return -ENOENT;
1304
1305         nent = dev->caps.max_counters;
1306         return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
1307 }
1308
1309 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
1310 {
1311         mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
1312 }
1313
1314 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
1315 {
1316         struct mlx4_priv *priv = mlx4_priv(dev);
1317
1318         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
1319                 return -ENOENT;
1320
1321         *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
1322         if (*idx == -1)
1323                 return -ENOMEM;
1324
1325         return 0;
1326 }
1327
1328 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
1329 {
1330         u64 out_param;
1331         int err;
1332
1333         if (mlx4_is_mfunc(dev)) {
1334                 err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
1335                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
1336                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
1337                 if (!err)
1338                         *idx = get_param_l(&out_param);
1339
1340                 return err;
1341         }
1342         return __mlx4_counter_alloc(dev, idx);
1343 }
1344 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
1345
1346 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
1347 {
1348         mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
1349         return;
1350 }
1351
1352 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
1353 {
1354         u64 in_param;
1355
1356         if (mlx4_is_mfunc(dev)) {
1357                 set_param_l(&in_param, idx);
1358                 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
1359                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
1360                          MLX4_CMD_WRAPPED);
1361                 return;
1362         }
1363         __mlx4_counter_free(dev, idx);
1364 }
1365 EXPORT_SYMBOL_GPL(mlx4_counter_free);
1366
1367 static int mlx4_setup_hca(struct mlx4_dev *dev)
1368 {
1369         struct mlx4_priv *priv = mlx4_priv(dev);
1370         int err;
1371         int port;
1372         __be32 ib_port_default_caps;
1373
1374         err = mlx4_init_uar_table(dev);
1375         if (err) {
1376                 mlx4_err(dev, "Failed to initialize "
1377                          "user access region table, aborting.\n");
1378                 return err;
1379         }
1380
1381         err = mlx4_uar_alloc(dev, &priv->driver_uar);
1382         if (err) {
1383                 mlx4_err(dev, "Failed to allocate driver access region, "
1384                          "aborting.\n");
1385                 goto err_uar_table_free;
1386         }
1387
1388         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
1389         if (!priv->kar) {
1390                 mlx4_err(dev, "Couldn't map kernel access region, "
1391                          "aborting.\n");
1392                 err = -ENOMEM;
1393                 goto err_uar_free;
1394         }
1395
1396         err = mlx4_init_pd_table(dev);
1397         if (err) {
1398                 mlx4_err(dev, "Failed to initialize "
1399                          "protection domain table, aborting.\n");
1400                 goto err_kar_unmap;
1401         }
1402
1403         err = mlx4_init_xrcd_table(dev);
1404         if (err) {
1405                 mlx4_err(dev, "Failed to initialize "
1406                          "reliable connection domain table, aborting.\n");
1407                 goto err_pd_table_free;
1408         }
1409
1410         err = mlx4_init_mr_table(dev);
1411         if (err) {
1412                 mlx4_err(dev, "Failed to initialize "
1413                          "memory region table, aborting.\n");
1414                 goto err_xrcd_table_free;
1415         }
1416
1417         err = mlx4_init_eq_table(dev);
1418         if (err) {
1419                 mlx4_err(dev, "Failed to initialize "
1420                          "event queue table, aborting.\n");
1421                 goto err_mr_table_free;
1422         }
1423
1424         err = mlx4_cmd_use_events(dev);
1425         if (err) {
1426                 mlx4_err(dev, "Failed to switch to event-driven "
1427                          "firmware commands, aborting.\n");
1428                 goto err_eq_table_free;
1429         }
1430
1431         err = mlx4_NOP(dev);
1432         if (err) {
1433                 if (dev->flags & MLX4_FLAG_MSI_X) {
1434                         mlx4_warn(dev, "NOP command failed to generate MSI-X "
1435                                   "interrupt IRQ %d).\n",
1436                                   priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
1437                         mlx4_warn(dev, "Trying again without MSI-X.\n");
1438                 } else {
1439                         mlx4_err(dev, "NOP command failed to generate interrupt "
1440                                  "(IRQ %d), aborting.\n",
1441                                  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
1442                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
1443                 }
1444
1445                 goto err_cmd_poll;
1446         }
1447
1448         mlx4_dbg(dev, "NOP command IRQ test passed\n");
1449
1450         err = mlx4_init_cq_table(dev);
1451         if (err) {
1452                 mlx4_err(dev, "Failed to initialize "
1453                          "completion queue table, aborting.\n");
1454                 goto err_cmd_poll;
1455         }
1456
1457         err = mlx4_init_srq_table(dev);
1458         if (err) {
1459                 mlx4_err(dev, "Failed to initialize "
1460                          "shared receive queue table, aborting.\n");
1461                 goto err_cq_table_free;
1462         }
1463
1464         err = mlx4_init_qp_table(dev);
1465         if (err) {
1466                 mlx4_err(dev, "Failed to initialize "
1467                          "queue pair table, aborting.\n");
1468                 goto err_srq_table_free;
1469         }
1470
1471         if (!mlx4_is_slave(dev)) {
1472                 err = mlx4_init_mcg_table(dev);
1473                 if (err) {
1474                         mlx4_err(dev, "Failed to initialize "
1475                                  "multicast group table, aborting.\n");
1476                         goto err_qp_table_free;
1477                 }
1478         }
1479
1480         err = mlx4_init_counters_table(dev);
1481         if (err && err != -ENOENT) {
1482                 mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
1483                 goto err_mcg_table_free;
1484         }
1485
1486         if (!mlx4_is_slave(dev)) {
1487                 for (port = 1; port <= dev->caps.num_ports; port++) {
1488                         ib_port_default_caps = 0;
1489                         err = mlx4_get_port_ib_caps(dev, port,
1490                                                     &ib_port_default_caps);
1491                         if (err)
1492                                 mlx4_warn(dev, "failed to get port %d default "
1493                                           "ib capabilities (%d). Continuing "
1494                                           "with caps = 0\n", port, err);
1495                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
1496
1497                         /* initialize per-slave default ib port capabilities */
1498                         if (mlx4_is_master(dev)) {
1499                                 int i;
1500                                 for (i = 0; i < dev->num_slaves; i++) {
1501                                         if (i == mlx4_master_func_num(dev))
1502                                                 continue;
1503                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
1504                                                         ib_port_default_caps;
1505                                 }
1506                         }
1507
1508                         if (mlx4_is_mfunc(dev))
1509                                 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
1510                         else
1511                                 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
1512
1513                         err = mlx4_SET_PORT(dev, port);
1514                         if (err) {
1515                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
1516                                         port);
1517                                 goto err_counters_table_free;
1518                         }
1519                 }
1520         }
1521
1522         return 0;
1523
1524 err_counters_table_free:
1525         mlx4_cleanup_counters_table(dev);
1526
1527 err_mcg_table_free:
1528         mlx4_cleanup_mcg_table(dev);
1529
1530 err_qp_table_free:
1531         mlx4_cleanup_qp_table(dev);
1532
1533 err_srq_table_free:
1534         mlx4_cleanup_srq_table(dev);
1535
1536 err_cq_table_free:
1537         mlx4_cleanup_cq_table(dev);
1538
1539 err_cmd_poll:
1540         mlx4_cmd_use_polling(dev);
1541
1542 err_eq_table_free:
1543         mlx4_cleanup_eq_table(dev);
1544
1545 err_mr_table_free:
1546         mlx4_cleanup_mr_table(dev);
1547
1548 err_xrcd_table_free:
1549         mlx4_cleanup_xrcd_table(dev);
1550
1551 err_pd_table_free:
1552         mlx4_cleanup_pd_table(dev);
1553
1554 err_kar_unmap:
1555         iounmap(priv->kar);
1556
1557 err_uar_free:
1558         mlx4_uar_free(dev, &priv->driver_uar);
1559
1560 err_uar_table_free:
1561         mlx4_cleanup_uar_table(dev);
1562         return err;
1563 }
1564
1565 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
1566 {
1567         struct mlx4_priv *priv = mlx4_priv(dev);
1568         struct msix_entry *entries;
1569         int nreq = min_t(int, dev->caps.num_ports *
1570                          min_t(int, num_online_cpus() + 1, MAX_MSIX_P_PORT)
1571                                 + MSIX_LEGACY_SZ, MAX_MSIX);
1572         int err;
1573         int i;
1574
1575         if (msi_x) {
1576                 /* In multifunction mode each function gets 2 msi-X vectors
1577                  * one for data path completions anf the other for asynch events
1578                  * or command completions */
1579                 if (mlx4_is_mfunc(dev)) {
1580                         nreq = 2;
1581                 } else {
1582                         nreq = min_t(int, dev->caps.num_eqs -
1583                                      dev->caps.reserved_eqs, nreq);
1584                 }
1585
1586                 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
1587                 if (!entries)
1588                         goto no_msi;
1589
1590                 for (i = 0; i < nreq; ++i)
1591                         entries[i].entry = i;
1592
1593         retry:
1594                 err = pci_enable_msix(dev->pdev, entries, nreq);
1595                 if (err) {
1596                         /* Try again if at least 2 vectors are available */
1597                         if (err > 1) {
1598                                 mlx4_info(dev, "Requested %d vectors, "
1599                                           "but only %d MSI-X vectors available, "
1600                                           "trying again\n", nreq, err);
1601                                 nreq = err;
1602                                 goto retry;
1603                         }
1604                         kfree(entries);
1605                         goto no_msi;
1606                 }
1607
1608                 if (nreq <
1609                     MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
1610                         /*Working in legacy mode , all EQ's shared*/
1611                         dev->caps.comp_pool           = 0;
1612                         dev->caps.num_comp_vectors = nreq - 1;
1613                 } else {
1614                         dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
1615                         dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
1616                 }
1617                 for (i = 0; i < nreq; ++i)
1618                         priv->eq_table.eq[i].irq = entries[i].vector;
1619
1620                 dev->flags |= MLX4_FLAG_MSI_X;
1621
1622                 kfree(entries);
1623                 return;
1624         }
1625
1626 no_msi:
1627         dev->caps.num_comp_vectors = 1;
1628         dev->caps.comp_pool        = 0;
1629
1630         for (i = 0; i < 2; ++i)
1631                 priv->eq_table.eq[i].irq = dev->pdev->irq;
1632 }
1633
1634 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
1635 {
1636         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
1637         int err = 0;
1638
1639         info->dev = dev;
1640         info->port = port;
1641         if (!mlx4_is_slave(dev)) {
1642                 INIT_RADIX_TREE(&info->mac_tree, GFP_KERNEL);
1643                 mlx4_init_mac_table(dev, &info->mac_table);
1644                 mlx4_init_vlan_table(dev, &info->vlan_table);
1645                 info->base_qpn =
1646                         dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
1647                         (port - 1) * (1 << log_num_mac);
1648         }
1649
1650         sprintf(info->dev_name, "mlx4_port%d", port);
1651         info->port_attr.attr.name = info->dev_name;
1652         if (mlx4_is_mfunc(dev))
1653                 info->port_attr.attr.mode = S_IRUGO;
1654         else {
1655                 info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
1656                 info->port_attr.store     = set_port_type;
1657         }
1658         info->port_attr.show      = show_port_type;
1659         sysfs_attr_init(&info->port_attr.attr);
1660
1661         err = device_create_file(&dev->pdev->dev, &info->port_attr);
1662         if (err) {
1663                 mlx4_err(dev, "Failed to create file for port %d\n", port);
1664                 info->port = -1;
1665         }
1666
1667         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
1668         info->port_mtu_attr.attr.name = info->dev_mtu_name;
1669         if (mlx4_is_mfunc(dev))
1670                 info->port_mtu_attr.attr.mode = S_IRUGO;
1671         else {
1672                 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
1673                 info->port_mtu_attr.store     = set_port_ib_mtu;
1674         }
1675         info->port_mtu_attr.show      = show_port_ib_mtu;
1676         sysfs_attr_init(&info->port_mtu_attr.attr);
1677
1678         err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
1679         if (err) {
1680                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
1681                 device_remove_file(&info->dev->pdev->dev, &info->port_attr);
1682                 info->port = -1;
1683         }
1684
1685         return err;
1686 }
1687
1688 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
1689 {
1690         if (info->port < 0)
1691                 return;
1692
1693         device_remove_file(&info->dev->pdev->dev, &info->port_attr);
1694         device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
1695 }
1696
1697 static int mlx4_init_steering(struct mlx4_dev *dev)
1698 {
1699         struct mlx4_priv *priv = mlx4_priv(dev);
1700         int num_entries = dev->caps.num_ports;
1701         int i, j;
1702
1703         priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
1704         if (!priv->steer)
1705                 return -ENOMEM;
1706
1707         for (i = 0; i < num_entries; i++)
1708                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
1709                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
1710                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
1711                 }
1712         return 0;
1713 }
1714
1715 static void mlx4_clear_steering(struct mlx4_dev *dev)
1716 {
1717         struct mlx4_priv *priv = mlx4_priv(dev);
1718         struct mlx4_steer_index *entry, *tmp_entry;
1719         struct mlx4_promisc_qp *pqp, *tmp_pqp;
1720         int num_entries = dev->caps.num_ports;
1721         int i, j;
1722
1723         for (i = 0; i < num_entries; i++) {
1724                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
1725                         list_for_each_entry_safe(pqp, tmp_pqp,
1726                                                  &priv->steer[i].promisc_qps[j],
1727                                                  list) {
1728                                 list_del(&pqp->list);
1729                                 kfree(pqp);
1730                         }
1731                         list_for_each_entry_safe(entry, tmp_entry,
1732                                                  &priv->steer[i].steer_entries[j],
1733                                                  list) {
1734                                 list_del(&entry->list);
1735                                 list_for_each_entry_safe(pqp, tmp_pqp,
1736                                                          &entry->duplicates,
1737                                                          list) {
1738                                         list_del(&pqp->list);
1739                                         kfree(pqp);
1740                                 }
1741                                 kfree(entry);
1742                         }
1743                 }
1744         }
1745         kfree(priv->steer);
1746 }
1747
1748 static int extended_func_num(struct pci_dev *pdev)
1749 {
1750         return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
1751 }
1752
1753 #define MLX4_OWNER_BASE 0x8069c
1754 #define MLX4_OWNER_SIZE 4
1755
1756 static int mlx4_get_ownership(struct mlx4_dev *dev)
1757 {
1758         void __iomem *owner;
1759         u32 ret;
1760
1761         owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
1762                         MLX4_OWNER_SIZE);
1763         if (!owner) {
1764                 mlx4_err(dev, "Failed to obtain ownership bit\n");
1765                 return -ENOMEM;
1766         }
1767
1768         ret = readl(owner);
1769         iounmap(owner);
1770         return (int) !!ret;
1771 }
1772
1773 static void mlx4_free_ownership(struct mlx4_dev *dev)
1774 {
1775         void __iomem *owner;
1776
1777         owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
1778                         MLX4_OWNER_SIZE);
1779         if (!owner) {
1780                 mlx4_err(dev, "Failed to obtain ownership bit\n");
1781                 return;
1782         }
1783         writel(0, owner);
1784         msleep(1000);
1785         iounmap(owner);
1786 }
1787
1788 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1789 {
1790         struct mlx4_priv *priv;
1791         struct mlx4_dev *dev;
1792         int err;
1793         int port;
1794
1795         pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
1796
1797         err = pci_enable_device(pdev);
1798         if (err) {
1799                 dev_err(&pdev->dev, "Cannot enable PCI device, "
1800                         "aborting.\n");
1801                 return err;
1802         }
1803         if (num_vfs > MLX4_MAX_NUM_VF) {
1804                 printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n",
1805                        num_vfs, MLX4_MAX_NUM_VF);
1806                 return -EINVAL;
1807         }
1808         /*
1809          * Check for BARs.
1810          */
1811         if (((id == NULL) || !(id->driver_data & MLX4_VF)) &&
1812             !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
1813                 dev_err(&pdev->dev, "Missing DCS, aborting."
1814                         "(id == 0X%p, id->driver_data: 0x%lx,"
1815                         " pci_resource_flags(pdev, 0):0x%lx)\n", id,
1816                         id ? id->driver_data : 0, pci_resource_flags(pdev, 0));
1817                 err = -ENODEV;
1818                 goto err_disable_pdev;
1819         }
1820         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
1821                 dev_err(&pdev->dev, "Missing UAR, aborting.\n");
1822                 err = -ENODEV;
1823                 goto err_disable_pdev;
1824         }
1825
1826         err = pci_request_regions(pdev, DRV_NAME);
1827         if (err) {
1828                 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
1829                 goto err_disable_pdev;
1830         }
1831
1832         pci_set_master(pdev);
1833
1834         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1835         if (err) {
1836                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
1837                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1838                 if (err) {
1839                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
1840                         goto err_release_regions;
1841                 }
1842         }
1843         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1844         if (err) {
1845                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
1846                          "consistent PCI DMA mask.\n");
1847                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1848                 if (err) {
1849                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
1850                                 "aborting.\n");
1851                         goto err_release_regions;
1852                 }
1853         }
1854
1855         /* Allow large DMA segments, up to the firmware limit of 1 GB */
1856         dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
1857
1858         priv = kzalloc(sizeof *priv, GFP_KERNEL);
1859         if (!priv) {
1860                 dev_err(&pdev->dev, "Device struct alloc failed, "
1861                         "aborting.\n");
1862                 err = -ENOMEM;
1863                 goto err_release_regions;
1864         }
1865
1866         dev       = &priv->dev;
1867         dev->pdev = pdev;
1868         INIT_LIST_HEAD(&priv->ctx_list);
1869         spin_lock_init(&priv->ctx_lock);
1870
1871         mutex_init(&priv->port_mutex);
1872
1873         INIT_LIST_HEAD(&priv->pgdir_list);
1874         mutex_init(&priv->pgdir_mutex);
1875
1876         INIT_LIST_HEAD(&priv->bf_list);
1877         mutex_init(&priv->bf_mutex);
1878
1879         dev->rev_id = pdev->revision;
1880         /* Detect if this device is a virtual function */
1881         if (id && id->driver_data & MLX4_VF) {
1882                 /* When acting as pf, we normally skip vfs unless explicitly
1883                  * requested to probe them. */
1884                 if (num_vfs && extended_func_num(pdev) > probe_vf) {
1885                         mlx4_warn(dev, "Skipping virtual function:%d\n",
1886                                                 extended_func_num(pdev));
1887                         err = -ENODEV;
1888                         goto err_free_dev;
1889                 }
1890                 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
1891                 dev->flags |= MLX4_FLAG_SLAVE;
1892         } else {
1893                 /* We reset the device and enable SRIOV only for physical
1894                  * devices.  Try to claim ownership on the device;
1895                  * if already taken, skip -- do not allow multiple PFs */
1896                 err = mlx4_get_ownership(dev);
1897                 if (err) {
1898                         if (err < 0)
1899                                 goto err_free_dev;
1900                         else {
1901                                 mlx4_warn(dev, "Multiple PFs not yet supported."
1902                                           " Skipping PF.\n");
1903                                 err = -EINVAL;
1904                                 goto err_free_dev;
1905                         }
1906                 }
1907
1908                 if (num_vfs) {
1909                         mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs);
1910                         err = pci_enable_sriov(pdev, num_vfs);
1911                         if (err) {
1912                                 mlx4_err(dev, "Failed to enable sriov,"
1913                                          "continuing without sriov enabled"
1914                                          " (err = %d).\n", err);
1915                                 err = 0;
1916                         } else {
1917                                 mlx4_warn(dev, "Running in master mode\n");
1918                                 dev->flags |= MLX4_FLAG_SRIOV |
1919                                               MLX4_FLAG_MASTER;
1920                                 dev->num_vfs = num_vfs;
1921                         }
1922                 }
1923
1924                 /*
1925                  * Now reset the HCA before we touch the PCI capabilities or
1926                  * attempt a firmware command, since a boot ROM may have left
1927                  * the HCA in an undefined state.
1928                  */
1929                 err = mlx4_reset(dev);
1930                 if (err) {
1931                         mlx4_err(dev, "Failed to reset HCA, aborting.\n");
1932                         goto err_rel_own;
1933                 }
1934         }
1935
1936 slave_start:
1937         if (mlx4_cmd_init(dev)) {
1938                 mlx4_err(dev, "Failed to init command interface, aborting.\n");
1939                 goto err_sriov;
1940         }
1941
1942         /* In slave functions, the communication channel must be initialized
1943          * before posting commands. Also, init num_slaves before calling
1944          * mlx4_init_hca */
1945         if (mlx4_is_mfunc(dev)) {
1946                 if (mlx4_is_master(dev))
1947                         dev->num_slaves = MLX4_MAX_NUM_SLAVES;
1948                 else {
1949                         dev->num_slaves = 0;
1950                         if (mlx4_multi_func_init(dev)) {
1951                                 mlx4_err(dev, "Failed to init slave mfunc"
1952                                          " interface, aborting.\n");
1953                                 goto err_cmd;
1954                         }
1955                 }
1956         }
1957
1958         err = mlx4_init_hca(dev);
1959         if (err) {
1960                 if (err == -EACCES) {
1961                         /* Not primary Physical function
1962                          * Running in slave mode */
1963                         mlx4_cmd_cleanup(dev);
1964                         dev->flags |= MLX4_FLAG_SLAVE;
1965                         dev->flags &= ~MLX4_FLAG_MASTER;
1966                         goto slave_start;
1967                 } else
1968                         goto err_mfunc;
1969         }
1970
1971         /* In master functions, the communication channel must be initialized
1972          * after obtaining its address from fw */
1973         if (mlx4_is_master(dev)) {
1974                 if (mlx4_multi_func_init(dev)) {
1975                         mlx4_err(dev, "Failed to init master mfunc"
1976                                  "interface, aborting.\n");
1977                         goto err_close;
1978                 }
1979         }
1980
1981         err = mlx4_alloc_eq_table(dev);
1982         if (err)
1983                 goto err_master_mfunc;
1984
1985         priv->msix_ctl.pool_bm = 0;
1986         mutex_init(&priv->msix_ctl.pool_lock);
1987
1988         mlx4_enable_msi_x(dev);
1989         if ((mlx4_is_mfunc(dev)) &&
1990             !(dev->flags & MLX4_FLAG_MSI_X)) {
1991                 mlx4_err(dev, "INTx is not supported in multi-function mode."
1992                          " aborting.\n");
1993                 goto err_free_eq;
1994         }
1995
1996         if (!mlx4_is_slave(dev)) {
1997                 err = mlx4_init_steering(dev);
1998                 if (err)
1999                         goto err_free_eq;
2000         }
2001
2002         err = mlx4_setup_hca(dev);
2003         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
2004             !mlx4_is_mfunc(dev)) {
2005                 dev->flags &= ~MLX4_FLAG_MSI_X;
2006                 dev->caps.num_comp_vectors = 1;
2007                 dev->caps.comp_pool        = 0;
2008                 pci_disable_msix(pdev);
2009                 err = mlx4_setup_hca(dev);
2010         }
2011
2012         if (err)
2013                 goto err_steer;
2014
2015         for (port = 1; port <= dev->caps.num_ports; port++) {
2016                 err = mlx4_init_port_info(dev, port);
2017                 if (err)
2018                         goto err_port;
2019         }
2020
2021         err = mlx4_register_device(dev);
2022         if (err)
2023                 goto err_port;
2024
2025         mlx4_sense_init(dev);
2026         mlx4_start_sense(dev);
2027
2028         pci_set_drvdata(pdev, dev);
2029
2030         return 0;
2031
2032 err_port:
2033         for (--port; port >= 1; --port)
2034                 mlx4_cleanup_port_info(&priv->port[port]);
2035
2036         mlx4_cleanup_counters_table(dev);
2037         mlx4_cleanup_mcg_table(dev);
2038         mlx4_cleanup_qp_table(dev);
2039         mlx4_cleanup_srq_table(dev);
2040         mlx4_cleanup_cq_table(dev);
2041         mlx4_cmd_use_polling(dev);
2042         mlx4_cleanup_eq_table(dev);
2043         mlx4_cleanup_mr_table(dev);
2044         mlx4_cleanup_xrcd_table(dev);
2045         mlx4_cleanup_pd_table(dev);
2046         mlx4_cleanup_uar_table(dev);
2047
2048 err_steer:
2049         if (!mlx4_is_slave(dev))
2050                 mlx4_clear_steering(dev);
2051
2052 err_free_eq:
2053         mlx4_free_eq_table(dev);
2054
2055 err_master_mfunc:
2056         if (mlx4_is_master(dev))
2057                 mlx4_multi_func_cleanup(dev);
2058
2059 err_close:
2060         if (dev->flags & MLX4_FLAG_MSI_X)
2061                 pci_disable_msix(pdev);
2062
2063         mlx4_close_hca(dev);
2064
2065 err_mfunc:
2066         if (mlx4_is_slave(dev))
2067                 mlx4_multi_func_cleanup(dev);
2068
2069 err_cmd:
2070         mlx4_cmd_cleanup(dev);
2071
2072 err_sriov:
2073         if (dev->flags & MLX4_FLAG_SRIOV)
2074                 pci_disable_sriov(pdev);
2075
2076 err_rel_own:
2077         if (!mlx4_is_slave(dev))
2078                 mlx4_free_ownership(dev);
2079
2080 err_free_dev:
2081         kfree(priv);
2082
2083 err_release_regions:
2084         pci_release_regions(pdev);
2085
2086 err_disable_pdev:
2087         pci_disable_device(pdev);
2088         pci_set_drvdata(pdev, NULL);
2089         return err;
2090 }
2091
2092 static int __devinit mlx4_init_one(struct pci_dev *pdev,
2093                                    const struct pci_device_id *id)
2094 {
2095         printk_once(KERN_INFO "%s", mlx4_version);
2096
2097         return __mlx4_init_one(pdev, id);
2098 }
2099
2100 static void mlx4_remove_one(struct pci_dev *pdev)
2101 {
2102         struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
2103         struct mlx4_priv *priv = mlx4_priv(dev);
2104         int p;
2105
2106         if (dev) {
2107                 /* in SRIOV it is not allowed to unload the pf's
2108                  * driver while there are alive vf's */
2109                 if (mlx4_is_master(dev)) {
2110                         if (mlx4_how_many_lives_vf(dev))
2111                                 printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
2112                 }
2113                 mlx4_stop_sense(dev);
2114                 mlx4_unregister_device(dev);
2115
2116                 for (p = 1; p <= dev->caps.num_ports; p++) {
2117                         mlx4_cleanup_port_info(&priv->port[p]);
2118                         mlx4_CLOSE_PORT(dev, p);
2119                 }
2120
2121                 if (mlx4_is_master(dev))
2122                         mlx4_free_resource_tracker(dev,
2123                                                    RES_TR_FREE_SLAVES_ONLY);
2124
2125                 mlx4_cleanup_counters_table(dev);
2126                 mlx4_cleanup_mcg_table(dev);
2127                 mlx4_cleanup_qp_table(dev);
2128                 mlx4_cleanup_srq_table(dev);
2129                 mlx4_cleanup_cq_table(dev);
2130                 mlx4_cmd_use_polling(dev);
2131                 mlx4_cleanup_eq_table(dev);
2132                 mlx4_cleanup_mr_table(dev);
2133                 mlx4_cleanup_xrcd_table(dev);
2134                 mlx4_cleanup_pd_table(dev);
2135
2136                 if (mlx4_is_master(dev))
2137                         mlx4_free_resource_tracker(dev,
2138                                                    RES_TR_FREE_STRUCTS_ONLY);
2139
2140                 iounmap(priv->kar);
2141                 mlx4_uar_free(dev, &priv->driver_uar);
2142                 mlx4_cleanup_uar_table(dev);
2143                 if (!mlx4_is_slave(dev))
2144                         mlx4_clear_steering(dev);
2145                 mlx4_free_eq_table(dev);
2146                 if (mlx4_is_master(dev))
2147                         mlx4_multi_func_cleanup(dev);
2148                 mlx4_close_hca(dev);
2149                 if (mlx4_is_slave(dev))
2150                         mlx4_multi_func_cleanup(dev);
2151                 mlx4_cmd_cleanup(dev);
2152
2153                 if (dev->flags & MLX4_FLAG_MSI_X)
2154                         pci_disable_msix(pdev);
2155                 if (dev->flags & MLX4_FLAG_SRIOV) {
2156                         mlx4_warn(dev, "Disabling sriov\n");
2157                         pci_disable_sriov(pdev);
2158                 }
2159
2160                 if (!mlx4_is_slave(dev))
2161                         mlx4_free_ownership(dev);
2162                 kfree(priv);
2163                 pci_release_regions(pdev);
2164                 pci_disable_device(pdev);
2165                 pci_set_drvdata(pdev, NULL);
2166         }
2167 }
2168
2169 int mlx4_restart_one(struct pci_dev *pdev)
2170 {
2171         mlx4_remove_one(pdev);
2172         return __mlx4_init_one(pdev, NULL);
2173 }
2174
2175 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
2176         /* MT25408 "Hermon" SDR */
2177         { PCI_VDEVICE(MELLANOX, 0x6340), 0 },
2178         /* MT25408 "Hermon" DDR */
2179         { PCI_VDEVICE(MELLANOX, 0x634a), 0 },
2180         /* MT25408 "Hermon" QDR */
2181         { PCI_VDEVICE(MELLANOX, 0x6354), 0 },
2182         /* MT25408 "Hermon" DDR PCIe gen2 */
2183         { PCI_VDEVICE(MELLANOX, 0x6732), 0 },
2184         /* MT25408 "Hermon" QDR PCIe gen2 */
2185         { PCI_VDEVICE(MELLANOX, 0x673c), 0 },
2186         /* MT25408 "Hermon" EN 10GigE */
2187         { PCI_VDEVICE(MELLANOX, 0x6368), 0 },
2188         /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
2189         { PCI_VDEVICE(MELLANOX, 0x6750), 0 },
2190         /* MT25458 ConnectX EN 10GBASE-T 10GigE */
2191         { PCI_VDEVICE(MELLANOX, 0x6372), 0 },
2192         /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
2193         { PCI_VDEVICE(MELLANOX, 0x675a), 0 },
2194         /* MT26468 ConnectX EN 10GigE PCIe gen2*/
2195         { PCI_VDEVICE(MELLANOX, 0x6764), 0 },
2196         /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
2197         { PCI_VDEVICE(MELLANOX, 0x6746), 0 },
2198         /* MT26478 ConnectX2 40GigE PCIe gen2 */
2199         { PCI_VDEVICE(MELLANOX, 0x676e), 0 },
2200         /* MT25400 Family [ConnectX-2 Virtual Function] */
2201         { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF },
2202         /* MT27500 Family [ConnectX-3] */
2203         { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
2204         /* MT27500 Family [ConnectX-3 Virtual Function] */
2205         { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF },
2206         { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
2207         { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
2208         { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
2209         { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
2210         { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
2211         { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
2212         { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
2213         { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
2214         { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
2215         { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
2216         { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
2217         { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
2218         { 0, }
2219 };
2220
2221 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
2222
2223 static struct pci_driver mlx4_driver = {
2224         .name           = DRV_NAME,
2225         .id_table       = mlx4_pci_table,
2226         .probe          = mlx4_init_one,
2227         .remove         = __devexit_p(mlx4_remove_one)
2228 };
2229
2230 static int __init mlx4_verify_params(void)
2231 {
2232         if ((log_num_mac < 0) || (log_num_mac > 7)) {
2233                 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
2234                 return -1;
2235         }
2236
2237         if (log_num_vlan != 0)
2238                 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
2239                            MLX4_LOG_NUM_VLANS);
2240
2241         if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
2242                 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
2243                 return -1;
2244         }
2245
2246         /* Check if module param for ports type has legal combination */
2247         if (port_type_array[0] == false && port_type_array[1] == true) {
2248                 printk(KERN_WARNING "Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
2249                 port_type_array[0] = true;
2250         }
2251
2252         return 0;
2253 }
2254
2255 static int __init mlx4_init(void)
2256 {
2257         int ret;
2258
2259         if (mlx4_verify_params())
2260                 return -EINVAL;
2261
2262         mlx4_catas_init();
2263
2264         mlx4_wq = create_singlethread_workqueue("mlx4");
2265         if (!mlx4_wq)
2266                 return -ENOMEM;
2267
2268         ret = pci_register_driver(&mlx4_driver);
2269         return ret < 0 ? ret : 0;
2270 }
2271
2272 static void __exit mlx4_cleanup(void)
2273 {
2274         pci_unregister_driver(&mlx4_driver);
2275         destroy_workqueue(mlx4_wq);
2276 }
2277
2278 module_init(mlx4_init);
2279 module_exit(mlx4_cleanup);