Merge remote-tracking branches 'asoc/topic/rt5640', 'asoc/topic/rt5659', 'asoc/topic...
[cascardo/linux.git] / drivers / net / ethernet / mellanox / mlx4 / main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/errno.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/io-mapping.h>
43 #include <linux/delay.h>
44 #include <linux/kmod.h>
45
46 #include <linux/mlx4/device.h>
47 #include <linux/mlx4/doorbell.h>
48
49 #include "mlx4.h"
50 #include "fw.h"
51 #include "icm.h"
52
53 MODULE_AUTHOR("Roland Dreier");
54 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
55 MODULE_LICENSE("Dual BSD/GPL");
56 MODULE_VERSION(DRV_VERSION);
57
58 struct workqueue_struct *mlx4_wq;
59
60 #ifdef CONFIG_MLX4_DEBUG
61
62 int mlx4_debug_level = 0;
63 module_param_named(debug_level, mlx4_debug_level, int, 0644);
64 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
65
66 #endif /* CONFIG_MLX4_DEBUG */
67
68 #ifdef CONFIG_PCI_MSI
69
70 static int msi_x = 1;
71 module_param(msi_x, int, 0444);
72 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
73
74 #else /* CONFIG_PCI_MSI */
75
76 #define msi_x (0)
77
78 #endif /* CONFIG_PCI_MSI */
79
80 static uint8_t num_vfs[3] = {0, 0, 0};
81 static int num_vfs_argc;
82 module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
83 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
84                           "num_vfs=port1,port2,port1+2");
85
86 static uint8_t probe_vf[3] = {0, 0, 0};
87 static int probe_vfs_argc;
88 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
89 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
90                            "probe_vf=port1,port2,port1+2");
91
92 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
93 module_param_named(log_num_mgm_entry_size,
94                         mlx4_log_num_mgm_entry_size, int, 0444);
95 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
96                                          " of qp per mcg, for example:"
97                                          " 10 gives 248.range: 7 <="
98                                          " log_num_mgm_entry_size <= 12."
99                                          " To activate device managed"
100                                          " flow steering when available, set to -1");
101
102 static bool enable_64b_cqe_eqe = true;
103 module_param(enable_64b_cqe_eqe, bool, 0444);
104 MODULE_PARM_DESC(enable_64b_cqe_eqe,
105                  "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
106
107 #define PF_CONTEXT_BEHAVIOUR_MASK       (MLX4_FUNC_CAP_64B_EQE_CQE | \
108                                          MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
109                                          MLX4_FUNC_CAP_DMFS_A0_STATIC)
110
111 #define RESET_PERSIST_MASK_FLAGS        (MLX4_FLAG_SRIOV)
112
113 static char mlx4_version[] =
114         DRV_NAME ": Mellanox ConnectX core driver v"
115         DRV_VERSION " (" DRV_RELDATE ")\n";
116
117 static struct mlx4_profile default_profile = {
118         .num_qp         = 1 << 18,
119         .num_srq        = 1 << 16,
120         .rdmarc_per_qp  = 1 << 4,
121         .num_cq         = 1 << 16,
122         .num_mcg        = 1 << 13,
123         .num_mpt        = 1 << 19,
124         .num_mtt        = 1 << 20, /* It is really num mtt segements */
125 };
126
127 static struct mlx4_profile low_mem_profile = {
128         .num_qp         = 1 << 17,
129         .num_srq        = 1 << 6,
130         .rdmarc_per_qp  = 1 << 4,
131         .num_cq         = 1 << 8,
132         .num_mcg        = 1 << 8,
133         .num_mpt        = 1 << 9,
134         .num_mtt        = 1 << 7,
135 };
136
137 static int log_num_mac = 7;
138 module_param_named(log_num_mac, log_num_mac, int, 0444);
139 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
140
141 static int log_num_vlan;
142 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
143 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
144 /* Log2 max number of VLANs per ETH port (0-7) */
145 #define MLX4_LOG_NUM_VLANS 7
146 #define MLX4_MIN_LOG_NUM_VLANS 0
147 #define MLX4_MIN_LOG_NUM_MAC 1
148
149 static bool use_prio;
150 module_param_named(use_prio, use_prio, bool, 0444);
151 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
152
153 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
154 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
155 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
156
157 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
158 static int arr_argc = 2;
159 module_param_array(port_type_array, int, &arr_argc, 0444);
160 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
161                                 "1 for IB, 2 for Ethernet");
162
163 struct mlx4_port_config {
164         struct list_head list;
165         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
166         struct pci_dev *pdev;
167 };
168
169 static atomic_t pf_loading = ATOMIC_INIT(0);
170
171 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
172                                               struct mlx4_dev_cap *dev_cap)
173 {
174         /* The reserved_uars is calculated by system page size unit.
175          * Therefore, adjustment is added when the uar page size is less
176          * than the system page size
177          */
178         dev->caps.reserved_uars =
179                 max_t(int,
180                       mlx4_get_num_reserved_uar(dev),
181                       dev_cap->reserved_uars /
182                         (1 << (PAGE_SHIFT - dev->uar_page_shift)));
183 }
184
185 int mlx4_check_port_params(struct mlx4_dev *dev,
186                            enum mlx4_port_type *port_type)
187 {
188         int i;
189
190         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
191                 for (i = 0; i < dev->caps.num_ports - 1; i++) {
192                         if (port_type[i] != port_type[i + 1]) {
193                                 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
194                                 return -EINVAL;
195                         }
196                 }
197         }
198
199         for (i = 0; i < dev->caps.num_ports; i++) {
200                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
201                         mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
202                                  i + 1);
203                         return -EINVAL;
204                 }
205         }
206         return 0;
207 }
208
209 static void mlx4_set_port_mask(struct mlx4_dev *dev)
210 {
211         int i;
212
213         for (i = 1; i <= dev->caps.num_ports; ++i)
214                 dev->caps.port_mask[i] = dev->caps.port_type[i];
215 }
216
217 enum {
218         MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
219 };
220
221 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
222 {
223         int err = 0;
224         struct mlx4_func func;
225
226         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
227                 err = mlx4_QUERY_FUNC(dev, &func, 0);
228                 if (err) {
229                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
230                         return err;
231                 }
232                 dev_cap->max_eqs = func.max_eq;
233                 dev_cap->reserved_eqs = func.rsvd_eqs;
234                 dev_cap->reserved_uars = func.rsvd_uars;
235                 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
236         }
237         return err;
238 }
239
240 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
241 {
242         struct mlx4_caps *dev_cap = &dev->caps;
243
244         /* FW not supporting or cancelled by user */
245         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
246             !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
247                 return;
248
249         /* Must have 64B CQE_EQE enabled by FW to use bigger stride
250          * When FW has NCSI it may decide not to report 64B CQE/EQEs
251          */
252         if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
253             !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
254                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
255                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
256                 return;
257         }
258
259         if (cache_line_size() == 128 || cache_line_size() == 256) {
260                 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
261                 /* Changing the real data inside CQE size to 32B */
262                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
263                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
264
265                 if (mlx4_is_master(dev))
266                         dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
267         } else {
268                 if (cache_line_size() != 32  && cache_line_size() != 64)
269                         mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
270                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
271                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
272         }
273 }
274
275 static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
276                           struct mlx4_port_cap *port_cap)
277 {
278         dev->caps.vl_cap[port]      = port_cap->max_vl;
279         dev->caps.ib_mtu_cap[port]          = port_cap->ib_mtu;
280         dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
281         dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
282         /* set gid and pkey table operating lengths by default
283          * to non-sriov values
284          */
285         dev->caps.gid_table_len[port]  = port_cap->max_gids;
286         dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
287         dev->caps.port_width_cap[port] = port_cap->max_port_width;
288         dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
289         dev->caps.def_mac[port]        = port_cap->def_mac;
290         dev->caps.supported_type[port] = port_cap->supported_port_types;
291         dev->caps.suggested_type[port] = port_cap->suggested_type;
292         dev->caps.default_sense[port] = port_cap->default_sense;
293         dev->caps.trans_type[port]          = port_cap->trans_type;
294         dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
295         dev->caps.wavelength[port]     = port_cap->wavelength;
296         dev->caps.trans_code[port]     = port_cap->trans_code;
297
298         return 0;
299 }
300
301 static int mlx4_dev_port(struct mlx4_dev *dev, int port,
302                          struct mlx4_port_cap *port_cap)
303 {
304         int err = 0;
305
306         err = mlx4_QUERY_PORT(dev, port, port_cap);
307
308         if (err)
309                 mlx4_err(dev, "QUERY_PORT command failed.\n");
310
311         return err;
312 }
313
314 static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
315 {
316         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
317                 return;
318
319         if (mlx4_is_mfunc(dev)) {
320                 mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
321                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
322                 return;
323         }
324
325         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
326                 mlx4_dbg(dev,
327                          "Keep FCS is not supported - Disabling Ignore FCS");
328                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
329                 return;
330         }
331 }
332
333 #define MLX4_A0_STEERING_TABLE_SIZE     256
334 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
335 {
336         int err;
337         int i;
338
339         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
340         if (err) {
341                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
342                 return err;
343         }
344         mlx4_dev_cap_dump(dev, dev_cap);
345
346         if (dev_cap->min_page_sz > PAGE_SIZE) {
347                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
348                          dev_cap->min_page_sz, PAGE_SIZE);
349                 return -ENODEV;
350         }
351         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
352                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
353                          dev_cap->num_ports, MLX4_MAX_PORTS);
354                 return -ENODEV;
355         }
356
357         if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
358                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
359                          dev_cap->uar_size,
360                          (unsigned long long)
361                          pci_resource_len(dev->persist->pdev, 2));
362                 return -ENODEV;
363         }
364
365         dev->caps.num_ports          = dev_cap->num_ports;
366         dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
367         dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
368                                       dev->caps.num_sys_eqs :
369                                       MLX4_MAX_EQ_NUM;
370         for (i = 1; i <= dev->caps.num_ports; ++i) {
371                 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
372                 if (err) {
373                         mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
374                         return err;
375                 }
376         }
377
378         dev->caps.uar_page_size      = PAGE_SIZE;
379         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
380         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
381         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
382         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
383         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
384         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
385         dev->caps.max_wqes           = dev_cap->max_qp_sz;
386         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
387         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
388         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
389         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
390         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
391         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
392         /*
393          * Subtract 1 from the limit because we need to allocate a
394          * spare CQE so the HCA HW can tell the difference between an
395          * empty CQ and a full CQ.
396          */
397         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
398         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
399         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
400         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
401         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
402
403         dev->caps.reserved_pds       = dev_cap->reserved_pds;
404         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
405                                         dev_cap->reserved_xrcds : 0;
406         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
407                                         dev_cap->max_xrcds : 0;
408         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
409
410         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
411         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
412         dev->caps.flags              = dev_cap->flags;
413         dev->caps.flags2             = dev_cap->flags2;
414         dev->caps.bmme_flags         = dev_cap->bmme_flags;
415         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
416         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
417         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
418         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
419
420         /* Save uar page shift */
421         if (!mlx4_is_slave(dev)) {
422                 /* Virtual PCI function needs to determine UAR page size from
423                  * firmware. Only master PCI function can set the uar page size
424                  */
425                 dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
426                 mlx4_set_num_reserved_uars(dev, dev_cap);
427         }
428
429         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
430                 struct mlx4_init_hca_param hca_param;
431
432                 memset(&hca_param, 0, sizeof(hca_param));
433                 err = mlx4_QUERY_HCA(dev, &hca_param);
434                 /* Turn off PHV_EN flag in case phv_check_en is set.
435                  * phv_check_en is a HW check that parse the packet and verify
436                  * phv bit was reported correctly in the wqe. To allow QinQ
437                  * PHV_EN flag should be set and phv_check_en must be cleared
438                  * otherwise QinQ packets will be drop by the HW.
439                  */
440                 if (err || hca_param.phv_check_en)
441                         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
442         }
443
444         /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
445         if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
446                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
447         /* Don't do sense port on multifunction devices (for now at least) */
448         if (mlx4_is_mfunc(dev))
449                 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
450
451         if (mlx4_low_memory_profile()) {
452                 dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
453                 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
454         } else {
455                 dev->caps.log_num_macs  = log_num_mac;
456                 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
457         }
458
459         for (i = 1; i <= dev->caps.num_ports; ++i) {
460                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
461                 if (dev->caps.supported_type[i]) {
462                         /* if only ETH is supported - assign ETH */
463                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
464                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
465                         /* if only IB is supported, assign IB */
466                         else if (dev->caps.supported_type[i] ==
467                                  MLX4_PORT_TYPE_IB)
468                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
469                         else {
470                                 /* if IB and ETH are supported, we set the port
471                                  * type according to user selection of port type;
472                                  * if user selected none, take the FW hint */
473                                 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
474                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
475                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
476                                 else
477                                         dev->caps.port_type[i] = port_type_array[i - 1];
478                         }
479                 }
480                 /*
481                  * Link sensing is allowed on the port if 3 conditions are true:
482                  * 1. Both protocols are supported on the port.
483                  * 2. Different types are supported on the port
484                  * 3. FW declared that it supports link sensing
485                  */
486                 mlx4_priv(dev)->sense.sense_allowed[i] =
487                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
488                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
489                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
490
491                 /*
492                  * If "default_sense" bit is set, we move the port to "AUTO" mode
493                  * and perform sense_port FW command to try and set the correct
494                  * port type from beginning
495                  */
496                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
497                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
498                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
499                         mlx4_SENSE_PORT(dev, i, &sensed_port);
500                         if (sensed_port != MLX4_PORT_TYPE_NONE)
501                                 dev->caps.port_type[i] = sensed_port;
502                 } else {
503                         dev->caps.possible_type[i] = dev->caps.port_type[i];
504                 }
505
506                 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
507                         dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
508                         mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
509                                   i, 1 << dev->caps.log_num_macs);
510                 }
511                 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
512                         dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
513                         mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
514                                   i, 1 << dev->caps.log_num_vlans);
515                 }
516         }
517
518         if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
519             (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
520             (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
521                 mlx4_warn(dev,
522                           "Granular QoS per VF not supported with IB/Eth configuration\n");
523                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
524         }
525
526         dev->caps.max_counters = dev_cap->max_counters;
527
528         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
529         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
530                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
531                 (1 << dev->caps.log_num_macs) *
532                 (1 << dev->caps.log_num_vlans) *
533                 dev->caps.num_ports;
534         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
535
536         if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
537             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
538                 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
539         else
540                 dev->caps.dmfs_high_rate_qpn_base =
541                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
542
543         if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
544             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
545                 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
546                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
547                 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
548         } else {
549                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
550                 dev->caps.dmfs_high_rate_qpn_base =
551                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
552                 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
553         }
554
555         dev->caps.rl_caps = dev_cap->rl_caps;
556
557         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
558                 dev->caps.dmfs_high_rate_qpn_range;
559
560         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
561                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
562                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
563                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
564
565         dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
566
567         if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
568                 if (dev_cap->flags &
569                     (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
570                         mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
571                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
572                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
573                 }
574
575                 if (dev_cap->flags2 &
576                     (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
577                      MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
578                         mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
579                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
580                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
581                 }
582         }
583
584         if ((dev->caps.flags &
585             (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
586             mlx4_is_master(dev))
587                 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
588
589         if (!mlx4_is_slave(dev)) {
590                 mlx4_enable_cqe_eqe_stride(dev);
591                 dev->caps.alloc_res_qp_mask =
592                         (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
593                         MLX4_RESERVE_A0_QP;
594
595                 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
596                     dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
597                         mlx4_warn(dev, "Old device ETS support detected\n");
598                         mlx4_warn(dev, "Consider upgrading device FW.\n");
599                         dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
600                 }
601
602         } else {
603                 dev->caps.alloc_res_qp_mask = 0;
604         }
605
606         mlx4_enable_ignore_fcs(dev);
607
608         return 0;
609 }
610
611 static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
612                                        enum pci_bus_speed *speed,
613                                        enum pcie_link_width *width)
614 {
615         u32 lnkcap1, lnkcap2;
616         int err1, err2;
617
618 #define  PCIE_MLW_CAP_SHIFT 4   /* start of MLW mask in link capabilities */
619
620         *speed = PCI_SPEED_UNKNOWN;
621         *width = PCIE_LNK_WIDTH_UNKNOWN;
622
623         err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
624                                           &lnkcap1);
625         err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
626                                           &lnkcap2);
627         if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
628                 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
629                         *speed = PCIE_SPEED_8_0GT;
630                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
631                         *speed = PCIE_SPEED_5_0GT;
632                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
633                         *speed = PCIE_SPEED_2_5GT;
634         }
635         if (!err1) {
636                 *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
637                 if (!lnkcap2) { /* pre-r3.0 */
638                         if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
639                                 *speed = PCIE_SPEED_5_0GT;
640                         else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
641                                 *speed = PCIE_SPEED_2_5GT;
642                 }
643         }
644
645         if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
646                 return err1 ? err1 :
647                         err2 ? err2 : -EINVAL;
648         }
649         return 0;
650 }
651
652 static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
653 {
654         enum pcie_link_width width, width_cap;
655         enum pci_bus_speed speed, speed_cap;
656         int err;
657
658 #define PCIE_SPEED_STR(speed) \
659         (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
660          speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
661          speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
662          "Unknown")
663
664         err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
665         if (err) {
666                 mlx4_warn(dev,
667                           "Unable to determine PCIe device BW capabilities\n");
668                 return;
669         }
670
671         err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
672         if (err || speed == PCI_SPEED_UNKNOWN ||
673             width == PCIE_LNK_WIDTH_UNKNOWN) {
674                 mlx4_warn(dev,
675                           "Unable to determine PCI device chain minimum BW\n");
676                 return;
677         }
678
679         if (width != width_cap || speed != speed_cap)
680                 mlx4_warn(dev,
681                           "PCIe BW is different than device's capability\n");
682
683         mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
684                   PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
685         mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
686                   width, width_cap);
687         return;
688 }
689
690 /*The function checks if there are live vf, return the num of them*/
691 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
692 {
693         struct mlx4_priv *priv = mlx4_priv(dev);
694         struct mlx4_slave_state *s_state;
695         int i;
696         int ret = 0;
697
698         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
699                 s_state = &priv->mfunc.master.slave_state[i];
700                 if (s_state->active && s_state->last_cmd !=
701                     MLX4_COMM_CMD_RESET) {
702                         mlx4_warn(dev, "%s: slave: %d is still active\n",
703                                   __func__, i);
704                         ret++;
705                 }
706         }
707         return ret;
708 }
709
710 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
711 {
712         u32 qk = MLX4_RESERVED_QKEY_BASE;
713
714         if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
715             qpn < dev->phys_caps.base_proxy_sqpn)
716                 return -EINVAL;
717
718         if (qpn >= dev->phys_caps.base_tunnel_sqpn)
719                 /* tunnel qp */
720                 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
721         else
722                 qk += qpn - dev->phys_caps.base_proxy_sqpn;
723         *qkey = qk;
724         return 0;
725 }
726 EXPORT_SYMBOL(mlx4_get_parav_qkey);
727
728 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
729 {
730         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
731
732         if (!mlx4_is_master(dev))
733                 return;
734
735         priv->virt2phys_pkey[slave][port - 1][i] = val;
736 }
737 EXPORT_SYMBOL(mlx4_sync_pkey_table);
738
739 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
740 {
741         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
742
743         if (!mlx4_is_master(dev))
744                 return;
745
746         priv->slave_node_guids[slave] = guid;
747 }
748 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
749
750 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
751 {
752         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
753
754         if (!mlx4_is_master(dev))
755                 return 0;
756
757         return priv->slave_node_guids[slave];
758 }
759 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
760
761 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
762 {
763         struct mlx4_priv *priv = mlx4_priv(dev);
764         struct mlx4_slave_state *s_slave;
765
766         if (!mlx4_is_master(dev))
767                 return 0;
768
769         s_slave = &priv->mfunc.master.slave_state[slave];
770         return !!s_slave->active;
771 }
772 EXPORT_SYMBOL(mlx4_is_slave_active);
773
774 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
775                                        struct mlx4_dev_cap *dev_cap,
776                                        struct mlx4_init_hca_param *hca_param)
777 {
778         dev->caps.steering_mode = hca_param->steering_mode;
779         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
780                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
781                 dev->caps.fs_log_max_ucast_qp_range_size =
782                         dev_cap->fs_log_max_ucast_qp_range_size;
783         } else
784                 dev->caps.num_qp_per_mgm =
785                         4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
786
787         mlx4_dbg(dev, "Steering mode is: %s\n",
788                  mlx4_steering_mode_str(dev->caps.steering_mode));
789 }
790
791 static int mlx4_slave_cap(struct mlx4_dev *dev)
792 {
793         int                        err;
794         u32                        page_size;
795         struct mlx4_dev_cap        dev_cap;
796         struct mlx4_func_cap       func_cap;
797         struct mlx4_init_hca_param hca_param;
798         u8                         i;
799
800         memset(&hca_param, 0, sizeof(hca_param));
801         err = mlx4_QUERY_HCA(dev, &hca_param);
802         if (err) {
803                 mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
804                 return err;
805         }
806
807         /* fail if the hca has an unknown global capability
808          * at this time global_caps should be always zeroed
809          */
810         if (hca_param.global_caps) {
811                 mlx4_err(dev, "Unknown hca global capabilities\n");
812                 return -ENOSYS;
813         }
814
815         mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
816
817         dev->caps.hca_core_clock = hca_param.hca_core_clock;
818
819         memset(&dev_cap, 0, sizeof(dev_cap));
820         dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
821         err = mlx4_dev_cap(dev, &dev_cap);
822         if (err) {
823                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
824                 return err;
825         }
826
827         err = mlx4_QUERY_FW(dev);
828         if (err)
829                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
830
831         page_size = ~dev->caps.page_size_cap + 1;
832         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
833         if (page_size > PAGE_SIZE) {
834                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
835                          page_size, PAGE_SIZE);
836                 return -ENODEV;
837         }
838
839         /* Set uar_page_shift for VF */
840         dev->uar_page_shift = hca_param.uar_page_sz + 12;
841
842         /* Make sure the master uar page size is valid */
843         if (dev->uar_page_shift > PAGE_SHIFT) {
844                 mlx4_err(dev,
845                          "Invalid configuration: uar page size is larger than system page size\n");
846                 return  -ENODEV;
847         }
848
849         /* Set reserved_uars based on the uar_page_shift */
850         mlx4_set_num_reserved_uars(dev, &dev_cap);
851
852         /* Although uar page size in FW differs from system page size,
853          * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
854          * still works with assumption that uar page size == system page size
855          */
856         dev->caps.uar_page_size = PAGE_SIZE;
857
858         memset(&func_cap, 0, sizeof(func_cap));
859         err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
860         if (err) {
861                 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
862                          err);
863                 return err;
864         }
865
866         if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
867             PF_CONTEXT_BEHAVIOUR_MASK) {
868                 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
869                          func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
870                 return -ENOSYS;
871         }
872
873         dev->caps.num_ports             = func_cap.num_ports;
874         dev->quotas.qp                  = func_cap.qp_quota;
875         dev->quotas.srq                 = func_cap.srq_quota;
876         dev->quotas.cq                  = func_cap.cq_quota;
877         dev->quotas.mpt                 = func_cap.mpt_quota;
878         dev->quotas.mtt                 = func_cap.mtt_quota;
879         dev->caps.num_qps               = 1 << hca_param.log_num_qps;
880         dev->caps.num_srqs              = 1 << hca_param.log_num_srqs;
881         dev->caps.num_cqs               = 1 << hca_param.log_num_cqs;
882         dev->caps.num_mpts              = 1 << hca_param.log_mpt_sz;
883         dev->caps.num_eqs               = func_cap.max_eq;
884         dev->caps.reserved_eqs          = func_cap.reserved_eq;
885         dev->caps.reserved_lkey         = func_cap.reserved_lkey;
886         dev->caps.num_pds               = MLX4_NUM_PDS;
887         dev->caps.num_mgms              = 0;
888         dev->caps.num_amgms             = 0;
889
890         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
891                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
892                          dev->caps.num_ports, MLX4_MAX_PORTS);
893                 return -ENODEV;
894         }
895
896         mlx4_replace_zero_macs(dev);
897
898         dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
899         dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
900         dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
901         dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
902         dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
903
904         if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
905             !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
906             !dev->caps.qp0_qkey) {
907                 err = -ENOMEM;
908                 goto err_mem;
909         }
910
911         for (i = 1; i <= dev->caps.num_ports; ++i) {
912                 err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
913                 if (err) {
914                         mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
915                                  i, err);
916                         goto err_mem;
917                 }
918                 dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
919                 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
920                 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
921                 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
922                 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
923                 dev->caps.port_mask[i] = dev->caps.port_type[i];
924                 dev->caps.phys_port_id[i] = func_cap.phys_port_id;
925                 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
926                                                       &dev->caps.gid_table_len[i],
927                                                       &dev->caps.pkey_table_len[i]);
928                 if (err)
929                         goto err_mem;
930         }
931
932         if (dev->caps.uar_page_size * (dev->caps.num_uars -
933                                        dev->caps.reserved_uars) >
934                                        pci_resource_len(dev->persist->pdev,
935                                                         2)) {
936                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
937                          dev->caps.uar_page_size * dev->caps.num_uars,
938                          (unsigned long long)
939                          pci_resource_len(dev->persist->pdev, 2));
940                 err = -ENOMEM;
941                 goto err_mem;
942         }
943
944         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
945                 dev->caps.eqe_size   = 64;
946                 dev->caps.eqe_factor = 1;
947         } else {
948                 dev->caps.eqe_size   = 32;
949                 dev->caps.eqe_factor = 0;
950         }
951
952         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
953                 dev->caps.cqe_size   = 64;
954                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
955         } else {
956                 dev->caps.cqe_size   = 32;
957         }
958
959         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
960                 dev->caps.eqe_size = hca_param.eqe_size;
961                 dev->caps.eqe_factor = 0;
962         }
963
964         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
965                 dev->caps.cqe_size = hca_param.cqe_size;
966                 /* User still need to know when CQE > 32B */
967                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
968         }
969
970         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
971         mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
972
973         slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
974         mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
975                  hca_param.rss_ip_frags ? "on" : "off");
976
977         if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
978             dev->caps.bf_reg_size)
979                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
980
981         if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
982                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
983
984         return 0;
985
986 err_mem:
987         kfree(dev->caps.qp0_qkey);
988         kfree(dev->caps.qp0_tunnel);
989         kfree(dev->caps.qp0_proxy);
990         kfree(dev->caps.qp1_tunnel);
991         kfree(dev->caps.qp1_proxy);
992         dev->caps.qp0_qkey = NULL;
993         dev->caps.qp0_tunnel = NULL;
994         dev->caps.qp0_proxy = NULL;
995         dev->caps.qp1_tunnel = NULL;
996         dev->caps.qp1_proxy = NULL;
997
998         return err;
999 }
1000
1001 static void mlx4_request_modules(struct mlx4_dev *dev)
1002 {
1003         int port;
1004         int has_ib_port = false;
1005         int has_eth_port = false;
1006 #define EN_DRV_NAME     "mlx4_en"
1007 #define IB_DRV_NAME     "mlx4_ib"
1008
1009         for (port = 1; port <= dev->caps.num_ports; port++) {
1010                 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1011                         has_ib_port = true;
1012                 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1013                         has_eth_port = true;
1014         }
1015
1016         if (has_eth_port)
1017                 request_module_nowait(EN_DRV_NAME);
1018         if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
1019                 request_module_nowait(IB_DRV_NAME);
1020 }
1021
1022 /*
1023  * Change the port configuration of the device.
1024  * Every user of this function must hold the port mutex.
1025  */
1026 int mlx4_change_port_types(struct mlx4_dev *dev,
1027                            enum mlx4_port_type *port_types)
1028 {
1029         int err = 0;
1030         int change = 0;
1031         int port;
1032
1033         for (port = 0; port <  dev->caps.num_ports; port++) {
1034                 /* Change the port type only if the new type is different
1035                  * from the current, and not set to Auto */
1036                 if (port_types[port] != dev->caps.port_type[port + 1])
1037                         change = 1;
1038         }
1039         if (change) {
1040                 mlx4_unregister_device(dev);
1041                 for (port = 1; port <= dev->caps.num_ports; port++) {
1042                         mlx4_CLOSE_PORT(dev, port);
1043                         dev->caps.port_type[port] = port_types[port - 1];
1044                         err = mlx4_SET_PORT(dev, port, -1);
1045                         if (err) {
1046                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
1047                                          port);
1048                                 goto out;
1049                         }
1050                 }
1051                 mlx4_set_port_mask(dev);
1052                 err = mlx4_register_device(dev);
1053                 if (err) {
1054                         mlx4_err(dev, "Failed to register device\n");
1055                         goto out;
1056                 }
1057                 mlx4_request_modules(dev);
1058         }
1059
1060 out:
1061         return err;
1062 }
1063
1064 static ssize_t show_port_type(struct device *dev,
1065                               struct device_attribute *attr,
1066                               char *buf)
1067 {
1068         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1069                                                    port_attr);
1070         struct mlx4_dev *mdev = info->dev;
1071         char type[8];
1072
1073         sprintf(type, "%s",
1074                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1075                 "ib" : "eth");
1076         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1077                 sprintf(buf, "auto (%s)\n", type);
1078         else
1079                 sprintf(buf, "%s\n", type);
1080
1081         return strlen(buf);
1082 }
1083
1084 static ssize_t set_port_type(struct device *dev,
1085                              struct device_attribute *attr,
1086                              const char *buf, size_t count)
1087 {
1088         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1089                                                    port_attr);
1090         struct mlx4_dev *mdev = info->dev;
1091         struct mlx4_priv *priv = mlx4_priv(mdev);
1092         enum mlx4_port_type types[MLX4_MAX_PORTS];
1093         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1094         static DEFINE_MUTEX(set_port_type_mutex);
1095         int i;
1096         int err = 0;
1097
1098         mutex_lock(&set_port_type_mutex);
1099
1100         if (!strcmp(buf, "ib\n"))
1101                 info->tmp_type = MLX4_PORT_TYPE_IB;
1102         else if (!strcmp(buf, "eth\n"))
1103                 info->tmp_type = MLX4_PORT_TYPE_ETH;
1104         else if (!strcmp(buf, "auto\n"))
1105                 info->tmp_type = MLX4_PORT_TYPE_AUTO;
1106         else {
1107                 mlx4_err(mdev, "%s is not supported port type\n", buf);
1108                 err = -EINVAL;
1109                 goto err_out;
1110         }
1111
1112         mlx4_stop_sense(mdev);
1113         mutex_lock(&priv->port_mutex);
1114         /* Possible type is always the one that was delivered */
1115         mdev->caps.possible_type[info->port] = info->tmp_type;
1116
1117         for (i = 0; i < mdev->caps.num_ports; i++) {
1118                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1119                                         mdev->caps.possible_type[i+1];
1120                 if (types[i] == MLX4_PORT_TYPE_AUTO)
1121                         types[i] = mdev->caps.port_type[i+1];
1122         }
1123
1124         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1125             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1126                 for (i = 1; i <= mdev->caps.num_ports; i++) {
1127                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1128                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1129                                 err = -EINVAL;
1130                         }
1131                 }
1132         }
1133         if (err) {
1134                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1135                 goto out;
1136         }
1137
1138         mlx4_do_sense_ports(mdev, new_types, types);
1139
1140         err = mlx4_check_port_params(mdev, new_types);
1141         if (err)
1142                 goto out;
1143
1144         /* We are about to apply the changes after the configuration
1145          * was verified, no need to remember the temporary types
1146          * any more */
1147         for (i = 0; i < mdev->caps.num_ports; i++)
1148                 priv->port[i + 1].tmp_type = 0;
1149
1150         err = mlx4_change_port_types(mdev, new_types);
1151
1152 out:
1153         mlx4_start_sense(mdev);
1154         mutex_unlock(&priv->port_mutex);
1155 err_out:
1156         mutex_unlock(&set_port_type_mutex);
1157
1158         return err ? err : count;
1159 }
1160
1161 enum ibta_mtu {
1162         IB_MTU_256  = 1,
1163         IB_MTU_512  = 2,
1164         IB_MTU_1024 = 3,
1165         IB_MTU_2048 = 4,
1166         IB_MTU_4096 = 5
1167 };
1168
1169 static inline int int_to_ibta_mtu(int mtu)
1170 {
1171         switch (mtu) {
1172         case 256:  return IB_MTU_256;
1173         case 512:  return IB_MTU_512;
1174         case 1024: return IB_MTU_1024;
1175         case 2048: return IB_MTU_2048;
1176         case 4096: return IB_MTU_4096;
1177         default: return -1;
1178         }
1179 }
1180
1181 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1182 {
1183         switch (mtu) {
1184         case IB_MTU_256:  return  256;
1185         case IB_MTU_512:  return  512;
1186         case IB_MTU_1024: return 1024;
1187         case IB_MTU_2048: return 2048;
1188         case IB_MTU_4096: return 4096;
1189         default: return -1;
1190         }
1191 }
1192
1193 static ssize_t show_port_ib_mtu(struct device *dev,
1194                              struct device_attribute *attr,
1195                              char *buf)
1196 {
1197         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1198                                                    port_mtu_attr);
1199         struct mlx4_dev *mdev = info->dev;
1200
1201         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1202                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1203
1204         sprintf(buf, "%d\n",
1205                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1206         return strlen(buf);
1207 }
1208
1209 static ssize_t set_port_ib_mtu(struct device *dev,
1210                              struct device_attribute *attr,
1211                              const char *buf, size_t count)
1212 {
1213         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1214                                                    port_mtu_attr);
1215         struct mlx4_dev *mdev = info->dev;
1216         struct mlx4_priv *priv = mlx4_priv(mdev);
1217         int err, port, mtu, ibta_mtu = -1;
1218
1219         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1220                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1221                 return -EINVAL;
1222         }
1223
1224         err = kstrtoint(buf, 0, &mtu);
1225         if (!err)
1226                 ibta_mtu = int_to_ibta_mtu(mtu);
1227
1228         if (err || ibta_mtu < 0) {
1229                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1230                 return -EINVAL;
1231         }
1232
1233         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1234
1235         mlx4_stop_sense(mdev);
1236         mutex_lock(&priv->port_mutex);
1237         mlx4_unregister_device(mdev);
1238         for (port = 1; port <= mdev->caps.num_ports; port++) {
1239                 mlx4_CLOSE_PORT(mdev, port);
1240                 err = mlx4_SET_PORT(mdev, port, -1);
1241                 if (err) {
1242                         mlx4_err(mdev, "Failed to set port %d, aborting\n",
1243                                  port);
1244                         goto err_set_port;
1245                 }
1246         }
1247         err = mlx4_register_device(mdev);
1248 err_set_port:
1249         mutex_unlock(&priv->port_mutex);
1250         mlx4_start_sense(mdev);
1251         return err ? err : count;
1252 }
1253
1254 /* bond for multi-function device */
1255 #define MAX_MF_BOND_ALLOWED_SLAVES 63
1256 static int mlx4_mf_bond(struct mlx4_dev *dev)
1257 {
1258         int err = 0;
1259         struct mlx4_slaves_pport slaves_port1;
1260         struct mlx4_slaves_pport slaves_port2;
1261         DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
1262
1263         slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
1264         slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
1265         bitmap_and(slaves_port_1_2,
1266                    slaves_port1.slaves, slaves_port2.slaves,
1267                    dev->persist->num_vfs + 1);
1268
1269         /* only single port vfs are allowed */
1270         if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
1271                 mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
1272                 return -EINVAL;
1273         }
1274
1275         /* limit on maximum allowed VFs */
1276         if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
1277             bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) >
1278             MAX_MF_BOND_ALLOWED_SLAVES)
1279                 return -EINVAL;
1280
1281         if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
1282                 mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
1283                 return -EINVAL;
1284         }
1285
1286         err = mlx4_bond_mac_table(dev);
1287         if (err)
1288                 return err;
1289         err = mlx4_bond_vlan_table(dev);
1290         if (err)
1291                 goto err1;
1292         err = mlx4_bond_fs_rules(dev);
1293         if (err)
1294                 goto err2;
1295
1296         return 0;
1297 err2:
1298         (void)mlx4_unbond_vlan_table(dev);
1299 err1:
1300         (void)mlx4_unbond_mac_table(dev);
1301         return err;
1302 }
1303
1304 static int mlx4_mf_unbond(struct mlx4_dev *dev)
1305 {
1306         int ret, ret1;
1307
1308         ret = mlx4_unbond_fs_rules(dev);
1309         if (ret)
1310                 mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
1311         ret1 = mlx4_unbond_mac_table(dev);
1312         if (ret1) {
1313                 mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
1314                 ret = ret1;
1315         }
1316         ret1 = mlx4_unbond_vlan_table(dev);
1317         if (ret1) {
1318                 mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
1319                 ret = ret1;
1320         }
1321         return ret;
1322 }
1323
1324 int mlx4_bond(struct mlx4_dev *dev)
1325 {
1326         int ret = 0;
1327         struct mlx4_priv *priv = mlx4_priv(dev);
1328
1329         mutex_lock(&priv->bond_mutex);
1330
1331         if (!mlx4_is_bonded(dev)) {
1332                 ret = mlx4_do_bond(dev, true);
1333                 if (ret)
1334                         mlx4_err(dev, "Failed to bond device: %d\n", ret);
1335                 if (!ret && mlx4_is_master(dev)) {
1336                         ret = mlx4_mf_bond(dev);
1337                         if (ret) {
1338                                 mlx4_err(dev, "bond for multifunction failed\n");
1339                                 mlx4_do_bond(dev, false);
1340                         }
1341                 }
1342         }
1343
1344         mutex_unlock(&priv->bond_mutex);
1345         if (!ret)
1346                 mlx4_dbg(dev, "Device is bonded\n");
1347
1348         return ret;
1349 }
1350 EXPORT_SYMBOL_GPL(mlx4_bond);
1351
1352 int mlx4_unbond(struct mlx4_dev *dev)
1353 {
1354         int ret = 0;
1355         struct mlx4_priv *priv = mlx4_priv(dev);
1356
1357         mutex_lock(&priv->bond_mutex);
1358
1359         if (mlx4_is_bonded(dev)) {
1360                 int ret2 = 0;
1361
1362                 ret = mlx4_do_bond(dev, false);
1363                 if (ret)
1364                         mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1365                 if (mlx4_is_master(dev))
1366                         ret2 = mlx4_mf_unbond(dev);
1367                 if (ret2) {
1368                         mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
1369                         ret = ret2;
1370                 }
1371         }
1372
1373         mutex_unlock(&priv->bond_mutex);
1374         if (!ret)
1375                 mlx4_dbg(dev, "Device is unbonded\n");
1376
1377         return ret;
1378 }
1379 EXPORT_SYMBOL_GPL(mlx4_unbond);
1380
1381
1382 int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1383 {
1384         u8 port1 = v2p->port1;
1385         u8 port2 = v2p->port2;
1386         struct mlx4_priv *priv = mlx4_priv(dev);
1387         int err;
1388
1389         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1390                 return -ENOTSUPP;
1391
1392         mutex_lock(&priv->bond_mutex);
1393
1394         /* zero means keep current mapping for this port */
1395         if (port1 == 0)
1396                 port1 = priv->v2p.port1;
1397         if (port2 == 0)
1398                 port2 = priv->v2p.port2;
1399
1400         if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1401             (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1402             (port1 == 2 && port2 == 1)) {
1403                 /* besides boundary checks cross mapping makes
1404                  * no sense and therefore not allowed */
1405                 err = -EINVAL;
1406         } else if ((port1 == priv->v2p.port1) &&
1407                  (port2 == priv->v2p.port2)) {
1408                 err = 0;
1409         } else {
1410                 err = mlx4_virt2phy_port_map(dev, port1, port2);
1411                 if (!err) {
1412                         mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1413                                  port1, port2);
1414                         priv->v2p.port1 = port1;
1415                         priv->v2p.port2 = port2;
1416                 } else {
1417                         mlx4_err(dev, "Failed to change port mape: %d\n", err);
1418                 }
1419         }
1420
1421         mutex_unlock(&priv->bond_mutex);
1422         return err;
1423 }
1424 EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1425
1426 static int mlx4_load_fw(struct mlx4_dev *dev)
1427 {
1428         struct mlx4_priv *priv = mlx4_priv(dev);
1429         int err;
1430
1431         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1432                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1433         if (!priv->fw.fw_icm) {
1434                 mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1435                 return -ENOMEM;
1436         }
1437
1438         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1439         if (err) {
1440                 mlx4_err(dev, "MAP_FA command failed, aborting\n");
1441                 goto err_free;
1442         }
1443
1444         err = mlx4_RUN_FW(dev);
1445         if (err) {
1446                 mlx4_err(dev, "RUN_FW command failed, aborting\n");
1447                 goto err_unmap_fa;
1448         }
1449
1450         return 0;
1451
1452 err_unmap_fa:
1453         mlx4_UNMAP_FA(dev);
1454
1455 err_free:
1456         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1457         return err;
1458 }
1459
1460 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1461                                 int cmpt_entry_sz)
1462 {
1463         struct mlx4_priv *priv = mlx4_priv(dev);
1464         int err;
1465         int num_eqs;
1466
1467         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1468                                   cmpt_base +
1469                                   ((u64) (MLX4_CMPT_TYPE_QP *
1470                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1471                                   cmpt_entry_sz, dev->caps.num_qps,
1472                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1473                                   0, 0);
1474         if (err)
1475                 goto err;
1476
1477         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1478                                   cmpt_base +
1479                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
1480                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1481                                   cmpt_entry_sz, dev->caps.num_srqs,
1482                                   dev->caps.reserved_srqs, 0, 0);
1483         if (err)
1484                 goto err_qp;
1485
1486         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1487                                   cmpt_base +
1488                                   ((u64) (MLX4_CMPT_TYPE_CQ *
1489                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1490                                   cmpt_entry_sz, dev->caps.num_cqs,
1491                                   dev->caps.reserved_cqs, 0, 0);
1492         if (err)
1493                 goto err_srq;
1494
1495         num_eqs = dev->phys_caps.num_phys_eqs;
1496         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1497                                   cmpt_base +
1498                                   ((u64) (MLX4_CMPT_TYPE_EQ *
1499                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1500                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1501         if (err)
1502                 goto err_cq;
1503
1504         return 0;
1505
1506 err_cq:
1507         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1508
1509 err_srq:
1510         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1511
1512 err_qp:
1513         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1514
1515 err:
1516         return err;
1517 }
1518
1519 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1520                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
1521 {
1522         struct mlx4_priv *priv = mlx4_priv(dev);
1523         u64 aux_pages;
1524         int num_eqs;
1525         int err;
1526
1527         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1528         if (err) {
1529                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1530                 return err;
1531         }
1532
1533         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1534                  (unsigned long long) icm_size >> 10,
1535                  (unsigned long long) aux_pages << 2);
1536
1537         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1538                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
1539         if (!priv->fw.aux_icm) {
1540                 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1541                 return -ENOMEM;
1542         }
1543
1544         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1545         if (err) {
1546                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1547                 goto err_free_aux;
1548         }
1549
1550         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1551         if (err) {
1552                 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1553                 goto err_unmap_aux;
1554         }
1555
1556
1557         num_eqs = dev->phys_caps.num_phys_eqs;
1558         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1559                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
1560                                   num_eqs, num_eqs, 0, 0);
1561         if (err) {
1562                 mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1563                 goto err_unmap_cmpt;
1564         }
1565
1566         /*
1567          * Reserved MTT entries must be aligned up to a cacheline
1568          * boundary, since the FW will write to them, while the driver
1569          * writes to all other MTT entries. (The variable
1570          * dev->caps.mtt_entry_sz below is really the MTT segment
1571          * size, not the raw entry size)
1572          */
1573         dev->caps.reserved_mtts =
1574                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1575                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1576
1577         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1578                                   init_hca->mtt_base,
1579                                   dev->caps.mtt_entry_sz,
1580                                   dev->caps.num_mtts,
1581                                   dev->caps.reserved_mtts, 1, 0);
1582         if (err) {
1583                 mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1584                 goto err_unmap_eq;
1585         }
1586
1587         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1588                                   init_hca->dmpt_base,
1589                                   dev_cap->dmpt_entry_sz,
1590                                   dev->caps.num_mpts,
1591                                   dev->caps.reserved_mrws, 1, 1);
1592         if (err) {
1593                 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1594                 goto err_unmap_mtt;
1595         }
1596
1597         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1598                                   init_hca->qpc_base,
1599                                   dev_cap->qpc_entry_sz,
1600                                   dev->caps.num_qps,
1601                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1602                                   0, 0);
1603         if (err) {
1604                 mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1605                 goto err_unmap_dmpt;
1606         }
1607
1608         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1609                                   init_hca->auxc_base,
1610                                   dev_cap->aux_entry_sz,
1611                                   dev->caps.num_qps,
1612                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1613                                   0, 0);
1614         if (err) {
1615                 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1616                 goto err_unmap_qp;
1617         }
1618
1619         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1620                                   init_hca->altc_base,
1621                                   dev_cap->altc_entry_sz,
1622                                   dev->caps.num_qps,
1623                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1624                                   0, 0);
1625         if (err) {
1626                 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1627                 goto err_unmap_auxc;
1628         }
1629
1630         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1631                                   init_hca->rdmarc_base,
1632                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1633                                   dev->caps.num_qps,
1634                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1635                                   0, 0);
1636         if (err) {
1637                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1638                 goto err_unmap_altc;
1639         }
1640
1641         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1642                                   init_hca->cqc_base,
1643                                   dev_cap->cqc_entry_sz,
1644                                   dev->caps.num_cqs,
1645                                   dev->caps.reserved_cqs, 0, 0);
1646         if (err) {
1647                 mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1648                 goto err_unmap_rdmarc;
1649         }
1650
1651         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1652                                   init_hca->srqc_base,
1653                                   dev_cap->srq_entry_sz,
1654                                   dev->caps.num_srqs,
1655                                   dev->caps.reserved_srqs, 0, 0);
1656         if (err) {
1657                 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1658                 goto err_unmap_cq;
1659         }
1660
1661         /*
1662          * For flow steering device managed mode it is required to use
1663          * mlx4_init_icm_table. For B0 steering mode it's not strictly
1664          * required, but for simplicity just map the whole multicast
1665          * group table now.  The table isn't very big and it's a lot
1666          * easier than trying to track ref counts.
1667          */
1668         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1669                                   init_hca->mc_base,
1670                                   mlx4_get_mgm_entry_size(dev),
1671                                   dev->caps.num_mgms + dev->caps.num_amgms,
1672                                   dev->caps.num_mgms + dev->caps.num_amgms,
1673                                   0, 0);
1674         if (err) {
1675                 mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1676                 goto err_unmap_srq;
1677         }
1678
1679         return 0;
1680
1681 err_unmap_srq:
1682         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1683
1684 err_unmap_cq:
1685         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1686
1687 err_unmap_rdmarc:
1688         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1689
1690 err_unmap_altc:
1691         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1692
1693 err_unmap_auxc:
1694         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1695
1696 err_unmap_qp:
1697         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1698
1699 err_unmap_dmpt:
1700         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1701
1702 err_unmap_mtt:
1703         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1704
1705 err_unmap_eq:
1706         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1707
1708 err_unmap_cmpt:
1709         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1710         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1711         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1712         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1713
1714 err_unmap_aux:
1715         mlx4_UNMAP_ICM_AUX(dev);
1716
1717 err_free_aux:
1718         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1719
1720         return err;
1721 }
1722
1723 static void mlx4_free_icms(struct mlx4_dev *dev)
1724 {
1725         struct mlx4_priv *priv = mlx4_priv(dev);
1726
1727         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1728         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1729         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1730         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1731         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1732         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1733         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1734         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1735         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1736         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1737         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1738         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1739         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1740         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1741
1742         mlx4_UNMAP_ICM_AUX(dev);
1743         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1744 }
1745
1746 static void mlx4_slave_exit(struct mlx4_dev *dev)
1747 {
1748         struct mlx4_priv *priv = mlx4_priv(dev);
1749
1750         mutex_lock(&priv->cmd.slave_cmd_mutex);
1751         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1752                           MLX4_COMM_TIME))
1753                 mlx4_warn(dev, "Failed to close slave function\n");
1754         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1755 }
1756
1757 static int map_bf_area(struct mlx4_dev *dev)
1758 {
1759         struct mlx4_priv *priv = mlx4_priv(dev);
1760         resource_size_t bf_start;
1761         resource_size_t bf_len;
1762         int err = 0;
1763
1764         if (!dev->caps.bf_reg_size)
1765                 return -ENXIO;
1766
1767         bf_start = pci_resource_start(dev->persist->pdev, 2) +
1768                         (dev->caps.num_uars << PAGE_SHIFT);
1769         bf_len = pci_resource_len(dev->persist->pdev, 2) -
1770                         (dev->caps.num_uars << PAGE_SHIFT);
1771         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1772         if (!priv->bf_mapping)
1773                 err = -ENOMEM;
1774
1775         return err;
1776 }
1777
1778 static void unmap_bf_area(struct mlx4_dev *dev)
1779 {
1780         if (mlx4_priv(dev)->bf_mapping)
1781                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1782 }
1783
1784 cycle_t mlx4_read_clock(struct mlx4_dev *dev)
1785 {
1786         u32 clockhi, clocklo, clockhi1;
1787         cycle_t cycles;
1788         int i;
1789         struct mlx4_priv *priv = mlx4_priv(dev);
1790
1791         for (i = 0; i < 10; i++) {
1792                 clockhi = swab32(readl(priv->clock_mapping));
1793                 clocklo = swab32(readl(priv->clock_mapping + 4));
1794                 clockhi1 = swab32(readl(priv->clock_mapping));
1795                 if (clockhi == clockhi1)
1796                         break;
1797         }
1798
1799         cycles = (u64) clockhi << 32 | (u64) clocklo;
1800
1801         return cycles;
1802 }
1803 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1804
1805
1806 static int map_internal_clock(struct mlx4_dev *dev)
1807 {
1808         struct mlx4_priv *priv = mlx4_priv(dev);
1809
1810         priv->clock_mapping =
1811                 ioremap(pci_resource_start(dev->persist->pdev,
1812                                            priv->fw.clock_bar) +
1813                         priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1814
1815         if (!priv->clock_mapping)
1816                 return -ENOMEM;
1817
1818         return 0;
1819 }
1820
1821 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1822                                    struct mlx4_clock_params *params)
1823 {
1824         struct mlx4_priv *priv = mlx4_priv(dev);
1825
1826         if (mlx4_is_slave(dev))
1827                 return -ENOTSUPP;
1828
1829         if (!params)
1830                 return -EINVAL;
1831
1832         params->bar = priv->fw.clock_bar;
1833         params->offset = priv->fw.clock_offset;
1834         params->size = MLX4_CLOCK_SIZE;
1835
1836         return 0;
1837 }
1838 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1839
1840 static void unmap_internal_clock(struct mlx4_dev *dev)
1841 {
1842         struct mlx4_priv *priv = mlx4_priv(dev);
1843
1844         if (priv->clock_mapping)
1845                 iounmap(priv->clock_mapping);
1846 }
1847
1848 static void mlx4_close_hca(struct mlx4_dev *dev)
1849 {
1850         unmap_internal_clock(dev);
1851         unmap_bf_area(dev);
1852         if (mlx4_is_slave(dev))
1853                 mlx4_slave_exit(dev);
1854         else {
1855                 mlx4_CLOSE_HCA(dev, 0);
1856                 mlx4_free_icms(dev);
1857         }
1858 }
1859
1860 static void mlx4_close_fw(struct mlx4_dev *dev)
1861 {
1862         if (!mlx4_is_slave(dev)) {
1863                 mlx4_UNMAP_FA(dev);
1864                 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1865         }
1866 }
1867
1868 static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1869 {
1870 #define COMM_CHAN_OFFLINE_OFFSET 0x09
1871
1872         u32 comm_flags;
1873         u32 offline_bit;
1874         unsigned long end;
1875         struct mlx4_priv *priv = mlx4_priv(dev);
1876
1877         end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
1878         while (time_before(jiffies, end)) {
1879                 comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
1880                                           MLX4_COMM_CHAN_FLAGS));
1881                 offline_bit = (comm_flags &
1882                                (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
1883                 if (!offline_bit)
1884                         return 0;
1885                 /* There are cases as part of AER/Reset flow that PF needs
1886                  * around 100 msec to load. We therefore sleep for 100 msec
1887                  * to allow other tasks to make use of that CPU during this
1888                  * time interval.
1889                  */
1890                 msleep(100);
1891         }
1892         mlx4_err(dev, "Communication channel is offline.\n");
1893         return -EIO;
1894 }
1895
1896 static void mlx4_reset_vf_support(struct mlx4_dev *dev)
1897 {
1898 #define COMM_CHAN_RST_OFFSET 0x1e
1899
1900         struct mlx4_priv *priv = mlx4_priv(dev);
1901         u32 comm_rst;
1902         u32 comm_caps;
1903
1904         comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
1905                                  MLX4_COMM_CHAN_CAPS));
1906         comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
1907
1908         if (comm_rst)
1909                 dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
1910 }
1911
1912 static int mlx4_init_slave(struct mlx4_dev *dev)
1913 {
1914         struct mlx4_priv *priv = mlx4_priv(dev);
1915         u64 dma = (u64) priv->mfunc.vhcr_dma;
1916         int ret_from_reset = 0;
1917         u32 slave_read;
1918         u32 cmd_channel_ver;
1919
1920         if (atomic_read(&pf_loading)) {
1921                 mlx4_warn(dev, "PF is not ready - Deferring probe\n");
1922                 return -EPROBE_DEFER;
1923         }
1924
1925         mutex_lock(&priv->cmd.slave_cmd_mutex);
1926         priv->cmd.max_cmds = 1;
1927         if (mlx4_comm_check_offline(dev)) {
1928                 mlx4_err(dev, "PF is not responsive, skipping initialization\n");
1929                 goto err_offline;
1930         }
1931
1932         mlx4_reset_vf_support(dev);
1933         mlx4_warn(dev, "Sending reset\n");
1934         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1935                                        MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
1936         /* if we are in the middle of flr the slave will try
1937          * NUM_OF_RESET_RETRIES times before leaving.*/
1938         if (ret_from_reset) {
1939                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1940                         mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
1941                         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1942                         return -EPROBE_DEFER;
1943                 } else
1944                         goto err;
1945         }
1946
1947         /* check the driver version - the slave I/F revision
1948          * must match the master's */
1949         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1950         cmd_channel_ver = mlx4_comm_get_version();
1951
1952         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1953                 MLX4_COMM_GET_IF_REV(slave_read)) {
1954                 mlx4_err(dev, "slave driver version is not supported by the master\n");
1955                 goto err;
1956         }
1957
1958         mlx4_warn(dev, "Sending vhcr0\n");
1959         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1960                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1961                 goto err;
1962         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1963                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1964                 goto err;
1965         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1966                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1967                 goto err;
1968         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
1969                           MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1970                 goto err;
1971
1972         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1973         return 0;
1974
1975 err:
1976         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
1977 err_offline:
1978         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1979         return -EIO;
1980 }
1981
1982 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1983 {
1984         int i;
1985
1986         for (i = 1; i <= dev->caps.num_ports; i++) {
1987                 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
1988                         dev->caps.gid_table_len[i] =
1989                                 mlx4_get_slave_num_gids(dev, 0, i);
1990                 else
1991                         dev->caps.gid_table_len[i] = 1;
1992                 dev->caps.pkey_table_len[i] =
1993                         dev->phys_caps.pkey_phys_table_len[i] - 1;
1994         }
1995 }
1996
1997 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1998 {
1999         int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
2000
2001         for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
2002               i++) {
2003                 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
2004                         break;
2005         }
2006
2007         return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
2008 }
2009
2010 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
2011 {
2012         switch (dmfs_high_steer_mode) {
2013         case MLX4_STEERING_DMFS_A0_DEFAULT:
2014                 return "default performance";
2015
2016         case MLX4_STEERING_DMFS_A0_DYNAMIC:
2017                 return "dynamic hybrid mode";
2018
2019         case MLX4_STEERING_DMFS_A0_STATIC:
2020                 return "performance optimized for limited rule configuration (static)";
2021
2022         case MLX4_STEERING_DMFS_A0_DISABLE:
2023                 return "disabled performance optimized steering";
2024
2025         case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
2026                 return "performance optimized steering not supported";
2027
2028         default:
2029                 return "Unrecognized mode";
2030         }
2031 }
2032
2033 #define MLX4_DMFS_A0_STEERING                   (1UL << 2)
2034
2035 static void choose_steering_mode(struct mlx4_dev *dev,
2036                                  struct mlx4_dev_cap *dev_cap)
2037 {
2038         if (mlx4_log_num_mgm_entry_size <= 0) {
2039                 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
2040                         if (dev->caps.dmfs_high_steer_mode ==
2041                             MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2042                                 mlx4_err(dev, "DMFS high rate mode not supported\n");
2043                         else
2044                                 dev->caps.dmfs_high_steer_mode =
2045                                         MLX4_STEERING_DMFS_A0_STATIC;
2046                 }
2047         }
2048
2049         if (mlx4_log_num_mgm_entry_size <= 0 &&
2050             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
2051             (!mlx4_is_mfunc(dev) ||
2052              (dev_cap->fs_max_num_qp_per_entry >=
2053              (dev->persist->num_vfs + 1))) &&
2054             choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
2055                 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
2056                 dev->oper_log_mgm_entry_size =
2057                         choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
2058                 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
2059                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
2060                 dev->caps.fs_log_max_ucast_qp_range_size =
2061                         dev_cap->fs_log_max_ucast_qp_range_size;
2062         } else {
2063                 if (dev->caps.dmfs_high_steer_mode !=
2064                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2065                         dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
2066                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
2067                     dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2068                         dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
2069                 else {
2070                         dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
2071
2072                         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
2073                             dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2074                                 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
2075                 }
2076                 dev->oper_log_mgm_entry_size =
2077                         mlx4_log_num_mgm_entry_size > 0 ?
2078                         mlx4_log_num_mgm_entry_size :
2079                         MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
2080                 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
2081         }
2082         mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
2083                  mlx4_steering_mode_str(dev->caps.steering_mode),
2084                  dev->oper_log_mgm_entry_size,
2085                  mlx4_log_num_mgm_entry_size);
2086 }
2087
2088 static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
2089                                        struct mlx4_dev_cap *dev_cap)
2090 {
2091         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2092             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
2093                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
2094         else
2095                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
2096
2097         mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
2098                  == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
2099 }
2100
2101 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
2102 {
2103         int i;
2104         struct mlx4_port_cap port_cap;
2105
2106         if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2107                 return -EINVAL;
2108
2109         for (i = 1; i <= dev->caps.num_ports; i++) {
2110                 if (mlx4_dev_port(dev, i, &port_cap)) {
2111                         mlx4_err(dev,
2112                                  "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
2113                 } else if ((dev->caps.dmfs_high_steer_mode !=
2114                             MLX4_STEERING_DMFS_A0_DEFAULT) &&
2115                            (port_cap.dmfs_optimized_state ==
2116                             !!(dev->caps.dmfs_high_steer_mode ==
2117                             MLX4_STEERING_DMFS_A0_DISABLE))) {
2118                         mlx4_err(dev,
2119                                  "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
2120                                  dmfs_high_rate_steering_mode_str(
2121                                         dev->caps.dmfs_high_steer_mode),
2122                                  (port_cap.dmfs_optimized_state ?
2123                                         "enabled" : "disabled"));
2124                 }
2125         }
2126
2127         return 0;
2128 }
2129
2130 static int mlx4_init_fw(struct mlx4_dev *dev)
2131 {
2132         struct mlx4_mod_stat_cfg   mlx4_cfg;
2133         int err = 0;
2134
2135         if (!mlx4_is_slave(dev)) {
2136                 err = mlx4_QUERY_FW(dev);
2137                 if (err) {
2138                         if (err == -EACCES)
2139                                 mlx4_info(dev, "non-primary physical function, skipping\n");
2140                         else
2141                                 mlx4_err(dev, "QUERY_FW command failed, aborting\n");
2142                         return err;
2143                 }
2144
2145                 err = mlx4_load_fw(dev);
2146                 if (err) {
2147                         mlx4_err(dev, "Failed to start FW, aborting\n");
2148                         return err;
2149                 }
2150
2151                 mlx4_cfg.log_pg_sz_m = 1;
2152                 mlx4_cfg.log_pg_sz = 0;
2153                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2154                 if (err)
2155                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2156         }
2157
2158         return err;
2159 }
2160
2161 static int mlx4_init_hca(struct mlx4_dev *dev)
2162 {
2163         struct mlx4_priv          *priv = mlx4_priv(dev);
2164         struct mlx4_adapter        adapter;
2165         struct mlx4_dev_cap        dev_cap;
2166         struct mlx4_profile        profile;
2167         struct mlx4_init_hca_param init_hca;
2168         u64 icm_size;
2169         struct mlx4_config_dev_params params;
2170         int err;
2171
2172         if (!mlx4_is_slave(dev)) {
2173                 err = mlx4_dev_cap(dev, &dev_cap);
2174                 if (err) {
2175                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2176                         return err;
2177                 }
2178
2179                 choose_steering_mode(dev, &dev_cap);
2180                 choose_tunnel_offload_mode(dev, &dev_cap);
2181
2182                 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2183                     mlx4_is_master(dev))
2184                         dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2185
2186                 err = mlx4_get_phys_port_id(dev);
2187                 if (err)
2188                         mlx4_err(dev, "Fail to get physical port id\n");
2189
2190                 if (mlx4_is_master(dev))
2191                         mlx4_parav_master_pf_caps(dev);
2192
2193                 if (mlx4_low_memory_profile()) {
2194                         mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2195                         profile = low_mem_profile;
2196                 } else {
2197                         profile = default_profile;
2198                 }
2199                 if (dev->caps.steering_mode ==
2200                     MLX4_STEERING_MODE_DEVICE_MANAGED)
2201                         profile.num_mcg = MLX4_FS_NUM_MCG;
2202
2203                 icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
2204                                              &init_hca);
2205                 if ((long long) icm_size < 0) {
2206                         err = icm_size;
2207                         return err;
2208                 }
2209
2210                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2211
2212                 /* Always set UAR page size 4KB, set log_uar_sz accordingly */
2213                 init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
2214                                       PAGE_SHIFT -
2215                                       DEFAULT_UAR_PAGE_SHIFT;
2216                 init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
2217
2218                 init_hca.mw_enabled = 0;
2219                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2220                     dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2221                         init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2222
2223                 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
2224                 if (err)
2225                         return err;
2226
2227                 err = mlx4_INIT_HCA(dev, &init_hca);
2228                 if (err) {
2229                         mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2230                         goto err_free_icm;
2231                 }
2232
2233                 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2234                         err = mlx4_query_func(dev, &dev_cap);
2235                         if (err < 0) {
2236                                 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2237                                 goto err_close;
2238                         } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2239                                 dev->caps.num_eqs = dev_cap.max_eqs;
2240                                 dev->caps.reserved_eqs = dev_cap.reserved_eqs;
2241                                 dev->caps.reserved_uars = dev_cap.reserved_uars;
2242                         }
2243                 }
2244
2245                 /*
2246                  * If TS is supported by FW
2247                  * read HCA frequency by QUERY_HCA command
2248                  */
2249                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2250                         memset(&init_hca, 0, sizeof(init_hca));
2251                         err = mlx4_QUERY_HCA(dev, &init_hca);
2252                         if (err) {
2253                                 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2254                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2255                         } else {
2256                                 dev->caps.hca_core_clock =
2257                                         init_hca.hca_core_clock;
2258                         }
2259
2260                         /* In case we got HCA frequency 0 - disable timestamping
2261                          * to avoid dividing by zero
2262                          */
2263                         if (!dev->caps.hca_core_clock) {
2264                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2265                                 mlx4_err(dev,
2266                                          "HCA frequency is 0 - timestamping is not supported\n");
2267                         } else if (map_internal_clock(dev)) {
2268                                 /*
2269                                  * Map internal clock,
2270                                  * in case of failure disable timestamping
2271                                  */
2272                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2273                                 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2274                         }
2275                 }
2276
2277                 if (dev->caps.dmfs_high_steer_mode !=
2278                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2279                         if (mlx4_validate_optimized_steering(dev))
2280                                 mlx4_warn(dev, "Optimized steering validation failed\n");
2281
2282                         if (dev->caps.dmfs_high_steer_mode ==
2283                             MLX4_STEERING_DMFS_A0_DISABLE) {
2284                                 dev->caps.dmfs_high_rate_qpn_base =
2285                                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2286                                 dev->caps.dmfs_high_rate_qpn_range =
2287                                         MLX4_A0_STEERING_TABLE_SIZE;
2288                         }
2289
2290                         mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
2291                                  dmfs_high_rate_steering_mode_str(
2292                                         dev->caps.dmfs_high_steer_mode));
2293                 }
2294         } else {
2295                 err = mlx4_init_slave(dev);
2296                 if (err) {
2297                         if (err != -EPROBE_DEFER)
2298                                 mlx4_err(dev, "Failed to initialize slave\n");
2299                         return err;
2300                 }
2301
2302                 err = mlx4_slave_cap(dev);
2303                 if (err) {
2304                         mlx4_err(dev, "Failed to obtain slave caps\n");
2305                         goto err_close;
2306                 }
2307         }
2308
2309         if (map_bf_area(dev))
2310                 mlx4_dbg(dev, "Failed to map blue flame area\n");
2311
2312         /*Only the master set the ports, all the rest got it from it.*/
2313         if (!mlx4_is_slave(dev))
2314                 mlx4_set_port_mask(dev);
2315
2316         err = mlx4_QUERY_ADAPTER(dev, &adapter);
2317         if (err) {
2318                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2319                 goto unmap_bf;
2320         }
2321
2322         /* Query CONFIG_DEV parameters */
2323         err = mlx4_config_dev_retrieval(dev, &params);
2324         if (err && err != -ENOTSUPP) {
2325                 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2326         } else if (!err) {
2327                 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2328                 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2329         }
2330         priv->eq_table.inta_pin = adapter.inta_pin;
2331         memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
2332
2333         return 0;
2334
2335 unmap_bf:
2336         unmap_internal_clock(dev);
2337         unmap_bf_area(dev);
2338
2339         if (mlx4_is_slave(dev)) {
2340                 kfree(dev->caps.qp0_qkey);
2341                 kfree(dev->caps.qp0_tunnel);
2342                 kfree(dev->caps.qp0_proxy);
2343                 kfree(dev->caps.qp1_tunnel);
2344                 kfree(dev->caps.qp1_proxy);
2345         }
2346
2347 err_close:
2348         if (mlx4_is_slave(dev))
2349                 mlx4_slave_exit(dev);
2350         else
2351                 mlx4_CLOSE_HCA(dev, 0);
2352
2353 err_free_icm:
2354         if (!mlx4_is_slave(dev))
2355                 mlx4_free_icms(dev);
2356
2357         return err;
2358 }
2359
2360 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2361 {
2362         struct mlx4_priv *priv = mlx4_priv(dev);
2363         int nent_pow2;
2364
2365         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2366                 return -ENOENT;
2367
2368         if (!dev->caps.max_counters)
2369                 return -ENOSPC;
2370
2371         nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2372         /* reserve last counter index for sink counter */
2373         return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
2374                                 nent_pow2 - 1, 0,
2375                                 nent_pow2 - dev->caps.max_counters + 1);
2376 }
2377
2378 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2379 {
2380         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2381                 return;
2382
2383         if (!dev->caps.max_counters)
2384                 return;
2385
2386         mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2387 }
2388
2389 static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
2390 {
2391         struct mlx4_priv *priv = mlx4_priv(dev);
2392         int port;
2393
2394         for (port = 0; port < dev->caps.num_ports; port++)
2395                 if (priv->def_counter[port] != -1)
2396                         mlx4_counter_free(dev,  priv->def_counter[port]);
2397 }
2398
2399 static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
2400 {
2401         struct mlx4_priv *priv = mlx4_priv(dev);
2402         int port, err = 0;
2403         u32 idx;
2404
2405         for (port = 0; port < dev->caps.num_ports; port++)
2406                 priv->def_counter[port] = -1;
2407
2408         for (port = 0; port < dev->caps.num_ports; port++) {
2409                 err = mlx4_counter_alloc(dev, &idx);
2410
2411                 if (!err || err == -ENOSPC) {
2412                         priv->def_counter[port] = idx;
2413                 } else if (err == -ENOENT) {
2414                         err = 0;
2415                         continue;
2416                 } else if (mlx4_is_slave(dev) && err == -EINVAL) {
2417                         priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
2418                         mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
2419                                   MLX4_SINK_COUNTER_INDEX(dev));
2420                         err = 0;
2421                 } else {
2422                         mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
2423                                  __func__, port + 1, err);
2424                         mlx4_cleanup_default_counters(dev);
2425                         return err;
2426                 }
2427
2428                 mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
2429                          __func__, priv->def_counter[port], port + 1);
2430         }
2431
2432         return err;
2433 }
2434
2435 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2436 {
2437         struct mlx4_priv *priv = mlx4_priv(dev);
2438
2439         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2440                 return -ENOENT;
2441
2442         *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2443         if (*idx == -1) {
2444                 *idx = MLX4_SINK_COUNTER_INDEX(dev);
2445                 return -ENOSPC;
2446         }
2447
2448         return 0;
2449 }
2450
2451 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2452 {
2453         u64 out_param;
2454         int err;
2455
2456         if (mlx4_is_mfunc(dev)) {
2457                 err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
2458                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2459                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2460                 if (!err)
2461                         *idx = get_param_l(&out_param);
2462
2463                 return err;
2464         }
2465         return __mlx4_counter_alloc(dev, idx);
2466 }
2467 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2468
2469 static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2470                                 u8 counter_index)
2471 {
2472         struct mlx4_cmd_mailbox *if_stat_mailbox;
2473         int err;
2474         u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
2475
2476         if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2477         if (IS_ERR(if_stat_mailbox))
2478                 return PTR_ERR(if_stat_mailbox);
2479
2480         err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2481                            MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2482                            MLX4_CMD_NATIVE);
2483
2484         mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2485         return err;
2486 }
2487
2488 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2489 {
2490         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2491                 return;
2492
2493         if (idx == MLX4_SINK_COUNTER_INDEX(dev))
2494                 return;
2495
2496         __mlx4_clear_if_stat(dev, idx);
2497
2498         mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2499         return;
2500 }
2501
2502 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2503 {
2504         u64 in_param = 0;
2505
2506         if (mlx4_is_mfunc(dev)) {
2507                 set_param_l(&in_param, idx);
2508                 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2509                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2510                          MLX4_CMD_WRAPPED);
2511                 return;
2512         }
2513         __mlx4_counter_free(dev, idx);
2514 }
2515 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2516
2517 int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
2518 {
2519         struct mlx4_priv *priv = mlx4_priv(dev);
2520
2521         return priv->def_counter[port - 1];
2522 }
2523 EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
2524
2525 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2526 {
2527         struct mlx4_priv *priv = mlx4_priv(dev);
2528
2529         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2530 }
2531 EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2532
2533 __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2534 {
2535         struct mlx4_priv *priv = mlx4_priv(dev);
2536
2537         return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2538 }
2539 EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2540
2541 void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2542 {
2543         struct mlx4_priv *priv = mlx4_priv(dev);
2544         __be64 guid;
2545
2546         /* hw GUID */
2547         if (entry == 0)
2548                 return;
2549
2550         get_random_bytes((char *)&guid, sizeof(guid));
2551         guid &= ~(cpu_to_be64(1ULL << 56));
2552         guid |= cpu_to_be64(1ULL << 57);
2553         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2554 }
2555
2556 static int mlx4_setup_hca(struct mlx4_dev *dev)
2557 {
2558         struct mlx4_priv *priv = mlx4_priv(dev);
2559         int err;
2560         int port;
2561         __be32 ib_port_default_caps;
2562
2563         err = mlx4_init_uar_table(dev);
2564         if (err) {
2565                 mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2566                  return err;
2567         }
2568
2569         err = mlx4_uar_alloc(dev, &priv->driver_uar);
2570         if (err) {
2571                 mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2572                 goto err_uar_table_free;
2573         }
2574
2575         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2576         if (!priv->kar) {
2577                 mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2578                 err = -ENOMEM;
2579                 goto err_uar_free;
2580         }
2581
2582         err = mlx4_init_pd_table(dev);
2583         if (err) {
2584                 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2585                 goto err_kar_unmap;
2586         }
2587
2588         err = mlx4_init_xrcd_table(dev);
2589         if (err) {
2590                 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2591                 goto err_pd_table_free;
2592         }
2593
2594         err = mlx4_init_mr_table(dev);
2595         if (err) {
2596                 mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2597                 goto err_xrcd_table_free;
2598         }
2599
2600         if (!mlx4_is_slave(dev)) {
2601                 err = mlx4_init_mcg_table(dev);
2602                 if (err) {
2603                         mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2604                         goto err_mr_table_free;
2605                 }
2606                 err = mlx4_config_mad_demux(dev);
2607                 if (err) {
2608                         mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2609                         goto err_mcg_table_free;
2610                 }
2611         }
2612
2613         err = mlx4_init_eq_table(dev);
2614         if (err) {
2615                 mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2616                 goto err_mcg_table_free;
2617         }
2618
2619         err = mlx4_cmd_use_events(dev);
2620         if (err) {
2621                 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2622                 goto err_eq_table_free;
2623         }
2624
2625         err = mlx4_NOP(dev);
2626         if (err) {
2627                 if (dev->flags & MLX4_FLAG_MSI_X) {
2628                         mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2629                                   priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2630                         mlx4_warn(dev, "Trying again without MSI-X\n");
2631                 } else {
2632                         mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2633                                  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2634                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2635                 }
2636
2637                 goto err_cmd_poll;
2638         }
2639
2640         mlx4_dbg(dev, "NOP command IRQ test passed\n");
2641
2642         err = mlx4_init_cq_table(dev);
2643         if (err) {
2644                 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2645                 goto err_cmd_poll;
2646         }
2647
2648         err = mlx4_init_srq_table(dev);
2649         if (err) {
2650                 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2651                 goto err_cq_table_free;
2652         }
2653
2654         err = mlx4_init_qp_table(dev);
2655         if (err) {
2656                 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2657                 goto err_srq_table_free;
2658         }
2659
2660         if (!mlx4_is_slave(dev)) {
2661                 err = mlx4_init_counters_table(dev);
2662                 if (err && err != -ENOENT) {
2663                         mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2664                         goto err_qp_table_free;
2665                 }
2666         }
2667
2668         err = mlx4_allocate_default_counters(dev);
2669         if (err) {
2670                 mlx4_err(dev, "Failed to allocate default counters, aborting\n");
2671                 goto err_counters_table_free;
2672         }
2673
2674         if (!mlx4_is_slave(dev)) {
2675                 for (port = 1; port <= dev->caps.num_ports; port++) {
2676                         ib_port_default_caps = 0;
2677                         err = mlx4_get_port_ib_caps(dev, port,
2678                                                     &ib_port_default_caps);
2679                         if (err)
2680                                 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2681                                           port, err);
2682                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2683
2684                         /* initialize per-slave default ib port capabilities */
2685                         if (mlx4_is_master(dev)) {
2686                                 int i;
2687                                 for (i = 0; i < dev->num_slaves; i++) {
2688                                         if (i == mlx4_master_func_num(dev))
2689                                                 continue;
2690                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2691                                                 ib_port_default_caps;
2692                                 }
2693                         }
2694
2695                         if (mlx4_is_mfunc(dev))
2696                                 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2697                         else
2698                                 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2699
2700                         err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2701                                             dev->caps.pkey_table_len[port] : -1);
2702                         if (err) {
2703                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
2704                                          port);
2705                                 goto err_default_countes_free;
2706                         }
2707                 }
2708         }
2709
2710         return 0;
2711
2712 err_default_countes_free:
2713         mlx4_cleanup_default_counters(dev);
2714
2715 err_counters_table_free:
2716         if (!mlx4_is_slave(dev))
2717                 mlx4_cleanup_counters_table(dev);
2718
2719 err_qp_table_free:
2720         mlx4_cleanup_qp_table(dev);
2721
2722 err_srq_table_free:
2723         mlx4_cleanup_srq_table(dev);
2724
2725 err_cq_table_free:
2726         mlx4_cleanup_cq_table(dev);
2727
2728 err_cmd_poll:
2729         mlx4_cmd_use_polling(dev);
2730
2731 err_eq_table_free:
2732         mlx4_cleanup_eq_table(dev);
2733
2734 err_mcg_table_free:
2735         if (!mlx4_is_slave(dev))
2736                 mlx4_cleanup_mcg_table(dev);
2737
2738 err_mr_table_free:
2739         mlx4_cleanup_mr_table(dev);
2740
2741 err_xrcd_table_free:
2742         mlx4_cleanup_xrcd_table(dev);
2743
2744 err_pd_table_free:
2745         mlx4_cleanup_pd_table(dev);
2746
2747 err_kar_unmap:
2748         iounmap(priv->kar);
2749
2750 err_uar_free:
2751         mlx4_uar_free(dev, &priv->driver_uar);
2752
2753 err_uar_table_free:
2754         mlx4_cleanup_uar_table(dev);
2755         return err;
2756 }
2757
2758 static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
2759 {
2760         int requested_cpu = 0;
2761         struct mlx4_priv *priv = mlx4_priv(dev);
2762         struct mlx4_eq *eq;
2763         int off = 0;
2764         int i;
2765
2766         if (eqn > dev->caps.num_comp_vectors)
2767                 return -EINVAL;
2768
2769         for (i = 1; i < port; i++)
2770                 off += mlx4_get_eqs_per_port(dev, i);
2771
2772         requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
2773
2774         /* Meaning EQs are shared, and this call comes from the second port */
2775         if (requested_cpu < 0)
2776                 return 0;
2777
2778         eq = &priv->eq_table.eq[eqn];
2779
2780         if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
2781                 return -ENOMEM;
2782
2783         cpumask_set_cpu(requested_cpu, eq->affinity_mask);
2784
2785         return 0;
2786 }
2787
2788 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2789 {
2790         struct mlx4_priv *priv = mlx4_priv(dev);
2791         struct msix_entry *entries;
2792         int i;
2793         int port = 0;
2794
2795         if (msi_x) {
2796                 int nreq = dev->caps.num_ports * num_online_cpus() + 1;
2797
2798                 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2799                              nreq);
2800                 if (nreq > MAX_MSIX)
2801                         nreq = MAX_MSIX;
2802
2803                 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2804                 if (!entries)
2805                         goto no_msi;
2806
2807                 for (i = 0; i < nreq; ++i)
2808                         entries[i].entry = i;
2809
2810                 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2811                                              nreq);
2812
2813                 if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
2814                         kfree(entries);
2815                         goto no_msi;
2816                 }
2817                 /* 1 is reserved for events (asyncrounous EQ) */
2818                 dev->caps.num_comp_vectors = nreq - 1;
2819
2820                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
2821                 bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
2822                             dev->caps.num_ports);
2823
2824                 for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
2825                         if (i == MLX4_EQ_ASYNC)
2826                                 continue;
2827
2828                         priv->eq_table.eq[i].irq =
2829                                 entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
2830
2831                         if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
2832                                 bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2833                                             dev->caps.num_ports);
2834                                 /* We don't set affinity hint when there
2835                                  * aren't enough EQs
2836                                  */
2837                         } else {
2838                                 set_bit(port,
2839                                         priv->eq_table.eq[i].actv_ports.ports);
2840                                 if (mlx4_init_affinity_hint(dev, port + 1, i))
2841                                         mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
2842                                                   i);
2843                         }
2844                         /* We divide the Eqs evenly between the two ports.
2845                          * (dev->caps.num_comp_vectors / dev->caps.num_ports)
2846                          * refers to the number of Eqs per port
2847                          * (i.e eqs_per_port). Theoretically, we would like to
2848                          * write something like (i + 1) % eqs_per_port == 0.
2849                          * However, since there's an asynchronous Eq, we have
2850                          * to skip over it by comparing this condition to
2851                          * !!((i + 1) > MLX4_EQ_ASYNC).
2852                          */
2853                         if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
2854                             ((i + 1) %
2855                              (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
2856                             !!((i + 1) > MLX4_EQ_ASYNC))
2857                                 /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
2858                                  * everything is shared anyway.
2859                                  */
2860                                 port++;
2861                 }
2862
2863                 dev->flags |= MLX4_FLAG_MSI_X;
2864
2865                 kfree(entries);
2866                 return;
2867         }
2868
2869 no_msi:
2870         dev->caps.num_comp_vectors = 1;
2871
2872         BUG_ON(MLX4_EQ_ASYNC >= 2);
2873         for (i = 0; i < 2; ++i) {
2874                 priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2875                 if (i != MLX4_EQ_ASYNC) {
2876                         bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2877                                     dev->caps.num_ports);
2878                 }
2879         }
2880 }
2881
2882 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2883 {
2884         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2885         int err = 0;
2886
2887         info->dev = dev;
2888         info->port = port;
2889         if (!mlx4_is_slave(dev)) {
2890                 mlx4_init_mac_table(dev, &info->mac_table);
2891                 mlx4_init_vlan_table(dev, &info->vlan_table);
2892                 mlx4_init_roce_gid_table(dev, &info->gid_table);
2893                 info->base_qpn = mlx4_get_base_qpn(dev, port);
2894         }
2895
2896         sprintf(info->dev_name, "mlx4_port%d", port);
2897         info->port_attr.attr.name = info->dev_name;
2898         if (mlx4_is_mfunc(dev))
2899                 info->port_attr.attr.mode = S_IRUGO;
2900         else {
2901                 info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
2902                 info->port_attr.store     = set_port_type;
2903         }
2904         info->port_attr.show      = show_port_type;
2905         sysfs_attr_init(&info->port_attr.attr);
2906
2907         err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
2908         if (err) {
2909                 mlx4_err(dev, "Failed to create file for port %d\n", port);
2910                 info->port = -1;
2911         }
2912
2913         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
2914         info->port_mtu_attr.attr.name = info->dev_mtu_name;
2915         if (mlx4_is_mfunc(dev))
2916                 info->port_mtu_attr.attr.mode = S_IRUGO;
2917         else {
2918                 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
2919                 info->port_mtu_attr.store     = set_port_ib_mtu;
2920         }
2921         info->port_mtu_attr.show      = show_port_ib_mtu;
2922         sysfs_attr_init(&info->port_mtu_attr.attr);
2923
2924         err = device_create_file(&dev->persist->pdev->dev,
2925                                  &info->port_mtu_attr);
2926         if (err) {
2927                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
2928                 device_remove_file(&info->dev->persist->pdev->dev,
2929                                    &info->port_attr);
2930                 info->port = -1;
2931         }
2932
2933         return err;
2934 }
2935
2936 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
2937 {
2938         if (info->port < 0)
2939                 return;
2940
2941         device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
2942         device_remove_file(&info->dev->persist->pdev->dev,
2943                            &info->port_mtu_attr);
2944 #ifdef CONFIG_RFS_ACCEL
2945         free_irq_cpu_rmap(info->rmap);
2946         info->rmap = NULL;
2947 #endif
2948 }
2949
2950 static int mlx4_init_steering(struct mlx4_dev *dev)
2951 {
2952         struct mlx4_priv *priv = mlx4_priv(dev);
2953         int num_entries = dev->caps.num_ports;
2954         int i, j;
2955
2956         priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
2957         if (!priv->steer)
2958                 return -ENOMEM;
2959
2960         for (i = 0; i < num_entries; i++)
2961                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
2962                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
2963                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
2964                 }
2965         return 0;
2966 }
2967
2968 static void mlx4_clear_steering(struct mlx4_dev *dev)
2969 {
2970         struct mlx4_priv *priv = mlx4_priv(dev);
2971         struct mlx4_steer_index *entry, *tmp_entry;
2972         struct mlx4_promisc_qp *pqp, *tmp_pqp;
2973         int num_entries = dev->caps.num_ports;
2974         int i, j;
2975
2976         for (i = 0; i < num_entries; i++) {
2977                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
2978                         list_for_each_entry_safe(pqp, tmp_pqp,
2979                                                  &priv->steer[i].promisc_qps[j],
2980                                                  list) {
2981                                 list_del(&pqp->list);
2982                                 kfree(pqp);
2983                         }
2984                         list_for_each_entry_safe(entry, tmp_entry,
2985                                                  &priv->steer[i].steer_entries[j],
2986                                                  list) {
2987                                 list_del(&entry->list);
2988                                 list_for_each_entry_safe(pqp, tmp_pqp,
2989                                                          &entry->duplicates,
2990                                                          list) {
2991                                         list_del(&pqp->list);
2992                                         kfree(pqp);
2993                                 }
2994                                 kfree(entry);
2995                         }
2996                 }
2997         }
2998         kfree(priv->steer);
2999 }
3000
3001 static int extended_func_num(struct pci_dev *pdev)
3002 {
3003         return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
3004 }
3005
3006 #define MLX4_OWNER_BASE 0x8069c
3007 #define MLX4_OWNER_SIZE 4
3008
3009 static int mlx4_get_ownership(struct mlx4_dev *dev)
3010 {
3011         void __iomem *owner;
3012         u32 ret;
3013
3014         if (pci_channel_offline(dev->persist->pdev))
3015                 return -EIO;
3016
3017         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3018                         MLX4_OWNER_BASE,
3019                         MLX4_OWNER_SIZE);
3020         if (!owner) {
3021                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3022                 return -ENOMEM;
3023         }
3024
3025         ret = readl(owner);
3026         iounmap(owner);
3027         return (int) !!ret;
3028 }
3029
3030 static void mlx4_free_ownership(struct mlx4_dev *dev)
3031 {
3032         void __iomem *owner;
3033
3034         if (pci_channel_offline(dev->persist->pdev))
3035                 return;
3036
3037         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3038                         MLX4_OWNER_BASE,
3039                         MLX4_OWNER_SIZE);
3040         if (!owner) {
3041                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3042                 return;
3043         }
3044         writel(0, owner);
3045         msleep(1000);
3046         iounmap(owner);
3047 }
3048
3049 #define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
3050                                   !!((flags) & MLX4_FLAG_MASTER))
3051
3052 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
3053                              u8 total_vfs, int existing_vfs, int reset_flow)
3054 {
3055         u64 dev_flags = dev->flags;
3056         int err = 0;
3057         int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
3058                                         MLX4_MAX_NUM_VF);
3059
3060         if (reset_flow) {
3061                 dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
3062                                        GFP_KERNEL);
3063                 if (!dev->dev_vfs)
3064                         goto free_mem;
3065                 return dev_flags;
3066         }
3067
3068         atomic_inc(&pf_loading);
3069         if (dev->flags &  MLX4_FLAG_SRIOV) {
3070                 if (existing_vfs != total_vfs) {
3071                         mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
3072                                  existing_vfs, total_vfs);
3073                         total_vfs = existing_vfs;
3074                 }
3075         }
3076
3077         dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
3078         if (NULL == dev->dev_vfs) {
3079                 mlx4_err(dev, "Failed to allocate memory for VFs\n");
3080                 goto disable_sriov;
3081         }
3082
3083         if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
3084                 if (total_vfs > fw_enabled_sriov_vfs) {
3085                         mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
3086                                  total_vfs, fw_enabled_sriov_vfs);
3087                         err = -ENOMEM;
3088                         goto disable_sriov;
3089                 }
3090                 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
3091                 err = pci_enable_sriov(pdev, total_vfs);
3092         }
3093         if (err) {
3094                 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
3095                          err);
3096                 goto disable_sriov;
3097         } else {
3098                 mlx4_warn(dev, "Running in master mode\n");
3099                 dev_flags |= MLX4_FLAG_SRIOV |
3100                         MLX4_FLAG_MASTER;
3101                 dev_flags &= ~MLX4_FLAG_SLAVE;
3102                 dev->persist->num_vfs = total_vfs;
3103         }
3104         return dev_flags;
3105
3106 disable_sriov:
3107         atomic_dec(&pf_loading);
3108 free_mem:
3109         dev->persist->num_vfs = 0;
3110         kfree(dev->dev_vfs);
3111         dev->dev_vfs = NULL;
3112         return dev_flags & ~MLX4_FLAG_MASTER;
3113 }
3114
3115 enum {
3116         MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
3117 };
3118
3119 static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
3120                               int *nvfs)
3121 {
3122         int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
3123         /* Checking for 64 VFs as a limitation of CX2 */
3124         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
3125             requested_vfs >= 64) {
3126                 mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
3127                          requested_vfs);
3128                 return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
3129         }
3130         return 0;
3131 }
3132
3133 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
3134                          int total_vfs, int *nvfs, struct mlx4_priv *priv,
3135                          int reset_flow)
3136 {
3137         struct mlx4_dev *dev;
3138         unsigned sum = 0;
3139         int err;
3140         int port;
3141         int i;
3142         struct mlx4_dev_cap *dev_cap = NULL;
3143         int existing_vfs = 0;
3144
3145         dev = &priv->dev;
3146
3147         INIT_LIST_HEAD(&priv->ctx_list);
3148         spin_lock_init(&priv->ctx_lock);
3149
3150         mutex_init(&priv->port_mutex);
3151         mutex_init(&priv->bond_mutex);
3152
3153         INIT_LIST_HEAD(&priv->pgdir_list);
3154         mutex_init(&priv->pgdir_mutex);
3155
3156         INIT_LIST_HEAD(&priv->bf_list);
3157         mutex_init(&priv->bf_mutex);
3158
3159         dev->rev_id = pdev->revision;
3160         dev->numa_node = dev_to_node(&pdev->dev);
3161
3162         /* Detect if this device is a virtual function */
3163         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3164                 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3165                 dev->flags |= MLX4_FLAG_SLAVE;
3166         } else {
3167                 /* We reset the device and enable SRIOV only for physical
3168                  * devices.  Try to claim ownership on the device;
3169                  * if already taken, skip -- do not allow multiple PFs */
3170                 err = mlx4_get_ownership(dev);
3171                 if (err) {
3172                         if (err < 0)
3173                                 return err;
3174                         else {
3175                                 mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
3176                                 return -EINVAL;
3177                         }
3178                 }
3179
3180                 atomic_set(&priv->opreq_count, 0);
3181                 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3182
3183                 /*
3184                  * Now reset the HCA before we touch the PCI capabilities or
3185                  * attempt a firmware command, since a boot ROM may have left
3186                  * the HCA in an undefined state.
3187                  */
3188                 err = mlx4_reset(dev);
3189                 if (err) {
3190                         mlx4_err(dev, "Failed to reset HCA, aborting\n");
3191                         goto err_sriov;
3192                 }
3193
3194                 if (total_vfs) {
3195                         dev->flags = MLX4_FLAG_MASTER;
3196                         existing_vfs = pci_num_vf(pdev);
3197                         if (existing_vfs)
3198                                 dev->flags |= MLX4_FLAG_SRIOV;
3199                         dev->persist->num_vfs = total_vfs;
3200                 }
3201         }
3202
3203         /* on load remove any previous indication of internal error,
3204          * device is up.
3205          */
3206         dev->persist->state = MLX4_DEVICE_STATE_UP;
3207
3208 slave_start:
3209         err = mlx4_cmd_init(dev);
3210         if (err) {
3211                 mlx4_err(dev, "Failed to init command interface, aborting\n");
3212                 goto err_sriov;
3213         }
3214
3215         /* In slave functions, the communication channel must be initialized
3216          * before posting commands. Also, init num_slaves before calling
3217          * mlx4_init_hca */
3218         if (mlx4_is_mfunc(dev)) {
3219                 if (mlx4_is_master(dev)) {
3220                         dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3221
3222                 } else {
3223                         dev->num_slaves = 0;
3224                         err = mlx4_multi_func_init(dev);
3225                         if (err) {
3226                                 mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
3227                                 goto err_cmd;
3228                         }
3229                 }
3230         }
3231
3232         err = mlx4_init_fw(dev);
3233         if (err) {
3234                 mlx4_err(dev, "Failed to init fw, aborting.\n");
3235                 goto err_mfunc;
3236         }
3237
3238         if (mlx4_is_master(dev)) {
3239                 /* when we hit the goto slave_start below, dev_cap already initialized */
3240                 if (!dev_cap) {
3241                         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
3242
3243                         if (!dev_cap) {
3244                                 err = -ENOMEM;
3245                                 goto err_fw;
3246                         }
3247
3248                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3249                         if (err) {
3250                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3251                                 goto err_fw;
3252                         }
3253
3254                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3255                                 goto err_fw;
3256
3257                         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3258                                 u64 dev_flags = mlx4_enable_sriov(dev, pdev,
3259                                                                   total_vfs,
3260                                                                   existing_vfs,
3261                                                                   reset_flow);
3262
3263                                 mlx4_close_fw(dev);
3264                                 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3265                                 dev->flags = dev_flags;
3266                                 if (!SRIOV_VALID_STATE(dev->flags)) {
3267                                         mlx4_err(dev, "Invalid SRIOV state\n");
3268                                         goto err_sriov;
3269                                 }
3270                                 err = mlx4_reset(dev);
3271                                 if (err) {
3272                                         mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3273                                         goto err_sriov;
3274                                 }
3275                                 goto slave_start;
3276                         }
3277                 } else {
3278                         /* Legacy mode FW requires SRIOV to be enabled before
3279                          * doing QUERY_DEV_CAP, since max_eq's value is different if
3280                          * SRIOV is enabled.
3281                          */
3282                         memset(dev_cap, 0, sizeof(*dev_cap));
3283                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3284                         if (err) {
3285                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3286                                 goto err_fw;
3287                         }
3288
3289                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3290                                 goto err_fw;
3291                 }
3292         }
3293
3294         err = mlx4_init_hca(dev);
3295         if (err) {
3296                 if (err == -EACCES) {
3297                         /* Not primary Physical function
3298                          * Running in slave mode */
3299                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3300                         /* We're not a PF */
3301                         if (dev->flags & MLX4_FLAG_SRIOV) {
3302                                 if (!existing_vfs)
3303                                         pci_disable_sriov(pdev);
3304                                 if (mlx4_is_master(dev) && !reset_flow)
3305                                         atomic_dec(&pf_loading);
3306                                 dev->flags &= ~MLX4_FLAG_SRIOV;
3307                         }
3308                         if (!mlx4_is_slave(dev))
3309                                 mlx4_free_ownership(dev);
3310                         dev->flags |= MLX4_FLAG_SLAVE;
3311                         dev->flags &= ~MLX4_FLAG_MASTER;
3312                         goto slave_start;
3313                 } else
3314                         goto err_fw;
3315         }
3316
3317         if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3318                 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
3319                                                   existing_vfs, reset_flow);
3320
3321                 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
3322                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
3323                         dev->flags = dev_flags;
3324                         err = mlx4_cmd_init(dev);
3325                         if (err) {
3326                                 /* Only VHCR is cleaned up, so could still
3327                                  * send FW commands
3328                                  */
3329                                 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
3330                                 goto err_close;
3331                         }
3332                 } else {
3333                         dev->flags = dev_flags;
3334                 }
3335
3336                 if (!SRIOV_VALID_STATE(dev->flags)) {
3337                         mlx4_err(dev, "Invalid SRIOV state\n");
3338                         goto err_close;
3339                 }
3340         }
3341
3342         /* check if the device is functioning at its maximum possible speed.
3343          * No return code for this call, just warn the user in case of PCI
3344          * express device capabilities are under-satisfied by the bus.
3345          */
3346         if (!mlx4_is_slave(dev))
3347                 mlx4_check_pcie_caps(dev);
3348
3349         /* In master functions, the communication channel must be initialized
3350          * after obtaining its address from fw */
3351         if (mlx4_is_master(dev)) {
3352                 if (dev->caps.num_ports < 2 &&
3353                     num_vfs_argc > 1) {
3354                         err = -EINVAL;
3355                         mlx4_err(dev,
3356                                  "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
3357                                  dev->caps.num_ports);
3358                         goto err_close;
3359                 }
3360                 memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
3361
3362                 for (i = 0;
3363                      i < sizeof(dev->persist->nvfs)/
3364                      sizeof(dev->persist->nvfs[0]); i++) {
3365                         unsigned j;
3366
3367                         for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
3368                                 dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
3369                                 dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
3370                                         dev->caps.num_ports;
3371                         }
3372                 }
3373
3374                 /* In master functions, the communication channel
3375                  * must be initialized after obtaining its address from fw
3376                  */
3377                 err = mlx4_multi_func_init(dev);
3378                 if (err) {
3379                         mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
3380                         goto err_close;
3381                 }
3382         }
3383
3384         err = mlx4_alloc_eq_table(dev);
3385         if (err)
3386                 goto err_master_mfunc;
3387
3388         bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
3389         mutex_init(&priv->msix_ctl.pool_lock);
3390
3391         mlx4_enable_msi_x(dev);
3392         if ((mlx4_is_mfunc(dev)) &&
3393             !(dev->flags & MLX4_FLAG_MSI_X)) {
3394                 err = -ENOSYS;
3395                 mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
3396                 goto err_free_eq;
3397         }
3398
3399         if (!mlx4_is_slave(dev)) {
3400                 err = mlx4_init_steering(dev);
3401                 if (err)
3402                         goto err_disable_msix;
3403         }
3404
3405         err = mlx4_setup_hca(dev);
3406         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3407             !mlx4_is_mfunc(dev)) {
3408                 dev->flags &= ~MLX4_FLAG_MSI_X;
3409                 dev->caps.num_comp_vectors = 1;
3410                 pci_disable_msix(pdev);
3411                 err = mlx4_setup_hca(dev);
3412         }
3413
3414         if (err)
3415                 goto err_steer;
3416
3417         mlx4_init_quotas(dev);
3418         /* When PF resources are ready arm its comm channel to enable
3419          * getting commands
3420          */
3421         if (mlx4_is_master(dev)) {
3422                 err = mlx4_ARM_COMM_CHANNEL(dev);
3423                 if (err) {
3424                         mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
3425                                  err);
3426                         goto err_steer;
3427                 }
3428         }
3429
3430         for (port = 1; port <= dev->caps.num_ports; port++) {
3431                 err = mlx4_init_port_info(dev, port);
3432                 if (err)
3433                         goto err_port;
3434         }
3435
3436         priv->v2p.port1 = 1;
3437         priv->v2p.port2 = 2;
3438
3439         err = mlx4_register_device(dev);
3440         if (err)
3441                 goto err_port;
3442
3443         mlx4_request_modules(dev);
3444
3445         mlx4_sense_init(dev);
3446         mlx4_start_sense(dev);
3447
3448         priv->removed = 0;
3449
3450         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3451                 atomic_dec(&pf_loading);
3452
3453         kfree(dev_cap);
3454         return 0;
3455
3456 err_port:
3457         for (--port; port >= 1; --port)
3458                 mlx4_cleanup_port_info(&priv->port[port]);
3459
3460         mlx4_cleanup_default_counters(dev);
3461         if (!mlx4_is_slave(dev))
3462                 mlx4_cleanup_counters_table(dev);
3463         mlx4_cleanup_qp_table(dev);
3464         mlx4_cleanup_srq_table(dev);
3465         mlx4_cleanup_cq_table(dev);
3466         mlx4_cmd_use_polling(dev);
3467         mlx4_cleanup_eq_table(dev);
3468         mlx4_cleanup_mcg_table(dev);
3469         mlx4_cleanup_mr_table(dev);
3470         mlx4_cleanup_xrcd_table(dev);
3471         mlx4_cleanup_pd_table(dev);
3472         mlx4_cleanup_uar_table(dev);
3473
3474 err_steer:
3475         if (!mlx4_is_slave(dev))
3476                 mlx4_clear_steering(dev);
3477
3478 err_disable_msix:
3479         if (dev->flags & MLX4_FLAG_MSI_X)
3480                 pci_disable_msix(pdev);
3481
3482 err_free_eq:
3483         mlx4_free_eq_table(dev);
3484
3485 err_master_mfunc:
3486         if (mlx4_is_master(dev)) {
3487                 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3488                 mlx4_multi_func_cleanup(dev);
3489         }
3490
3491         if (mlx4_is_slave(dev)) {
3492                 kfree(dev->caps.qp0_qkey);
3493                 kfree(dev->caps.qp0_tunnel);
3494                 kfree(dev->caps.qp0_proxy);
3495                 kfree(dev->caps.qp1_tunnel);
3496                 kfree(dev->caps.qp1_proxy);
3497         }
3498
3499 err_close:
3500         mlx4_close_hca(dev);
3501
3502 err_fw:
3503         mlx4_close_fw(dev);
3504
3505 err_mfunc:
3506         if (mlx4_is_slave(dev))
3507                 mlx4_multi_func_cleanup(dev);
3508
3509 err_cmd:
3510         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3511
3512 err_sriov:
3513         if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3514                 pci_disable_sriov(pdev);
3515                 dev->flags &= ~MLX4_FLAG_SRIOV;
3516         }
3517
3518         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3519                 atomic_dec(&pf_loading);
3520
3521         kfree(priv->dev.dev_vfs);
3522
3523         if (!mlx4_is_slave(dev))
3524                 mlx4_free_ownership(dev);
3525
3526         kfree(dev_cap);
3527         return err;
3528 }
3529
3530 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
3531                            struct mlx4_priv *priv)
3532 {
3533         int err;
3534         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3535         int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3536         const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
3537                 {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
3538         unsigned total_vfs = 0;
3539         unsigned int i;
3540
3541         pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3542
3543         err = pci_enable_device(pdev);
3544         if (err) {
3545                 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
3546                 return err;
3547         }
3548
3549         /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
3550          * per port, we must limit the number of VFs to 63 (since their are
3551          * 128 MACs)
3552          */
3553         for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
3554              total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
3555                 nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
3556                 if (nvfs[i] < 0) {
3557                         dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3558                         err = -EINVAL;
3559                         goto err_disable_pdev;
3560                 }
3561         }
3562         for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
3563              i++) {
3564                 prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
3565                 if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
3566                         dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
3567                         err = -EINVAL;
3568                         goto err_disable_pdev;
3569                 }
3570         }
3571         if (total_vfs > MLX4_MAX_NUM_VF) {
3572                 dev_err(&pdev->dev,
3573                         "Requested more VF's (%d) than allowed by hw (%d)\n",
3574                         total_vfs, MLX4_MAX_NUM_VF);
3575                 err = -EINVAL;
3576                 goto err_disable_pdev;
3577         }
3578
3579         for (i = 0; i < MLX4_MAX_PORTS; i++) {
3580                 if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
3581                         dev_err(&pdev->dev,
3582                                 "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
3583                                 nvfs[i] + nvfs[2], i + 1,
3584                                 MLX4_MAX_NUM_VF_P_PORT);
3585                         err = -EINVAL;
3586                         goto err_disable_pdev;
3587                 }
3588         }
3589
3590         /* Check for BARs. */
3591         if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3592             !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3593                 dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3594                         pci_dev_data, pci_resource_flags(pdev, 0));
3595                 err = -ENODEV;
3596                 goto err_disable_pdev;
3597         }
3598         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3599                 dev_err(&pdev->dev, "Missing UAR, aborting\n");
3600                 err = -ENODEV;
3601                 goto err_disable_pdev;
3602         }
3603
3604         err = pci_request_regions(pdev, DRV_NAME);
3605         if (err) {
3606                 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3607                 goto err_disable_pdev;
3608         }
3609
3610         pci_set_master(pdev);
3611
3612         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3613         if (err) {
3614                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3615                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3616                 if (err) {
3617                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3618                         goto err_release_regions;
3619                 }
3620         }
3621         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3622         if (err) {
3623                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3624                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3625                 if (err) {
3626                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3627                         goto err_release_regions;
3628                 }
3629         }
3630
3631         /* Allow large DMA segments, up to the firmware limit of 1 GB */
3632         dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3633         /* Detect if this device is a virtual function */
3634         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3635                 /* When acting as pf, we normally skip vfs unless explicitly
3636                  * requested to probe them.
3637                  */
3638                 if (total_vfs) {
3639                         unsigned vfs_offset = 0;
3640
3641                         for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
3642                              vfs_offset + nvfs[i] < extended_func_num(pdev);
3643                              vfs_offset += nvfs[i], i++)
3644                                 ;
3645                         if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
3646                                 err = -ENODEV;
3647                                 goto err_release_regions;
3648                         }
3649                         if ((extended_func_num(pdev) - vfs_offset)
3650                             > prb_vf[i]) {
3651                                 dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3652                                          extended_func_num(pdev));
3653                                 err = -ENODEV;
3654                                 goto err_release_regions;
3655                         }
3656                 }
3657         }
3658
3659         err = mlx4_catas_init(&priv->dev);
3660         if (err)
3661                 goto err_release_regions;
3662
3663         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3664         if (err)
3665                 goto err_catas;
3666
3667         return 0;
3668
3669 err_catas:
3670         mlx4_catas_end(&priv->dev);
3671
3672 err_release_regions:
3673         pci_release_regions(pdev);
3674
3675 err_disable_pdev:
3676         pci_disable_device(pdev);
3677         pci_set_drvdata(pdev, NULL);
3678         return err;
3679 }
3680
3681 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3682 {
3683         struct mlx4_priv *priv;
3684         struct mlx4_dev *dev;
3685         int ret;
3686
3687         printk_once(KERN_INFO "%s", mlx4_version);
3688
3689         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
3690         if (!priv)
3691                 return -ENOMEM;
3692
3693         dev       = &priv->dev;
3694         dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3695         if (!dev->persist) {
3696                 kfree(priv);
3697                 return -ENOMEM;
3698         }
3699         dev->persist->pdev = pdev;
3700         dev->persist->dev = dev;
3701         pci_set_drvdata(pdev, dev->persist);
3702         priv->pci_dev_data = id->driver_data;
3703         mutex_init(&dev->persist->device_state_mutex);
3704         mutex_init(&dev->persist->interface_state_mutex);
3705
3706         ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3707         if (ret) {
3708                 kfree(dev->persist);
3709                 kfree(priv);
3710         } else {
3711                 pci_save_state(pdev);
3712         }
3713
3714         return ret;
3715 }
3716
3717 static void mlx4_clean_dev(struct mlx4_dev *dev)
3718 {
3719         struct mlx4_dev_persistent *persist = dev->persist;
3720         struct mlx4_priv *priv = mlx4_priv(dev);
3721         unsigned long   flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
3722
3723         memset(priv, 0, sizeof(*priv));
3724         priv->dev.persist = persist;
3725         priv->dev.flags = flags;
3726 }
3727
3728 static void mlx4_unload_one(struct pci_dev *pdev)
3729 {
3730         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3731         struct mlx4_dev  *dev  = persist->dev;
3732         struct mlx4_priv *priv = mlx4_priv(dev);
3733         int               pci_dev_data;
3734         int p, i;
3735
3736         if (priv->removed)
3737                 return;
3738
3739         /* saving current ports type for further use */
3740         for (i = 0; i < dev->caps.num_ports; i++) {
3741                 dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
3742                 dev->persist->curr_port_poss_type[i] = dev->caps.
3743                                                        possible_type[i + 1];
3744         }
3745
3746         pci_dev_data = priv->pci_dev_data;
3747
3748         mlx4_stop_sense(dev);
3749         mlx4_unregister_device(dev);
3750
3751         for (p = 1; p <= dev->caps.num_ports; p++) {
3752                 mlx4_cleanup_port_info(&priv->port[p]);
3753                 mlx4_CLOSE_PORT(dev, p);
3754         }
3755
3756         if (mlx4_is_master(dev))
3757                 mlx4_free_resource_tracker(dev,
3758                                            RES_TR_FREE_SLAVES_ONLY);
3759
3760         mlx4_cleanup_default_counters(dev);
3761         if (!mlx4_is_slave(dev))
3762                 mlx4_cleanup_counters_table(dev);
3763         mlx4_cleanup_qp_table(dev);
3764         mlx4_cleanup_srq_table(dev);
3765         mlx4_cleanup_cq_table(dev);
3766         mlx4_cmd_use_polling(dev);
3767         mlx4_cleanup_eq_table(dev);
3768         mlx4_cleanup_mcg_table(dev);
3769         mlx4_cleanup_mr_table(dev);
3770         mlx4_cleanup_xrcd_table(dev);
3771         mlx4_cleanup_pd_table(dev);
3772
3773         if (mlx4_is_master(dev))
3774                 mlx4_free_resource_tracker(dev,
3775                                            RES_TR_FREE_STRUCTS_ONLY);
3776
3777         iounmap(priv->kar);
3778         mlx4_uar_free(dev, &priv->driver_uar);
3779         mlx4_cleanup_uar_table(dev);
3780         if (!mlx4_is_slave(dev))
3781                 mlx4_clear_steering(dev);
3782         mlx4_free_eq_table(dev);
3783         if (mlx4_is_master(dev))
3784                 mlx4_multi_func_cleanup(dev);
3785         mlx4_close_hca(dev);
3786         mlx4_close_fw(dev);
3787         if (mlx4_is_slave(dev))
3788                 mlx4_multi_func_cleanup(dev);
3789         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3790
3791         if (dev->flags & MLX4_FLAG_MSI_X)
3792                 pci_disable_msix(pdev);
3793
3794         if (!mlx4_is_slave(dev))
3795                 mlx4_free_ownership(dev);
3796
3797         kfree(dev->caps.qp0_qkey);
3798         kfree(dev->caps.qp0_tunnel);
3799         kfree(dev->caps.qp0_proxy);
3800         kfree(dev->caps.qp1_tunnel);
3801         kfree(dev->caps.qp1_proxy);
3802         kfree(dev->dev_vfs);
3803
3804         mlx4_clean_dev(dev);
3805         priv->pci_dev_data = pci_dev_data;
3806         priv->removed = 1;
3807 }
3808
3809 static void mlx4_remove_one(struct pci_dev *pdev)
3810 {
3811         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3812         struct mlx4_dev  *dev  = persist->dev;
3813         struct mlx4_priv *priv = mlx4_priv(dev);
3814         int active_vfs = 0;
3815
3816         mutex_lock(&persist->interface_state_mutex);
3817         persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
3818         mutex_unlock(&persist->interface_state_mutex);
3819
3820         /* Disabling SR-IOV is not allowed while there are active vf's */
3821         if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
3822                 active_vfs = mlx4_how_many_lives_vf(dev);
3823                 if (active_vfs) {
3824                         pr_warn("Removing PF when there are active VF's !!\n");
3825                         pr_warn("Will not disable SR-IOV.\n");
3826                 }
3827         }
3828
3829         /* device marked to be under deletion running now without the lock
3830          * letting other tasks to be terminated
3831          */
3832         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3833                 mlx4_unload_one(pdev);
3834         else
3835                 mlx4_info(dev, "%s: interface is down\n", __func__);
3836         mlx4_catas_end(dev);
3837         if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
3838                 mlx4_warn(dev, "Disabling SR-IOV\n");
3839                 pci_disable_sriov(pdev);
3840         }
3841
3842         pci_release_regions(pdev);
3843         pci_disable_device(pdev);
3844         kfree(dev->persist);
3845         kfree(priv);
3846         pci_set_drvdata(pdev, NULL);
3847 }
3848
3849 static int restore_current_port_types(struct mlx4_dev *dev,
3850                                       enum mlx4_port_type *types,
3851                                       enum mlx4_port_type *poss_types)
3852 {
3853         struct mlx4_priv *priv = mlx4_priv(dev);
3854         int err, i;
3855
3856         mlx4_stop_sense(dev);
3857
3858         mutex_lock(&priv->port_mutex);
3859         for (i = 0; i < dev->caps.num_ports; i++)
3860                 dev->caps.possible_type[i + 1] = poss_types[i];
3861         err = mlx4_change_port_types(dev, types);
3862         mlx4_start_sense(dev);
3863         mutex_unlock(&priv->port_mutex);
3864
3865         return err;
3866 }
3867
3868 int mlx4_restart_one(struct pci_dev *pdev)
3869 {
3870         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3871         struct mlx4_dev  *dev  = persist->dev;
3872         struct mlx4_priv *priv = mlx4_priv(dev);
3873         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3874         int pci_dev_data, err, total_vfs;
3875
3876         pci_dev_data = priv->pci_dev_data;
3877         total_vfs = dev->persist->num_vfs;
3878         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3879
3880         mlx4_unload_one(pdev);
3881         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
3882         if (err) {
3883                 mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
3884                          __func__, pci_name(pdev), err);
3885                 return err;
3886         }
3887
3888         err = restore_current_port_types(dev, dev->persist->curr_port_type,
3889                                          dev->persist->curr_port_poss_type);
3890         if (err)
3891                 mlx4_err(dev, "could not restore original port types (%d)\n",
3892                          err);
3893
3894         return err;
3895 }
3896
3897 static const struct pci_device_id mlx4_pci_table[] = {
3898         /* MT25408 "Hermon" SDR */
3899         { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3900         /* MT25408 "Hermon" DDR */
3901         { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3902         /* MT25408 "Hermon" QDR */
3903         { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3904         /* MT25408 "Hermon" DDR PCIe gen2 */
3905         { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3906         /* MT25408 "Hermon" QDR PCIe gen2 */
3907         { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3908         /* MT25408 "Hermon" EN 10GigE */
3909         { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3910         /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
3911         { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3912         /* MT25458 ConnectX EN 10GBASE-T 10GigE */
3913         { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3914         /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
3915         { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3916         /* MT26468 ConnectX EN 10GigE PCIe gen2*/
3917         { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3918         /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
3919         { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3920         /* MT26478 ConnectX2 40GigE PCIe gen2 */
3921         { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3922         /* MT25400 Family [ConnectX-2 Virtual Function] */
3923         { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
3924         /* MT27500 Family [ConnectX-3] */
3925         { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
3926         /* MT27500 Family [ConnectX-3 Virtual Function] */
3927         { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
3928         { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
3929         { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
3930         { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
3931         { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
3932         { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
3933         { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
3934         { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
3935         { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
3936         { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
3937         { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
3938         { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
3939         { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
3940         { 0, }
3941 };
3942
3943 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
3944
3945 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
3946                                               pci_channel_state_t state)
3947 {
3948         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3949
3950         mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
3951         mlx4_enter_error_state(persist);
3952
3953         mutex_lock(&persist->interface_state_mutex);
3954         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3955                 mlx4_unload_one(pdev);
3956
3957         mutex_unlock(&persist->interface_state_mutex);
3958         if (state == pci_channel_io_perm_failure)
3959                 return PCI_ERS_RESULT_DISCONNECT;
3960
3961         pci_disable_device(pdev);
3962         return PCI_ERS_RESULT_NEED_RESET;
3963 }
3964
3965 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
3966 {
3967         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3968         struct mlx4_dev  *dev  = persist->dev;
3969         struct mlx4_priv *priv = mlx4_priv(dev);
3970         int               ret;
3971         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3972         int total_vfs;
3973
3974         mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
3975         ret = pci_enable_device(pdev);
3976         if (ret) {
3977                 mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret);
3978                 return PCI_ERS_RESULT_DISCONNECT;
3979         }
3980
3981         pci_set_master(pdev);
3982         pci_restore_state(pdev);
3983         pci_save_state(pdev);
3984
3985         total_vfs = dev->persist->num_vfs;
3986         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3987
3988         mutex_lock(&persist->interface_state_mutex);
3989         if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
3990                 ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
3991                                     priv, 1);
3992                 if (ret) {
3993                         mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
3994                                  __func__,  ret);
3995                         goto end;
3996                 }
3997
3998                 ret = restore_current_port_types(dev, dev->persist->
3999                                                  curr_port_type, dev->persist->
4000                                                  curr_port_poss_type);
4001                 if (ret)
4002                         mlx4_err(dev, "could not restore original port types (%d)\n", ret);
4003         }
4004 end:
4005         mutex_unlock(&persist->interface_state_mutex);
4006
4007         return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
4008 }
4009
4010 static void mlx4_shutdown(struct pci_dev *pdev)
4011 {
4012         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4013
4014         mlx4_info(persist->dev, "mlx4_shutdown was called\n");
4015         mutex_lock(&persist->interface_state_mutex);
4016         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4017                 mlx4_unload_one(pdev);
4018         mutex_unlock(&persist->interface_state_mutex);
4019 }
4020
4021 static const struct pci_error_handlers mlx4_err_handler = {
4022         .error_detected = mlx4_pci_err_detected,
4023         .slot_reset     = mlx4_pci_slot_reset,
4024 };
4025
4026 static struct pci_driver mlx4_driver = {
4027         .name           = DRV_NAME,
4028         .id_table       = mlx4_pci_table,
4029         .probe          = mlx4_init_one,
4030         .shutdown       = mlx4_shutdown,
4031         .remove         = mlx4_remove_one,
4032         .err_handler    = &mlx4_err_handler,
4033 };
4034
4035 static int __init mlx4_verify_params(void)
4036 {
4037         if ((log_num_mac < 0) || (log_num_mac > 7)) {
4038                 pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
4039                 return -1;
4040         }
4041
4042         if (log_num_vlan != 0)
4043                 pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
4044                         MLX4_LOG_NUM_VLANS);
4045
4046         if (use_prio != 0)
4047                 pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
4048
4049         if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
4050                 pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
4051                         log_mtts_per_seg);
4052                 return -1;
4053         }
4054
4055         /* Check if module param for ports type has legal combination */
4056         if (port_type_array[0] == false && port_type_array[1] == true) {
4057                 pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
4058                 port_type_array[0] = true;
4059         }
4060
4061         if (mlx4_log_num_mgm_entry_size < -7 ||
4062             (mlx4_log_num_mgm_entry_size > 0 &&
4063              (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
4064               mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
4065                 pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
4066                         mlx4_log_num_mgm_entry_size,
4067                         MLX4_MIN_MGM_LOG_ENTRY_SIZE,
4068                         MLX4_MAX_MGM_LOG_ENTRY_SIZE);
4069                 return -1;
4070         }
4071
4072         return 0;
4073 }
4074
4075 static int __init mlx4_init(void)
4076 {
4077         int ret;
4078
4079         if (mlx4_verify_params())
4080                 return -EINVAL;
4081
4082
4083         mlx4_wq = create_singlethread_workqueue("mlx4");
4084         if (!mlx4_wq)
4085                 return -ENOMEM;
4086
4087         ret = pci_register_driver(&mlx4_driver);
4088         if (ret < 0)
4089                 destroy_workqueue(mlx4_wq);
4090         return ret < 0 ? ret : 0;
4091 }
4092
4093 static void __exit mlx4_cleanup(void)
4094 {
4095         pci_unregister_driver(&mlx4_driver);
4096         destroy_workqueue(mlx4_wq);
4097 }
4098
4099 module_init(mlx4_init);
4100 module_exit(mlx4_cleanup);