b673a5fc6b6c45ef61168485a3ab30e4d29f862e
[cascardo/linux.git] / drivers / net / ethernet / mellanox / mlx4 / main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/errno.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/io-mapping.h>
43 #include <linux/delay.h>
44 #include <linux/kmod.h>
45 #include <net/devlink.h>
46
47 #include <linux/mlx4/device.h>
48 #include <linux/mlx4/doorbell.h>
49
50 #include "mlx4.h"
51 #include "fw.h"
52 #include "icm.h"
53
54 MODULE_AUTHOR("Roland Dreier");
55 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
56 MODULE_LICENSE("Dual BSD/GPL");
57 MODULE_VERSION(DRV_VERSION);
58
59 struct workqueue_struct *mlx4_wq;
60
61 #ifdef CONFIG_MLX4_DEBUG
62
63 int mlx4_debug_level = 0;
64 module_param_named(debug_level, mlx4_debug_level, int, 0644);
65 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
66
67 #endif /* CONFIG_MLX4_DEBUG */
68
69 #ifdef CONFIG_PCI_MSI
70
71 static int msi_x = 1;
72 module_param(msi_x, int, 0444);
73 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
74
75 #else /* CONFIG_PCI_MSI */
76
77 #define msi_x (0)
78
79 #endif /* CONFIG_PCI_MSI */
80
81 static uint8_t num_vfs[3] = {0, 0, 0};
82 static int num_vfs_argc;
83 module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
84 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
85                           "num_vfs=port1,port2,port1+2");
86
87 static uint8_t probe_vf[3] = {0, 0, 0};
88 static int probe_vfs_argc;
89 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
90 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
91                            "probe_vf=port1,port2,port1+2");
92
93 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
94 module_param_named(log_num_mgm_entry_size,
95                         mlx4_log_num_mgm_entry_size, int, 0444);
96 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
97                                          " of qp per mcg, for example:"
98                                          " 10 gives 248.range: 7 <="
99                                          " log_num_mgm_entry_size <= 12."
100                                          " To activate device managed"
101                                          " flow steering when available, set to -1");
102
103 static bool enable_64b_cqe_eqe = true;
104 module_param(enable_64b_cqe_eqe, bool, 0444);
105 MODULE_PARM_DESC(enable_64b_cqe_eqe,
106                  "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
107
108 static bool enable_4k_uar;
109 module_param(enable_4k_uar, bool, 0444);
110 MODULE_PARM_DESC(enable_4k_uar,
111                  "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)");
112
113 #define PF_CONTEXT_BEHAVIOUR_MASK       (MLX4_FUNC_CAP_64B_EQE_CQE | \
114                                          MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
115                                          MLX4_FUNC_CAP_DMFS_A0_STATIC)
116
117 #define RESET_PERSIST_MASK_FLAGS        (MLX4_FLAG_SRIOV)
118
119 static char mlx4_version[] =
120         DRV_NAME ": Mellanox ConnectX core driver v"
121         DRV_VERSION " (" DRV_RELDATE ")\n";
122
123 static struct mlx4_profile default_profile = {
124         .num_qp         = 1 << 18,
125         .num_srq        = 1 << 16,
126         .rdmarc_per_qp  = 1 << 4,
127         .num_cq         = 1 << 16,
128         .num_mcg        = 1 << 13,
129         .num_mpt        = 1 << 19,
130         .num_mtt        = 1 << 20, /* It is really num mtt segements */
131 };
132
133 static struct mlx4_profile low_mem_profile = {
134         .num_qp         = 1 << 17,
135         .num_srq        = 1 << 6,
136         .rdmarc_per_qp  = 1 << 4,
137         .num_cq         = 1 << 8,
138         .num_mcg        = 1 << 8,
139         .num_mpt        = 1 << 9,
140         .num_mtt        = 1 << 7,
141 };
142
143 static int log_num_mac = 7;
144 module_param_named(log_num_mac, log_num_mac, int, 0444);
145 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
146
147 static int log_num_vlan;
148 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
149 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
150 /* Log2 max number of VLANs per ETH port (0-7) */
151 #define MLX4_LOG_NUM_VLANS 7
152 #define MLX4_MIN_LOG_NUM_VLANS 0
153 #define MLX4_MIN_LOG_NUM_MAC 1
154
155 static bool use_prio;
156 module_param_named(use_prio, use_prio, bool, 0444);
157 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
158
159 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
160 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
161 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
162
163 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
164 static int arr_argc = 2;
165 module_param_array(port_type_array, int, &arr_argc, 0444);
166 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
167                                 "1 for IB, 2 for Ethernet");
168
169 struct mlx4_port_config {
170         struct list_head list;
171         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
172         struct pci_dev *pdev;
173 };
174
175 static atomic_t pf_loading = ATOMIC_INIT(0);
176
177 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
178                                               struct mlx4_dev_cap *dev_cap)
179 {
180         /* The reserved_uars is calculated by system page size unit.
181          * Therefore, adjustment is added when the uar page size is less
182          * than the system page size
183          */
184         dev->caps.reserved_uars =
185                 max_t(int,
186                       mlx4_get_num_reserved_uar(dev),
187                       dev_cap->reserved_uars /
188                         (1 << (PAGE_SHIFT - dev->uar_page_shift)));
189 }
190
191 int mlx4_check_port_params(struct mlx4_dev *dev,
192                            enum mlx4_port_type *port_type)
193 {
194         int i;
195
196         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
197                 for (i = 0; i < dev->caps.num_ports - 1; i++) {
198                         if (port_type[i] != port_type[i + 1]) {
199                                 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
200                                 return -EINVAL;
201                         }
202                 }
203         }
204
205         for (i = 0; i < dev->caps.num_ports; i++) {
206                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
207                         mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
208                                  i + 1);
209                         return -EINVAL;
210                 }
211         }
212         return 0;
213 }
214
215 static void mlx4_set_port_mask(struct mlx4_dev *dev)
216 {
217         int i;
218
219         for (i = 1; i <= dev->caps.num_ports; ++i)
220                 dev->caps.port_mask[i] = dev->caps.port_type[i];
221 }
222
223 enum {
224         MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
225 };
226
227 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
228 {
229         int err = 0;
230         struct mlx4_func func;
231
232         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
233                 err = mlx4_QUERY_FUNC(dev, &func, 0);
234                 if (err) {
235                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
236                         return err;
237                 }
238                 dev_cap->max_eqs = func.max_eq;
239                 dev_cap->reserved_eqs = func.rsvd_eqs;
240                 dev_cap->reserved_uars = func.rsvd_uars;
241                 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
242         }
243         return err;
244 }
245
246 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
247 {
248         struct mlx4_caps *dev_cap = &dev->caps;
249
250         /* FW not supporting or cancelled by user */
251         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
252             !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
253                 return;
254
255         /* Must have 64B CQE_EQE enabled by FW to use bigger stride
256          * When FW has NCSI it may decide not to report 64B CQE/EQEs
257          */
258         if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
259             !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
260                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
261                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
262                 return;
263         }
264
265         if (cache_line_size() == 128 || cache_line_size() == 256) {
266                 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
267                 /* Changing the real data inside CQE size to 32B */
268                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
269                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
270
271                 if (mlx4_is_master(dev))
272                         dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
273         } else {
274                 if (cache_line_size() != 32  && cache_line_size() != 64)
275                         mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
276                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
277                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
278         }
279 }
280
281 static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
282                           struct mlx4_port_cap *port_cap)
283 {
284         dev->caps.vl_cap[port]      = port_cap->max_vl;
285         dev->caps.ib_mtu_cap[port]          = port_cap->ib_mtu;
286         dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
287         dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
288         /* set gid and pkey table operating lengths by default
289          * to non-sriov values
290          */
291         dev->caps.gid_table_len[port]  = port_cap->max_gids;
292         dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
293         dev->caps.port_width_cap[port] = port_cap->max_port_width;
294         dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
295         dev->caps.max_tc_eth           = port_cap->max_tc_eth;
296         dev->caps.def_mac[port]        = port_cap->def_mac;
297         dev->caps.supported_type[port] = port_cap->supported_port_types;
298         dev->caps.suggested_type[port] = port_cap->suggested_type;
299         dev->caps.default_sense[port] = port_cap->default_sense;
300         dev->caps.trans_type[port]          = port_cap->trans_type;
301         dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
302         dev->caps.wavelength[port]     = port_cap->wavelength;
303         dev->caps.trans_code[port]     = port_cap->trans_code;
304
305         return 0;
306 }
307
308 static int mlx4_dev_port(struct mlx4_dev *dev, int port,
309                          struct mlx4_port_cap *port_cap)
310 {
311         int err = 0;
312
313         err = mlx4_QUERY_PORT(dev, port, port_cap);
314
315         if (err)
316                 mlx4_err(dev, "QUERY_PORT command failed.\n");
317
318         return err;
319 }
320
321 static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
322 {
323         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
324                 return;
325
326         if (mlx4_is_mfunc(dev)) {
327                 mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
328                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
329                 return;
330         }
331
332         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
333                 mlx4_dbg(dev,
334                          "Keep FCS is not supported - Disabling Ignore FCS");
335                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
336                 return;
337         }
338 }
339
340 #define MLX4_A0_STEERING_TABLE_SIZE     256
341 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
342 {
343         int err;
344         int i;
345
346         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
347         if (err) {
348                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
349                 return err;
350         }
351         mlx4_dev_cap_dump(dev, dev_cap);
352
353         if (dev_cap->min_page_sz > PAGE_SIZE) {
354                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
355                          dev_cap->min_page_sz, PAGE_SIZE);
356                 return -ENODEV;
357         }
358         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
359                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
360                          dev_cap->num_ports, MLX4_MAX_PORTS);
361                 return -ENODEV;
362         }
363
364         if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
365                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
366                          dev_cap->uar_size,
367                          (unsigned long long)
368                          pci_resource_len(dev->persist->pdev, 2));
369                 return -ENODEV;
370         }
371
372         dev->caps.num_ports          = dev_cap->num_ports;
373         dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
374         dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
375                                       dev->caps.num_sys_eqs :
376                                       MLX4_MAX_EQ_NUM;
377         for (i = 1; i <= dev->caps.num_ports; ++i) {
378                 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
379                 if (err) {
380                         mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
381                         return err;
382                 }
383         }
384
385         dev->caps.uar_page_size      = PAGE_SIZE;
386         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
387         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
388         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
389         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
390         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
391         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
392         dev->caps.max_wqes           = dev_cap->max_qp_sz;
393         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
394         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
395         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
396         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
397         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
398         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
399         /*
400          * Subtract 1 from the limit because we need to allocate a
401          * spare CQE so the HCA HW can tell the difference between an
402          * empty CQ and a full CQ.
403          */
404         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
405         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
406         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
407         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
408         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
409
410         dev->caps.reserved_pds       = dev_cap->reserved_pds;
411         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
412                                         dev_cap->reserved_xrcds : 0;
413         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
414                                         dev_cap->max_xrcds : 0;
415         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
416
417         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
418         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
419         dev->caps.flags              = dev_cap->flags;
420         dev->caps.flags2             = dev_cap->flags2;
421         dev->caps.bmme_flags         = dev_cap->bmme_flags;
422         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
423         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
424         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
425         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
426
427         /* Save uar page shift */
428         if (!mlx4_is_slave(dev)) {
429                 /* Virtual PCI function needs to determine UAR page size from
430                  * firmware. Only master PCI function can set the uar page size
431                  */
432                 if (enable_4k_uar)
433                         dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
434                 else
435                         dev->uar_page_shift = PAGE_SHIFT;
436
437                 mlx4_set_num_reserved_uars(dev, dev_cap);
438         }
439
440         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
441                 struct mlx4_init_hca_param hca_param;
442
443                 memset(&hca_param, 0, sizeof(hca_param));
444                 err = mlx4_QUERY_HCA(dev, &hca_param);
445                 /* Turn off PHV_EN flag in case phv_check_en is set.
446                  * phv_check_en is a HW check that parse the packet and verify
447                  * phv bit was reported correctly in the wqe. To allow QinQ
448                  * PHV_EN flag should be set and phv_check_en must be cleared
449                  * otherwise QinQ packets will be drop by the HW.
450                  */
451                 if (err || hca_param.phv_check_en)
452                         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
453         }
454
455         /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
456         if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
457                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
458         /* Don't do sense port on multifunction devices (for now at least) */
459         if (mlx4_is_mfunc(dev))
460                 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
461
462         if (mlx4_low_memory_profile()) {
463                 dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
464                 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
465         } else {
466                 dev->caps.log_num_macs  = log_num_mac;
467                 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
468         }
469
470         for (i = 1; i <= dev->caps.num_ports; ++i) {
471                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
472                 if (dev->caps.supported_type[i]) {
473                         /* if only ETH is supported - assign ETH */
474                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
475                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
476                         /* if only IB is supported, assign IB */
477                         else if (dev->caps.supported_type[i] ==
478                                  MLX4_PORT_TYPE_IB)
479                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
480                         else {
481                                 /* if IB and ETH are supported, we set the port
482                                  * type according to user selection of port type;
483                                  * if user selected none, take the FW hint */
484                                 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
485                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
486                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
487                                 else
488                                         dev->caps.port_type[i] = port_type_array[i - 1];
489                         }
490                 }
491                 /*
492                  * Link sensing is allowed on the port if 3 conditions are true:
493                  * 1. Both protocols are supported on the port.
494                  * 2. Different types are supported on the port
495                  * 3. FW declared that it supports link sensing
496                  */
497                 mlx4_priv(dev)->sense.sense_allowed[i] =
498                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
499                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
500                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
501
502                 /*
503                  * If "default_sense" bit is set, we move the port to "AUTO" mode
504                  * and perform sense_port FW command to try and set the correct
505                  * port type from beginning
506                  */
507                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
508                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
509                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
510                         mlx4_SENSE_PORT(dev, i, &sensed_port);
511                         if (sensed_port != MLX4_PORT_TYPE_NONE)
512                                 dev->caps.port_type[i] = sensed_port;
513                 } else {
514                         dev->caps.possible_type[i] = dev->caps.port_type[i];
515                 }
516
517                 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
518                         dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
519                         mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
520                                   i, 1 << dev->caps.log_num_macs);
521                 }
522                 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
523                         dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
524                         mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
525                                   i, 1 << dev->caps.log_num_vlans);
526                 }
527         }
528
529         if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
530             (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
531             (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
532                 mlx4_warn(dev,
533                           "Granular QoS per VF not supported with IB/Eth configuration\n");
534                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
535         }
536
537         dev->caps.max_counters = dev_cap->max_counters;
538
539         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
540         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
541                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
542                 (1 << dev->caps.log_num_macs) *
543                 (1 << dev->caps.log_num_vlans) *
544                 dev->caps.num_ports;
545         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
546
547         if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
548             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
549                 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
550         else
551                 dev->caps.dmfs_high_rate_qpn_base =
552                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
553
554         if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
555             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
556                 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
557                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
558                 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
559         } else {
560                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
561                 dev->caps.dmfs_high_rate_qpn_base =
562                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
563                 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
564         }
565
566         dev->caps.rl_caps = dev_cap->rl_caps;
567
568         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
569                 dev->caps.dmfs_high_rate_qpn_range;
570
571         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
572                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
573                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
574                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
575
576         dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
577
578         if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
579                 if (dev_cap->flags &
580                     (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
581                         mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
582                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
583                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
584                 }
585
586                 if (dev_cap->flags2 &
587                     (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
588                      MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
589                         mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
590                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
591                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
592                 }
593         }
594
595         if ((dev->caps.flags &
596             (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
597             mlx4_is_master(dev))
598                 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
599
600         if (!mlx4_is_slave(dev)) {
601                 mlx4_enable_cqe_eqe_stride(dev);
602                 dev->caps.alloc_res_qp_mask =
603                         (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
604                         MLX4_RESERVE_A0_QP;
605
606                 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
607                     dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
608                         mlx4_warn(dev, "Old device ETS support detected\n");
609                         mlx4_warn(dev, "Consider upgrading device FW.\n");
610                         dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
611                 }
612
613         } else {
614                 dev->caps.alloc_res_qp_mask = 0;
615         }
616
617         mlx4_enable_ignore_fcs(dev);
618
619         return 0;
620 }
621
622 static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
623                                        enum pci_bus_speed *speed,
624                                        enum pcie_link_width *width)
625 {
626         u32 lnkcap1, lnkcap2;
627         int err1, err2;
628
629 #define  PCIE_MLW_CAP_SHIFT 4   /* start of MLW mask in link capabilities */
630
631         *speed = PCI_SPEED_UNKNOWN;
632         *width = PCIE_LNK_WIDTH_UNKNOWN;
633
634         err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
635                                           &lnkcap1);
636         err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
637                                           &lnkcap2);
638         if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
639                 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
640                         *speed = PCIE_SPEED_8_0GT;
641                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
642                         *speed = PCIE_SPEED_5_0GT;
643                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
644                         *speed = PCIE_SPEED_2_5GT;
645         }
646         if (!err1) {
647                 *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
648                 if (!lnkcap2) { /* pre-r3.0 */
649                         if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
650                                 *speed = PCIE_SPEED_5_0GT;
651                         else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
652                                 *speed = PCIE_SPEED_2_5GT;
653                 }
654         }
655
656         if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
657                 return err1 ? err1 :
658                         err2 ? err2 : -EINVAL;
659         }
660         return 0;
661 }
662
663 static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
664 {
665         enum pcie_link_width width, width_cap;
666         enum pci_bus_speed speed, speed_cap;
667         int err;
668
669 #define PCIE_SPEED_STR(speed) \
670         (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
671          speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
672          speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
673          "Unknown")
674
675         err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
676         if (err) {
677                 mlx4_warn(dev,
678                           "Unable to determine PCIe device BW capabilities\n");
679                 return;
680         }
681
682         err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
683         if (err || speed == PCI_SPEED_UNKNOWN ||
684             width == PCIE_LNK_WIDTH_UNKNOWN) {
685                 mlx4_warn(dev,
686                           "Unable to determine PCI device chain minimum BW\n");
687                 return;
688         }
689
690         if (width != width_cap || speed != speed_cap)
691                 mlx4_warn(dev,
692                           "PCIe BW is different than device's capability\n");
693
694         mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
695                   PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
696         mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
697                   width, width_cap);
698         return;
699 }
700
701 /*The function checks if there are live vf, return the num of them*/
702 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
703 {
704         struct mlx4_priv *priv = mlx4_priv(dev);
705         struct mlx4_slave_state *s_state;
706         int i;
707         int ret = 0;
708
709         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
710                 s_state = &priv->mfunc.master.slave_state[i];
711                 if (s_state->active && s_state->last_cmd !=
712                     MLX4_COMM_CMD_RESET) {
713                         mlx4_warn(dev, "%s: slave: %d is still active\n",
714                                   __func__, i);
715                         ret++;
716                 }
717         }
718         return ret;
719 }
720
721 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
722 {
723         u32 qk = MLX4_RESERVED_QKEY_BASE;
724
725         if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
726             qpn < dev->phys_caps.base_proxy_sqpn)
727                 return -EINVAL;
728
729         if (qpn >= dev->phys_caps.base_tunnel_sqpn)
730                 /* tunnel qp */
731                 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
732         else
733                 qk += qpn - dev->phys_caps.base_proxy_sqpn;
734         *qkey = qk;
735         return 0;
736 }
737 EXPORT_SYMBOL(mlx4_get_parav_qkey);
738
739 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
740 {
741         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
742
743         if (!mlx4_is_master(dev))
744                 return;
745
746         priv->virt2phys_pkey[slave][port - 1][i] = val;
747 }
748 EXPORT_SYMBOL(mlx4_sync_pkey_table);
749
750 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
751 {
752         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
753
754         if (!mlx4_is_master(dev))
755                 return;
756
757         priv->slave_node_guids[slave] = guid;
758 }
759 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
760
761 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
762 {
763         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
764
765         if (!mlx4_is_master(dev))
766                 return 0;
767
768         return priv->slave_node_guids[slave];
769 }
770 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
771
772 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
773 {
774         struct mlx4_priv *priv = mlx4_priv(dev);
775         struct mlx4_slave_state *s_slave;
776
777         if (!mlx4_is_master(dev))
778                 return 0;
779
780         s_slave = &priv->mfunc.master.slave_state[slave];
781         return !!s_slave->active;
782 }
783 EXPORT_SYMBOL(mlx4_is_slave_active);
784
785 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
786                                        struct mlx4_dev_cap *dev_cap,
787                                        struct mlx4_init_hca_param *hca_param)
788 {
789         dev->caps.steering_mode = hca_param->steering_mode;
790         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
791                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
792                 dev->caps.fs_log_max_ucast_qp_range_size =
793                         dev_cap->fs_log_max_ucast_qp_range_size;
794         } else
795                 dev->caps.num_qp_per_mgm =
796                         4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
797
798         mlx4_dbg(dev, "Steering mode is: %s\n",
799                  mlx4_steering_mode_str(dev->caps.steering_mode));
800 }
801
802 static int mlx4_slave_cap(struct mlx4_dev *dev)
803 {
804         int                        err;
805         u32                        page_size;
806         struct mlx4_dev_cap        dev_cap;
807         struct mlx4_func_cap       func_cap;
808         struct mlx4_init_hca_param hca_param;
809         u8                         i;
810
811         memset(&hca_param, 0, sizeof(hca_param));
812         err = mlx4_QUERY_HCA(dev, &hca_param);
813         if (err) {
814                 mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
815                 return err;
816         }
817
818         /* fail if the hca has an unknown global capability
819          * at this time global_caps should be always zeroed
820          */
821         if (hca_param.global_caps) {
822                 mlx4_err(dev, "Unknown hca global capabilities\n");
823                 return -ENOSYS;
824         }
825
826         mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
827
828         dev->caps.hca_core_clock = hca_param.hca_core_clock;
829
830         memset(&dev_cap, 0, sizeof(dev_cap));
831         dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
832         err = mlx4_dev_cap(dev, &dev_cap);
833         if (err) {
834                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
835                 return err;
836         }
837
838         err = mlx4_QUERY_FW(dev);
839         if (err)
840                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
841
842         page_size = ~dev->caps.page_size_cap + 1;
843         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
844         if (page_size > PAGE_SIZE) {
845                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
846                          page_size, PAGE_SIZE);
847                 return -ENODEV;
848         }
849
850         /* Set uar_page_shift for VF */
851         dev->uar_page_shift = hca_param.uar_page_sz + 12;
852
853         /* Make sure the master uar page size is valid */
854         if (dev->uar_page_shift > PAGE_SHIFT) {
855                 mlx4_err(dev,
856                          "Invalid configuration: uar page size is larger than system page size\n");
857                 return  -ENODEV;
858         }
859
860         /* Set reserved_uars based on the uar_page_shift */
861         mlx4_set_num_reserved_uars(dev, &dev_cap);
862
863         /* Although uar page size in FW differs from system page size,
864          * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
865          * still works with assumption that uar page size == system page size
866          */
867         dev->caps.uar_page_size = PAGE_SIZE;
868
869         memset(&func_cap, 0, sizeof(func_cap));
870         err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
871         if (err) {
872                 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
873                          err);
874                 return err;
875         }
876
877         if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
878             PF_CONTEXT_BEHAVIOUR_MASK) {
879                 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
880                          func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
881                 return -ENOSYS;
882         }
883
884         dev->caps.num_ports             = func_cap.num_ports;
885         dev->quotas.qp                  = func_cap.qp_quota;
886         dev->quotas.srq                 = func_cap.srq_quota;
887         dev->quotas.cq                  = func_cap.cq_quota;
888         dev->quotas.mpt                 = func_cap.mpt_quota;
889         dev->quotas.mtt                 = func_cap.mtt_quota;
890         dev->caps.num_qps               = 1 << hca_param.log_num_qps;
891         dev->caps.num_srqs              = 1 << hca_param.log_num_srqs;
892         dev->caps.num_cqs               = 1 << hca_param.log_num_cqs;
893         dev->caps.num_mpts              = 1 << hca_param.log_mpt_sz;
894         dev->caps.num_eqs               = func_cap.max_eq;
895         dev->caps.reserved_eqs          = func_cap.reserved_eq;
896         dev->caps.reserved_lkey         = func_cap.reserved_lkey;
897         dev->caps.num_pds               = MLX4_NUM_PDS;
898         dev->caps.num_mgms              = 0;
899         dev->caps.num_amgms             = 0;
900
901         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
902                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
903                          dev->caps.num_ports, MLX4_MAX_PORTS);
904                 return -ENODEV;
905         }
906
907         mlx4_replace_zero_macs(dev);
908
909         dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
910         dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
911         dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
912         dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
913         dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
914
915         if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
916             !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
917             !dev->caps.qp0_qkey) {
918                 err = -ENOMEM;
919                 goto err_mem;
920         }
921
922         for (i = 1; i <= dev->caps.num_ports; ++i) {
923                 err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
924                 if (err) {
925                         mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
926                                  i, err);
927                         goto err_mem;
928                 }
929                 dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
930                 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
931                 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
932                 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
933                 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
934                 dev->caps.port_mask[i] = dev->caps.port_type[i];
935                 dev->caps.phys_port_id[i] = func_cap.phys_port_id;
936                 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
937                                                       &dev->caps.gid_table_len[i],
938                                                       &dev->caps.pkey_table_len[i]);
939                 if (err)
940                         goto err_mem;
941         }
942
943         if (dev->caps.uar_page_size * (dev->caps.num_uars -
944                                        dev->caps.reserved_uars) >
945                                        pci_resource_len(dev->persist->pdev,
946                                                         2)) {
947                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
948                          dev->caps.uar_page_size * dev->caps.num_uars,
949                          (unsigned long long)
950                          pci_resource_len(dev->persist->pdev, 2));
951                 err = -ENOMEM;
952                 goto err_mem;
953         }
954
955         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
956                 dev->caps.eqe_size   = 64;
957                 dev->caps.eqe_factor = 1;
958         } else {
959                 dev->caps.eqe_size   = 32;
960                 dev->caps.eqe_factor = 0;
961         }
962
963         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
964                 dev->caps.cqe_size   = 64;
965                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
966         } else {
967                 dev->caps.cqe_size   = 32;
968         }
969
970         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
971                 dev->caps.eqe_size = hca_param.eqe_size;
972                 dev->caps.eqe_factor = 0;
973         }
974
975         if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
976                 dev->caps.cqe_size = hca_param.cqe_size;
977                 /* User still need to know when CQE > 32B */
978                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
979         }
980
981         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
982         mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
983
984         slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
985         mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
986                  hca_param.rss_ip_frags ? "on" : "off");
987
988         if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
989             dev->caps.bf_reg_size)
990                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
991
992         if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
993                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
994
995         return 0;
996
997 err_mem:
998         kfree(dev->caps.qp0_qkey);
999         kfree(dev->caps.qp0_tunnel);
1000         kfree(dev->caps.qp0_proxy);
1001         kfree(dev->caps.qp1_tunnel);
1002         kfree(dev->caps.qp1_proxy);
1003         dev->caps.qp0_qkey = NULL;
1004         dev->caps.qp0_tunnel = NULL;
1005         dev->caps.qp0_proxy = NULL;
1006         dev->caps.qp1_tunnel = NULL;
1007         dev->caps.qp1_proxy = NULL;
1008
1009         return err;
1010 }
1011
1012 static void mlx4_request_modules(struct mlx4_dev *dev)
1013 {
1014         int port;
1015         int has_ib_port = false;
1016         int has_eth_port = false;
1017 #define EN_DRV_NAME     "mlx4_en"
1018 #define IB_DRV_NAME     "mlx4_ib"
1019
1020         for (port = 1; port <= dev->caps.num_ports; port++) {
1021                 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1022                         has_ib_port = true;
1023                 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1024                         has_eth_port = true;
1025         }
1026
1027         if (has_eth_port)
1028                 request_module_nowait(EN_DRV_NAME);
1029         if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
1030                 request_module_nowait(IB_DRV_NAME);
1031 }
1032
1033 /*
1034  * Change the port configuration of the device.
1035  * Every user of this function must hold the port mutex.
1036  */
1037 int mlx4_change_port_types(struct mlx4_dev *dev,
1038                            enum mlx4_port_type *port_types)
1039 {
1040         int err = 0;
1041         int change = 0;
1042         int port;
1043
1044         for (port = 0; port <  dev->caps.num_ports; port++) {
1045                 /* Change the port type only if the new type is different
1046                  * from the current, and not set to Auto */
1047                 if (port_types[port] != dev->caps.port_type[port + 1])
1048                         change = 1;
1049         }
1050         if (change) {
1051                 mlx4_unregister_device(dev);
1052                 for (port = 1; port <= dev->caps.num_ports; port++) {
1053                         mlx4_CLOSE_PORT(dev, port);
1054                         dev->caps.port_type[port] = port_types[port - 1];
1055                         err = mlx4_SET_PORT(dev, port, -1);
1056                         if (err) {
1057                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
1058                                          port);
1059                                 goto out;
1060                         }
1061                 }
1062                 mlx4_set_port_mask(dev);
1063                 err = mlx4_register_device(dev);
1064                 if (err) {
1065                         mlx4_err(dev, "Failed to register device\n");
1066                         goto out;
1067                 }
1068                 mlx4_request_modules(dev);
1069         }
1070
1071 out:
1072         return err;
1073 }
1074
1075 static ssize_t show_port_type(struct device *dev,
1076                               struct device_attribute *attr,
1077                               char *buf)
1078 {
1079         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1080                                                    port_attr);
1081         struct mlx4_dev *mdev = info->dev;
1082         char type[8];
1083
1084         sprintf(type, "%s",
1085                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1086                 "ib" : "eth");
1087         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1088                 sprintf(buf, "auto (%s)\n", type);
1089         else
1090                 sprintf(buf, "%s\n", type);
1091
1092         return strlen(buf);
1093 }
1094
1095 static int __set_port_type(struct mlx4_port_info *info,
1096                            enum mlx4_port_type port_type)
1097 {
1098         struct mlx4_dev *mdev = info->dev;
1099         struct mlx4_priv *priv = mlx4_priv(mdev);
1100         enum mlx4_port_type types[MLX4_MAX_PORTS];
1101         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1102         int i;
1103         int err = 0;
1104
1105         mlx4_stop_sense(mdev);
1106         mutex_lock(&priv->port_mutex);
1107         info->tmp_type = port_type;
1108
1109         /* Possible type is always the one that was delivered */
1110         mdev->caps.possible_type[info->port] = info->tmp_type;
1111
1112         for (i = 0; i < mdev->caps.num_ports; i++) {
1113                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1114                                         mdev->caps.possible_type[i+1];
1115                 if (types[i] == MLX4_PORT_TYPE_AUTO)
1116                         types[i] = mdev->caps.port_type[i+1];
1117         }
1118
1119         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1120             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1121                 for (i = 1; i <= mdev->caps.num_ports; i++) {
1122                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1123                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1124                                 err = -EINVAL;
1125                         }
1126                 }
1127         }
1128         if (err) {
1129                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1130                 goto out;
1131         }
1132
1133         mlx4_do_sense_ports(mdev, new_types, types);
1134
1135         err = mlx4_check_port_params(mdev, new_types);
1136         if (err)
1137                 goto out;
1138
1139         /* We are about to apply the changes after the configuration
1140          * was verified, no need to remember the temporary types
1141          * any more */
1142         for (i = 0; i < mdev->caps.num_ports; i++)
1143                 priv->port[i + 1].tmp_type = 0;
1144
1145         err = mlx4_change_port_types(mdev, new_types);
1146
1147 out:
1148         mlx4_start_sense(mdev);
1149         mutex_unlock(&priv->port_mutex);
1150
1151         return err;
1152 }
1153
1154 static ssize_t set_port_type(struct device *dev,
1155                              struct device_attribute *attr,
1156                              const char *buf, size_t count)
1157 {
1158         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1159                                                    port_attr);
1160         struct mlx4_dev *mdev = info->dev;
1161         enum mlx4_port_type port_type;
1162         static DEFINE_MUTEX(set_port_type_mutex);
1163         int err;
1164
1165         mutex_lock(&set_port_type_mutex);
1166
1167         if (!strcmp(buf, "ib\n")) {
1168                 port_type = MLX4_PORT_TYPE_IB;
1169         } else if (!strcmp(buf, "eth\n")) {
1170                 port_type = MLX4_PORT_TYPE_ETH;
1171         } else if (!strcmp(buf, "auto\n")) {
1172                 port_type = MLX4_PORT_TYPE_AUTO;
1173         } else {
1174                 mlx4_err(mdev, "%s is not supported port type\n", buf);
1175                 err = -EINVAL;
1176                 goto err_out;
1177         }
1178
1179         err = __set_port_type(info, port_type);
1180
1181 err_out:
1182         mutex_unlock(&set_port_type_mutex);
1183
1184         return err ? err : count;
1185 }
1186
1187 enum ibta_mtu {
1188         IB_MTU_256  = 1,
1189         IB_MTU_512  = 2,
1190         IB_MTU_1024 = 3,
1191         IB_MTU_2048 = 4,
1192         IB_MTU_4096 = 5
1193 };
1194
1195 static inline int int_to_ibta_mtu(int mtu)
1196 {
1197         switch (mtu) {
1198         case 256:  return IB_MTU_256;
1199         case 512:  return IB_MTU_512;
1200         case 1024: return IB_MTU_1024;
1201         case 2048: return IB_MTU_2048;
1202         case 4096: return IB_MTU_4096;
1203         default: return -1;
1204         }
1205 }
1206
1207 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1208 {
1209         switch (mtu) {
1210         case IB_MTU_256:  return  256;
1211         case IB_MTU_512:  return  512;
1212         case IB_MTU_1024: return 1024;
1213         case IB_MTU_2048: return 2048;
1214         case IB_MTU_4096: return 4096;
1215         default: return -1;
1216         }
1217 }
1218
1219 static ssize_t show_port_ib_mtu(struct device *dev,
1220                              struct device_attribute *attr,
1221                              char *buf)
1222 {
1223         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1224                                                    port_mtu_attr);
1225         struct mlx4_dev *mdev = info->dev;
1226
1227         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1228                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1229
1230         sprintf(buf, "%d\n",
1231                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1232         return strlen(buf);
1233 }
1234
1235 static ssize_t set_port_ib_mtu(struct device *dev,
1236                              struct device_attribute *attr,
1237                              const char *buf, size_t count)
1238 {
1239         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1240                                                    port_mtu_attr);
1241         struct mlx4_dev *mdev = info->dev;
1242         struct mlx4_priv *priv = mlx4_priv(mdev);
1243         int err, port, mtu, ibta_mtu = -1;
1244
1245         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1246                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1247                 return -EINVAL;
1248         }
1249
1250         err = kstrtoint(buf, 0, &mtu);
1251         if (!err)
1252                 ibta_mtu = int_to_ibta_mtu(mtu);
1253
1254         if (err || ibta_mtu < 0) {
1255                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1256                 return -EINVAL;
1257         }
1258
1259         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1260
1261         mlx4_stop_sense(mdev);
1262         mutex_lock(&priv->port_mutex);
1263         mlx4_unregister_device(mdev);
1264         for (port = 1; port <= mdev->caps.num_ports; port++) {
1265                 mlx4_CLOSE_PORT(mdev, port);
1266                 err = mlx4_SET_PORT(mdev, port, -1);
1267                 if (err) {
1268                         mlx4_err(mdev, "Failed to set port %d, aborting\n",
1269                                  port);
1270                         goto err_set_port;
1271                 }
1272         }
1273         err = mlx4_register_device(mdev);
1274 err_set_port:
1275         mutex_unlock(&priv->port_mutex);
1276         mlx4_start_sense(mdev);
1277         return err ? err : count;
1278 }
1279
1280 /* bond for multi-function device */
1281 #define MAX_MF_BOND_ALLOWED_SLAVES 63
1282 static int mlx4_mf_bond(struct mlx4_dev *dev)
1283 {
1284         int err = 0;
1285         int nvfs;
1286         struct mlx4_slaves_pport slaves_port1;
1287         struct mlx4_slaves_pport slaves_port2;
1288         DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
1289
1290         slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
1291         slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
1292         bitmap_and(slaves_port_1_2,
1293                    slaves_port1.slaves, slaves_port2.slaves,
1294                    dev->persist->num_vfs + 1);
1295
1296         /* only single port vfs are allowed */
1297         if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
1298                 mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
1299                 return -EINVAL;
1300         }
1301
1302         /* number of virtual functions is number of total functions minus one
1303          * physical function for each port.
1304          */
1305         nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
1306                 bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
1307
1308         /* limit on maximum allowed VFs */
1309         if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
1310                 mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
1311                           nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
1312                 return -EINVAL;
1313         }
1314
1315         if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
1316                 mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
1317                 return -EINVAL;
1318         }
1319
1320         err = mlx4_bond_mac_table(dev);
1321         if (err)
1322                 return err;
1323         err = mlx4_bond_vlan_table(dev);
1324         if (err)
1325                 goto err1;
1326         err = mlx4_bond_fs_rules(dev);
1327         if (err)
1328                 goto err2;
1329
1330         return 0;
1331 err2:
1332         (void)mlx4_unbond_vlan_table(dev);
1333 err1:
1334         (void)mlx4_unbond_mac_table(dev);
1335         return err;
1336 }
1337
1338 static int mlx4_mf_unbond(struct mlx4_dev *dev)
1339 {
1340         int ret, ret1;
1341
1342         ret = mlx4_unbond_fs_rules(dev);
1343         if (ret)
1344                 mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
1345         ret1 = mlx4_unbond_mac_table(dev);
1346         if (ret1) {
1347                 mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
1348                 ret = ret1;
1349         }
1350         ret1 = mlx4_unbond_vlan_table(dev);
1351         if (ret1) {
1352                 mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
1353                 ret = ret1;
1354         }
1355         return ret;
1356 }
1357
1358 int mlx4_bond(struct mlx4_dev *dev)
1359 {
1360         int ret = 0;
1361         struct mlx4_priv *priv = mlx4_priv(dev);
1362
1363         mutex_lock(&priv->bond_mutex);
1364
1365         if (!mlx4_is_bonded(dev)) {
1366                 ret = mlx4_do_bond(dev, true);
1367                 if (ret)
1368                         mlx4_err(dev, "Failed to bond device: %d\n", ret);
1369                 if (!ret && mlx4_is_master(dev)) {
1370                         ret = mlx4_mf_bond(dev);
1371                         if (ret) {
1372                                 mlx4_err(dev, "bond for multifunction failed\n");
1373                                 mlx4_do_bond(dev, false);
1374                         }
1375                 }
1376         }
1377
1378         mutex_unlock(&priv->bond_mutex);
1379         if (!ret)
1380                 mlx4_dbg(dev, "Device is bonded\n");
1381
1382         return ret;
1383 }
1384 EXPORT_SYMBOL_GPL(mlx4_bond);
1385
1386 int mlx4_unbond(struct mlx4_dev *dev)
1387 {
1388         int ret = 0;
1389         struct mlx4_priv *priv = mlx4_priv(dev);
1390
1391         mutex_lock(&priv->bond_mutex);
1392
1393         if (mlx4_is_bonded(dev)) {
1394                 int ret2 = 0;
1395
1396                 ret = mlx4_do_bond(dev, false);
1397                 if (ret)
1398                         mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1399                 if (mlx4_is_master(dev))
1400                         ret2 = mlx4_mf_unbond(dev);
1401                 if (ret2) {
1402                         mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
1403                         ret = ret2;
1404                 }
1405         }
1406
1407         mutex_unlock(&priv->bond_mutex);
1408         if (!ret)
1409                 mlx4_dbg(dev, "Device is unbonded\n");
1410
1411         return ret;
1412 }
1413 EXPORT_SYMBOL_GPL(mlx4_unbond);
1414
1415
1416 int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1417 {
1418         u8 port1 = v2p->port1;
1419         u8 port2 = v2p->port2;
1420         struct mlx4_priv *priv = mlx4_priv(dev);
1421         int err;
1422
1423         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1424                 return -ENOTSUPP;
1425
1426         mutex_lock(&priv->bond_mutex);
1427
1428         /* zero means keep current mapping for this port */
1429         if (port1 == 0)
1430                 port1 = priv->v2p.port1;
1431         if (port2 == 0)
1432                 port2 = priv->v2p.port2;
1433
1434         if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1435             (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1436             (port1 == 2 && port2 == 1)) {
1437                 /* besides boundary checks cross mapping makes
1438                  * no sense and therefore not allowed */
1439                 err = -EINVAL;
1440         } else if ((port1 == priv->v2p.port1) &&
1441                  (port2 == priv->v2p.port2)) {
1442                 err = 0;
1443         } else {
1444                 err = mlx4_virt2phy_port_map(dev, port1, port2);
1445                 if (!err) {
1446                         mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1447                                  port1, port2);
1448                         priv->v2p.port1 = port1;
1449                         priv->v2p.port2 = port2;
1450                 } else {
1451                         mlx4_err(dev, "Failed to change port mape: %d\n", err);
1452                 }
1453         }
1454
1455         mutex_unlock(&priv->bond_mutex);
1456         return err;
1457 }
1458 EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1459
1460 static int mlx4_load_fw(struct mlx4_dev *dev)
1461 {
1462         struct mlx4_priv *priv = mlx4_priv(dev);
1463         int err;
1464
1465         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1466                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1467         if (!priv->fw.fw_icm) {
1468                 mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1469                 return -ENOMEM;
1470         }
1471
1472         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1473         if (err) {
1474                 mlx4_err(dev, "MAP_FA command failed, aborting\n");
1475                 goto err_free;
1476         }
1477
1478         err = mlx4_RUN_FW(dev);
1479         if (err) {
1480                 mlx4_err(dev, "RUN_FW command failed, aborting\n");
1481                 goto err_unmap_fa;
1482         }
1483
1484         return 0;
1485
1486 err_unmap_fa:
1487         mlx4_UNMAP_FA(dev);
1488
1489 err_free:
1490         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1491         return err;
1492 }
1493
1494 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1495                                 int cmpt_entry_sz)
1496 {
1497         struct mlx4_priv *priv = mlx4_priv(dev);
1498         int err;
1499         int num_eqs;
1500
1501         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1502                                   cmpt_base +
1503                                   ((u64) (MLX4_CMPT_TYPE_QP *
1504                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1505                                   cmpt_entry_sz, dev->caps.num_qps,
1506                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1507                                   0, 0);
1508         if (err)
1509                 goto err;
1510
1511         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1512                                   cmpt_base +
1513                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
1514                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1515                                   cmpt_entry_sz, dev->caps.num_srqs,
1516                                   dev->caps.reserved_srqs, 0, 0);
1517         if (err)
1518                 goto err_qp;
1519
1520         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1521                                   cmpt_base +
1522                                   ((u64) (MLX4_CMPT_TYPE_CQ *
1523                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1524                                   cmpt_entry_sz, dev->caps.num_cqs,
1525                                   dev->caps.reserved_cqs, 0, 0);
1526         if (err)
1527                 goto err_srq;
1528
1529         num_eqs = dev->phys_caps.num_phys_eqs;
1530         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1531                                   cmpt_base +
1532                                   ((u64) (MLX4_CMPT_TYPE_EQ *
1533                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1534                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1535         if (err)
1536                 goto err_cq;
1537
1538         return 0;
1539
1540 err_cq:
1541         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1542
1543 err_srq:
1544         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1545
1546 err_qp:
1547         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1548
1549 err:
1550         return err;
1551 }
1552
1553 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1554                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
1555 {
1556         struct mlx4_priv *priv = mlx4_priv(dev);
1557         u64 aux_pages;
1558         int num_eqs;
1559         int err;
1560
1561         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1562         if (err) {
1563                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1564                 return err;
1565         }
1566
1567         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1568                  (unsigned long long) icm_size >> 10,
1569                  (unsigned long long) aux_pages << 2);
1570
1571         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1572                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
1573         if (!priv->fw.aux_icm) {
1574                 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1575                 return -ENOMEM;
1576         }
1577
1578         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1579         if (err) {
1580                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1581                 goto err_free_aux;
1582         }
1583
1584         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1585         if (err) {
1586                 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1587                 goto err_unmap_aux;
1588         }
1589
1590
1591         num_eqs = dev->phys_caps.num_phys_eqs;
1592         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1593                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
1594                                   num_eqs, num_eqs, 0, 0);
1595         if (err) {
1596                 mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1597                 goto err_unmap_cmpt;
1598         }
1599
1600         /*
1601          * Reserved MTT entries must be aligned up to a cacheline
1602          * boundary, since the FW will write to them, while the driver
1603          * writes to all other MTT entries. (The variable
1604          * dev->caps.mtt_entry_sz below is really the MTT segment
1605          * size, not the raw entry size)
1606          */
1607         dev->caps.reserved_mtts =
1608                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1609                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1610
1611         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1612                                   init_hca->mtt_base,
1613                                   dev->caps.mtt_entry_sz,
1614                                   dev->caps.num_mtts,
1615                                   dev->caps.reserved_mtts, 1, 0);
1616         if (err) {
1617                 mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1618                 goto err_unmap_eq;
1619         }
1620
1621         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1622                                   init_hca->dmpt_base,
1623                                   dev_cap->dmpt_entry_sz,
1624                                   dev->caps.num_mpts,
1625                                   dev->caps.reserved_mrws, 1, 1);
1626         if (err) {
1627                 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1628                 goto err_unmap_mtt;
1629         }
1630
1631         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1632                                   init_hca->qpc_base,
1633                                   dev_cap->qpc_entry_sz,
1634                                   dev->caps.num_qps,
1635                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1636                                   0, 0);
1637         if (err) {
1638                 mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1639                 goto err_unmap_dmpt;
1640         }
1641
1642         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1643                                   init_hca->auxc_base,
1644                                   dev_cap->aux_entry_sz,
1645                                   dev->caps.num_qps,
1646                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1647                                   0, 0);
1648         if (err) {
1649                 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1650                 goto err_unmap_qp;
1651         }
1652
1653         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1654                                   init_hca->altc_base,
1655                                   dev_cap->altc_entry_sz,
1656                                   dev->caps.num_qps,
1657                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1658                                   0, 0);
1659         if (err) {
1660                 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1661                 goto err_unmap_auxc;
1662         }
1663
1664         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1665                                   init_hca->rdmarc_base,
1666                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1667                                   dev->caps.num_qps,
1668                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1669                                   0, 0);
1670         if (err) {
1671                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1672                 goto err_unmap_altc;
1673         }
1674
1675         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1676                                   init_hca->cqc_base,
1677                                   dev_cap->cqc_entry_sz,
1678                                   dev->caps.num_cqs,
1679                                   dev->caps.reserved_cqs, 0, 0);
1680         if (err) {
1681                 mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1682                 goto err_unmap_rdmarc;
1683         }
1684
1685         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1686                                   init_hca->srqc_base,
1687                                   dev_cap->srq_entry_sz,
1688                                   dev->caps.num_srqs,
1689                                   dev->caps.reserved_srqs, 0, 0);
1690         if (err) {
1691                 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1692                 goto err_unmap_cq;
1693         }
1694
1695         /*
1696          * For flow steering device managed mode it is required to use
1697          * mlx4_init_icm_table. For B0 steering mode it's not strictly
1698          * required, but for simplicity just map the whole multicast
1699          * group table now.  The table isn't very big and it's a lot
1700          * easier than trying to track ref counts.
1701          */
1702         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1703                                   init_hca->mc_base,
1704                                   mlx4_get_mgm_entry_size(dev),
1705                                   dev->caps.num_mgms + dev->caps.num_amgms,
1706                                   dev->caps.num_mgms + dev->caps.num_amgms,
1707                                   0, 0);
1708         if (err) {
1709                 mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1710                 goto err_unmap_srq;
1711         }
1712
1713         return 0;
1714
1715 err_unmap_srq:
1716         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1717
1718 err_unmap_cq:
1719         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1720
1721 err_unmap_rdmarc:
1722         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1723
1724 err_unmap_altc:
1725         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1726
1727 err_unmap_auxc:
1728         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1729
1730 err_unmap_qp:
1731         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1732
1733 err_unmap_dmpt:
1734         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1735
1736 err_unmap_mtt:
1737         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1738
1739 err_unmap_eq:
1740         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1741
1742 err_unmap_cmpt:
1743         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1744         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1745         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1746         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1747
1748 err_unmap_aux:
1749         mlx4_UNMAP_ICM_AUX(dev);
1750
1751 err_free_aux:
1752         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1753
1754         return err;
1755 }
1756
1757 static void mlx4_free_icms(struct mlx4_dev *dev)
1758 {
1759         struct mlx4_priv *priv = mlx4_priv(dev);
1760
1761         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1762         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1763         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1764         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1765         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1766         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1767         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1768         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1769         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1770         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1771         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1772         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1773         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1774         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1775
1776         mlx4_UNMAP_ICM_AUX(dev);
1777         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1778 }
1779
1780 static void mlx4_slave_exit(struct mlx4_dev *dev)
1781 {
1782         struct mlx4_priv *priv = mlx4_priv(dev);
1783
1784         mutex_lock(&priv->cmd.slave_cmd_mutex);
1785         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1786                           MLX4_COMM_TIME))
1787                 mlx4_warn(dev, "Failed to close slave function\n");
1788         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1789 }
1790
1791 static int map_bf_area(struct mlx4_dev *dev)
1792 {
1793         struct mlx4_priv *priv = mlx4_priv(dev);
1794         resource_size_t bf_start;
1795         resource_size_t bf_len;
1796         int err = 0;
1797
1798         if (!dev->caps.bf_reg_size)
1799                 return -ENXIO;
1800
1801         bf_start = pci_resource_start(dev->persist->pdev, 2) +
1802                         (dev->caps.num_uars << PAGE_SHIFT);
1803         bf_len = pci_resource_len(dev->persist->pdev, 2) -
1804                         (dev->caps.num_uars << PAGE_SHIFT);
1805         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1806         if (!priv->bf_mapping)
1807                 err = -ENOMEM;
1808
1809         return err;
1810 }
1811
1812 static void unmap_bf_area(struct mlx4_dev *dev)
1813 {
1814         if (mlx4_priv(dev)->bf_mapping)
1815                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1816 }
1817
1818 cycle_t mlx4_read_clock(struct mlx4_dev *dev)
1819 {
1820         u32 clockhi, clocklo, clockhi1;
1821         cycle_t cycles;
1822         int i;
1823         struct mlx4_priv *priv = mlx4_priv(dev);
1824
1825         for (i = 0; i < 10; i++) {
1826                 clockhi = swab32(readl(priv->clock_mapping));
1827                 clocklo = swab32(readl(priv->clock_mapping + 4));
1828                 clockhi1 = swab32(readl(priv->clock_mapping));
1829                 if (clockhi == clockhi1)
1830                         break;
1831         }
1832
1833         cycles = (u64) clockhi << 32 | (u64) clocklo;
1834
1835         return cycles;
1836 }
1837 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1838
1839
1840 static int map_internal_clock(struct mlx4_dev *dev)
1841 {
1842         struct mlx4_priv *priv = mlx4_priv(dev);
1843
1844         priv->clock_mapping =
1845                 ioremap(pci_resource_start(dev->persist->pdev,
1846                                            priv->fw.clock_bar) +
1847                         priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1848
1849         if (!priv->clock_mapping)
1850                 return -ENOMEM;
1851
1852         return 0;
1853 }
1854
1855 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1856                                    struct mlx4_clock_params *params)
1857 {
1858         struct mlx4_priv *priv = mlx4_priv(dev);
1859
1860         if (mlx4_is_slave(dev))
1861                 return -ENOTSUPP;
1862
1863         if (!params)
1864                 return -EINVAL;
1865
1866         params->bar = priv->fw.clock_bar;
1867         params->offset = priv->fw.clock_offset;
1868         params->size = MLX4_CLOCK_SIZE;
1869
1870         return 0;
1871 }
1872 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1873
1874 static void unmap_internal_clock(struct mlx4_dev *dev)
1875 {
1876         struct mlx4_priv *priv = mlx4_priv(dev);
1877
1878         if (priv->clock_mapping)
1879                 iounmap(priv->clock_mapping);
1880 }
1881
1882 static void mlx4_close_hca(struct mlx4_dev *dev)
1883 {
1884         unmap_internal_clock(dev);
1885         unmap_bf_area(dev);
1886         if (mlx4_is_slave(dev))
1887                 mlx4_slave_exit(dev);
1888         else {
1889                 mlx4_CLOSE_HCA(dev, 0);
1890                 mlx4_free_icms(dev);
1891         }
1892 }
1893
1894 static void mlx4_close_fw(struct mlx4_dev *dev)
1895 {
1896         if (!mlx4_is_slave(dev)) {
1897                 mlx4_UNMAP_FA(dev);
1898                 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1899         }
1900 }
1901
1902 static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1903 {
1904 #define COMM_CHAN_OFFLINE_OFFSET 0x09
1905
1906         u32 comm_flags;
1907         u32 offline_bit;
1908         unsigned long end;
1909         struct mlx4_priv *priv = mlx4_priv(dev);
1910
1911         end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
1912         while (time_before(jiffies, end)) {
1913                 comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
1914                                           MLX4_COMM_CHAN_FLAGS));
1915                 offline_bit = (comm_flags &
1916                                (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
1917                 if (!offline_bit)
1918                         return 0;
1919                 /* There are cases as part of AER/Reset flow that PF needs
1920                  * around 100 msec to load. We therefore sleep for 100 msec
1921                  * to allow other tasks to make use of that CPU during this
1922                  * time interval.
1923                  */
1924                 msleep(100);
1925         }
1926         mlx4_err(dev, "Communication channel is offline.\n");
1927         return -EIO;
1928 }
1929
1930 static void mlx4_reset_vf_support(struct mlx4_dev *dev)
1931 {
1932 #define COMM_CHAN_RST_OFFSET 0x1e
1933
1934         struct mlx4_priv *priv = mlx4_priv(dev);
1935         u32 comm_rst;
1936         u32 comm_caps;
1937
1938         comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
1939                                  MLX4_COMM_CHAN_CAPS));
1940         comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
1941
1942         if (comm_rst)
1943                 dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
1944 }
1945
1946 static int mlx4_init_slave(struct mlx4_dev *dev)
1947 {
1948         struct mlx4_priv *priv = mlx4_priv(dev);
1949         u64 dma = (u64) priv->mfunc.vhcr_dma;
1950         int ret_from_reset = 0;
1951         u32 slave_read;
1952         u32 cmd_channel_ver;
1953
1954         if (atomic_read(&pf_loading)) {
1955                 mlx4_warn(dev, "PF is not ready - Deferring probe\n");
1956                 return -EPROBE_DEFER;
1957         }
1958
1959         mutex_lock(&priv->cmd.slave_cmd_mutex);
1960         priv->cmd.max_cmds = 1;
1961         if (mlx4_comm_check_offline(dev)) {
1962                 mlx4_err(dev, "PF is not responsive, skipping initialization\n");
1963                 goto err_offline;
1964         }
1965
1966         mlx4_reset_vf_support(dev);
1967         mlx4_warn(dev, "Sending reset\n");
1968         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1969                                        MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
1970         /* if we are in the middle of flr the slave will try
1971          * NUM_OF_RESET_RETRIES times before leaving.*/
1972         if (ret_from_reset) {
1973                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1974                         mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
1975                         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1976                         return -EPROBE_DEFER;
1977                 } else
1978                         goto err;
1979         }
1980
1981         /* check the driver version - the slave I/F revision
1982          * must match the master's */
1983         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1984         cmd_channel_ver = mlx4_comm_get_version();
1985
1986         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1987                 MLX4_COMM_GET_IF_REV(slave_read)) {
1988                 mlx4_err(dev, "slave driver version is not supported by the master\n");
1989                 goto err;
1990         }
1991
1992         mlx4_warn(dev, "Sending vhcr0\n");
1993         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1994                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1995                 goto err;
1996         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1997                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1998                 goto err;
1999         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
2000                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2001                 goto err;
2002         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
2003                           MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2004                 goto err;
2005
2006         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2007         return 0;
2008
2009 err:
2010         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
2011 err_offline:
2012         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2013         return -EIO;
2014 }
2015
2016 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
2017 {
2018         int i;
2019
2020         for (i = 1; i <= dev->caps.num_ports; i++) {
2021                 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
2022                         dev->caps.gid_table_len[i] =
2023                                 mlx4_get_slave_num_gids(dev, 0, i);
2024                 else
2025                         dev->caps.gid_table_len[i] = 1;
2026                 dev->caps.pkey_table_len[i] =
2027                         dev->phys_caps.pkey_phys_table_len[i] - 1;
2028         }
2029 }
2030
2031 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
2032 {
2033         int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
2034
2035         for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
2036               i++) {
2037                 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
2038                         break;
2039         }
2040
2041         return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
2042 }
2043
2044 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
2045 {
2046         switch (dmfs_high_steer_mode) {
2047         case MLX4_STEERING_DMFS_A0_DEFAULT:
2048                 return "default performance";
2049
2050         case MLX4_STEERING_DMFS_A0_DYNAMIC:
2051                 return "dynamic hybrid mode";
2052
2053         case MLX4_STEERING_DMFS_A0_STATIC:
2054                 return "performance optimized for limited rule configuration (static)";
2055
2056         case MLX4_STEERING_DMFS_A0_DISABLE:
2057                 return "disabled performance optimized steering";
2058
2059         case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
2060                 return "performance optimized steering not supported";
2061
2062         default:
2063                 return "Unrecognized mode";
2064         }
2065 }
2066
2067 #define MLX4_DMFS_A0_STEERING                   (1UL << 2)
2068
2069 static void choose_steering_mode(struct mlx4_dev *dev,
2070                                  struct mlx4_dev_cap *dev_cap)
2071 {
2072         if (mlx4_log_num_mgm_entry_size <= 0) {
2073                 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
2074                         if (dev->caps.dmfs_high_steer_mode ==
2075                             MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2076                                 mlx4_err(dev, "DMFS high rate mode not supported\n");
2077                         else
2078                                 dev->caps.dmfs_high_steer_mode =
2079                                         MLX4_STEERING_DMFS_A0_STATIC;
2080                 }
2081         }
2082
2083         if (mlx4_log_num_mgm_entry_size <= 0 &&
2084             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
2085             (!mlx4_is_mfunc(dev) ||
2086              (dev_cap->fs_max_num_qp_per_entry >=
2087              (dev->persist->num_vfs + 1))) &&
2088             choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
2089                 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
2090                 dev->oper_log_mgm_entry_size =
2091                         choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
2092                 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
2093                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
2094                 dev->caps.fs_log_max_ucast_qp_range_size =
2095                         dev_cap->fs_log_max_ucast_qp_range_size;
2096         } else {
2097                 if (dev->caps.dmfs_high_steer_mode !=
2098                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2099                         dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
2100                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
2101                     dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2102                         dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
2103                 else {
2104                         dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
2105
2106                         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
2107                             dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2108                                 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
2109                 }
2110                 dev->oper_log_mgm_entry_size =
2111                         mlx4_log_num_mgm_entry_size > 0 ?
2112                         mlx4_log_num_mgm_entry_size :
2113                         MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
2114                 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
2115         }
2116         mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
2117                  mlx4_steering_mode_str(dev->caps.steering_mode),
2118                  dev->oper_log_mgm_entry_size,
2119                  mlx4_log_num_mgm_entry_size);
2120 }
2121
2122 static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
2123                                        struct mlx4_dev_cap *dev_cap)
2124 {
2125         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2126             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
2127                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
2128         else
2129                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
2130
2131         mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
2132                  == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
2133 }
2134
2135 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
2136 {
2137         int i;
2138         struct mlx4_port_cap port_cap;
2139
2140         if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2141                 return -EINVAL;
2142
2143         for (i = 1; i <= dev->caps.num_ports; i++) {
2144                 if (mlx4_dev_port(dev, i, &port_cap)) {
2145                         mlx4_err(dev,
2146                                  "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
2147                 } else if ((dev->caps.dmfs_high_steer_mode !=
2148                             MLX4_STEERING_DMFS_A0_DEFAULT) &&
2149                            (port_cap.dmfs_optimized_state ==
2150                             !!(dev->caps.dmfs_high_steer_mode ==
2151                             MLX4_STEERING_DMFS_A0_DISABLE))) {
2152                         mlx4_err(dev,
2153                                  "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
2154                                  dmfs_high_rate_steering_mode_str(
2155                                         dev->caps.dmfs_high_steer_mode),
2156                                  (port_cap.dmfs_optimized_state ?
2157                                         "enabled" : "disabled"));
2158                 }
2159         }
2160
2161         return 0;
2162 }
2163
2164 static int mlx4_init_fw(struct mlx4_dev *dev)
2165 {
2166         struct mlx4_mod_stat_cfg   mlx4_cfg;
2167         int err = 0;
2168
2169         if (!mlx4_is_slave(dev)) {
2170                 err = mlx4_QUERY_FW(dev);
2171                 if (err) {
2172                         if (err == -EACCES)
2173                                 mlx4_info(dev, "non-primary physical function, skipping\n");
2174                         else
2175                                 mlx4_err(dev, "QUERY_FW command failed, aborting\n");
2176                         return err;
2177                 }
2178
2179                 err = mlx4_load_fw(dev);
2180                 if (err) {
2181                         mlx4_err(dev, "Failed to start FW, aborting\n");
2182                         return err;
2183                 }
2184
2185                 mlx4_cfg.log_pg_sz_m = 1;
2186                 mlx4_cfg.log_pg_sz = 0;
2187                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2188                 if (err)
2189                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2190         }
2191
2192         return err;
2193 }
2194
2195 static int mlx4_init_hca(struct mlx4_dev *dev)
2196 {
2197         struct mlx4_priv          *priv = mlx4_priv(dev);
2198         struct mlx4_adapter        adapter;
2199         struct mlx4_dev_cap        dev_cap;
2200         struct mlx4_profile        profile;
2201         struct mlx4_init_hca_param init_hca;
2202         u64 icm_size;
2203         struct mlx4_config_dev_params params;
2204         int err;
2205
2206         if (!mlx4_is_slave(dev)) {
2207                 err = mlx4_dev_cap(dev, &dev_cap);
2208                 if (err) {
2209                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2210                         return err;
2211                 }
2212
2213                 choose_steering_mode(dev, &dev_cap);
2214                 choose_tunnel_offload_mode(dev, &dev_cap);
2215
2216                 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2217                     mlx4_is_master(dev))
2218                         dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2219
2220                 err = mlx4_get_phys_port_id(dev);
2221                 if (err)
2222                         mlx4_err(dev, "Fail to get physical port id\n");
2223
2224                 if (mlx4_is_master(dev))
2225                         mlx4_parav_master_pf_caps(dev);
2226
2227                 if (mlx4_low_memory_profile()) {
2228                         mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2229                         profile = low_mem_profile;
2230                 } else {
2231                         profile = default_profile;
2232                 }
2233                 if (dev->caps.steering_mode ==
2234                     MLX4_STEERING_MODE_DEVICE_MANAGED)
2235                         profile.num_mcg = MLX4_FS_NUM_MCG;
2236
2237                 icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
2238                                              &init_hca);
2239                 if ((long long) icm_size < 0) {
2240                         err = icm_size;
2241                         return err;
2242                 }
2243
2244                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2245
2246                 if (enable_4k_uar) {
2247                         init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
2248                                                     PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
2249                         init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
2250                 } else {
2251                         init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2252                         init_hca.uar_page_sz = PAGE_SHIFT - 12;
2253                 }
2254
2255                 init_hca.mw_enabled = 0;
2256                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2257                     dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2258                         init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2259
2260                 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
2261                 if (err)
2262                         return err;
2263
2264                 err = mlx4_INIT_HCA(dev, &init_hca);
2265                 if (err) {
2266                         mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2267                         goto err_free_icm;
2268                 }
2269
2270                 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2271                         err = mlx4_query_func(dev, &dev_cap);
2272                         if (err < 0) {
2273                                 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2274                                 goto err_close;
2275                         } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2276                                 dev->caps.num_eqs = dev_cap.max_eqs;
2277                                 dev->caps.reserved_eqs = dev_cap.reserved_eqs;
2278                                 dev->caps.reserved_uars = dev_cap.reserved_uars;
2279                         }
2280                 }
2281
2282                 /*
2283                  * If TS is supported by FW
2284                  * read HCA frequency by QUERY_HCA command
2285                  */
2286                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2287                         memset(&init_hca, 0, sizeof(init_hca));
2288                         err = mlx4_QUERY_HCA(dev, &init_hca);
2289                         if (err) {
2290                                 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2291                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2292                         } else {
2293                                 dev->caps.hca_core_clock =
2294                                         init_hca.hca_core_clock;
2295                         }
2296
2297                         /* In case we got HCA frequency 0 - disable timestamping
2298                          * to avoid dividing by zero
2299                          */
2300                         if (!dev->caps.hca_core_clock) {
2301                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2302                                 mlx4_err(dev,
2303                                          "HCA frequency is 0 - timestamping is not supported\n");
2304                         } else if (map_internal_clock(dev)) {
2305                                 /*
2306                                  * Map internal clock,
2307                                  * in case of failure disable timestamping
2308                                  */
2309                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2310                                 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2311                         }
2312                 }
2313
2314                 if (dev->caps.dmfs_high_steer_mode !=
2315                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2316                         if (mlx4_validate_optimized_steering(dev))
2317                                 mlx4_warn(dev, "Optimized steering validation failed\n");
2318
2319                         if (dev->caps.dmfs_high_steer_mode ==
2320                             MLX4_STEERING_DMFS_A0_DISABLE) {
2321                                 dev->caps.dmfs_high_rate_qpn_base =
2322                                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2323                                 dev->caps.dmfs_high_rate_qpn_range =
2324                                         MLX4_A0_STEERING_TABLE_SIZE;
2325                         }
2326
2327                         mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
2328                                  dmfs_high_rate_steering_mode_str(
2329                                         dev->caps.dmfs_high_steer_mode));
2330                 }
2331         } else {
2332                 err = mlx4_init_slave(dev);
2333                 if (err) {
2334                         if (err != -EPROBE_DEFER)
2335                                 mlx4_err(dev, "Failed to initialize slave\n");
2336                         return err;
2337                 }
2338
2339                 err = mlx4_slave_cap(dev);
2340                 if (err) {
2341                         mlx4_err(dev, "Failed to obtain slave caps\n");
2342                         goto err_close;
2343                 }
2344         }
2345
2346         if (map_bf_area(dev))
2347                 mlx4_dbg(dev, "Failed to map blue flame area\n");
2348
2349         /*Only the master set the ports, all the rest got it from it.*/
2350         if (!mlx4_is_slave(dev))
2351                 mlx4_set_port_mask(dev);
2352
2353         err = mlx4_QUERY_ADAPTER(dev, &adapter);
2354         if (err) {
2355                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2356                 goto unmap_bf;
2357         }
2358
2359         /* Query CONFIG_DEV parameters */
2360         err = mlx4_config_dev_retrieval(dev, &params);
2361         if (err && err != -ENOTSUPP) {
2362                 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2363         } else if (!err) {
2364                 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2365                 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2366         }
2367         priv->eq_table.inta_pin = adapter.inta_pin;
2368         memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
2369
2370         return 0;
2371
2372 unmap_bf:
2373         unmap_internal_clock(dev);
2374         unmap_bf_area(dev);
2375
2376         if (mlx4_is_slave(dev)) {
2377                 kfree(dev->caps.qp0_qkey);
2378                 kfree(dev->caps.qp0_tunnel);
2379                 kfree(dev->caps.qp0_proxy);
2380                 kfree(dev->caps.qp1_tunnel);
2381                 kfree(dev->caps.qp1_proxy);
2382         }
2383
2384 err_close:
2385         if (mlx4_is_slave(dev))
2386                 mlx4_slave_exit(dev);
2387         else
2388                 mlx4_CLOSE_HCA(dev, 0);
2389
2390 err_free_icm:
2391         if (!mlx4_is_slave(dev))
2392                 mlx4_free_icms(dev);
2393
2394         return err;
2395 }
2396
2397 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2398 {
2399         struct mlx4_priv *priv = mlx4_priv(dev);
2400         int nent_pow2;
2401
2402         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2403                 return -ENOENT;
2404
2405         if (!dev->caps.max_counters)
2406                 return -ENOSPC;
2407
2408         nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2409         /* reserve last counter index for sink counter */
2410         return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
2411                                 nent_pow2 - 1, 0,
2412                                 nent_pow2 - dev->caps.max_counters + 1);
2413 }
2414
2415 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2416 {
2417         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2418                 return;
2419
2420         if (!dev->caps.max_counters)
2421                 return;
2422
2423         mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2424 }
2425
2426 static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
2427 {
2428         struct mlx4_priv *priv = mlx4_priv(dev);
2429         int port;
2430
2431         for (port = 0; port < dev->caps.num_ports; port++)
2432                 if (priv->def_counter[port] != -1)
2433                         mlx4_counter_free(dev,  priv->def_counter[port]);
2434 }
2435
2436 static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
2437 {
2438         struct mlx4_priv *priv = mlx4_priv(dev);
2439         int port, err = 0;
2440         u32 idx;
2441
2442         for (port = 0; port < dev->caps.num_ports; port++)
2443                 priv->def_counter[port] = -1;
2444
2445         for (port = 0; port < dev->caps.num_ports; port++) {
2446                 err = mlx4_counter_alloc(dev, &idx);
2447
2448                 if (!err || err == -ENOSPC) {
2449                         priv->def_counter[port] = idx;
2450                 } else if (err == -ENOENT) {
2451                         err = 0;
2452                         continue;
2453                 } else if (mlx4_is_slave(dev) && err == -EINVAL) {
2454                         priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
2455                         mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
2456                                   MLX4_SINK_COUNTER_INDEX(dev));
2457                         err = 0;
2458                 } else {
2459                         mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
2460                                  __func__, port + 1, err);
2461                         mlx4_cleanup_default_counters(dev);
2462                         return err;
2463                 }
2464
2465                 mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
2466                          __func__, priv->def_counter[port], port + 1);
2467         }
2468
2469         return err;
2470 }
2471
2472 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2473 {
2474         struct mlx4_priv *priv = mlx4_priv(dev);
2475
2476         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2477                 return -ENOENT;
2478
2479         *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2480         if (*idx == -1) {
2481                 *idx = MLX4_SINK_COUNTER_INDEX(dev);
2482                 return -ENOSPC;
2483         }
2484
2485         return 0;
2486 }
2487
2488 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2489 {
2490         u64 out_param;
2491         int err;
2492
2493         if (mlx4_is_mfunc(dev)) {
2494                 err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
2495                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2496                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2497                 if (!err)
2498                         *idx = get_param_l(&out_param);
2499
2500                 return err;
2501         }
2502         return __mlx4_counter_alloc(dev, idx);
2503 }
2504 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2505
2506 static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2507                                 u8 counter_index)
2508 {
2509         struct mlx4_cmd_mailbox *if_stat_mailbox;
2510         int err;
2511         u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
2512
2513         if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2514         if (IS_ERR(if_stat_mailbox))
2515                 return PTR_ERR(if_stat_mailbox);
2516
2517         err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2518                            MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2519                            MLX4_CMD_NATIVE);
2520
2521         mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2522         return err;
2523 }
2524
2525 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2526 {
2527         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2528                 return;
2529
2530         if (idx == MLX4_SINK_COUNTER_INDEX(dev))
2531                 return;
2532
2533         __mlx4_clear_if_stat(dev, idx);
2534
2535         mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2536         return;
2537 }
2538
2539 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2540 {
2541         u64 in_param = 0;
2542
2543         if (mlx4_is_mfunc(dev)) {
2544                 set_param_l(&in_param, idx);
2545                 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2546                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2547                          MLX4_CMD_WRAPPED);
2548                 return;
2549         }
2550         __mlx4_counter_free(dev, idx);
2551 }
2552 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2553
2554 int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
2555 {
2556         struct mlx4_priv *priv = mlx4_priv(dev);
2557
2558         return priv->def_counter[port - 1];
2559 }
2560 EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
2561
2562 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2563 {
2564         struct mlx4_priv *priv = mlx4_priv(dev);
2565
2566         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2567 }
2568 EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2569
2570 __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2571 {
2572         struct mlx4_priv *priv = mlx4_priv(dev);
2573
2574         return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2575 }
2576 EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2577
2578 void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2579 {
2580         struct mlx4_priv *priv = mlx4_priv(dev);
2581         __be64 guid;
2582
2583         /* hw GUID */
2584         if (entry == 0)
2585                 return;
2586
2587         get_random_bytes((char *)&guid, sizeof(guid));
2588         guid &= ~(cpu_to_be64(1ULL << 56));
2589         guid |= cpu_to_be64(1ULL << 57);
2590         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2591 }
2592
2593 static int mlx4_setup_hca(struct mlx4_dev *dev)
2594 {
2595         struct mlx4_priv *priv = mlx4_priv(dev);
2596         int err;
2597         int port;
2598         __be32 ib_port_default_caps;
2599
2600         err = mlx4_init_uar_table(dev);
2601         if (err) {
2602                 mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2603                  return err;
2604         }
2605
2606         err = mlx4_uar_alloc(dev, &priv->driver_uar);
2607         if (err) {
2608                 mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2609                 goto err_uar_table_free;
2610         }
2611
2612         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2613         if (!priv->kar) {
2614                 mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2615                 err = -ENOMEM;
2616                 goto err_uar_free;
2617         }
2618
2619         err = mlx4_init_pd_table(dev);
2620         if (err) {
2621                 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2622                 goto err_kar_unmap;
2623         }
2624
2625         err = mlx4_init_xrcd_table(dev);
2626         if (err) {
2627                 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2628                 goto err_pd_table_free;
2629         }
2630
2631         err = mlx4_init_mr_table(dev);
2632         if (err) {
2633                 mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2634                 goto err_xrcd_table_free;
2635         }
2636
2637         if (!mlx4_is_slave(dev)) {
2638                 err = mlx4_init_mcg_table(dev);
2639                 if (err) {
2640                         mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2641                         goto err_mr_table_free;
2642                 }
2643                 err = mlx4_config_mad_demux(dev);
2644                 if (err) {
2645                         mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2646                         goto err_mcg_table_free;
2647                 }
2648         }
2649
2650         err = mlx4_init_eq_table(dev);
2651         if (err) {
2652                 mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2653                 goto err_mcg_table_free;
2654         }
2655
2656         err = mlx4_cmd_use_events(dev);
2657         if (err) {
2658                 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2659                 goto err_eq_table_free;
2660         }
2661
2662         err = mlx4_NOP(dev);
2663         if (err) {
2664                 if (dev->flags & MLX4_FLAG_MSI_X) {
2665                         mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2666                                   priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2667                         mlx4_warn(dev, "Trying again without MSI-X\n");
2668                 } else {
2669                         mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2670                                  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2671                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2672                 }
2673
2674                 goto err_cmd_poll;
2675         }
2676
2677         mlx4_dbg(dev, "NOP command IRQ test passed\n");
2678
2679         err = mlx4_init_cq_table(dev);
2680         if (err) {
2681                 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2682                 goto err_cmd_poll;
2683         }
2684
2685         err = mlx4_init_srq_table(dev);
2686         if (err) {
2687                 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2688                 goto err_cq_table_free;
2689         }
2690
2691         err = mlx4_init_qp_table(dev);
2692         if (err) {
2693                 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2694                 goto err_srq_table_free;
2695         }
2696
2697         if (!mlx4_is_slave(dev)) {
2698                 err = mlx4_init_counters_table(dev);
2699                 if (err && err != -ENOENT) {
2700                         mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2701                         goto err_qp_table_free;
2702                 }
2703         }
2704
2705         err = mlx4_allocate_default_counters(dev);
2706         if (err) {
2707                 mlx4_err(dev, "Failed to allocate default counters, aborting\n");
2708                 goto err_counters_table_free;
2709         }
2710
2711         if (!mlx4_is_slave(dev)) {
2712                 for (port = 1; port <= dev->caps.num_ports; port++) {
2713                         ib_port_default_caps = 0;
2714                         err = mlx4_get_port_ib_caps(dev, port,
2715                                                     &ib_port_default_caps);
2716                         if (err)
2717                                 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2718                                           port, err);
2719                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2720
2721                         /* initialize per-slave default ib port capabilities */
2722                         if (mlx4_is_master(dev)) {
2723                                 int i;
2724                                 for (i = 0; i < dev->num_slaves; i++) {
2725                                         if (i == mlx4_master_func_num(dev))
2726                                                 continue;
2727                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2728                                                 ib_port_default_caps;
2729                                 }
2730                         }
2731
2732                         if (mlx4_is_mfunc(dev))
2733                                 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2734                         else
2735                                 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2736
2737                         err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2738                                             dev->caps.pkey_table_len[port] : -1);
2739                         if (err) {
2740                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
2741                                          port);
2742                                 goto err_default_countes_free;
2743                         }
2744                 }
2745         }
2746
2747         return 0;
2748
2749 err_default_countes_free:
2750         mlx4_cleanup_default_counters(dev);
2751
2752 err_counters_table_free:
2753         if (!mlx4_is_slave(dev))
2754                 mlx4_cleanup_counters_table(dev);
2755
2756 err_qp_table_free:
2757         mlx4_cleanup_qp_table(dev);
2758
2759 err_srq_table_free:
2760         mlx4_cleanup_srq_table(dev);
2761
2762 err_cq_table_free:
2763         mlx4_cleanup_cq_table(dev);
2764
2765 err_cmd_poll:
2766         mlx4_cmd_use_polling(dev);
2767
2768 err_eq_table_free:
2769         mlx4_cleanup_eq_table(dev);
2770
2771 err_mcg_table_free:
2772         if (!mlx4_is_slave(dev))
2773                 mlx4_cleanup_mcg_table(dev);
2774
2775 err_mr_table_free:
2776         mlx4_cleanup_mr_table(dev);
2777
2778 err_xrcd_table_free:
2779         mlx4_cleanup_xrcd_table(dev);
2780
2781 err_pd_table_free:
2782         mlx4_cleanup_pd_table(dev);
2783
2784 err_kar_unmap:
2785         iounmap(priv->kar);
2786
2787 err_uar_free:
2788         mlx4_uar_free(dev, &priv->driver_uar);
2789
2790 err_uar_table_free:
2791         mlx4_cleanup_uar_table(dev);
2792         return err;
2793 }
2794
2795 static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
2796 {
2797         int requested_cpu = 0;
2798         struct mlx4_priv *priv = mlx4_priv(dev);
2799         struct mlx4_eq *eq;
2800         int off = 0;
2801         int i;
2802
2803         if (eqn > dev->caps.num_comp_vectors)
2804                 return -EINVAL;
2805
2806         for (i = 1; i < port; i++)
2807                 off += mlx4_get_eqs_per_port(dev, i);
2808
2809         requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
2810
2811         /* Meaning EQs are shared, and this call comes from the second port */
2812         if (requested_cpu < 0)
2813                 return 0;
2814
2815         eq = &priv->eq_table.eq[eqn];
2816
2817         if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
2818                 return -ENOMEM;
2819
2820         cpumask_set_cpu(requested_cpu, eq->affinity_mask);
2821
2822         return 0;
2823 }
2824
2825 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2826 {
2827         struct mlx4_priv *priv = mlx4_priv(dev);
2828         struct msix_entry *entries;
2829         int i;
2830         int port = 0;
2831
2832         if (msi_x) {
2833                 int nreq = dev->caps.num_ports * num_online_cpus() + 1;
2834
2835                 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2836                              nreq);
2837                 if (nreq > MAX_MSIX)
2838                         nreq = MAX_MSIX;
2839
2840                 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2841                 if (!entries)
2842                         goto no_msi;
2843
2844                 for (i = 0; i < nreq; ++i)
2845                         entries[i].entry = i;
2846
2847                 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2848                                              nreq);
2849
2850                 if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
2851                         kfree(entries);
2852                         goto no_msi;
2853                 }
2854                 /* 1 is reserved for events (asyncrounous EQ) */
2855                 dev->caps.num_comp_vectors = nreq - 1;
2856
2857                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
2858                 bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
2859                             dev->caps.num_ports);
2860
2861                 for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
2862                         if (i == MLX4_EQ_ASYNC)
2863                                 continue;
2864
2865                         priv->eq_table.eq[i].irq =
2866                                 entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
2867
2868                         if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
2869                                 bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2870                                             dev->caps.num_ports);
2871                                 /* We don't set affinity hint when there
2872                                  * aren't enough EQs
2873                                  */
2874                         } else {
2875                                 set_bit(port,
2876                                         priv->eq_table.eq[i].actv_ports.ports);
2877                                 if (mlx4_init_affinity_hint(dev, port + 1, i))
2878                                         mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
2879                                                   i);
2880                         }
2881                         /* We divide the Eqs evenly between the two ports.
2882                          * (dev->caps.num_comp_vectors / dev->caps.num_ports)
2883                          * refers to the number of Eqs per port
2884                          * (i.e eqs_per_port). Theoretically, we would like to
2885                          * write something like (i + 1) % eqs_per_port == 0.
2886                          * However, since there's an asynchronous Eq, we have
2887                          * to skip over it by comparing this condition to
2888                          * !!((i + 1) > MLX4_EQ_ASYNC).
2889                          */
2890                         if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
2891                             ((i + 1) %
2892                              (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
2893                             !!((i + 1) > MLX4_EQ_ASYNC))
2894                                 /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
2895                                  * everything is shared anyway.
2896                                  */
2897                                 port++;
2898                 }
2899
2900                 dev->flags |= MLX4_FLAG_MSI_X;
2901
2902                 kfree(entries);
2903                 return;
2904         }
2905
2906 no_msi:
2907         dev->caps.num_comp_vectors = 1;
2908
2909         BUG_ON(MLX4_EQ_ASYNC >= 2);
2910         for (i = 0; i < 2; ++i) {
2911                 priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2912                 if (i != MLX4_EQ_ASYNC) {
2913                         bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2914                                     dev->caps.num_ports);
2915                 }
2916         }
2917 }
2918
2919 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2920 {
2921         struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
2922         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2923         int err;
2924
2925         err = devlink_port_register(devlink, &info->devlink_port, port);
2926         if (err)
2927                 return err;
2928
2929         info->dev = dev;
2930         info->port = port;
2931         if (!mlx4_is_slave(dev)) {
2932                 mlx4_init_mac_table(dev, &info->mac_table);
2933                 mlx4_init_vlan_table(dev, &info->vlan_table);
2934                 mlx4_init_roce_gid_table(dev, &info->gid_table);
2935                 info->base_qpn = mlx4_get_base_qpn(dev, port);
2936         }
2937
2938         sprintf(info->dev_name, "mlx4_port%d", port);
2939         info->port_attr.attr.name = info->dev_name;
2940         if (mlx4_is_mfunc(dev))
2941                 info->port_attr.attr.mode = S_IRUGO;
2942         else {
2943                 info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
2944                 info->port_attr.store     = set_port_type;
2945         }
2946         info->port_attr.show      = show_port_type;
2947         sysfs_attr_init(&info->port_attr.attr);
2948
2949         err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
2950         if (err) {
2951                 mlx4_err(dev, "Failed to create file for port %d\n", port);
2952                 devlink_port_unregister(&info->devlink_port);
2953                 info->port = -1;
2954         }
2955
2956         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
2957         info->port_mtu_attr.attr.name = info->dev_mtu_name;
2958         if (mlx4_is_mfunc(dev))
2959                 info->port_mtu_attr.attr.mode = S_IRUGO;
2960         else {
2961                 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
2962                 info->port_mtu_attr.store     = set_port_ib_mtu;
2963         }
2964         info->port_mtu_attr.show      = show_port_ib_mtu;
2965         sysfs_attr_init(&info->port_mtu_attr.attr);
2966
2967         err = device_create_file(&dev->persist->pdev->dev,
2968                                  &info->port_mtu_attr);
2969         if (err) {
2970                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
2971                 device_remove_file(&info->dev->persist->pdev->dev,
2972                                    &info->port_attr);
2973                 info->port = -1;
2974         }
2975
2976         return err;
2977 }
2978
2979 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
2980 {
2981         if (info->port < 0)
2982                 return;
2983
2984         device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
2985         device_remove_file(&info->dev->persist->pdev->dev,
2986                            &info->port_mtu_attr);
2987 #ifdef CONFIG_RFS_ACCEL
2988         free_irq_cpu_rmap(info->rmap);
2989         info->rmap = NULL;
2990 #endif
2991 }
2992
2993 static int mlx4_init_steering(struct mlx4_dev *dev)
2994 {
2995         struct mlx4_priv *priv = mlx4_priv(dev);
2996         int num_entries = dev->caps.num_ports;
2997         int i, j;
2998
2999         priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
3000         if (!priv->steer)
3001                 return -ENOMEM;
3002
3003         for (i = 0; i < num_entries; i++)
3004                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3005                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
3006                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
3007                 }
3008         return 0;
3009 }
3010
3011 static void mlx4_clear_steering(struct mlx4_dev *dev)
3012 {
3013         struct mlx4_priv *priv = mlx4_priv(dev);
3014         struct mlx4_steer_index *entry, *tmp_entry;
3015         struct mlx4_promisc_qp *pqp, *tmp_pqp;
3016         int num_entries = dev->caps.num_ports;
3017         int i, j;
3018
3019         for (i = 0; i < num_entries; i++) {
3020                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3021                         list_for_each_entry_safe(pqp, tmp_pqp,
3022                                                  &priv->steer[i].promisc_qps[j],
3023                                                  list) {
3024                                 list_del(&pqp->list);
3025                                 kfree(pqp);
3026                         }
3027                         list_for_each_entry_safe(entry, tmp_entry,
3028                                                  &priv->steer[i].steer_entries[j],
3029                                                  list) {
3030                                 list_del(&entry->list);
3031                                 list_for_each_entry_safe(pqp, tmp_pqp,
3032                                                          &entry->duplicates,
3033                                                          list) {
3034                                         list_del(&pqp->list);
3035                                         kfree(pqp);
3036                                 }
3037                                 kfree(entry);
3038                         }
3039                 }
3040         }
3041         kfree(priv->steer);
3042 }
3043
3044 static int extended_func_num(struct pci_dev *pdev)
3045 {
3046         return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
3047 }
3048
3049 #define MLX4_OWNER_BASE 0x8069c
3050 #define MLX4_OWNER_SIZE 4
3051
3052 static int mlx4_get_ownership(struct mlx4_dev *dev)
3053 {
3054         void __iomem *owner;
3055         u32 ret;
3056
3057         if (pci_channel_offline(dev->persist->pdev))
3058                 return -EIO;
3059
3060         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3061                         MLX4_OWNER_BASE,
3062                         MLX4_OWNER_SIZE);
3063         if (!owner) {
3064                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3065                 return -ENOMEM;
3066         }
3067
3068         ret = readl(owner);
3069         iounmap(owner);
3070         return (int) !!ret;
3071 }
3072
3073 static void mlx4_free_ownership(struct mlx4_dev *dev)
3074 {
3075         void __iomem *owner;
3076
3077         if (pci_channel_offline(dev->persist->pdev))
3078                 return;
3079
3080         owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
3081                         MLX4_OWNER_BASE,
3082                         MLX4_OWNER_SIZE);
3083         if (!owner) {
3084                 mlx4_err(dev, "Failed to obtain ownership bit\n");
3085                 return;
3086         }
3087         writel(0, owner);
3088         msleep(1000);
3089         iounmap(owner);
3090 }
3091
3092 #define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
3093                                   !!((flags) & MLX4_FLAG_MASTER))
3094
3095 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
3096                              u8 total_vfs, int existing_vfs, int reset_flow)
3097 {
3098         u64 dev_flags = dev->flags;
3099         int err = 0;
3100         int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
3101                                         MLX4_MAX_NUM_VF);
3102
3103         if (reset_flow) {
3104                 dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
3105                                        GFP_KERNEL);
3106                 if (!dev->dev_vfs)
3107                         goto free_mem;
3108                 return dev_flags;
3109         }
3110
3111         atomic_inc(&pf_loading);
3112         if (dev->flags &  MLX4_FLAG_SRIOV) {
3113                 if (existing_vfs != total_vfs) {
3114                         mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
3115                                  existing_vfs, total_vfs);
3116                         total_vfs = existing_vfs;
3117                 }
3118         }
3119
3120         dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
3121         if (NULL == dev->dev_vfs) {
3122                 mlx4_err(dev, "Failed to allocate memory for VFs\n");
3123                 goto disable_sriov;
3124         }
3125
3126         if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
3127                 if (total_vfs > fw_enabled_sriov_vfs) {
3128                         mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
3129                                  total_vfs, fw_enabled_sriov_vfs);
3130                         err = -ENOMEM;
3131                         goto disable_sriov;
3132                 }
3133                 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
3134                 err = pci_enable_sriov(pdev, total_vfs);
3135         }
3136         if (err) {
3137                 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
3138                          err);
3139                 goto disable_sriov;
3140         } else {
3141                 mlx4_warn(dev, "Running in master mode\n");
3142                 dev_flags |= MLX4_FLAG_SRIOV |
3143                         MLX4_FLAG_MASTER;
3144                 dev_flags &= ~MLX4_FLAG_SLAVE;
3145                 dev->persist->num_vfs = total_vfs;
3146         }
3147         return dev_flags;
3148
3149 disable_sriov:
3150         atomic_dec(&pf_loading);
3151 free_mem:
3152         dev->persist->num_vfs = 0;
3153         kfree(dev->dev_vfs);
3154         dev->dev_vfs = NULL;
3155         return dev_flags & ~MLX4_FLAG_MASTER;
3156 }
3157
3158 enum {
3159         MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
3160 };
3161
3162 static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
3163                               int *nvfs)
3164 {
3165         int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
3166         /* Checking for 64 VFs as a limitation of CX2 */
3167         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
3168             requested_vfs >= 64) {
3169                 mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
3170                          requested_vfs);
3171                 return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
3172         }
3173         return 0;
3174 }
3175
3176 static int mlx4_pci_enable_device(struct mlx4_dev *dev)
3177 {
3178         struct pci_dev *pdev = dev->persist->pdev;
3179         int err = 0;
3180
3181         mutex_lock(&dev->persist->pci_status_mutex);
3182         if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) {
3183                 err = pci_enable_device(pdev);
3184                 if (!err)
3185                         dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED;
3186         }
3187         mutex_unlock(&dev->persist->pci_status_mutex);
3188
3189         return err;
3190 }
3191
3192 static void mlx4_pci_disable_device(struct mlx4_dev *dev)
3193 {
3194         struct pci_dev *pdev = dev->persist->pdev;
3195
3196         mutex_lock(&dev->persist->pci_status_mutex);
3197         if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) {
3198                 pci_disable_device(pdev);
3199                 dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED;
3200         }
3201         mutex_unlock(&dev->persist->pci_status_mutex);
3202 }
3203
3204 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
3205                          int total_vfs, int *nvfs, struct mlx4_priv *priv,
3206                          int reset_flow)
3207 {
3208         struct mlx4_dev *dev;
3209         unsigned sum = 0;
3210         int err;
3211         int port;
3212         int i;
3213         struct mlx4_dev_cap *dev_cap = NULL;
3214         int existing_vfs = 0;
3215
3216         dev = &priv->dev;
3217
3218         INIT_LIST_HEAD(&priv->ctx_list);
3219         spin_lock_init(&priv->ctx_lock);
3220
3221         mutex_init(&priv->port_mutex);
3222         mutex_init(&priv->bond_mutex);
3223
3224         INIT_LIST_HEAD(&priv->pgdir_list);
3225         mutex_init(&priv->pgdir_mutex);
3226         spin_lock_init(&priv->cmd.context_lock);
3227
3228         INIT_LIST_HEAD(&priv->bf_list);
3229         mutex_init(&priv->bf_mutex);
3230
3231         dev->rev_id = pdev->revision;
3232         dev->numa_node = dev_to_node(&pdev->dev);
3233
3234         /* Detect if this device is a virtual function */
3235         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3236                 mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3237                 dev->flags |= MLX4_FLAG_SLAVE;
3238         } else {
3239                 /* We reset the device and enable SRIOV only for physical
3240                  * devices.  Try to claim ownership on the device;
3241                  * if already taken, skip -- do not allow multiple PFs */
3242                 err = mlx4_get_ownership(dev);
3243                 if (err) {
3244                         if (err < 0)
3245                                 return err;
3246                         else {
3247                                 mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
3248                                 return -EINVAL;
3249                         }
3250                 }
3251
3252                 atomic_set(&priv->opreq_count, 0);
3253                 INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3254
3255                 /*
3256                  * Now reset the HCA before we touch the PCI capabilities or
3257                  * attempt a firmware command, since a boot ROM may have left
3258                  * the HCA in an undefined state.
3259                  */
3260                 err = mlx4_reset(dev);
3261                 if (err) {
3262                         mlx4_err(dev, "Failed to reset HCA, aborting\n");
3263                         goto err_sriov;
3264                 }
3265
3266                 if (total_vfs) {
3267                         dev->flags = MLX4_FLAG_MASTER;
3268                         existing_vfs = pci_num_vf(pdev);
3269                         if (existing_vfs)
3270                                 dev->flags |= MLX4_FLAG_SRIOV;
3271                         dev->persist->num_vfs = total_vfs;
3272                 }
3273         }
3274
3275         /* on load remove any previous indication of internal error,
3276          * device is up.
3277          */
3278         dev->persist->state = MLX4_DEVICE_STATE_UP;
3279
3280 slave_start:
3281         err = mlx4_cmd_init(dev);
3282         if (err) {
3283                 mlx4_err(dev, "Failed to init command interface, aborting\n");
3284                 goto err_sriov;
3285         }
3286
3287         /* In slave functions, the communication channel must be initialized
3288          * before posting commands. Also, init num_slaves before calling
3289          * mlx4_init_hca */
3290         if (mlx4_is_mfunc(dev)) {
3291                 if (mlx4_is_master(dev)) {
3292                         dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3293
3294                 } else {
3295                         dev->num_slaves = 0;
3296                         err = mlx4_multi_func_init(dev);
3297                         if (err) {
3298                                 mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
3299                                 goto err_cmd;
3300                         }
3301                 }
3302         }
3303
3304         err = mlx4_init_fw(dev);
3305         if (err) {
3306                 mlx4_err(dev, "Failed to init fw, aborting.\n");
3307                 goto err_mfunc;
3308         }
3309
3310         if (mlx4_is_master(dev)) {
3311                 /* when we hit the goto slave_start below, dev_cap already initialized */
3312                 if (!dev_cap) {
3313                         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
3314
3315                         if (!dev_cap) {
3316                                 err = -ENOMEM;
3317                                 goto err_fw;
3318                         }
3319
3320                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3321                         if (err) {
3322                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3323                                 goto err_fw;
3324                         }
3325
3326                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3327                                 goto err_fw;
3328
3329                         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3330                                 u64 dev_flags = mlx4_enable_sriov(dev, pdev,
3331                                                                   total_vfs,
3332                                                                   existing_vfs,
3333                                                                   reset_flow);
3334
3335                                 mlx4_close_fw(dev);
3336                                 mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3337                                 dev->flags = dev_flags;
3338                                 if (!SRIOV_VALID_STATE(dev->flags)) {
3339                                         mlx4_err(dev, "Invalid SRIOV state\n");
3340                                         goto err_sriov;
3341                                 }
3342                                 err = mlx4_reset(dev);
3343                                 if (err) {
3344                                         mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3345                                         goto err_sriov;
3346                                 }
3347                                 goto slave_start;
3348                         }
3349                 } else {
3350                         /* Legacy mode FW requires SRIOV to be enabled before
3351                          * doing QUERY_DEV_CAP, since max_eq's value is different if
3352                          * SRIOV is enabled.
3353                          */
3354                         memset(dev_cap, 0, sizeof(*dev_cap));
3355                         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3356                         if (err) {
3357                                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3358                                 goto err_fw;
3359                         }
3360
3361                         if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3362                                 goto err_fw;
3363                 }
3364         }
3365
3366         err = mlx4_init_hca(dev);
3367         if (err) {
3368                 if (err == -EACCES) {
3369                         /* Not primary Physical function
3370                          * Running in slave mode */
3371                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3372                         /* We're not a PF */
3373                         if (dev->flags & MLX4_FLAG_SRIOV) {
3374                                 if (!existing_vfs)
3375                                         pci_disable_sriov(pdev);
3376                                 if (mlx4_is_master(dev) && !reset_flow)
3377                                         atomic_dec(&pf_loading);
3378                                 dev->flags &= ~MLX4_FLAG_SRIOV;
3379                         }
3380                         if (!mlx4_is_slave(dev))
3381                                 mlx4_free_ownership(dev);
3382                         dev->flags |= MLX4_FLAG_SLAVE;
3383                         dev->flags &= ~MLX4_FLAG_MASTER;
3384                         goto slave_start;
3385                 } else
3386                         goto err_fw;
3387         }
3388
3389         if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3390                 u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
3391                                                   existing_vfs, reset_flow);
3392
3393                 if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
3394                         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
3395                         dev->flags = dev_flags;
3396                         err = mlx4_cmd_init(dev);
3397                         if (err) {
3398                                 /* Only VHCR is cleaned up, so could still
3399                                  * send FW commands
3400                                  */
3401                                 mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
3402                                 goto err_close;
3403                         }
3404                 } else {
3405                         dev->flags = dev_flags;
3406                 }
3407
3408                 if (!SRIOV_VALID_STATE(dev->flags)) {
3409                         mlx4_err(dev, "Invalid SRIOV state\n");
3410                         goto err_close;
3411                 }
3412         }
3413
3414         /* check if the device is functioning at its maximum possible speed.
3415          * No return code for this call, just warn the user in case of PCI
3416          * express device capabilities are under-satisfied by the bus.
3417          */
3418         if (!mlx4_is_slave(dev))
3419                 mlx4_check_pcie_caps(dev);
3420
3421         /* In master functions, the communication channel must be initialized
3422          * after obtaining its address from fw */
3423         if (mlx4_is_master(dev)) {
3424                 if (dev->caps.num_ports < 2 &&
3425                     num_vfs_argc > 1) {
3426                         err = -EINVAL;
3427                         mlx4_err(dev,
3428                                  "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
3429                                  dev->caps.num_ports);
3430                         goto err_close;
3431                 }
3432                 memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
3433
3434                 for (i = 0;
3435                      i < sizeof(dev->persist->nvfs)/
3436                      sizeof(dev->persist->nvfs[0]); i++) {
3437                         unsigned j;
3438
3439                         for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
3440                                 dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
3441                                 dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
3442                                         dev->caps.num_ports;
3443                         }
3444                 }
3445
3446                 /* In master functions, the communication channel
3447                  * must be initialized after obtaining its address from fw
3448                  */
3449                 err = mlx4_multi_func_init(dev);
3450                 if (err) {
3451                         mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
3452                         goto err_close;
3453                 }
3454         }
3455
3456         err = mlx4_alloc_eq_table(dev);
3457         if (err)
3458                 goto err_master_mfunc;
3459
3460         bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
3461         mutex_init(&priv->msix_ctl.pool_lock);
3462
3463         mlx4_enable_msi_x(dev);
3464         if ((mlx4_is_mfunc(dev)) &&
3465             !(dev->flags & MLX4_FLAG_MSI_X)) {
3466                 err = -ENOSYS;
3467                 mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
3468                 goto err_free_eq;
3469         }
3470
3471         if (!mlx4_is_slave(dev)) {
3472                 err = mlx4_init_steering(dev);
3473                 if (err)
3474                         goto err_disable_msix;
3475         }
3476
3477         err = mlx4_setup_hca(dev);
3478         if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3479             !mlx4_is_mfunc(dev)) {
3480                 dev->flags &= ~MLX4_FLAG_MSI_X;
3481                 dev->caps.num_comp_vectors = 1;
3482                 pci_disable_msix(pdev);
3483                 err = mlx4_setup_hca(dev);
3484         }
3485
3486         if (err)
3487                 goto err_steer;
3488
3489         mlx4_init_quotas(dev);
3490         /* When PF resources are ready arm its comm channel to enable
3491          * getting commands
3492          */
3493         if (mlx4_is_master(dev)) {
3494                 err = mlx4_ARM_COMM_CHANNEL(dev);
3495                 if (err) {
3496                         mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
3497                                  err);
3498                         goto err_steer;
3499                 }
3500         }
3501
3502         for (port = 1; port <= dev->caps.num_ports; port++) {
3503                 err = mlx4_init_port_info(dev, port);
3504                 if (err)
3505                         goto err_port;
3506         }
3507
3508         priv->v2p.port1 = 1;
3509         priv->v2p.port2 = 2;
3510
3511         err = mlx4_register_device(dev);
3512         if (err)
3513                 goto err_port;
3514
3515         mlx4_request_modules(dev);
3516
3517         mlx4_sense_init(dev);
3518         mlx4_start_sense(dev);
3519
3520         priv->removed = 0;
3521
3522         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3523                 atomic_dec(&pf_loading);
3524
3525         kfree(dev_cap);
3526         return 0;
3527
3528 err_port:
3529         for (--port; port >= 1; --port)
3530                 mlx4_cleanup_port_info(&priv->port[port]);
3531
3532         mlx4_cleanup_default_counters(dev);
3533         if (!mlx4_is_slave(dev))
3534                 mlx4_cleanup_counters_table(dev);
3535         mlx4_cleanup_qp_table(dev);
3536         mlx4_cleanup_srq_table(dev);
3537         mlx4_cleanup_cq_table(dev);
3538         mlx4_cmd_use_polling(dev);
3539         mlx4_cleanup_eq_table(dev);
3540         mlx4_cleanup_mcg_table(dev);
3541         mlx4_cleanup_mr_table(dev);
3542         mlx4_cleanup_xrcd_table(dev);
3543         mlx4_cleanup_pd_table(dev);
3544         mlx4_cleanup_uar_table(dev);
3545
3546 err_steer:
3547         if (!mlx4_is_slave(dev))
3548                 mlx4_clear_steering(dev);
3549
3550 err_disable_msix:
3551         if (dev->flags & MLX4_FLAG_MSI_X)
3552                 pci_disable_msix(pdev);
3553
3554 err_free_eq:
3555         mlx4_free_eq_table(dev);
3556
3557 err_master_mfunc:
3558         if (mlx4_is_master(dev)) {
3559                 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3560                 mlx4_multi_func_cleanup(dev);
3561         }
3562
3563         if (mlx4_is_slave(dev)) {
3564                 kfree(dev->caps.qp0_qkey);
3565                 kfree(dev->caps.qp0_tunnel);
3566                 kfree(dev->caps.qp0_proxy);
3567                 kfree(dev->caps.qp1_tunnel);
3568                 kfree(dev->caps.qp1_proxy);
3569         }
3570
3571 err_close:
3572         mlx4_close_hca(dev);
3573
3574 err_fw:
3575         mlx4_close_fw(dev);
3576
3577 err_mfunc:
3578         if (mlx4_is_slave(dev))
3579                 mlx4_multi_func_cleanup(dev);
3580
3581 err_cmd:
3582         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3583
3584 err_sriov:
3585         if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3586                 pci_disable_sriov(pdev);
3587                 dev->flags &= ~MLX4_FLAG_SRIOV;
3588         }
3589
3590         if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3591                 atomic_dec(&pf_loading);
3592
3593         kfree(priv->dev.dev_vfs);
3594
3595         if (!mlx4_is_slave(dev))
3596                 mlx4_free_ownership(dev);
3597
3598         kfree(dev_cap);
3599         return err;
3600 }
3601
3602 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
3603                            struct mlx4_priv *priv)
3604 {
3605         int err;
3606         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3607         int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3608         const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
3609                 {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
3610         unsigned total_vfs = 0;
3611         unsigned int i;
3612
3613         pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3614
3615         err = mlx4_pci_enable_device(&priv->dev);
3616         if (err) {
3617                 dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
3618                 return err;
3619         }
3620
3621         /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
3622          * per port, we must limit the number of VFs to 63 (since their are
3623          * 128 MACs)
3624          */
3625         for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
3626              total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
3627                 nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
3628                 if (nvfs[i] < 0) {
3629                         dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3630                         err = -EINVAL;
3631                         goto err_disable_pdev;
3632                 }
3633         }
3634         for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
3635              i++) {
3636                 prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
3637                 if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
3638                         dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
3639                         err = -EINVAL;
3640                         goto err_disable_pdev;
3641                 }
3642         }
3643         if (total_vfs > MLX4_MAX_NUM_VF) {
3644                 dev_err(&pdev->dev,
3645                         "Requested more VF's (%d) than allowed by hw (%d)\n",
3646                         total_vfs, MLX4_MAX_NUM_VF);
3647                 err = -EINVAL;
3648                 goto err_disable_pdev;
3649         }
3650
3651         for (i = 0; i < MLX4_MAX_PORTS; i++) {
3652                 if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
3653                         dev_err(&pdev->dev,
3654                                 "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
3655                                 nvfs[i] + nvfs[2], i + 1,
3656                                 MLX4_MAX_NUM_VF_P_PORT);
3657                         err = -EINVAL;
3658                         goto err_disable_pdev;
3659                 }
3660         }
3661
3662         /* Check for BARs. */
3663         if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3664             !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3665                 dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3666                         pci_dev_data, pci_resource_flags(pdev, 0));
3667                 err = -ENODEV;
3668                 goto err_disable_pdev;
3669         }
3670         if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3671                 dev_err(&pdev->dev, "Missing UAR, aborting\n");
3672                 err = -ENODEV;
3673                 goto err_disable_pdev;
3674         }
3675
3676         err = pci_request_regions(pdev, DRV_NAME);
3677         if (err) {
3678                 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3679                 goto err_disable_pdev;
3680         }
3681
3682         pci_set_master(pdev);
3683
3684         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3685         if (err) {
3686                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3687                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3688                 if (err) {
3689                         dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3690                         goto err_release_regions;
3691                 }
3692         }
3693         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3694         if (err) {
3695                 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3696                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3697                 if (err) {
3698                         dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3699                         goto err_release_regions;
3700                 }
3701         }
3702
3703         /* Allow large DMA segments, up to the firmware limit of 1 GB */
3704         dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3705         /* Detect if this device is a virtual function */
3706         if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3707                 /* When acting as pf, we normally skip vfs unless explicitly
3708                  * requested to probe them.
3709                  */
3710                 if (total_vfs) {
3711                         unsigned vfs_offset = 0;
3712
3713                         for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
3714                              vfs_offset + nvfs[i] < extended_func_num(pdev);
3715                              vfs_offset += nvfs[i], i++)
3716                                 ;
3717                         if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
3718                                 err = -ENODEV;
3719                                 goto err_release_regions;
3720                         }
3721                         if ((extended_func_num(pdev) - vfs_offset)
3722                             > prb_vf[i]) {
3723                                 dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3724                                          extended_func_num(pdev));
3725                                 err = -ENODEV;
3726                                 goto err_release_regions;
3727                         }
3728                 }
3729         }
3730
3731         err = mlx4_catas_init(&priv->dev);
3732         if (err)
3733                 goto err_release_regions;
3734
3735         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3736         if (err)
3737                 goto err_catas;
3738
3739         return 0;
3740
3741 err_catas:
3742         mlx4_catas_end(&priv->dev);
3743
3744 err_release_regions:
3745         pci_release_regions(pdev);
3746
3747 err_disable_pdev:
3748         mlx4_pci_disable_device(&priv->dev);
3749         pci_set_drvdata(pdev, NULL);
3750         return err;
3751 }
3752
3753 static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
3754                                       enum devlink_port_type port_type)
3755 {
3756         struct mlx4_port_info *info = container_of(devlink_port,
3757                                                    struct mlx4_port_info,
3758                                                    devlink_port);
3759         enum mlx4_port_type mlx4_port_type;
3760
3761         switch (port_type) {
3762         case DEVLINK_PORT_TYPE_AUTO:
3763                 mlx4_port_type = MLX4_PORT_TYPE_AUTO;
3764                 break;
3765         case DEVLINK_PORT_TYPE_ETH:
3766                 mlx4_port_type = MLX4_PORT_TYPE_ETH;
3767                 break;
3768         case DEVLINK_PORT_TYPE_IB:
3769                 mlx4_port_type = MLX4_PORT_TYPE_IB;
3770                 break;
3771         default:
3772                 return -EOPNOTSUPP;
3773         }
3774
3775         return __set_port_type(info, mlx4_port_type);
3776 }
3777
3778 static const struct devlink_ops mlx4_devlink_ops = {
3779         .port_type_set  = mlx4_devlink_port_type_set,
3780 };
3781
3782 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3783 {
3784         struct devlink *devlink;
3785         struct mlx4_priv *priv;
3786         struct mlx4_dev *dev;
3787         int ret;
3788
3789         printk_once(KERN_INFO "%s", mlx4_version);
3790
3791         devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
3792         if (!devlink)
3793                 return -ENOMEM;
3794         priv = devlink_priv(devlink);
3795
3796         dev       = &priv->dev;
3797         dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3798         if (!dev->persist) {
3799                 ret = -ENOMEM;
3800                 goto err_devlink_free;
3801         }
3802         dev->persist->pdev = pdev;
3803         dev->persist->dev = dev;
3804         pci_set_drvdata(pdev, dev->persist);
3805         priv->pci_dev_data = id->driver_data;
3806         mutex_init(&dev->persist->device_state_mutex);
3807         mutex_init(&dev->persist->interface_state_mutex);
3808         mutex_init(&dev->persist->pci_status_mutex);
3809
3810         ret = devlink_register(devlink, &pdev->dev);
3811         if (ret)
3812                 goto err_persist_free;
3813
3814         ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3815         if (ret)
3816                 goto err_devlink_unregister;
3817
3818         pci_save_state(pdev);
3819         return 0;
3820
3821 err_devlink_unregister:
3822         devlink_unregister(devlink);
3823 err_persist_free:
3824         kfree(dev->persist);
3825 err_devlink_free:
3826         devlink_free(devlink);
3827         return ret;
3828 }
3829
3830 static void mlx4_clean_dev(struct mlx4_dev *dev)
3831 {
3832         struct mlx4_dev_persistent *persist = dev->persist;
3833         struct mlx4_priv *priv = mlx4_priv(dev);
3834         unsigned long   flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
3835
3836         memset(priv, 0, sizeof(*priv));
3837         priv->dev.persist = persist;
3838         priv->dev.flags = flags;
3839 }
3840
3841 static void mlx4_unload_one(struct pci_dev *pdev)
3842 {
3843         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3844         struct mlx4_dev  *dev  = persist->dev;
3845         struct mlx4_priv *priv = mlx4_priv(dev);
3846         int               pci_dev_data;
3847         int p, i;
3848
3849         if (priv->removed)
3850                 return;
3851
3852         /* saving current ports type for further use */
3853         for (i = 0; i < dev->caps.num_ports; i++) {
3854                 dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
3855                 dev->persist->curr_port_poss_type[i] = dev->caps.
3856                                                        possible_type[i + 1];
3857         }
3858
3859         pci_dev_data = priv->pci_dev_data;
3860
3861         mlx4_stop_sense(dev);
3862         mlx4_unregister_device(dev);
3863
3864         for (p = 1; p <= dev->caps.num_ports; p++) {
3865                 mlx4_cleanup_port_info(&priv->port[p]);
3866                 mlx4_CLOSE_PORT(dev, p);
3867         }
3868
3869         if (mlx4_is_master(dev))
3870                 mlx4_free_resource_tracker(dev,
3871                                            RES_TR_FREE_SLAVES_ONLY);
3872
3873         mlx4_cleanup_default_counters(dev);
3874         if (!mlx4_is_slave(dev))
3875                 mlx4_cleanup_counters_table(dev);
3876         mlx4_cleanup_qp_table(dev);
3877         mlx4_cleanup_srq_table(dev);
3878         mlx4_cleanup_cq_table(dev);
3879         mlx4_cmd_use_polling(dev);
3880         mlx4_cleanup_eq_table(dev);
3881         mlx4_cleanup_mcg_table(dev);
3882         mlx4_cleanup_mr_table(dev);
3883         mlx4_cleanup_xrcd_table(dev);
3884         mlx4_cleanup_pd_table(dev);
3885
3886         if (mlx4_is_master(dev))
3887                 mlx4_free_resource_tracker(dev,
3888                                            RES_TR_FREE_STRUCTS_ONLY);
3889
3890         iounmap(priv->kar);
3891         mlx4_uar_free(dev, &priv->driver_uar);
3892         mlx4_cleanup_uar_table(dev);
3893         if (!mlx4_is_slave(dev))
3894                 mlx4_clear_steering(dev);
3895         mlx4_free_eq_table(dev);
3896         if (mlx4_is_master(dev))
3897                 mlx4_multi_func_cleanup(dev);
3898         mlx4_close_hca(dev);
3899         mlx4_close_fw(dev);
3900         if (mlx4_is_slave(dev))
3901                 mlx4_multi_func_cleanup(dev);
3902         mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3903
3904         if (dev->flags & MLX4_FLAG_MSI_X)
3905                 pci_disable_msix(pdev);
3906
3907         if (!mlx4_is_slave(dev))
3908                 mlx4_free_ownership(dev);
3909
3910         kfree(dev->caps.qp0_qkey);
3911         kfree(dev->caps.qp0_tunnel);
3912         kfree(dev->caps.qp0_proxy);
3913         kfree(dev->caps.qp1_tunnel);
3914         kfree(dev->caps.qp1_proxy);
3915         kfree(dev->dev_vfs);
3916
3917         mlx4_clean_dev(dev);
3918         priv->pci_dev_data = pci_dev_data;
3919         priv->removed = 1;
3920 }
3921
3922 static void mlx4_remove_one(struct pci_dev *pdev)
3923 {
3924         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3925         struct mlx4_dev  *dev  = persist->dev;
3926         struct mlx4_priv *priv = mlx4_priv(dev);
3927         struct devlink *devlink = priv_to_devlink(priv);
3928         int active_vfs = 0;
3929
3930         mutex_lock(&persist->interface_state_mutex);
3931         persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
3932         mutex_unlock(&persist->interface_state_mutex);
3933
3934         /* Disabling SR-IOV is not allowed while there are active vf's */
3935         if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
3936                 active_vfs = mlx4_how_many_lives_vf(dev);
3937                 if (active_vfs) {
3938                         pr_warn("Removing PF when there are active VF's !!\n");
3939                         pr_warn("Will not disable SR-IOV.\n");
3940                 }
3941         }
3942
3943         /* device marked to be under deletion running now without the lock
3944          * letting other tasks to be terminated
3945          */
3946         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3947                 mlx4_unload_one(pdev);
3948         else
3949                 mlx4_info(dev, "%s: interface is down\n", __func__);
3950         mlx4_catas_end(dev);
3951         if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
3952                 mlx4_warn(dev, "Disabling SR-IOV\n");
3953                 pci_disable_sriov(pdev);
3954         }
3955
3956         pci_release_regions(pdev);
3957         mlx4_pci_disable_device(dev);
3958         devlink_unregister(devlink);
3959         kfree(dev->persist);
3960         devlink_free(devlink);
3961         pci_set_drvdata(pdev, NULL);
3962 }
3963
3964 static int restore_current_port_types(struct mlx4_dev *dev,
3965                                       enum mlx4_port_type *types,
3966                                       enum mlx4_port_type *poss_types)
3967 {
3968         struct mlx4_priv *priv = mlx4_priv(dev);
3969         int err, i;
3970
3971         mlx4_stop_sense(dev);
3972
3973         mutex_lock(&priv->port_mutex);
3974         for (i = 0; i < dev->caps.num_ports; i++)
3975                 dev->caps.possible_type[i + 1] = poss_types[i];
3976         err = mlx4_change_port_types(dev, types);
3977         mlx4_start_sense(dev);
3978         mutex_unlock(&priv->port_mutex);
3979
3980         return err;
3981 }
3982
3983 int mlx4_restart_one(struct pci_dev *pdev)
3984 {
3985         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3986         struct mlx4_dev  *dev  = persist->dev;
3987         struct mlx4_priv *priv = mlx4_priv(dev);
3988         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3989         int pci_dev_data, err, total_vfs;
3990
3991         pci_dev_data = priv->pci_dev_data;
3992         total_vfs = dev->persist->num_vfs;
3993         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3994
3995         mlx4_unload_one(pdev);
3996         err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
3997         if (err) {
3998                 mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
3999                          __func__, pci_name(pdev), err);
4000                 return err;
4001         }
4002
4003         err = restore_current_port_types(dev, dev->persist->curr_port_type,
4004                                          dev->persist->curr_port_poss_type);
4005         if (err)
4006                 mlx4_err(dev, "could not restore original port types (%d)\n",
4007                          err);
4008
4009         return err;
4010 }
4011
4012 static const struct pci_device_id mlx4_pci_table[] = {
4013         /* MT25408 "Hermon" SDR */
4014         { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4015         /* MT25408 "Hermon" DDR */
4016         { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4017         /* MT25408 "Hermon" QDR */
4018         { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4019         /* MT25408 "Hermon" DDR PCIe gen2 */
4020         { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4021         /* MT25408 "Hermon" QDR PCIe gen2 */
4022         { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4023         /* MT25408 "Hermon" EN 10GigE */
4024         { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4025         /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
4026         { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4027         /* MT25458 ConnectX EN 10GBASE-T 10GigE */
4028         { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4029         /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
4030         { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4031         /* MT26468 ConnectX EN 10GigE PCIe gen2*/
4032         { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4033         /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
4034         { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4035         /* MT26478 ConnectX2 40GigE PCIe gen2 */
4036         { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
4037         /* MT25400 Family [ConnectX-2 Virtual Function] */
4038         { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
4039         /* MT27500 Family [ConnectX-3] */
4040         { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
4041         /* MT27500 Family [ConnectX-3 Virtual Function] */
4042         { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
4043         { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
4044         { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
4045         { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
4046         { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
4047         { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
4048         { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
4049         { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
4050         { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
4051         { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
4052         { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
4053         { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
4054         { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
4055         { 0, }
4056 };
4057
4058 MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
4059
4060 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
4061                                               pci_channel_state_t state)
4062 {
4063         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4064
4065         mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
4066         mlx4_enter_error_state(persist);
4067
4068         mutex_lock(&persist->interface_state_mutex);
4069         if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
4070                 mlx4_unload_one(pdev);
4071
4072         mutex_unlock(&persist->interface_state_mutex);
4073         if (state == pci_channel_io_perm_failure)
4074                 return PCI_ERS_RESULT_DISCONNECT;
4075
4076         mlx4_pci_disable_device(persist->dev);
4077         return PCI_ERS_RESULT_NEED_RESET;
4078 }
4079
4080 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
4081 {
4082         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4083         struct mlx4_dev  *dev  = persist->dev;
4084         int err;
4085
4086         mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
4087         err = mlx4_pci_enable_device(dev);
4088         if (err) {
4089                 mlx4_err(dev, "Can not re-enable device, err=%d\n", err);
4090                 return PCI_ERS_RESULT_DISCONNECT;
4091         }
4092
4093         pci_set_master(pdev);
4094         pci_restore_state(pdev);
4095         pci_save_state(pdev);
4096         return PCI_ERS_RESULT_RECOVERED;
4097 }
4098
4099 static void mlx4_pci_resume(struct pci_dev *pdev)
4100 {
4101         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4102         struct mlx4_dev  *dev  = persist->dev;
4103         struct mlx4_priv *priv = mlx4_priv(dev);
4104         int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
4105         int total_vfs;
4106         int err;
4107
4108         mlx4_err(dev, "%s was called\n", __func__);
4109         total_vfs = dev->persist->num_vfs;
4110         memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
4111
4112         mutex_lock(&persist->interface_state_mutex);
4113         if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
4114                 err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
4115                                     priv, 1);
4116                 if (err) {
4117                         mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n",
4118                                  __func__,  err);
4119                         goto end;
4120                 }
4121
4122                 err = restore_current_port_types(dev, dev->persist->
4123                                                  curr_port_type, dev->persist->
4124                                                  curr_port_poss_type);
4125                 if (err)
4126                         mlx4_err(dev, "could not restore original port types (%d)\n", err);
4127         }
4128 end:
4129         mutex_unlock(&persist->interface_state_mutex);
4130
4131 }
4132
4133 static void mlx4_shutdown(struct pci_dev *pdev)
4134 {
4135         struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
4136
4137         mlx4_info(persist->dev, "mlx4_shutdown was called\n");
4138         mutex_lock(&persist->interface_state_mutex);
4139         if (persist->interface_state & MLX4_INTERFACE_STATE_UP) {
4140                 /* Notify mlx4 clients that the kernel is being shut down */
4141                 persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN;
4142                 mlx4_unload_one(pdev);
4143         }
4144         mutex_unlock(&persist->interface_state_mutex);
4145 }
4146
4147 static const struct pci_error_handlers mlx4_err_handler = {
4148         .error_detected = mlx4_pci_err_detected,
4149         .slot_reset     = mlx4_pci_slot_reset,
4150         .resume         = mlx4_pci_resume,
4151 };
4152
4153 static struct pci_driver mlx4_driver = {
4154         .name           = DRV_NAME,
4155         .id_table       = mlx4_pci_table,
4156         .probe          = mlx4_init_one,
4157         .shutdown       = mlx4_shutdown,
4158         .remove         = mlx4_remove_one,
4159         .err_handler    = &mlx4_err_handler,
4160 };
4161
4162 static int __init mlx4_verify_params(void)
4163 {
4164         if ((log_num_mac < 0) || (log_num_mac > 7)) {
4165                 pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
4166                 return -1;
4167         }
4168
4169         if (log_num_vlan != 0)
4170                 pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
4171                         MLX4_LOG_NUM_VLANS);
4172
4173         if (use_prio != 0)
4174                 pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
4175
4176         if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
4177                 pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
4178                         log_mtts_per_seg);
4179                 return -1;
4180         }
4181
4182         /* Check if module param for ports type has legal combination */
4183         if (port_type_array[0] == false && port_type_array[1] == true) {
4184                 pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
4185                 port_type_array[0] = true;
4186         }
4187
4188         if (mlx4_log_num_mgm_entry_size < -7 ||
4189             (mlx4_log_num_mgm_entry_size > 0 &&
4190              (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
4191               mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
4192                 pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
4193                         mlx4_log_num_mgm_entry_size,
4194                         MLX4_MIN_MGM_LOG_ENTRY_SIZE,
4195                         MLX4_MAX_MGM_LOG_ENTRY_SIZE);
4196                 return -1;
4197         }
4198
4199         return 0;
4200 }
4201
4202 static int __init mlx4_init(void)
4203 {
4204         int ret;
4205
4206         if (mlx4_verify_params())
4207                 return -EINVAL;
4208
4209
4210         mlx4_wq = create_singlethread_workqueue("mlx4");
4211         if (!mlx4_wq)
4212                 return -ENOMEM;
4213
4214         ret = pci_register_driver(&mlx4_driver);
4215         if (ret < 0)
4216                 destroy_workqueue(mlx4_wq);
4217         return ret < 0 ? ret : 0;
4218 }
4219
4220 static void __exit mlx4_cleanup(void)
4221 {
4222         pci_unregister_driver(&mlx4_driver);
4223         destroy_workqueue(mlx4_wq);
4224 }
4225
4226 module_init(mlx4_init);
4227 module_exit(mlx4_cleanup);